zookeeper服务端源码深入分析

  • 从启动脚本寻找入口
  • QuorumPeerMain入口
  • ZooKeeperServerMain
  • initializeAndRun单机模式初始化配置信息并运行
  • runFromConfig根据配置信息运行服务器
  • NIOServerCnxnFactory服务端socket
  • configure
  • startup
  • start()启动线程NIOServerCnxnFactory的Thread
  • zks.startdata()加载数据
  • loadData()
  • ZKDatabase.loadDataBase
  • FileTxnSnapLog.restore将快照加到DataTree
  • zks.startup()设置请求处理器
  • setupRequestProcessors()建立请求处理器链
  • 回到NIOServerCnxnFactory.run()
  • NIOServerCnxn
  • doIO执行读写
  • readPayload
  • readRequest()读取请求
  • ZooKeeperServer
  • processPacket
  • submitRequest交由请求处理器处理数据
  • 请求处理链
  • PreRequestProcessor处理请求放入队列
  • run()
  • pRequest判断请求类型
  • pRequest2Txn根据请求类型执行操作
  • addChangeRecord添加修改记录到队列
  • SyncRequestProcessor持久化(快照)操作
  • processRequest
  • run()
  • FinalRequestProcessor从队列中拿出数据进行内存操作
  • processRequest
  • ProcessTxnResult
  • DataTree.processTxn根据事务来操作数据
  • createNode放入ConcurrentHashMap
  • 返回结果


从启动脚本寻找入口

与客户端启动类似,服务端肯定也会有一个启动类作为入口。
例如zkServer.cmd脚本文件

setlocal
call "%~dp0zkEnv.cmd"

set ZOOMAIN=org.apache.zookeeper.server.quorum.QuorumPeerMain
echo on
java "-Dzookeeper.log.dir=%ZOO_LOG_DIR%" "-Dzookeeper.root.logger=%ZOO_LOG4J_PROP%" -cp "%CLASSPATH%" %ZOOMAIN% "%ZOOCFG%" %*

endlocal

可以看到启动类是QuorumPeerMain,与zkCli.cmd脚本文件相比,就多了一个%ZOOMAIN% 根据call "%~dp0zkEnv.cmd",可知变量定义在zkEnv.cmd中,

set ZOOCFGDIR=%~dp0%..\conf
set ZOO_LOG_DIR=%~dp0%..
set ZOO_LOG4J_PROP=INFO,CONSOLE

REM for sanity sake assume Java 1.6
REM see: http://java.sun.com/javase/6/docs/technotes/tools/windows/java.html

REM add the zoocfg dir to classpath
set CLASSPATH=%ZOOCFGDIR%

REM make it work in the release
SET CLASSPATH=%~dp0..\*;%~dp0..\lib\*;%CLASSPATH%

REM make it work for developers
SET CLASSPATH=%~dp0..\build\classes;%~dp0..\build\lib\*;%CLASSPATH%

set ZOOCFG=%ZOOCFGDIR%\zoo.cfg

zkEnv.cmd配置文件最后一行可以看到ZOOCFG的定义
说明服务端是通过配置文件zoo.cfg来启动的
单机服务器的基础配置

#基础时间单元
tickTime=2000
#Follower同步Leader的限制时间,initLimit * tickTime,随ZK集群数据量适当增加值
initLimit=10
#心跳检测机制,来检测机器的存活状态,syncLimit * tickTime
syncLimit=5
#志输出目录
dataDir=D:/tools/zookeeper-3.4.6/data
#端口
clientPort=2181

QuorumPeerMain入口

public static void main(String[] args) {
    // args就是变量ZOOCFG
    QuorumPeerMain main = new QuorumPeerMain();
    try {
        // 初始化配置信息并运行
        main.initializeAndRun(args);
    } catch (IllegalArgumentException e) {
       .
       .
       
    }
    LOG.info("Exiting normally");
    System.exit(0);
}

初始化配置信息并运行

protected void initializeAndRun(String[] args) throws ConfigException, IOException
    {
    // 集群模式的配置类
    QuorumPeerConfig config = new QuorumPeerConfig();
    if (args.length == 1) {
        // 解析配置文件
        config.parse(args[0]);
    }

    // Start and schedule the the purge task
    DatadirCleanupManager purgeMgr = new DatadirCleanupManager(config
            .getDataDir(), config.getDataLogDir(), config
            .getSnapRetainCount(), config.getPurgeInterval());
    purgeMgr.start();

    if (args.length == 1 && config.servers.size() > 0) {
        // 运行集群模式
        runFromConfig(config);
    } else {
        LOG.warn("Either no config or no quorum defined in config, running "
                + " in standalone mode");
        // 单机模式
        ZooKeeperServerMain.main(args);
    }
}

这里先只看单机模式的服务端

ZooKeeperServerMain

public static void main(String[] args) {
    ZooKeeperServerMain main = new ZooKeeperServerMain();
    try {
        // 单机模式初始化配置信息并运行
        main.initializeAndRun(args);
    } catch (IllegalArgumentException e) {
       .
       .
    }
    LOG.info("Exiting normally");
    System.exit(0);
}

initializeAndRun单机模式初始化配置信息并运行

protected void initializeAndRun(String[] args) throws ConfigException, IOException
    {
    try {
        ManagedUtil.registerLog4jMBeans();
    } catch (JMException e) {
        LOG.warn("Unable to register log4j JMX control", e);
    }

    // 单机模式的配置类
    ServerConfig config = new ServerConfig();
    if (args.length == 1) {
        // 解析配置文件,将配置信息放入配置类里
        config.parse(args[0]);
    } else {
        config.parse(args);
    }
    // 根据配置信息运行服务器
    runFromConfig(config);
}

runFromConfig根据配置信息运行服务器

public void runFromConfig(ServerConfig config) throws IOException {
    LOG.info("Starting server");
    FileTxnSnapLog txnLog = null;
    try {
        // 服务端对象
        final ZooKeeperServer zkServer = new ZooKeeperServer();
        final CountDownLatch shutdownLatch = new CountDownLatch(1);
        zkServer.registerServerShutdownHandler(
                new ZooKeeperServerShutdownHandler(shutdownLatch));
        // 工具类,存储日志文件与快照
        txnLog = new FileTxnSnapLog(new File(config.dataLogDir), new File(
                config.dataDir));
        txnLog.setServerStats(zkServer.serverStats());
        zkServer.setTxnLogFactory(txnLog);
        zkServer.setTickTime(config.tickTime);
        zkServer.setMinSessionTimeout(config.minSessionTimeout);
        zkServer.setMaxSessionTimeout(config.maxSessionTimeout);
        // 获取建立socket工厂,工厂方法模式
        cnxnFactory = ServerCnxnFactory.createFactory();
        // 建立socket,默认是NIOServerCnxnFactory(是一个线程)
        cnxnFactory.configure(config.getClientPortAddress(),
                config.getMaxClientCnxns());
        cnxnFactory.startup(zkServer);
        // Watch status of ZooKeeper server. It will do a graceful shutdown
        // if the server is not running or hits an internal error.
        shutdownLatch.await();
        shutdown();

        cnxnFactory.join();
        if (zkServer.canShutdown()) {
            zkServer.shutdown(true);
        }
    } catch (InterruptedException e) {
        // warn, but generally this is ok
        LOG.warn("Server interrupted", e);
    } finally {
        if (txnLog != null) {
            txnLog.close();
        }
    }
}

NIOServerCnxnFactory服务端socket

建立socket,默认是NIOServerCnxnFactory

configure

public void configure(InetSocketAddress addr, int maxcc) throws IOException {
    configureSaslLogin();

    // 把当前类作为线程
    thread = new ZooKeeperThread(this, "NIOServerCxn.Factory:" + addr);
    // java中线程分为两种类型:用户线程和守护线程。
    // 通过Thread.setDaemon(false)设置为用户线程;通过Thread.setDaemon(true)设置为守护线程。
    // 如果不设置次属性,默认为用户线程。
    // 守护进程(Daemon)是运行在后台的一种特殊进程。它独立于控制终端并且周期性地执行某种任务或等待处理某些发生的事件。也就是说守护线程不依赖于终端,但是依赖于系统,与系统“同生共死”。
    // 那Java的守护线程是什么样子的呢。当JVM中所有的线程都是守护线程的时候,JVM就可以退出了;如果还有一个或以上的非守护线程则JVM不会退出
    // 垃圾回收线程就是一个经典的守护线程,当我们的程序中不再有任何运行的Thread,程序就不会再产生垃圾,垃圾回收器也就无事可做,所以当垃圾回收线程是JVM上仅剩的线程时,垃圾回收线程会自动离开。
    // 它始终在低级别的状态中运行,用于实时监控和管理系统中的可回收资源。

    // 所以这里的这个线程是为了和JVM生命周期绑定,只剩下这个线程时已经没有意义了,应该关闭掉。
    thread.setDaemon(true);
    maxClientCnxns = maxcc;
    this.ss = ServerSocketChannel.open();
    ss.socket().setReuseAddress(true);
    LOG.info("binding to port " + addr);
    ss.socket().bind(addr);
    ss.configureBlocking(false);
    ss.register(selector, SelectionKey.OP_ACCEPT);
}

startup

public void startup(ZooKeeperServer zks) throws IOException,InterruptedException {
    // 启动线程
    start();
    // 初始化zkServer
    setZooKeeperServer(zks);
    // 加载数据
    zks.startdata();
    // 设置请求处理器链
    zks.startup();
}

start()启动线程NIOServerCnxnFactory的Thread

就是启动线程

public void run() {
    // 如果socket没有关闭掉
    // selector是跟nio有关系的,我们看核心代码
    while (!ss.socket().isClosed()) {
        try {
            selector.select(1000);
            Set<SelectionKey> selected;
            synchronized (this) {
                selected = selector.selectedKeys();
            }
            ArrayList<SelectionKey> selectedList = new ArrayList<SelectionKey>(
                    selected);
            Collections.shuffle(selectedList);
            for (SelectionKey k : selectedList) {
                if ((k.readyOps() & SelectionKey.OP_ACCEPT) != 0) {
                    // 建立连接
                    SocketChannel sc = ((ServerSocketChannel) k
                            .channel()).accept();
                    InetAddress ia = sc.socket().getInetAddress();
                    int cnxncount = getClientCnxnCount(ia);
                    if (maxClientCnxns > 0 && cnxncount >= maxClientCnxns){
                        LOG.warn("Too many connections from " + ia
                                 + " - max is " + maxClientCnxns );
                        sc.close();
                    } else {
                        LOG.info("Accepted socket connection from "
                                 + sc.socket().getRemoteSocketAddress());
                        sc.configureBlocking(false);
                        SelectionKey sk = sc.register(selector,
                                SelectionKey.OP_READ);

                        // 创建连接
                        NIOServerCnxn cnxn = createConnection(sc, sk);
                        sk.attach(cnxn);
                        addCnxn(cnxn);
                    }
                } else if ((k.readyOps() & (SelectionKey.OP_READ | SelectionKey.OP_WRITE)) != 0) {
                    // 接收数据,这里会间歇性的接收到客户端的ping
                    NIOServerCnxn c = (NIOServerCnxn) k.attachment();
                    // 处理
                    c.doIO(k);
                } else {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Unexpected ops in select "
                                  + k.readyOps());
                    }
                }
            }
            selected.clear();
        } catch (RuntimeException e) {
            LOG.warn("Ignoring unexpected runtime exception", e);
        } catch (Exception e) {
            LOG.warn("Ignoring exception", e);
        }
    }
    closeAll();
    LOG.info("NIOServerCnxn factory exited run method");
}

zks.startdata()加载数据

public void startdata() throws IOException, InterruptedException {
    //check to see if zkDb is not null
    if (zkDb == null) {
        // txnLogFactory就是存储日志文件与快照的工具类,在ZooKeeperServerMain中赋值的
        zkDb = new ZKDatabase(this.txnLogFactory);
    }  
    if (!zkDb.isInitialized()) {// 一开始为false
        // 没有初始化,加载数据
        loadData();
    }
}
loadData()
public void loadData() throws IOException, InterruptedException {
    if(zkDb.isInitialized()){
        // 已经初始化
        setZxid(zkDb.getDataTreeLastProcessedZxid());
    }
    else {
        // 没有初始化
        setZxid(zkDb.loadDataBase());
    }
    
    // 找到过期的session
    LinkedList<Long> deadSessions = new LinkedList<Long>();
    for (Long session : zkDb.getSessions()) {
        if (zkDb.getSessionWithTimeOuts().get(session) == null) {
            deadSessions.add(session);
        }
    }
    // 设置initialized为true,表示已经初始化
    zkDb.setDataTreeInit(true);

    // 清除过期的session
    for (long session : deadSessions) {
        killSession(session, zkDb.getDataTreeLastProcessedZxid());
    }
}
ZKDatabase.loadDataBase
public long loadDataBase() throws IOException {
    // snapLog就是在ZooKeeperServerMain.runFromConfig初始化的
    long zxid = snapLog.restore(dataTree, sessionsWithTimeouts, commitProposalPlaybackListener);
    initialized = true;
    return zxid;
}
FileTxnSnapLog.restore将快照加到DataTree
public long restore(DataTree dt, Map<Long, Integer> sessions, PlayBackListener listener) throws IOException {
    // 将快照加到DataTree
    snapLog.deserialize(dt, sessions);
    return fastForwardFromEdits(dt, sessions, listener);
}

zks.startup()设置请求处理器

public synchronized void startup() {
    if (sessionTracker == null) {
        // session跟踪器
        createSessionTracker();
    }
    // 启动session跟踪器,它是一个线程
    startSessionTracker();

    // 建立请求处理器
    setupRequestProcessors();

    registerJMX();

    setState(State.RUNNING);
    notifyAll();
}
setupRequestProcessors()建立请求处理器链

这三个请求处理器都是线程,组成一条请求链
PrepRequestProcessor ———>> SyncRequestProcessor ———>> FinalRequestProcessor

protected void setupRequestProcessors() {
    // 最后一个执行的请求处理器
    RequestProcessor finalProcessor = new FinalRequestProcessor(this);

    // 第二个执行的请求处理器,封装了下一个执行的处理器finalProcessor
    RequestProcessor syncProcessor = new SyncRequestProcessor(this,
            finalProcessor);
    ((SyncRequestProcessor)syncProcessor).start();

    // 第一个执行的请求处理器,封装了下一个执行的处理器syncProcessor
    firstProcessor = new PrepRequestProcessor(this, syncProcessor);
    ((PrepRequestProcessor)firstProcessor).start();
}

回到NIOServerCnxnFactory.run()

具体看服务端怎么处理客户端的请求

// 接收数据,这里会间歇性的接收到客户端的ping
NIOServerCnxn c = (NIOServerCnxn) k.attachment();
// 处理请求数据
c.doIO(k);

NIOServerCnxn

doIO执行读写

这里只看怎么读数据

// 读取数据
if (k.isReadable()) {
    int rc = sock.read(incomingBuffer);
    if (rc < 0) {
        throw new EndOfStreamException(
                "Unable to read additional data from client sessionid 0x"
                + Long.toHexString(sessionId)
                + ", likely client has closed socket");
    }
    if (incomingBuffer.remaining() == 0) {
        boolean isPayload;
        if (incomingBuffer == lenBuffer) { // start of next request
            incomingBuffer.flip();
            isPayload = readLength(k);
            incomingBuffer.clear();
        } else {
            // continuation
            isPayload = true;
        }
        if (isPayload) {

            // 读取
            readPayload();
        }
        else {
            return;
        }
    }
}

readPayload

private void readPayload() throws IOException, InterruptedException {
    if (incomingBuffer.remaining() != 0) { // have we read length bytes?
        int rc = sock.read(incomingBuffer); // sock is non-blocking, so ok
        if (rc < 0) {
            throw new EndOfStreamException(
                    "Unable to read additional data from client sessionid 0x"
                    + Long.toHexString(sessionId)
                    + ", likely client has closed socket");
        }
    }

    if (incomingBuffer.remaining() == 0) { // have we read length bytes?
        // 计数
        packetReceived();
        incomingBuffer.flip();
        if (!initialized) {
            readConnectRequest();
        } else {
            // 已连接,读取请求
            readRequest();
        }
        lenBuffer.clear();
        incomingBuffer = lenBuffer;
    }
}

readRequest()读取请求

private void readRequest() throws IOException {
   /**
    * 这里的zkServer是在执行NIOServerCnxnFactory的run()->createConnection()
    * 执行构造函数初始化的
    */
   zkServer.processPacket(this, incomingBuffer);
}

ZooKeeperServer

processPacket

// 例如create命令就会走到这里来
// 构建Request
Request si = new Request(cnxn, cnxn.getSessionId(), h.getXid(),
h.getType(), incomingBuffer, cnxn.getAuthInfo());
si.setOwner(ServerCnxn.me);

// 提交请求
submitRequest(si);

submitRequest交由请求处理器处理数据

public void submitRequest(Request si) {
    // 请求处理器PrepRequestProcessor
    if (firstProcessor == null) {
        synchronized (this) {
            try {
                while (state == State.INITIAL) {
                    wait(1000);
                }
            } catch (InterruptedException e) {
                LOG.warn("Unexpected interruption", e);
            }
            if (firstProcessor == null || state != State.RUNNING) {
                throw new RuntimeException("Not started");
            }
        }
    }
    try {
        touch(si.cnxn);
        // 校验请求是否合法
        boolean validpacket = Request.isValid(si.type);
        if (validpacket) {
            // 执行请求处理器,交由请求处理器处理
            firstProcessor.processRequest(si);
            if (si.cnxn != null) {
                incInProcess();
            }
        } else {
            LOG.warn("Received packet at server of unknown type " + si.type);
            new UnimplementedRequestProcessor().processRequest(si);
        }
    } catch (MissingSessionException e) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("Dropping request: " + e.getMessage());
        }
    } catch (RequestProcessorException e) {
        LOG.error("Unable to process request:" + e.getMessage(), e);
    }
}

请求处理链

zookeeper LICENSE 开源协议 zookeeper源码解析_服务端

PreRequestProcessor处理请求放入队列

public void processRequest(Request request) {
    // 将请求添加队列submittedRequests中
    submittedRequests.add(request);
}

请求处理器在setupRequestProcessors()执行构建的时候开始运行了,所以看run()方法

run()

public void run() {
    try {
        // 从队列获取请求进行处理
        while (true) {
            Request request = submittedRequests.take();
            long traceMask = ZooTrace.CLIENT_REQUEST_TRACE_MASK;
            if (request.type == OpCode.ping) {
                traceMask = ZooTrace.CLIENT_PING_TRACE_MASK;
            }
            if (LOG.isTraceEnabled()) {
                ZooTrace.logRequest(LOG, traceMask, 'P', request, "");
            }
            if (Request.requestOfDeath == request) {
                break;
            }
            // 处理请求
            pRequest(request);
        }
    } catch (RequestProcessorException e) {
        if (e.getCause() instanceof XidRolloverException) {
            LOG.info(e.getCause().getMessage());
        }
        handleException(this.getName(), e);
    } catch (Exception e) {
        handleException(this.getName(), e);
    }
    LOG.info("PrepRequestProcessor exited loop!");
}

pRequest判断请求类型

protected void pRequest(Request request) throws RequestProcessorException {
    request.hdr = null;
    request.txn = null;
    
    try {
        switch (request.type) {
            case OpCode.create:
            CreateRequest createRequest = new CreateRequest();
            // zxid的生成
            pRequest2Txn(request.type, zks.getNextZxid(), request, createRequest, true);
            break;
        case OpCode.delete:
            .
            .
            break;
        }
    } catch (KeeperException e) {
        .
        .
    }
    request.zxid = zks.getZxid();

    // 调用下一个请求处理器SyncRequestProcessor
    nextProcessor.processRequest(request);
}

pRequest2Txn根据请求类型执行操作

protected void pRequest2Txn(int type, long zxid, Request request, Record record, boolean deserialize)
        throws KeeperException, IOException, RequestProcessorException
    {
    request.hdr = new TxnHeader(request.sessionId, request.cxid, zxid,
                                Time.currentWallTime(), type);

    switch (type) {
        case OpCode.create: // 创建节点的操作               
            zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
            CreateRequest createRequest = (CreateRequest)record;   
            if(deserialize)
                ByteBufferInputStream.byteBuffer2Record(request.request, createRequest);
            String path = createRequest.getPath();
            int lastSlash = path.lastIndexOf('/');
            if (lastSlash == -1 || path.indexOf('\0') != -1 || failCreate) {
                LOG.info("Invalid path " + path + " with session 0x" +
                        Long.toHexString(request.sessionId));
                throw new KeeperException.BadArgumentsException(path);
            }
            List<ACL> listACL = removeDuplicates(createRequest.getAcl());
            if (!fixupACL(request.authInfo, listACL)) {
                throw new KeeperException.InvalidACLException(path);
            }
            String parentPath = path.substring(0, lastSlash);
            // 获取父节点的最后一次修改记录
            ChangeRecord parentRecord = getRecordForPath(parentPath);

            checkACL(zks, parentRecord.acl, ZooDefs.Perms.CREATE,
                    request.authInfo);
            // 表示对此znode的子节点进行的更改次数。
            int parentCVersion = parentRecord.stat.getCversion();
            CreateMode createMode =
                CreateMode.fromFlag(createRequest.getFlags());
            // 创建顺序节点,则在path后面加上序列号,cVersion初始值为0
            if (createMode.isSequential()) {
                path = path + String.format(Locale.ENGLISH, "%010d", parentCVersion);
            }
            validatePath(path, request.sessionId);
            try {
                if (getRecordForPath(path) != null) {
                    throw new KeeperException.NodeExistsException(path);
                }
            } catch (KeeperException.NoNodeException e) {
                // ignore this one
            }
            // 临时节点
            // ephemeralOwner:如果znode是ephemeral类型节点,则这是znode所有者的 session ID。 如果znode不是ephemeral节点,则该字段设置为零。
            // 父节点是不是临时节点,ephemeralOwner不等于0则代表是临时节点
            boolean ephemeralParent = parentRecord.stat.getEphemeralOwner() != 0;
            if (ephemeralParent) {
                // 父节点是临时节点,不能创建子节点
                throw new KeeperException.NoChildrenForEphemeralsException(path);
            }
            // cVersion加1
            int newCversion = parentRecord.stat.getCversion()+1;
            // 生成事务
            request.txn = new CreateTxn(path, createRequest.getData(),
                    listACL,
                    createMode.isEphemeral(), newCversion);
            StatPersisted s = new StatPersisted();
            if (createMode.isEphemeral()) {
                s.setEphemeralOwner(request.sessionId);
            }
            parentRecord = parentRecord.duplicate(request.hdr.getZxid());
            parentRecord.childCount++;
            parentRecord.stat.setCversion(newCversion);
            // 把修改记录加入到集合容器中去,那么就肯定有线程服务去取修改记录进行修改
            addChangeRecord(parentRecord);
            addChangeRecord(new ChangeRecord(request.hdr.getZxid(), path, s,
                    0, listACL));
            break;
        case OpCode.delete:
            .
            .
        default:
            LOG.error("Invalid OpCode: {} received by PrepRequestProcessor", type);
    }
}

addChangeRecord添加修改记录到队列

void addChangeRecord(ChangeRecord c) {
    synchronized (zks.outstandingChanges) {
        zks.outstandingChanges.add(c);
        zks.outstandingChangesForPath.put(c.path, c);
    }
}

SyncRequestProcessor持久化(快照)操作

// 调用SyncRequestProcessor
nextProcessor.processRequest(request);

processRequest

public void processRequest(Request request) {
    // 将请求添加队列queuedRequests中
    queuedRequests.add(request);
}

run()

public void run() {
    try {
        int logCount = 0;
        setRandRoll(r.nextInt(snapCount/2));
        while (true) {
            Request si = null;
            if (toFlush.isEmpty()) { //没有要刷到磁盘的请求
                si = queuedRequests.take(); //消费请求队列
            } else {
                si = queuedRequests.poll();
                // 暂时没有请求了也会刷新到磁盘
                if (si == null) { //如果请求队列的当前请求为空
                    flush(toFlush); //刷到磁盘
                    continue;
                }
            }
            if (si == requestOfDeath) {
                break;
            }
            if (si != null) {
                // track the number of records written to the log
                // 先持久化日志!成功了才继续下面的操作
                // 一个事务日志文件会有多个请求
                if (zks.getZKDatabase().append(si)) {
                    // 每个请求加1
                    logCount++;
                    //如果logCount到了一定的量,zk运行过程中会不断地接受到请求,那么这个logCount就不会断的增加,
                    // 增加到一定的数据量之后,就会先生成一个快照,然后加入到待刷新到磁盘列表中去
                    if (logCount > (snapCount / 2 + randRoll)) {
                        setRandRoll(r.nextInt(snapCount/2)); //下一次的随机数重新选
                        // roll the log
                        zks.getZKDatabase().rollLog(); //事务日志滚动记录到另外一个新文件
                        // take a snapshot
                        if (snapInProcess != null && snapInProcess.isAlive()) { //正在进行快照
                            LOG.warn("Too busy to snap, skipping");
                        } else {
                            // 没有开启快照线程的话就单独开启一个线程,这个线程里没有循环,所以只会执行一次
                            snapInProcess = new ZooKeeperThread("Snapshot Thread") {
                                    public void run() {
                                        try {
                                            zks.takeSnapshot();
                                        } catch(Exception e) {
                                            LOG.warn("Unexpected exception", e);
                                        }
                                    }
                                };
                            snapInProcess.start();
                        }
                        logCount = 0;
                    }
                } else if (toFlush.isEmpty()) {
                    //刷到磁盘的队列为空,表示已经全部刷新到磁盘,开始调用下一个请求处理器
                    if (nextProcessor != null) {
                        nextProcessor.processRequest(si);
                        if (nextProcessor instanceof Flushable) {
                            ((Flushable)nextProcessor).flush();
                        }
                    }
                    continue;
                }
                // 请求加到队列,待刷新到磁盘,
                toFlush.add(si);
                // 当请求数超过1000了就会刷新到磁盘
                // flush方法里面也会调用nextProcessor,代表刷新到事务都持久化到磁盘之后,就调用下一个请求处理器
                if (toFlush.size() > 1000) {
                    flush(toFlush);
                }
            }
        }
    } catch (Throwable t) {
        handleException(this.getName(), t);
        running = false;
    }
    LOG.info("SyncRequestProcessor exited!");
}

FinalRequestProcessor从队列中拿出数据进行内存操作

processRequest

这是三个处理器里面唯一一个不是线程的处理器

// 从修改记录列表中循环提交修改,包括处理事务
synchronized (zks.outstandingChanges) {
    while (!zks.outstandingChanges.isEmpty()
            && zks.outstandingChanges.get(0).zxid <= request.zxid) {
        ChangeRecord cr = zks.outstandingChanges.remove(0);
        if (cr.zxid < request.zxid) {
            LOG.warn("Zxid outstanding "
                    + cr.zxid
                    + " is less than current " + request.zxid);
        }
        if (zks.outstandingChangesForPath.get(cr.path) == cr) {
            zks.outstandingChangesForPath.remove(cr.path);
        }
    }
    if (request.hdr != null) {
       TxnHeader hdr = request.hdr;
       Record txn = request.txn;
       // 处理请求
       rc = zks.processTxn(hdr, txn);
    }
    // do not add non quorum packets to the queue.
    // 添加提交历史
    if (Request.isQuorum(request.type)) {
        zks.getZKDatabase().addCommittedProposal(request);
    }
}

ProcessTxnResult

// 处理请求
rc = getZKDatabase().processTxn(hdr, txn);

DataTree.processTxn根据事务来操作数据

例如

case OpCode.create:
     CreateTxn createTxn = (CreateTxn) txn;
     rc.path = createTxn.getPath();
     // 创建节点,但是这里都是操作的内存中的数据
     createNode(
             createTxn.getPath(),
             createTxn.getData(),
             createTxn.getAcl(),
             createTxn.getEphemeral() ? header.getClientId() : 0,
             createTxn.getParentCVersion(),
             header.getZxid(), header.getTime());
     break;

createNode放入ConcurrentHashMap

创建节点后,触发对应的监听器

public String  createNode(String path, byte data[], List<ACL> acl,
        long ephemeralOwner, int parentCVersion, long zxid, long time)
         throws KeeperException.NoNodeException,
         KeeperException.NodeExistsException {
    int lastSlash = path.lastIndexOf('/');
    String parentName = path.substring(0, lastSlash);
    String childName = path.substring(lastSlash + 1);
    StatPersisted stat = new StatPersisted();
    stat.setCtime(time);
    stat.setMtime(time);
    stat.setCzxid(zxid);
    stat.setMzxid(zxid);
    stat.setPzxid(zxid);
    stat.setVersion(0);
    stat.setAversion(0);
    stat.setEphemeralOwner(ephemeralOwner);
    DataNode parent = nodes.get(parentName);
    if (parent == null) {
        throw new KeeperException.NoNodeException();
    }
    synchronized (parent) {
        Set<String> children = parent.getChildren();
        if (children.contains(childName)) {
            throw new KeeperException.NodeExistsException();
        }
        
        if (parentCVersion == -1) {
            parentCVersion = parent.stat.getCversion();
            parentCVersion++;
        }    
        parent.stat.setCversion(parentCVersion);
        parent.stat.setPzxid(zxid);
        Long longval = aclCache.convertAcls(acl);
        DataNode child = new DataNode(parent, data, longval, stat);
        parent.addChild(childName);
        // nodes就是一个ConcurrentHashMap
        nodes.put(path, child);
        // 如果是临时节点
        if (ephemeralOwner != 0) {
            HashSet<String> list = ephemerals.get(ephemeralOwner);
            if (list == null) {
                list = new HashSet<String>();
                ephemerals.put(ephemeralOwner, list);
            }
            synchronized (list) {
                list.add(path);
            }
        }
    }
    // now check if its one of the zookeeper node child
    if (parentName.startsWith(quotaZookeeper)) {
        // now check if its the limit node
        if (Quotas.limitNode.equals(childName)) {
            // this is the limit node
            // get the parent and add it to the trie
            pTrie.addPath(parentName.substring(quotaZookeeper.length()));
        }
        if (Quotas.statNode.equals(childName)) {
            updateQuotaForPath(parentName
                    .substring(quotaZookeeper.length()));
        }
    }
    // also check to update the quotas for this node
    String lastPrefix;
    if((lastPrefix = getMaxPrefixWithQuota(path)) != null) {
        // ok we have some match and need to update
        updateCount(lastPrefix, 1);
        updateBytes(lastPrefix, data == null ? 0 : data.length);
    }
    // 在这里触发监听机制的
    dataWatches.triggerWatch(path, Event.EventType.NodeCreated);
    childWatches.triggerWatch(parentName.equals("") ? "/" : parentName,
            Event.EventType.NodeChildrenChanged);
    return path;
}

返回结果

case OpCode.create: {
    lastOp = "CREA";
    rsp = new CreateResponse(rc.path);
    err = Code.get(rc.err);
    break;
}
// 发送命令处理完成后的结果,默认是NIOServerCnxn
cnxn.sendResponse(hdr, rsp, "response");