SolrCloud的DistributedQueue分析

      SolrCloud中采用了DistributedQueue来同步节点间的状态信息。SolrCloud中总共会在3个地方保存队列信息:

  /overseer/queue:保存每个shard的配置信息,以及状态信息(recovering,recovery_failed,active,down,sync)

  对应的生产者为:ZKController中的overseerJobQueue

  消费者:Overseer.ClusterStateUpdater中的stateUpdateQueue;

  /overseer/queue-work:正在处理中的消息,首先shard中信息会先保存到/overseer/queue下面,进行处理时会移到/overseer/queue-work中,处理完后消息之后在从/overseer/queue-work中删除

  生产者:stateUpdateQueue

  消费者:Overseer.ClusterStateUpdater中的workQueue

  /overseer/collection-queue-work:只有在create,delete,reload collection时候才会触发到此队列,只是保存相应的collection操作信息。待collection操作成功之后,还会涉及到/overseer/queue和/overseer/queue-work之中

  生产者:ZKController中的overseerCollectionQueue

  消费者:OverseerCollectionProcessor中的workQueue

  DistributedQueue源码:

package org.apache.solr.cloud;
 
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.TreeMap;
 import java.util.concurrent.CountDownLatch;
 
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
 import org.apache.zookeeper.ZooDefs;
 import org.apache.zookeeper.data.ACL;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
  * zookeeper可以通过Sequence Nodes来实现分布式队列
  * 采用sequential在client在申请创建该节点时,zk会自动在节点路径末尾添加递增序号,
  */
 public class DistributedQueue {
     
   private static final Logger LOG = LoggerFactory.getLogger(DistributedQueue.class);
   
   private final String dir;    //队列的上层访问路径
   
   private SolrZkClient zookeeper;
   
   private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;    // 访问控制列表,这里是一个完全打开的ACL,允许任何客户端对znode进行读写
   
   private final String prefix = "qn-";    // 节点的名称前缀
   
   public DistributedQueue(SolrZkClient zookeeper, String dir, List<ACL> acl) {
     this.dir = dir;
     
     if (acl != null) {
       this.acl = acl;
     }
     this.zookeeper = zookeeper;
     
   }
   
   /**
    * 对序列号进行排序,实现分布式队列的关键,保证了消息的有序性
    */
   private TreeMap<Long,String> orderedChildren(Watcher watcher)
       throws KeeperException, InterruptedException {
     TreeMap<Long,String> orderedChildren = new TreeMap<Long,String>();
     
     List<String> childNames = null;
     try {
       childNames = zookeeper.getChildren(dir, watcher, true);    // 节点名称
     } catch (KeeperException.NoNodeException e) {
       throw e;
     }
     
     for (String childName : childNames) {
       try {
         if (!childName.regionMatches(0, prefix, 0, prefix.length())) {
           LOG.warn("Found child node with improper name: " + childName);
           continue;
         }
         String suffix = childName.substring(prefix.length());
         Long childId = new Long(suffix);        // 递增的序列号
         orderedChildren.put(childId, childName);
       } catch (NumberFormatException e) {
         LOG.warn("Found child node with improper format : " + childName + " "
             + e, e);
       }
     }
     
     return orderedChildren;
   }
   
   /**
    * 返回队首元素
    */
   public byte[] element() throws NoSuchElementException, KeeperException,
       InterruptedException {
     TreeMap<Long,String> orderedChildren;
     
     while (true) {
       try {
         orderedChildren = orderedChildren(null);
       } catch (KeeperException.NoNodeException e) {
         throw new NoSuchElementException();
       }
       if (orderedChildren.size() == 0) throw new NoSuchElementException();
       
       for (String headNode : orderedChildren.values()) {
         if (headNode != null) {
           try {
             return zookeeper.getData(dir + "/" + headNode, null, null, true);
           } catch (KeeperException.NoNodeException e) {
             // Another client removed the node first, try next
           }
         }
       }
     }
   }
   
   /**
    * 删除队首元素
    */
   public byte[] remove() throws NoSuchElementException, KeeperException,
       InterruptedException {
     TreeMap<Long,String> orderedChildren;
     // Same as for element. Should refactor this.
     while (true) {
       try {
         orderedChildren = orderedChildren(null);
       } catch (KeeperException.NoNodeException e) {
         throw new NoSuchElementException();
       }
       if (orderedChildren.size() == 0) throw new NoSuchElementException();
       
       for (String headNode : orderedChildren.values()) {
         String path = dir + "/" + headNode;
         try {
           byte[] data = zookeeper.getData(path, null, null, true);
           zookeeper.delete(path, -1, true);
           return data;
         } catch (KeeperException.NoNodeException e) {
           // Another client deleted the node first.
         }
       }
       
     }
   }
   
   /**
    * zk的watch机制,没什么特别只是添加了个日志的debug
    */
   private class LatchChildWatcher implements Watcher {
     
     CountDownLatch latch;
     
     public LatchChildWatcher() {
       latch = new CountDownLatch(1);
     }
     
     public void process(WatchedEvent event) {
       LOG.debug("Watcher fired on path: " + event.getPath() + " state: "
           + event.getState() + " type " + event.getType());
       latch.countDown();
     }
     
     public void await() throws InterruptedException {
       latch.await();
     }
   }
   
   /**
    * 出队操作
    */
   public byte[] take() throws KeeperException, InterruptedException {
     TreeMap<Long,String> orderedChildren;
     // Same as for element. Should refactor this.
     while (true) {
       LatchChildWatcher childWatcher = new LatchChildWatcher();
       try {
         orderedChildren = orderedChildren(childWatcher);
       } catch (KeeperException.NoNodeException e) {
         zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
         continue;
       }
       if (orderedChildren.size() == 0) {    //  如果orderedChildren为0的话,则等待 
         childWatcher.await();
         continue;
       }
       
       /**
        * 对于失败的delete操作,client转向处理下一个node
        */
       for (String headNode : orderedChildren.values()) {
         String path = dir + "/" + headNode;
         try {
           byte[] data = zookeeper.getData(path, null, null, true);
           zookeeper.delete(path, -1, true);
           return data;
         } catch (KeeperException.NoNodeException e) {    // 这个删除操作有可能失败,因为可能有其他的消费者已经成功的获取该znode
           // Another client deleted the node first.
         }
       }
       
       // 如果最后还没有成功的delete一个item,则在重新orderedChildren()
     }
   }
   
   /**
    * 入队操作 
    * 不需要任何的锁来保证client对同一个znode的操作有序性。由zk负责按顺序分配序列号
    */
   public boolean offer(byte[] data) throws KeeperException,
       InterruptedException {
     for (;;) {
       try {
         zookeeper.create(dir + "/" + prefix, data, acl, CreateMode.PERSISTENT_SEQUENTIAL, true);
         return true;
       } catch (KeeperException.NoNodeException e) {
         try {
           zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
         } catch (KeeperException.NodeExistsException ne) {
         //someone created it
         }
       }
     }
 
     
     
   }
   
   /**
    * 返回队首信息,如果队列为空,则返回null
    */
   public byte[] peek() throws KeeperException, InterruptedException {
     try {
       return element();
     } catch (NoSuchElementException e) {
       return null;
     }
   }
   
   /**
    * block为true的时候,如果队列为空,则会一直阻塞,直到有数据返回
    */
   public byte[] peek(boolean block) throws KeeperException, InterruptedException {
     if (!block) {
       return peek();
     }
     
     TreeMap<Long,String> orderedChildren;
     while (true) {
       LatchChildWatcher childWatcher = new LatchChildWatcher();
       try {
         orderedChildren = orderedChildren(childWatcher);
       } catch (KeeperException.NoNodeException e) {
         zookeeper.create(dir, new byte[0], acl, CreateMode.PERSISTENT, true);
         continue;
       }
       if (orderedChildren.size() == 0) {
         childWatcher.await();
         continue;
       }
       
       for (String headNode : orderedChildren.values()) {
         String path = dir + "/" + headNode;
         try {
           byte[] data = zookeeper.getData(path, null, null, true);
           return data;
         } catch (KeeperException.NoNodeException e) {
           // Another client deleted the node first.
         }
       }
     }
   }
   
   /**
    * 删除队首,如果队列为空,则返回null
    */
   public byte[] poll() throws KeeperException, InterruptedException {
     try {
       return remove();
     } catch (NoSuchElementException e) {
       return null;
     }
   }
   
   public static void main(String[] args) throws KeeperException, InterruptedException {
       SolrZkClient client = new SolrZkClient("localhost", 5*1000);
       DistributedQueue queue = new DistributedQueue(client, "/overseer/queue", null);
       queue.offer("test".getBytes());
       System.out.println(new String(queue.take()));
 }
 }
 

猜你喜欢

转载自fwuwen.iteye.com/blog/1759897