Bobo源码笔记6(结果的筛选和收集(2))

browse()函数的最后部分,是对分组计数结果的一个整理:

 finally
      {
         //每个Facet对应一个FacetHitCollector
         for (FacetHitCollector facetCollector : facetHitCollectorList)
        {
          String name = facetCollector.facetHandler.getName();
          LinkedList<FacetCountCollector> resultcollector=null;
          //每一个FacetHitCollector维护一个FacetCountCollector列表,每一个IndexReader对应一个FacetCountCollector
          resultcollector = facetCollector._countCollectorList;
          if (resultcollector == null || resultcollector.size() == 0){
        	  resultcollector = facetCollector._collectAllCollectorList;
          }
          if (resultcollector!=null){
        	FacetSpec fspec = req.getFacetSpec(name);
        	assert fspec != null;
            if(resultcollector.size() == 1)
            {
              facetMap.put(name, resultcollector.get(0));             
            }
            else
            {
              ArrayList<FacetAccessible> finalList = new ArrayList<FacetAccessible>(resultcollector.size());
              for (FacetCountCollector fc : resultcollector){
                finalList.add((FacetAccessible)fc);
              }
              //多个IndexReader下的分组计数结果要进行归并,相同的属性的count求和
              CombinedFacetAccessible combinedCollector = new CombinedFacetAccessible(fspec, finalList);
              facetMap.put(name, combinedCollector);
        	}
          }
        }
      }

同一个facet下的多个IndexReader的分组计数结果要进行合并,相同的属性的计数要进行求和。这个过程是通过构造CombinedFacetAccessible(FacetSpec, List<FacetAccessible>) 来实现的,这个类的对外的功能函数是getFacets(),也就是返回最终的分组计数结果:

public List<BrowseFacet> getFacets() 
  {
    int maxCnt = _fspec.getMaxCount();
    if(maxCnt <= 0)
      maxCnt = Integer.MAX_VALUE;
    int minHits = _fspec.getMinHitCount();
    LinkedList<BrowseFacet> list = new LinkedList<BrowseFacet>();

    int cnt = 0;
    Comparable facet = null;
    FacetIterator iter = (FacetIterator)this.iterator();
    Comparator<BrowseFacet> comparator;
    if (FacetSortSpec.OrderValueAsc.equals(_fspec.getOrderBy()))
    {
      while((facet = iter.next(minHits)) != null) 
      {
        // find the next facet whose combined hit count obeys minHits
        list.add(new BrowseFacet(String.valueOf(facet), iter.count));
        if(++cnt >= maxCnt) break;                  
      }
    }
    else if(FacetSortSpec.OrderHitsDesc.equals(_fspec.getOrderBy()))
    {
      comparator = new Comparator<BrowseFacet>()
      {
        public int compare(BrowseFacet f1, BrowseFacet f2)
        {
          int val=f2.getHitCount() - f1.getHitCount();
          if (val==0)
          {
            val = (f1.getValue().compareTo(f2.getValue()));
          }
          return val;
        }
      };       
      if(maxCnt != Integer.MAX_VALUE)
      {
        // we will maintain a min heap of size maxCnt
        // Order by hits in descending order and max count is supplied
        PriorityQueue queue = createPQ(maxCnt, comparator);
        int qsize = 0;
        while( (qsize < maxCnt) && ((facet = iter.next(minHits)) != null) )
        {
          queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
          qsize++;
        }
        if(facet != null)
        {
          BrowseFacet rootFacet = (BrowseFacet)queue.top();
          minHits = rootFacet.getHitCount() + 1;
          // facet count less than top of min heap, it will never be added 
          while(((facet = iter.next(minHits)) != null))
          {
            rootFacet.setValue(String.valueOf(facet));
            rootFacet.setHitCount(iter.count);
            rootFacet = (BrowseFacet) queue.updateTop();
            minHits = rootFacet.getHitCount() + 1;
          }
        }
        // at this point, queue contains top maxCnt facets that have hitcount >= minHits
        while(qsize-- > 0)
        {
          // append each entry to the beginning of the facet list to order facets by hits descending
          list.addFirst((BrowseFacet) queue.pop());
        }
      }
      else
      {
        // no maxCnt specified. So fetch all facets according to minHits and sort them later
        while((facet = iter.next(minHits)) != null)
          list.add(new BrowseFacet(String.valueOf(facet), iter.count));
        Collections.sort(list, comparator);
      }
    }
    else // FacetSortSpec.OrderByCustom.equals(_fspec.getOrderBy()
    {
      comparator = _fspec.getCustomComparatorFactory().newComparator();
      if(maxCnt != Integer.MAX_VALUE)
      {
        PriorityQueue queue = createPQ(maxCnt, comparator);
        BrowseFacet browseFacet = new BrowseFacet();        
        int qsize = 0;
        while( (qsize < maxCnt) && ((facet = iter.next(minHits)) != null) )
        {
          queue.add(new BrowseFacet(String.valueOf(facet), iter.count));
          qsize++;
        }
        if(facet != null)
        {
          while((facet = iter.next(minHits)) != null)
          {
            // check with the top of min heap
            browseFacet.setHitCount(iter.count);
            browseFacet.setValue(String.valueOf(facet));
            browseFacet = (BrowseFacet)queue.insertWithOverflow(browseFacet);
          }
        }
        // remove from queue and add to the list
        while(qsize-- > 0)
          list.addFirst((BrowseFacet)queue.pop());
      }
      else 
      {
        // order by custom but no max count supplied
        while((facet = iter.next(minHits)) != null)
          list.add(new BrowseFacet(String.valueOf(facet), iter.count));
        Collections.sort(list, comparator);
      }
    }
    return list;
  }

其一行代码,

FacetIterator iter = (FacetIterator)this.iterator();

这个函数iterator()很重要,是将FacetAccessible.iterator() 返回的统计分组结果列表的迭代器存储到一个列表中,然后将这个列表赋给new CombinedFacetIterator(List<FacetIterator>)

 public FacetIterator iterator() {

    ArrayList<FacetIterator> iterList = new ArrayList<FacetIterator>(_list.size());
    FacetIterator iter;
    for (FacetAccessible facetAccessor : _list)
    {
      //这里其实是将FacetCountCollector的分组统计结果从数组转换成list,并获取列表的iterator
      iter = (FacetIterator) facetAccessor.iterator();
      if(iter != null)
        iterList.add(iter);
    }
    if (iterList.get(0) instanceof IntFacetIterator)
    {
      ArrayList<IntFacetIterator> il = new ArrayList<IntFacetIterator>();
      for (FacetAccessible facetAccessor : _list)
      {
        iter = (FacetIterator) facetAccessor.iterator();
        if(iter != null)
          il.add((IntFacetIterator) iter);
      }
      return new CombinedIntFacetIterator(il, _fspec.getMinHitCount());
    }
    if (iterList.get(0) instanceof LongFacetIterator)
    {
      ArrayList<LongFacetIterator> il = new ArrayList<LongFacetIterator>();
      for (FacetAccessible facetAccessor : _list)
      {
        iter = (FacetIterator) facetAccessor.iterator();
        if(iter != null)
          il.add((LongFacetIterator) iter);
      }
      return new CombinedLongFacetIterator(il, _fspec.getMinHitCount());
    }
    if (iterList.get(0) instanceof ShortFacetIterator)
    {
      ArrayList<ShortFacetIterator> il = new ArrayList<ShortFacetIterator>();
      for (FacetAccessible facetAccessor : _list)
      {
        iter = (FacetIterator) facetAccessor.iterator();
        if(iter != null)
          il.add((ShortFacetIterator) iter);
      }
      return new CombinedShortFacetIterator(il, _fspec.getMinHitCount());
    }
    if (iterList.get(0) instanceof FloatFacetIterator)
    {
      ArrayList<FloatFacetIterator> il = new ArrayList<FloatFacetIterator>();
      for (FacetAccessible facetAccessor : _list)
      {
        iter = (FacetIterator) facetAccessor.iterator();
        if(iter != null)
          il.add((FloatFacetIterator) iter);
      }
      return new CombinedFloatFacetIterator(il, _fspec.getMinHitCount());
    }
    if (iterList.get(0) instanceof DoubleFacetIterator)
    {
      ArrayList<DoubleFacetIterator> il = new ArrayList<DoubleFacetIterator>();
      for (FacetAccessible facetAccessor : _list)
      {
        iter = (FacetIterator) facetAccessor.iterator();
        if(iter != null)
          il.add((DoubleFacetIterator) iter);
      }
      return new CombinedDoubleFacetIterator(il, _fspec.getMinHitCount());
    }
    return new CombinedFacetIterator(iterList);
  }
 

FacetAccessor有接口iterator,是将分组计数结果由数组转换成队列,并返回iterator,这里举例是DefaultFacetCountCollector的具体实现:

扫描二维码关注公众号,回复: 807497 查看本文章
  public FacetIterator iterator() {
    if (_dataCache.valArray.getType().equals(Integer.class))
    {
      return new DefaultIntFacetIterator((TermIntList) _dataCache.valArray, _count, _countlength, false);
    } else if (_dataCache.valArray.getType().equals(Long.class))
    {
      return new DefaultLongFacetIterator((TermLongList) _dataCache.valArray, _count, _countlength, false);
    } else if (_dataCache.valArray.getType().equals(Short.class))
    {
      return new DefaultShortFacetIterator((TermShortList) _dataCache.valArray, _count, _countlength, false);
    } else if (_dataCache.valArray.getType().equals(Float.class))
    {
      return new DefaultFloatFacetIterator((TermFloatList) _dataCache.valArray, _count, _countlength, false);
    } else if (_dataCache.valArray.getType().equals(Double.class))
    {
      return new DefaultDoubleFacetIterator((TermDoubleList) _dataCache.valArray, _count, _countlength, false);
    } else
    return new DefaultFacetIterator(_dataCache.valArray, _count, _countlength, false);
  }

 可以看到是根据facet的属性值的类型来返回iterator的

那么多个IndexReader的分组计数结果是如何进行合并的呢?

public CombinedFacetIterator(final List<FacetIterator> iterators) {
    _iterators = iterators;
    heap = new FacetIterator[iterators.size() + 1];
    size = 0;
    for(FacetIterator iterator : iterators) {
      if(iterator.next(0) != null)
        add(iterator);
    }
    facet = null;
    count = 0;
  }

类CombinedFacetIterator维护着一个最小堆,最小堆的元素是FacetIterator。这个最小堆,以facet的属性值作为比较进行排列,那么必然是相同的属性值的facet会连续的从最小堆中弹出,并且将它们的count进行求和,最后合并成一个facet返回:

public Comparable next(int minHits) {
    if(size == 0)
    {
      facet = null;
      count = 0;
      return null;
    }

    FacetIterator node = heap[1];    
    facet = node.facet;
    count = node.count;
    int min = (minHits > 0 ? 1 : 0);
    while(true)
    {
      if(node.next(min) != null)
      {
        //重新对最小堆排序
        downHeap();
        node = heap[1];
      }
      else
      {
        //heap[1]的iterator已经没有元素,那么将这个it弹出
        pop();
        if(size > 0)
        {
          node = heap[1];
        }
        else
        {
          // we reached the end. check if this facet obeys the minHits
          if(count < minHits)
          {
            facet = null;
            count = 0;
          }
          break;
        }
      }
      Comparable next = node.facet;
      if (next==null) throw new RuntimeException();
      if(!next.equals(facet))//当前的facet已经全部弹出、求和完成
      {
        // check if this facet obeys the minHits
        if(count >= minHits)
          break;
        // else, continue iterating to the next facet
        facet = next;
        count = node.count;
      }
      else//同一个facet,那么对count求和
      {
        count += node.count;
      }
    }
    return format(facet);
  }
 

猜你喜欢

转载自eric-gcm.iteye.com/blog/1672961