Hibernate Search索引重建源码解析

使用Hibernate Search可以很方便的实现搜索功能，一般重建全部索引时会使用下面的方法：

    Session session = dao.getSession();
    FullTextSession fullTextSession = Search.getFullTextSession(session);
    try {
     // 只执行一次即可，后续新增的记录Hibernate将自动创建索引
        fullTextSession.createIndexer().startAndWait();
    } catch (InterruptedException e) {
        logger.error("buildIndex error------->", e);
    }

由于遇到重建索引阻塞线程的情况，看了一下Hibernate Search建立索引的源码。
Hibernate Search版本:4.5.0.Final

fullTextSession调用createIndexer()方法，创建MassIndexer的实例来给数据库的记录创建索引

public MassIndexer createIndexer(Class<?>... types) {
        MutableSearchFactory msf = (MutableSearchFactory) getSearchFactoryImplementor();
        ServiceManager serviceManager = msf.getServiceManager();
        MassIndexerFactory service = serviceManager.requestService( MassIndexerFactoryProvider.class, null );
        return service.createMassIndexer( getSearchFactoryImplementor(), getFactory(), types );
    }

调用MassIndexerImpl的startAndWait()方法开始创建索引，并同步等待索引创建完成

    public void startAndWait() throws InterruptedException {
        //创建BatchCoordinator协调器来做启动相应的生产者和消费者线程
        BatchCoordinator coordinator = createCoordinator();
        coordinator.run();
        if ( Thread.currentThread().isInterrupted() ) {
            throw new InterruptedException();
        }
    }

    protected BatchCoordinator createCoordinator() {
        return new BatchCoordinator(
                rootEntities, searchFactoryImplementor, sessionFactory,
                typesToIndexInParallel, documentBuilderThreads,
                cacheMode, objectLoadingBatchSize, objectsLimit,
                optimizeAtEnd, purgeAtStart, optimizeAfterPurge,
                monitor, idFetchSize
        );
    }

BatchCoordinator继承了ErrorHandledRunnable，run()方法代码如下

public final void run() {
        ErrorHandler errorHandler = searchFactoryImplementor.getErrorHandler();
        try {
            //模板方法，调用子类的实现
            runWithErrorHandler();
        }
        catch (Exception re) {
            //being this an async thread we want to make sure everything is somehow reported
            errorHandler.handleException( log.massIndexerUnexpectedErrorMessage() , re );
            cleanUpOnError();
        }
    }

调用BatchCoordinator的runWithErrorHandler()方法

public void runWithErrorHandler() {
        final BatchBackend backend = searchFactoryImplementor.makeBatchBackend( monitor );
        try {
            beforeBatch( backend ); // purgeAll and pre-optimize activities
            doBatchWork( backend );//
            afterBatch( backend );//清理工作
        }
        catch (InterruptedException e) {
            log.interruptedBatchIndexing();
            Thread.currentThread().interrupt();
        }
        finally {
            monitor.indexingCompleted();
        }
    }
    //使用线程池循环为每个持久化实体创建索引
private void doBatchWork(BatchBackend backend) throws InterruptedException {
        ExecutorService executor = Executors.newFixedThreadPool( typesToIndexInParallel, "BatchIndexingWorkspace" );//线程池数量默认为1
        for ( Class<?> type : rootEntities ) {
            executor.execute(
                    new BatchIndexingWorkspace(
                            searchFactoryImplementor, sessionFactory, type,
                            documentBuilderThreads,
                            cacheMode, objectLoadingBatchSize, endAllSignal,
                            monitor, backend, objectsLimit, idFetchSize
                    )
            );
        }
        executor.shutdown();
        endAllSignal.await(); //waits for the executor to finish
    }

BatchIndexingWorkspace集成了ErrorHandledRunnable，实际业务代码在runWithErrorHandler()方法中

public void runWithErrorHandler() {
        try {
            final ErrorHandler errorHandler = searchFactoryImplementor.getErrorHandler();
            final BatchTransactionalContext transactionalContext = new BatchTransactionalContext( searchFactoryImplementor, sessionFactory, errorHandler );
            //first start the consumers, then the producers (reverse order):
            //先启动消费者线程，然后启动生产者线程
            //消费者线程负责根据生产者提供的主键列表从数据库获取记录并创建索引
            startTransformationToLuceneWork( transactionalContext, errorHandler );
            //生产者线程负责从数据库加载主键
            startProducingPrimaryKeys( transactionalContext, errorHandler );
            try {
                producerEndSignal.await(); //await for all work being sent to the backend
                log.debugf( "All work for type %s has been produced", indexedType.getName() );
            }
            catch (InterruptedException e) {
                //restore interruption signal:
                Thread.currentThread().interrupt();
                throw new SearchException( "Interrupted on batch Indexing; index will be left in unknown state!", e );
            }
        }
        finally {
            endAllSignal.countDown();
        }
    }

    //启动生产者线程
    private void startProducingPrimaryKeys(BatchTransactionalContext transactionalContext, ErrorHandler errorHandler) {
        final Runnable primaryKeyOutputter = new OptionallyWrapInJTATransaction( transactionalContext,
                new IdentifierProducer(
                        primaryKeyStream, sessionFactory,
                        objectLoadingBatchSize, indexedType, monitor,
                        objectsLimit, errorHandler, idFetchSize
                ));
        //execIdentifiersLoader has size 1 and is not configurable: ensures the list is consistent as produced by one transaction
        final ThreadPoolExecutor execIdentifiersLoader = Executors.newFixedThreadPool( 1, "identifierloader" );
        try {
            execIdentifiersLoader.execute( primaryKeyOutputter );
        }
        finally {
            execIdentifiersLoader.shutdown();
        }
    }

    //启动消费者线程
    private void startTransformationToLuceneWork(BatchTransactionalContext transactionalContext, ErrorHandler errorHandler) {
        final Runnable documentOutputter = new OptionallyWrapInJTATransaction( transactionalContext,
                new IdentifierConsumerDocumentProducer(
                        primaryKeyStream, monitor, sessionFactory, producerEndSignal,
                        cacheMode, indexedType, searchFactoryImplementor,
                        idNameOfIndexedType, backend, errorHandler
                ));
        final ThreadPoolExecutor execFirstLoader = Executors.newFixedThreadPool( documentBuilderThreads, "entityloader" );//默认是6个线程
        try {
            for ( int i = 0; i < documentBuilderThreads; i++ ) {
                execFirstLoader.execute( documentOutputter );
            }
        }
        finally {
            execFirstLoader.shutdown();
        }
    }

生产者和消费者都是通过OptionallyWrapInJTATransaction封装

public void runWithErrorHandler() throws Exception {
        if ( wrapInTransaction ) {
            final Session session;
            final StatelessSession statelessSession;
            if ( sessionAwareRunnable != null ) {
                session = batchContext.factory.openSession();
                statelessSession = null;
            }
            else {
                session = null;
                statelessSession = batchContext.factory.openStatelessSession();
            }

            batchContext.transactionManager.begin();

            if ( sessionAwareRunnable != null ) {
                sessionAwareRunnable.run( session );//生产者IdentifierProducer调用该方法
            }
            else {
                statelessSessionAwareRunnable.run( statelessSession );//消费者IdentifierConsumerDocumentProducer调用该方法
            }

            batchContext.transactionManager.commit();

            if ( sessionAwareRunnable != null ) {
                session.close();
            }
            else {
                statelessSession.close();
            }
        }
        else {
            if ( sessionAwareRunnable != null ) {
                sessionAwareRunnable.run( null );
            }
            else {
                statelessSessionAwareRunnable.run( null );
            }
        }
    }

IdentifierProducer通过Hibernate从数据库查询主键，并放在List中，达到batchSize(默认为10，从MassIndexerImpl中的objectLoadingBatchSize属性层层传递过来)后放入到ProducerConsumerQueue中。

public void run(StatelessSession upperSession) throws Exception {
        log.trace( "started" );
        try {
            inTransactionWrapper( upperSession );
        }
        finally {
            destination.producerStopping();//加载完毕往队列放入结束标识以通知消费者线程，注意：生产者线程阻塞将导致对应的6条消费者线程阻塞
        }
        log.trace( "finished" );
    }

    private void inTransactionWrapper(StatelessSession upperSession) throws Exception {
        StatelessSession session = upperSession;
        if ( upperSession == null ) {
            session = sessionFactory.openStatelessSession();
        }
        try {
            Transaction transaction = Helper.getTransactionAndMarkForJoin( session );
            transaction.begin();
            loadAllIdentifiers( session );//从数据库加载指定实体的所有主键
            transaction.commit();
        }
        catch (InterruptedException e) {
            // just quit
            Thread.currentThread().interrupt();
        }
        finally {
            if ( upperSession == null ) {
                session.close();
            }
        }
    }

    private void loadAllIdentifiers(final StatelessSession session) throws InterruptedException {
        Number countAsNumber = (Number) session
            .createCriteria( indexedType )
            .setProjection( Projections.rowCount() )
            .setCacheable( false )
            .uniqueResult();
        long totalCount = countAsNumber.longValue();
        if ( objectsLimit != 0 && objectsLimit < totalCount ) {
            totalCount = objectsLimit;
        }
        if ( log.isDebugEnabled() ) {
            log.debugf( "going to fetch %d primary keys", totalCount);
        }
        monitor.addToTotalCount( totalCount );

        Criteria criteria = session
            .createCriteria( indexedType )
            .setProjection( Projections.id() )
            .setCacheable( false )
            .setFetchSize( idFetchSize );//默认每次抓取100条，MassIndexerImpl中的idFetchSize属性值传递过来

        ScrollableResults results = criteria.scroll( ScrollMode.FORWARD_ONLY );
        ArrayList<Serializable> destinationList = new ArrayList<Serializable>( batchSize );
        long counter = 0;
        try {
            while ( results.next() ) {
                Serializable id = (Serializable) results.get( 0 );
                destinationList.add( id );
                if ( destinationList.size() == batchSize ) {
                    enqueueList( destinationList );
                    destinationList = new ArrayList<Serializable>( batchSize );
                }
                counter++;
                if ( counter == totalCount ) {
                    break;
                }
            }
        }
        finally {
            results.close();
        }
        enqueueList( destinationList );
    }

    //将主键List放入到队列中供消费者线程获取
    private void enqueueList(final List<Serializable> idsList) throws InterruptedException {
        if ( ! idsList.isEmpty() ) {
            destination.put( idsList );
            log.tracef( "produced a list of ids %s", idsList );
        }
    }

消费者IdentifierConsumerDocumentProducer从队列ProducerConsumerQueue中获取主键List，然后根据主键从数据库加载数据。

public void run(Session upperSession) throws Exception {
        log.trace( "started" );
        Session session = upperSession;
        if ( upperSession == null ) {
            session = sessionFactory.openSession();
        }
        session.setFlushMode( FlushMode.MANUAL );
        session.setCacheMode( cacheMode );
        session.setDefaultReadOnly( true );
        try {
            Transaction transaction = Helper.getTransactionAndMarkForJoin( session );
            transaction.begin();
            loadAllFromQueue( session );//从队列获取主键列表
            transaction.commit();
        }
        finally {
            producerEndSignal.countDown();
            if ( upperSession == null ) {
                session.close();
            }
        }
        log.trace( "finished" );
    }

    private void loadAllFromQueue(Session session) {
        final InstanceInitializer sessionInitializer = new HibernateSessionLoadingInitializer(
                (SessionImplementor) session
        );
        try {
            Object take;
            do {
                take = source.take();//阻塞直到生产者发出结束信号
                if ( take != null ) {
                    @SuppressWarnings("unchecked")
                    List<Serializable> idList = (List<Serializable>) take;
                    log.tracef( "received list of ids %s", idList );
                    loadList( idList, session, sessionInitializer );//根据主键列表从数据库加载数据
                }
            }
            while ( take != null );
        }
        catch (InterruptedException e) {
            // just quit
            Thread.currentThread().interrupt();
        }
    }

    /**
     * Loads a list of entities of defined type using their identifiers.
     * The loaded objects are then transformed into Lucene Documents
     * and forwarded to the indexing backend.
     *
     * @param listIds the list of entity identifiers (of type
     * @param session the session to be used
     * @param sessionInitializer
     *
     * @throws InterruptedException
     */
    private void loadList(List<Serializable> listIds, Session session, InstanceInitializer sessionInitializer) throws InterruptedException {
        Criteria criteria = session
                .createCriteria( type )
                .setCacheMode( cacheMode )
                .setLockMode( LockMode.NONE )
                .setCacheable( false )
                .setFlushMode( FlushMode.MANUAL )
                .setFetchSize( listIds.size() )
                .setResultTransformer( CriteriaSpecification.DISTINCT_ROOT_ENTITY )
                .add( Restrictions.in( idName, listIds ) );
        List<?> list = criteria.list();
        monitor.entitiesLoaded( list.size() );
        indexAllQueue( session, list, sessionInitializer );
        session.clear();
    }

    private void indexAllQueue(Session session, List<?> entities, InstanceInitializer sessionInitializer) {
        try {
            ConversionContext contextualBridge = new ContextualExceptionBridgeHelper();
                if ( entities == null && entities.isEmpty() ) {
                    return;
                }
                else {
                    log.tracef( "received a list of objects to index: %s", entities );
                    //循环实体列表创建索引
                    for ( Object object : entities ) {
                        try {
                            index( object, session, sessionInitializer, contextualBridge );
                            monitor.documentsBuilt( 1 );
                        }
                        catch (InterruptedException ie) {
                            // rethrowing the interrupted exception
                            throw ie;
                        }
                        catch (RuntimeException e) {
                            String errorMsg = log.massIndexerUnableToIndexInstance(
                                    object.getClass().getName(),
                                    object.toString()
                            );
                            errorHandler.handleException( errorMsg, e );
                        }
                    }
                }
        }
        catch (InterruptedException e) {
            // just quit
            Thread.currentThread().interrupt();
        }
    }

Hibernate Search索引重建源码解析

猜你喜欢