Lucene (full-text search) index database maintenance

Maintenance of Lucene Index Repository

    Lucene tool class encapsulation
package com.xushuai.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

/**
 * Lucene tool class
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 12:36
 * Description:
 */
public class LuceneUtil {

    /**
     * Get the index library write stream object
     * @auther: xushuai
     * @date: 2018/5/7 12:37
     * @return: write stream object
     * @throws: IOException
     */
    public static IndexWriter getIndexWriter(String pathname, Analyzer analyzer) throws IOException {
        //The path to store the index library
        Directory directory = FSDirectory.open(new File(pathname));
        //Create analyzer (use its subclass, standard analyzer class)
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, analyzer);
        //Use the index library path and analyzer to construct the index library write stream
        return new IndexWriter(directory,indexWriterConfig);
    }

    /**
     * Get the index library read stream
     * @auther: xushuai
     * @date: 2018/5/7 12:45
     * @return: read stream object
     * @throws: IOException
     */
    public static IndexReader getIndexReader(String pathname) throws IOException {
        //Specify the index library location
        Directory directory = FSDirectory.open(new File(pathname));

        //Create an index library read stream
        return DirectoryReader.open(directory);
    }

    /**
     * print the result set to the console
     * @auther: xushuai
     * @date: 2018/5/7 13:23
     * @throws: IOException
     */
    public static void printResult(IndexSearcher indexSearcher, Query query, int count) throws IOException {
        //Execute the query, the first parameter is: query condition The second parameter is: the maximum number of results returned
        TopDocs topDocs = indexSearcher.search(query, count);
        // print the length of the result set
        System.out.println("Total number of query results: " + topDocs.totalHits);

        // loop through the result set
        for (ScoreDoc doc:topDocs.scoreDocs) {
            //Get the document object it queried, the doc attribute of the ScoreDoc object can get the id value of the document
            Document document = indexSearcher.doc(doc.doc);
            // print the file name
            System.out.println("文件名:  " + document.get("filename"));
            // print file size
            System.out.println("文件大小:" + document.get("filesize"));
            //print file path
            System.out.println("文件路径:" + document.get("filepath"));

            //Dividing line
            System.out.println("------------------------------------------------------------------------------");
        }
    }
}
First, the modification and deletion of the index database
package com.xushuai.lucene;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.IOException;

/**
 * Index library maintenance
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 12:47
 * Description: The maintenance of the index library is mainly divided into: addition, deletion, modification and query (CRUD), in which the addition is to create an index library, and the query content is more, and it is a separate branch.
 */
public class LuceneManager {


    private IndexWriter indexWriter = null;

    /**
     * Load the write stream
     * @auther: xushuai
     * @date: 2018/5/7 17:21
     * @throws: IOException
     */
    @Before
    public void setUp() throws IOException {//Note: IKAnalyzer is a third-party analyzer and needs to be packaged separately
        indexWriter = LuceneUtil.getIndexWriter("D:\\lucene-solr\\lucene\\index", new IKAnalyzer());
    }

    /**
     * release resources
     * @auther: xushuai
     * @date: 2018/5/7 17:21
     * @throws: IOException
     */
    @After
    public void tearDown() throws IOException {
        indexWriter.close();
    }

    /**
     * Lucene index modification process: delete first, then add
     * @auther: xushuai
     * @date: 2018/5/7 12:56
     * @throws: IOException
     */
    @Test
    public void luceneUpdateRepository() throws IOException {
        //Create a Document object
        Document document = new Document();
        //add domain
        document.add(new TextField("fname","modified file name", Field.Store.YES));
        document.add(new TextField("fcontent","modified file content",Field.Store.YES));

        //Modify, the first parameter: a Term, which will match the document object to be modified according to the Term
        indexWriter.updateDocument(new Term("filename","java"),document);
    }

    /**
     * delete index
     * @auther: xushuai
     * @date: 2018/5/7 13:00
     * @throws: IOException
     */
    @Test
    public void luceneDeleteRepository() throws IOException{
        //The first deletion: delete all the contents of the index library
        indexWriter.deleteAll();

        //The second deletion: delete the specified index. Process: Query first, then delete
        //Create a new query condition, for example: delete the value containing "java" in the "filename" field
        Query query = new TermQuery(new Term("filename","java"));

        //Delete according to conditions (parameter: Query variable array, which can be multiple conditions, that is, combined condition deletion)
        indexWriter.deleteDocuments(query);
    }


}



2. Query index (Query subclass)
package com.xushuai.lucene;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

/**
 * Lucene index query
 * Author: xushuai
 * Date: 2018/5/7
 * Time: 13:19
 * Description:
 */
public class LuceneQuery {


    private IndexSearcher indexSearcher = null;


    /**
     * Ready to work
     * @auther: xushuai
     * @date: 2018/5/7 17:27
     * @throws: IOException
     */
    @Before
    public void setUp() throws IOException {
        IndexReader indexReader = LuceneUtil.getIndexReader("D:\\lucene-solr\\lucene\\index");
        indexSearcher = new IndexSearcher(indexReader);
    }

    /**
     * release resources
     * @auther: xushuai
     * @date: 2018/5/7 17:27
     * @throws: IOException
     */
    @After
    public void tearDown() throws IOException {
        indexSearcher.getIndexReader().close();
    }

    /**
     * Query all documents in the index directory
     * @auther: xushuai
     * @date: 2018/5/7 13:22
     * @return:
     * @throws:
     */
    @Test
    public void luceneMatchAllDocsQuery() throws IOException {
        //Create a query condition object, MatchAllDocsQuery: query all document objects
        Query query  = new MatchAllDocsQuery();

        //Print the query result set (the last parameter is: print the first 10 items in the query result set)
        LuceneUtil.printResult (indexSearcher, query, 10);

    }

    /**
     * Precise query
     * @auther: xushuai
     * @date: 2018/5/7 13:32
     * @return:
     * @throws:
     */
    @Test
    public void luceneTermQuery() throws IOException {
        //Create a query condition object, TermQuery: precise query, query by Term. For example: Query the document object whose value in the filename field is java
        Query query  = new TermQuery(new Term("filename","java"));

        //Print the query result set (the last parameter is: print the first 10 items in the query result set)
        LuceneUtil.printResult (indexSearcher, query, 10);
    }

    /**
     *
     * @auther: xushuai
     * @date: 2018/5/7 13:34
     * @return:
     * @throws:
     */
    @Test
    public void luceneNumericRangeQuery() throws IOException {
        //Create a query condition object
        /*
         * newLongRange parameter:
         * 1. Domain name
         * 2. Minimum value
         * 3. Maximum value
         * 4. Whether to include the minimum value: boolean
         * 5. Whether to include the maximum value: boolean
         */
        Query query  = NumericRangeQuery.newLongRange("filesize",50L,200L,true,true);

        //Print the query result set (the last parameter is: print the first 10 items in the query result set)
        LuceneUtil.printResult (indexSearcher, query, 10);
    }

    /**
     * Condition combination query
     * @auther: xushuai
     * @date: 2018/5/7 13:38
     * @throws: IOException
     */
    @Test
    public void luceneBooleanQuery() throws IOException{
        //create query object
        BooleanQuery booleanQuery  = new BooleanQuery();

        //create query condition
        Query query1 = new TermQuery(new Term("filename","java"));
        Query query2 = new TermQuery(new Term("filename","apache"));
        //Set the relationship between its two conditions
        /*
         * BooleanClause.Occur.MUST: must, that is, the condition must be established
         * BooleanClause.Occur.MUST_NOT: must not, that is, the condition must not hold
         * BooleanClause.Occur.SHOULD: should, that is, the condition may or may not be true, similar to OR\
         *
         * The following combination of conditions translates to: Search for document objects with java or apache in the file name
         */
        booleanQuery.add(query1, BooleanClause.Occur.SHOULD);
        booleanQuery.add(query2, BooleanClause.Occur.SHOULD);


        //Print the query result set (the last parameter is: print the first 10 items in the query result set)
        LuceneUtil.printResult (indexSearcher, booleanQuery, 10);
    }

}



Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325907138&siteId=291194637