Lucene入门 - HelloWorld

前言：

Lucene 是apache软件基金会4 jakarta项目组的一个子项目，是一个开放源代码的全文检索引擎工具包，但它不是一个完整的全文检索引擎，而是一个全文检索引擎的架构，提供了完整的查询引擎和索引引擎，部分文本分析引擎（英文与德文两种西方语言）。Lucene的目的是为软件开发人员提供一个简单易用的工具包，以方便的在目标系统中实现全文检索的功能，或者是以此为基础建立起完整的全文检索引擎。Lucene是一套用于全文检索和搜寻的开源程式库，由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口，能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言，Lucene是当前以及最近几年最受欢迎的免费Java信息检索程序库。人们经常提到信息检索程序库，虽然与搜索引擎有关，但不应该将信息检索程序库与搜索引擎相混淆

什么是全文检索？？

在全文中去检索数据,检索的是文本数据

全文检索的特点？？

相关度最高的排在最前面，官网中相关的网页排在最前面
对摘要进行了截取
关键词的高亮
只关注文本,不考虑语义

使用场景

①数据库中去替换模糊查询(使用)
数据库中的模糊查询是不会使用索引,查询缓慢
全文检索,会为文本创建索引,根据索引进行查询
②全文索引是搜索引擎的基础
③垂直搜索
④其他... word pdf 拼音输入法

全文检索的架构

①创建索引
②根据索引进行搜索

Lucene入门

索引的创建
IndexWriter
索引的搜索
IndexSearcher

导入依赖包

        <!-- lucene全文检索所需 -->
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
            <version>5.5.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-common</artifactId>
            <version>5.5.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>5.5.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>5.5.0</version>
        </dependency>

案例一：helloworld

public class HelloWorldTest {

    private String doc1 = "hello world";
    private String doc2 = "hello java world";
    private String doc3 = "hello lucene world";

    @Test  //索引的创建:IndexWriter
    public void testIndexWriter() throws  Exception{
        Path path = Paths.get("F:\\lucene\\index\\_01_helloworld");
        Directory d = FSDirectory.open(path);//索引存放的目录
        Analyzer analyzer = new SimpleAnalyzer();//分词器
        IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
        //1 创建核心对象IndexWriter
        IndexWriter indexWriter = new IndexWriter(d,conf);

        //2 使用IndexWriter为全文创建索引
        Document document1 = new Document();
        document1.add(new TextField("title","doc1", Field.Store.YES));
        document1.add(new TextField("content",doc1, Field.Store.YES));
        indexWriter.addDocument(document1);

        Document document2 = new Document();
        document2.add(new TextField("title","doc2", Field.Store.YES));
        document2.add(new TextField("content",doc2, Field.Store.YES));
        indexWriter.addDocument(document2);

        Document document3 = new Document();;
        document3.add(new TextField("title","doc3", Field.Store.YES));
        document3.add(new TextField("content",doc3, Field.Store.YES));
        indexWriter.addDocument(document3);

        indexWriter.commit();
        indexWriter.close();

    }

    @Test //索引的搜索:IndexSearcher
    public void testIndexSearcher() throws Exception{
        Path path = Paths.get("F:\\lucene\\index\\_01_helloworld");
        Directory d = FSDirectory.open(path);//索引存放的目录
        //索引的读的对象
        IndexReader indexReader = DirectoryReader.open(d);
        //1 创建索引查询对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        //2 执行查询操作
        String f = "content";//默认查询的字段
        Analyzer a = new SimpleAnalyzer();//分词器,注意创建索引的分词器和搜索索引的分词器要保持一致
        QueryParser queryParser = new QueryParser(f,a);
        String queryStr = "content:java world";//查询字符串
        Query query = queryParser.parse(queryStr);
        int numHits = 10;//最大命中的条数
        TopDocs topDocs = indexSearcher.search(query, numHits);//查询,返回查询结果

        //3 分析查询结果
        int totalHits = topDocs.totalHits;//命中的条目数,总条目数
        System.out.println("总共命中:"+totalHits);

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;//返回命中文档的id
            //System.out.println(doc);
            Document document = indexSearcher.doc(doc);
            String title = document.get("title");
            String content = document.get("content");
            System.out.println("title:"+title+";content:"+content);
        }
        //4 释放资源
        indexReader.close();
    }

}

案例二：字段

public class FieldTest {

    private String doc1 = "hello world";
    private String doc2 = "hello java world";
    private String doc3 = "hello lucene world";

    @Test //索引的创建:IndexWriter
    public void testIndexWriter() throws  Exception{
        Path path = Paths.get("F:\\lucene\\index\\_02_field");
        Directory d = FSDirectory.open(path);//索引存放的目录
        Analyzer analyzer = new SimpleAnalyzer();//分词器
        IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
        //1 创建核心对象IndexWriter
        IndexWriter indexWriter = new IndexWriter(d,conf);
        //2 使用IndexWriter为全文创建索引
        Document document1 = new Document();
        FieldType type = new FieldType();
        type.setStored(true);//该字段的值是否存储
        type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);//索引的类型
        type.setTokenized(false);//是否分词
        document1.add(new Field("title","doc1",type));
        document1.add(new Field("content",doc1,type));
//        document1.add(new TextField("title","doc1", Field.Store.YES));
//        document1.add(new TextField("content",doc1, Field.Store.YES));
        indexWriter.addDocument(document1);

        indexWriter.commit();
        indexWriter.close();
    }

    @Test //索引的搜索:IndexSearcher
    public void testIndexSearcher() throws Exception{
        Path path = Paths.get("F:\\lucene\\index\\_02_field");
        Directory d = FSDirectory.open(path);//索引存放的目录
        //索引的读的对象
        IndexReader indexReader = DirectoryReader.open(d);
        //1 创建索引查询对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);
        //2 执行查询操作
        PhraseQuery.Builder builder = new PhraseQuery.Builder();
        builder.add(new Term("content","hello world"));
        Query query = builder.build();
        int numHits = 10;//最大命中的条数
        TopDocs topDocs = indexSearcher.search(query, numHits);//查询,返回查询结果
        //3 分析查询结果
        int totalHits = topDocs.totalHits;//命中的条目数,总条目数
        System.out.println("总共命中:"+totalHits);

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;//返回命中文档的id
            //System.out.println(doc);
            Document document = indexSearcher.doc(doc);
            String title = document.get("title");
            String content = document.get("content");
            System.out.println("title:"+title+";content:"+content);
        }
        //4 释放资源
        indexReader.close();
    }

}

案例三：

public class QueryTest {

    private final String pathStr = "F:\\lucene\\index\\_03_query";

    private String tlbb = "在一个月黑风高的夜晚";
    private String sdyxz = "在一个伸手不见五指的夜晚";
    private String jpm = "在一个阳光明媚的夜晚";

    @Test  // 索引的创建:IndexWriter
    public void testIndexWriter() throws  Exception{
        Path path = Paths.get(pathStr);
        Directory d = FSDirectory.open(path);//索引存放的目录
        Analyzer analyzer = new SmartChineseAnalyzer();//分词器
        IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
        //1 创建核心对象IndexWriter
        IndexWriter indexWriter = new IndexWriter(d,conf);

        FieldType type = new FieldType();
        type.setTokenized(false);
        type.setStored(true);

        //2 使用IndexWriter为全文创建索引
        Document document1 = new Document();
        document1.add(new Field("title","天龙八部",type));
        document1.add(new TextField("content",tlbb, Field.Store.YES));
        indexWriter.addDocument(document1);

        Document document2 = new Document();
        document2.add(new Field("title","射雕英雄传",type));
        document2.add(new TextField("content",sdyxz, Field.Store.YES));
        indexWriter.addDocument(document2);

        Document document3 = new Document();;
        document3.add(new Field("title","键盘膜", type));
        document3.add(new TextField("content",jpm, Field.Store.YES));
        indexWriter.addDocument(document3);

        indexWriter.commit();
        indexWriter.close();
    }

    @Test //索引的搜索:IndexSearcher
    public void testIndexSearcher() throws Exception{
        Path path = Paths.get(pathStr);
        Directory d = FSDirectory.open(path);//索引存放的目录
        //索引的读的对象
        IndexReader indexReader = DirectoryReader.open(d);
        //1 创建索引查询对象
        IndexSearcher indexSearcher = new IndexSearcher(indexReader);

        //2 执行查询操作
       /* String f = "content";//默认查询的字段
        Analyzer a = new SmartChineseAnalyzer();//分词器,注意创建索引的分词器和搜索索引的分词器要保持一致
        QueryParser queryParser = new QueryParser(f,a);
        String queryStr = "content:阳光明媚";//查询字符串
        Query query = queryParser.parse(queryStr);*/

       Query query1 = new TermQuery(new Term("title","天龙八部"));
       Query query2 = new TermQuery(new Term("content","夜晚"));

       BooleanQuery.Builder builder = new BooleanQuery.Builder();
       builder.add(query1, BooleanClause.Occur.MUST);
       //builder.add(query2, BooleanClause.Occur.MUST);

        int numHits = 10;//最大命中的条数
        TopDocs topDocs = indexSearcher.search(builder.build(), numHits);//查询,返回查询结果

        //3 分析查询结果
        int totalHits = topDocs.totalHits;//命中的条目数,总条目数
        System.out.println("总共命中:"+totalHits);

        ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
        for (ScoreDoc scoreDoc : scoreDocs) {
            int doc = scoreDoc.doc;//返回命中文档的id
            //System.out.println(doc);
            Document document = indexSearcher.doc(doc);
            String title = document.get("title");
            String content = document.get("content");
            System.out.println("title:"+title+";content:"+content);
        }

        //4 释放资源
        indexReader.close();
    }

}

项目实战使用：

抽取Lucene工具类

public class LuceneUtil {

    private static final String INDEX_DIRCTORY = "F:/lucene/index";
    private static Directory directory;//存放索引的目录
    private static IndexWriter indexWriter;//索引写对象，线程安全
    private static IndexReader indexReader;//索引读对象，线程安全
    private static IndexSearcher indexSearcher;//索引查询对象，线程安全
    private static Analyzer analyzer;//分词器对象

    static{
        try {  //如果父目录不存在，先创建父目录
            File file = new File(INDEX_DIRCTORY);
            if(!file.getParentFile().exists()){
                file.getParentFile().mkdirs();
            }
            directory = FSDirectory.open(Paths.get(INDEX_DIRCTORY));
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //获取IndexWriter对象
    public static IndexWriter getIndexWriter(){
        try {
            if(indexWriter==null){
                Analyzer analyzer = getAnalyzer();
                IndexWriterConfig conf = new IndexWriterConfig(analyzer);
                indexWriter = new IndexWriter(directory,conf);
            }
            return indexWriter;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    //获取IndexReader
    public static IndexReader getIndexReader(){
        try {
            if(indexReader==null){
                indexReader = DirectoryReader.open(directory);
            }else {
                //如果不为空，就使用DirectoryReader打开一个索引变更过的IndexReader类
                DirectoryReader newIndexReader = DirectoryReader.openIfChanged((DirectoryReader) indexReader);
                if(newIndexReader!=null){
                    //把旧的索引读对象关掉
                    indexReader.close();
                    indexReader = newIndexReader;
                }
            }
            return indexReader;
        } catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    //获取IndexSearcher对象
    public static IndexSearcher getIndexSearcher(){
        if(indexSearcher==null){
            indexSearcher = new IndexSearcher(getIndexReader());
        }
        return indexSearcher;
    }

    //获取分词器对象
    public static Analyzer getAnalyzer() {
        if(analyzer!=null){
            return analyzer;
        }
        return new SmartChineseAnalyzer();
    }

    //创建QueryParser对象
    public static QueryParser createQueryParser(String field){
        return new QueryParser(field,getAnalyzer());
    }

    //创建Query对象
    public static Query createQuery(String field[],String queryStr){
        BooleanQuery booleanQuery = new BooleanQuery();
        for (String f : field) {
            booleanQuery.add(new TermQuery(new Term(f,queryStr)), BooleanClause.Occur.SHOULD);
        }
        return booleanQuery;
    }

    //分页查询索引
    public static List<Document> getHitDocuments(String[] field,String queryStr,int pageNum,int pageSize){
        List<Document> list = new ArrayList<>();
        try {
            IndexSearcher indexSearcher = getIndexSearcher();
            Query query = createQuery(field,queryStr);
            System.out.println(query);

            // 查询数据， 结束页面自前的数据都会查询到，但是只取本页的数据
            TopDocs topDocs = indexSearcher.search(query, pageNum * pageSize);
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;

            //总条目数
            int totalHits = topDocs.totalHits;

            int start = (pageNum-1)*pageSize;
            int end = (pageNum*pageSize)>totalHits?totalHits:(pageNum*pageSize);
            for(int i=start;i<end;i++){
                ScoreDoc scoreDoc = scoreDocs[i];
                Document document = indexSearcher.doc(scoreDoc.doc);
                list.add(document);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return list;
    }

    //总共命中的条目数
    public static long totalHits(String[] field,String queryStr){
        try {
            IndexSearcher indexSearcher = getIndexSearcher();
            Query query = createQuery(field,queryStr);
            TopDocs topDocs = indexSearcher.search(query, 10);
            return topDocs.totalHits;
        } catch (IOException e) {
            e.printStackTrace();
            return 0;
        }
    }

    //删除索引
    public static void deleteIndex(String[] field,String queryStr){
        try {
            Query query = createQuery(field,queryStr);
            getIndexWriter().deleteDocuments(query);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //删除所有索引
    public static void deleteAllIndex()throws IOException{
        getIndexWriter().deleteAll();
    }

    //更新索引文档
    public static void updateIndex(Term term,Document document) {
        try {
            getIndexWriter().updateDocument(term, document);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    //更新索引文档
    public static void updateIndex(String field,String value,Document document) {
        updateIndex( new Term(field, value), document);
    }

    //添加索引文档
    public static void addIndex(Document document) {
        updateIndex(null, document);
    }

    //关闭资源
    public static void closeAll(){
        try {
            if (indexWriter!=null)
                indexWriter.close();
            if(indexReader!=null)
                indexReader.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

}

功能实现：

数据库表字段：

@Component
public class SystemLogHelperImpl implements SystemLogHelper {

    @Autowired
    private SystemLogMapper systemLogMapper;

    @Override  //更新数据库中的数据到Lucene索引库 --》 更新索引
    public void updateAllIndex() throws IOException {
        IndexWriter indexWriter = LuceneUtil.getIndexWriter(); //获取IndexWriter对象
        //查询数据库中的数据,创建Doocument
        List<SystemLog> systemLogs = systemLogMapper.selectAll();
        for (SystemLog systemLog : systemLogs) {
            Document document = new Document();
            document.add(new TextField("id",String.valueOf(systemLog.getId()), Field.Store.YES));
            document.add(new TextField("opuserId",String.valueOf(systemLog.getOpuserId()), Field.Store.YES));
            document.add(new TextField("function",systemLog.getFunction(), Field.Store.YES));
            document.add(new TextField("params",systemLog.getParams(), Field.Store.YES));
            indexWriter.addDocument(document);
        }
        //提交事务
        indexWriter.commit();
        //释放资源
        LuceneUtil.closeAll();
    }

    //索引的分页检索 -》 分页查询
    public List<SystemLog> querrySystemLog(SystemLogQuery query){
        List<Document> documents = LuceneUtil.getHitDocuments(new String[]{"function", "params"}, query.getQ(), query.getPage(), query.getRows());
        List<SystemLog> logs = new ArrayList<>();
        for (Document document : documents) {
            SystemLog log = new SystemLog();
            log.setId(Long.parseLong(document.get("id")));
            log.setOpuserId(Long.parseLong(document.get("opuserId")));
            log.setFunction(document.get("function"));
            log.setParams(document.get("params"));
            logs.add(log);
        }
        return logs;
    }

    //查询总数
    public long getTotal(SystemLogQuery query){
        return LuceneUtil.totalHits(new String[]{"function", "params"}, query.getQ());
    }

}

Lucene入门 - HelloWorld

猜你喜欢