spring boot+全文搜索框架lucene

1.全文搜索框架lucene和mysql like%对比

全文索引是查询完然后建立索引,再对搜索的词拆分,再根据索引查找东西。系统需要维护索引。

mysql like%用法是全表遍历一遍,效率相对比较慢。

2.github(项目)

https://github.com/dajitui/spring-boot-lucene-ik

3.详细的过程

我初衷也是想和数据库结合的,so

利用jpa查询得到数据,由于一般数据量都是众多的,所以不能用数据库的like进行查询!

得到数据后,需要通过关联lucene版本和分值器,再创建一个文本保存索引,然后写入。

Directory directory=null;
        IndexWriterConfig config=null;
        IndexWriter iwriter=null;
        try {
            //索引库的存储目录
            directory = FSDirectory.open(new File(dir));
            //关联当前lucence版本和分值器
            config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
            //传入目录和分词器
            iwriter = new IndexWriter(directory, config);
            iwriter.commit();
            //写入到目录文件中
            iwriter.addDocument(doc);
            //提交事务
            iwriter.commit();
            //关闭流
            iwriter.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

把数据写进document,再写入刚刚创建的文件里面

                //获取每行数据
                Map<String, Object> lineData = queryFood.get(i);
                //创建Document对象
                Document doc = new Document();
                //获取每列数据
                Field foodid=new Field("foodid",lineData.get("foodid").toString(),TextField.TYPE_STORED);
                Field foodname=new Field("foodname",lineData.get("foodname").toString(),TextField.TYPE_STORED);
                Field price=new Field("price",lineData.get("price").toString(),TextField.TYPE_STORED);
                Field imagepath=new Field("imagepath",lineData.get("imagepath").toString(),TextField.TYPE_STORED);
                //添加到Document中
                doc.add(foodid);
                doc.add(foodname);
                doc.add(price);
                doc.add(imagepath);
                //调用,创建索引库
                indexDemo.write(doc);

当搜索的时候呢?读取文件,通过索引,

//索引库的存储目录
        Directory directory = FSDirectory.open(new File(dir));
        //读取索引库的存储目录
        DirectoryReader ireader = DirectoryReader.open(directory);
        //搜索类
        IndexSearcher isearcher = new IndexSearcher(ireader);
        //lucence查询解析器,用于指定查询的属性名和分词器
        QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
        //搜索
        Query query = parser.parse(value);
        //最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
        SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
        //高亮搜索的词添加到高亮处理器中
        Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));

        //获取搜索的结果,指定返回document返回的个数
        ScoreDoc[] hits = isearcher.search(query, null, 5).scoreDocs;
        List<Map> list=new ArrayList<Map>();
        //遍历,输出
        for (int i = 0; i < hits.length; i++) {
            int id = hits[i].doc;
            Document hitDoc = isearcher.doc(hits[i].doc);
            Map map=new HashMap();
            map.put("foodid", hitDoc.get("foodid"));

            //获取到foodname
            String foodname=hitDoc.get("foodname");
            //将查询的词和搜索词匹配,匹配到添加前缀和后缀
            TokenStream tokenStream = TokenSources.getAnyTokenStream(isearcher.getIndexReader(), id, "foodname", analyzer);
            //传入的第二个参数是查询的值
            TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, foodname, false, 10);
            String foodValue="";
            for (int j = 0; j < frag.length; j++) {
                if ((frag[j] != null) && (frag[j].getScore() > 0)) {
                    //获取 foodname 的值
                    foodValue=((frag[j].toString()));
                }
            }
            map.put("foodname", foodValue);

            map.put("price", hitDoc.get("price"));
            map.put("imagepath", hitDoc.get("imagepath"));
            list.add(map);
        }
        ireader.close();
        directory.close();
        return list;

luncene索引维护

上面是创建索引

增量添加索引

/**
     * 增加索引
     * 
     * @throws Exception
     */
    public static void insert() throws Exception {
        String text5 = "hello,goodbye,man,woman";
        Date date1 = new Date();
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        directory = FSDirectory.open(new File(INDEX_DIR));

        IndexWriterConfig config = new IndexWriterConfig(
                Version.LUCENE_CURRENT, analyzer);
        indexWriter = new IndexWriter(directory, config);

        Document doc1 = new Document();
        doc1.add(new TextField("filename", "text5", Store.YES));
        doc1.add(new TextField("content", text5, Store.YES));
        indexWriter.addDocument(doc1);

        indexWriter.commit();
        indexWriter.close();

        Date date2 = new Date();
        System.out.println("增加索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
    }

删除索引

/**
     * 删除索引
     * 
     * @param str 删除的关键字
     * @throws Exception
     */
    public static void delete(String str) throws Exception {
        Date date1 = new Date();
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        directory = FSDirectory.open(new File(INDEX_DIR));

        IndexWriterConfig config = new IndexWriterConfig(
                Version.LUCENE_CURRENT, analyzer);
        indexWriter = new IndexWriter(directory, config);
        
        indexWriter.deleteDocuments(new Term("filename",str));  
        
        indexWriter.close();
        
        Date date2 = new Date();
        System.out.println("删除索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
    }

更新索引

/**
     * 更新索引
     * 
     * @throws Exception
     */
    public static void update() throws Exception {
        String text1 = "update,hello,man!";
        Date date1 = new Date();
         analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
         directory = FSDirectory.open(new File(INDEX_DIR));

         IndexWriterConfig config = new IndexWriterConfig(
                 Version.LUCENE_CURRENT, analyzer);
         indexWriter = new IndexWriter(directory, config);
         
         Document doc1 = new Document();
        doc1.add(new TextField("filename", "text1", Store.YES));
        doc1.add(new TextField("content", text1, Store.YES));
        
        indexWriter.updateDocument(new Term("filename","text1"), doc1);
        
         indexWriter.close();
         
         Date date2 = new Date();
         System.out.println("更新索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
    }

根据索引查询

/**
     * 关键字查询
     * 
     * @param str
     * @throws Exception
     */
    public static void search(String str) throws Exception {
        directory = FSDirectory.open(new File(INDEX_DIR));
        analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
        DirectoryReader ireader = DirectoryReader.open(directory);
        IndexSearcher isearcher = new IndexSearcher(ireader);

        QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer);
        Query query = parser.parse(str);

        ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
        for (int i = 0; i < hits.length; i++) {
            Document hitDoc = isearcher.doc(hits[i].doc);
            System.out.println(hitDoc.get("filename"));
            System.out.println(hitDoc.get("content"));
        }
        ireader.close();
        directory.close();
    }




猜你喜欢

转载自blog.csdn.net/weixin_38336658/article/details/80304326