Lucene基本使用(读取文件,未封装)-SpringBoot

准备几个文件:里面有包含java的,有没有包含的。
在这里插入图片描述
依赖

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>2.3.0.RELEASE</version>
        <relativePath/> <!-- lookup parent from repository -->
    </parent>
    <groupId>com.hr</groupId>
    <artifactId>lucene</artifactId>
    <version>0.0.1-SNAPSHOT</version>
    <name>lucene</name>
    <description>Demo project for Spring Boot</description>

    <properties>
        <java.version>1.8</java.version>
    </properties>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>

        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-devtools</artifactId>
            <scope>runtime</scope>
            <optional>true</optional>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-test</artifactId>
            <scope>test</scope>
            <exclusions>
                <exclusion>
                    <groupId>org.junit.vintage</groupId>
                    <artifactId>junit-vintage-engine</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-queryparser</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-analyzers-smartcn</artifactId>
            <version>4.10.3</version>
        </dependency>
        <dependency>
            <groupId>com.janeluo</groupId>
            <artifactId>ikanalyzer</artifactId>
            <version>2012_u6</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-core</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-queryparser</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.apache.lucene</groupId>
                    <artifactId>lucene-common</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>5.1.47</version>
        </dependency>
        <dependency>
            <groupId>org.mybatis.spring.boot</groupId>
            <artifactId>mybatis-spring-boot-starter</artifactId>
            <version>2.1.2</version>
        </dependency>

    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
                <configuration>
                    <fork>true</fork>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>

实体

package com.hr.lucene.entity;

/**
 * @ClassName FileInfo
 * @Description: TODO
 * @Author 汤永红
 * @Date 2020/5/26 0026
 * @Version V1.0
 **/
public class FileInfo {
    private String name;
    private String path;
    private long size;
    private String content;

    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getPath() {
        return path;
    }

    public void setPath(String path) {
        this.path = path;
    }

    public long getSize() {
        return size;
    }

    public void setSize(long size) {
        this.size = size;
    }

    public String getContent() {
        return content;
    }

    public void setContent(String content) {
        this.content = content;
    }
}

mapper
package com.hr.lucene.mapper;

import com.hr.lucene.entity.Products;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Select;

import java.util.List;

@Mapper
public interface ProductsMapper {
@Select(“select * from products”)
List findAll();
}

dao

package com.hr.lucene.dao;

import com.hr.lucene.entity.FileInfo;
import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.springframework.stereotype.Repository;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName LuceneDao
 * @Description: TODO
 * @Author 汤永红
 * @Date 2020/5/26 0026
 * @Version V1.0
 **/
@Repository
public class LuceneDao {
    //创建索引
    public void createIndex() throws Exception{
        //1.源文件  D:\lucene\files
        File src = new File("D:\\lucene\\files");
        //2.索引库 D:\lucene\indexdb
        //3.FSD(打开哪个索引库)
        File target = new File("D:\\lucene\\indexdb");
        FSDirectory directory = FSDirectory.open(target);
        //分词器(切词) 标准,中文
        //StandardAnalyzer analyzer = new StandardAnalyzer();
        Analyzer analyzer = new IKAnalyzer();//中文分词器
        //版本号
        Version version =Version.LUCENE_4_10_3;
        //配置文件
        IndexWriterConfig iwc = new IndexWriterConfig(version, analyzer);
        //写
        IndexWriter iw = new IndexWriter(directory, iwc);
        //获取所有源文件
        File[] allFiles = src.listFiles();
        if(allFiles!=null && allFiles.length>0){
            for (File file : allFiles) {
                //文件名
                String name = file.getName();
                //路径
                String path = file.getPath();
                //通过commands-io包的工具类来得到文件大小
                long size = FileUtils.sizeOf(file);
                //文件内容
                String content = FileUtils.readFileToString(file, "utf-8");
                // 对以上的内容进行字段构建
                TextField indexName = new TextField("name", name, Field.Store.YES);
                StoredField indexPath = new StoredField("path", path);
                LongField indexSize = new LongField("size", size, Field.Store.YES);
                TextField indexContent= new TextField("content", content, Field.Store.YES);

                Document doc = new Document();
                doc.add(indexName);
                doc.add(indexPath);
                doc.add(indexSize);
                doc.add(indexContent);
                //写入
                iw.addDocument(doc);
            }
            iw.close();
        }

    }


    //搜索索引
    public List<FileInfo> searchIndex(String field,String keyWords,int size) throws Exception{
        //1.FSD(打开哪个索引库)
        File target = new File("D:\\lucene\\indexdb");
        FSDirectory directory = FSDirectory.open(target);
        //2.打开索引库
        DirectoryReader reader = DirectoryReader.open(directory);
        //3.创建搜索
        IndexSearcher searcher = new IndexSearcher(reader);
        //4.要搜索的字段和内容
        Term term = new Term(field,keyWords);
        //5.创建查询,返加几条
        TermQuery termQuery = new TermQuery(term);
        //6.搜索
        TopDocs docs = searcher.search(termQuery, size);//条数
        //7.获取document
        ScoreDoc[] mydoc=docs.scoreDocs;
        List<FileInfo> lists = null;
        if(mydoc!=null && mydoc.length>0){
            lists = new ArrayList<>();
            for (ScoreDoc scoreDoc : mydoc) {
                int index = scoreDoc.doc;//下标
                Document doc = searcher.doc(index);
                FileInfo info = new FileInfo();
                //
                info.setName(doc.get("name"));
                info.setPath(doc.get("path"));
                info.setSize(Long.parseLong(doc.get("size")));
                info.setContent(doc.get("content"));

                lists.add(info);
                info = null;
            }
            return lists;
        }
        return  null;
    }

}




web

package com.hr.lucene.web;

import com.hr.lucene.dao.LuceneDao;
import com.hr.lucene.entity.FileInfo;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;

import javax.annotation.Resource;
import java.util.List;

/**
 * @ClassName LuceneController
 * @Description: TODO
 * @Author 汤永红
 * @Date 2020/5/26 0026
 * @Version V1.0
 **/
@RestController
@RequestMapping("/api/lucene")
public class LuceneController {
    @Resource
    private LuceneDao dao;
    @RequestMapping("/createIndex")
    public String createIndex(){
        //调工具包的代码
        try {
            dao.createIndex();
            return "成功";
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "失败";
    }
//        /api/lucene/useIndex
    @RequestMapping("/useIndex/{key}")
    public List<FileInfo> useIndex(@PathVariable("key") String key){
        //调工具包的代码
        try {
            List<FileInfo> fileInfos = dao.searchIndex("content",key,5);
            return fileInfos;
        } catch (Exception e) {
            e.printStackTrace();
        }
       return null;
    }
}

中文分词器三个文件
IKAnalyzer.cfg.xml

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">  
<properties>  
	<comment>IK Analyzer 扩展配置</comment>
	<!--用户可以在这里配置自己的扩展字典--> 
	<entry key="ext_dict">mydict.dic;</entry> 
	 
	 <!--用户可以在这里配置自己的扩展停止词字典-->
	<entry key="ext_stopwords">ext_stopword.dic</entry> 
</properties>

ext_stopword.dic 可以去网上找,也可以自己写

人民
末##末
啊
阿
哎
哎呀
哎哟

mydict.dic

你最优秀

测试:要看到控制台输出
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
查找java
在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/mtm001/article/details/106360908