Lucene搜索:数据源(自定义,文件夹)

一:将文件夹下面的子文件作为数据源

package lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;

import java.io.*;
/**
 * 将文件夹下面的子文件作为数据源
 * Created by xhga on 2018/5/31.
 */
public class LuceneFile {
    public static void main(String[] args) throws IOException, InterruptedException {
        getMultiSearcher("D:\\wenjian"); // 文件夹 下面就是你要查找的文件
    }
    private static Document createDocument(String title, String content) {
        Document doc = new Document();
        // 存储两个信息:fileName:文件名  content:文件类型
        doc.add(new Field("fileName", title, TextField.TYPE_STORED));
        doc.add(new Field("content", content, TextField.TYPE_STORED));
        //如需添加在添加一条就是了 如:
        //doc.add(new Field("author", author, TextField.TYPE_STORED)); // 作者标识
        return doc;
    }
    public static IndexSearcher getMultiSearcher(String parentPath) throws IOException, InterruptedException{
        Analyzer analyzer = new StandardAnalyzer();
        Directory idx;
        // 将索引存在内存中
        idx = new RAMDirectory();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(idx, iwc);

        File file = new File(parentPath);
        File[] files = file.listFiles();
        // 存储信息
        for (int i = 0 ; i < files.length ; i ++) {
            File file1 = files[i];
            StringBuilder result = new StringBuilder();
            BufferedReader in=new BufferedReader(new InputStreamReader(new FileInputStream(file1)));
            String str;
            while ((str = in.readLine()) != null)
            {
                result.append(System.lineSeparator()+str);
            }
            writer.addDocument(createDocument(file1.toString(), result.toString()));
            in.close();
        }
        writer.commit();
        writer.close();
        IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
        Query content = null;
        try {
            // 需要查询的类型,内容(content:表示内容,fileName:表示文件名)
            content = new QueryParser("content", analyzer).parse("content:'硬实力'");
        } catch (ParseException e) {
            e.printStackTrace();
        }
        TopDocs topdoc =  searcher.search(content, 10);
        System.out.println("匹配到的文件数量:"+topdoc.totalHits+"查询时间时间:"+System.currentTimeMillis());
        ScoreDoc[] hits=  topdoc.scoreDocs;
        for(ScoreDoc scoreDoc:hits){
            Document hitDoc = searcher.doc(scoreDoc.doc);
            System.out.println("文件名:"+hitDoc.get("fileName")+","+hitDoc.get("content"));
        }
        return searcher;
    }

}

二:设置指定内容(可以通过查询数据库,作为数据源)

package lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import java.nio.file.Paths;
import java.util.Random;

/**
 * 设置指定内容(可以通过查询数据库,作为数据源)
 */
public class LuceneData {
    private static Document createDocument(String title, String content) {
        Document doc = new Document();
        doc.add(new Field("content", content, TextField.TYPE_STORED));
        doc.add(new Field("title", title, TextField.TYPE_STORED));
        doc.add(new Field("author", "bobliu", TextField.TYPE_STORED));
        return doc;
    }
    /**
     * lucene简单实例 索引 查询 经济,分词器:标准分词器
     */
    public static void testDemo() throws Exception{
        Analyzer analyzer = new StandardAnalyzer();
        Directory idx;
        // 将索引存入指定位置
        //idx = FSDirectory.open(Paths.get("D:\\index"));
        // 将索引存在内存中
        idx = new RAMDirectory();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(idx, iwc);
        String[] strings = new String[]{"爱","王","张","李","周","马","习","花"};
        for (int i = 0; i < 100000; i++) {
            Random random = new Random();
            int i1 = random.nextInt(strings.length);
            int i2 = random.nextInt(strings.length);
            int i3 = random.nextInt(strings.length);
            String s = strings[i1] + strings[i2] + strings[i3];
            // 设置内容
            writer.addDocument(createDocument(String.valueOf(i), s));
        }
        writer.commit();
        writer.close();

        IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
        Query content = new QueryParser("content", analyzer).parse("content:'王王王'");
        TopDocs topdoc =  searcher.search(content, 10);
        System.out.println("命中个数:"+topdoc.totalHits+"时间:"+System.currentTimeMillis());
        ScoreDoc[] hits=  topdoc.scoreDocs;
        for(ScoreDoc scoreDoc:hits){
            Document hitDoc = searcher.doc(scoreDoc.doc);
            System.out.println(hitDoc.get("content")+","+hitDoc.get("title"));
        }
    }
    public static void main(String[] args) {
        try {
            testDemo();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

猜你喜欢

转载自blog.csdn.net/qq_37751454/article/details/80703958