前言:
Lucene 是apache软件基金会4 jakarta项目组的一个子项目,是一个开放源代码的全文检索引擎工具包,但它不是一个完整的全文检索引擎,而是一个全文检索引擎的架构,提供了完整的查询引擎和索引引擎,部分文本分析引擎(英文与德文两种西方语言)。Lucene的目的是为软件开发人员提供一个简单易用的工具包,以方便的在目标系统中实现全文检索的功能,或者是以此为基础建立起完整的全文检索引擎。Lucene是一套用于全文检索和搜寻的开源程式库,由Apache软件基金会支持和提供。Lucene提供了一个简单却强大的应用程式接口,能够做全文索引和搜寻。在Java开发环境里Lucene是一个成熟的免费开源工具。就其本身而言,Lucene是当前以及最近几年最受欢迎的免费Java信息检索程序库。人们经常提到信息检索程序库,虽然与搜索引擎有关,但不应该将信息检索程序库与搜索引擎相混淆
什么是全文检索??
在全文中去检索数据,检索的是文本数据
全文检索的特点??
相关度最高的排在最前面,官网中相关的网页排在最前面
对摘要进行了截取
关键词的高亮
只关注文本,不考虑语义
使用场景
①数据库中去替换模糊查询(使用)
数据库中的模糊查询是不会使用索引,查询缓慢
全文检索,会为文本创建索引,根据索引进行查询
②全文索引是搜索引擎的基础
③垂直搜索
④其他... word pdf 拼音输入法
全文检索的架构
①创建索引
②根据索引进行搜索
Lucene入门
索引的创建
IndexWriter
索引的搜索
IndexSearcher
导入依赖包
<!-- lucene全文检索所需 -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>5.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>5.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-smartcn</artifactId>
<version>5.5.0</version>
</dependency>
案例一:helloworld
public class HelloWorldTest {
private String doc1 = "hello world";
private String doc2 = "hello java world";
private String doc3 = "hello lucene world";
@Test //索引的创建:IndexWriter
public void testIndexWriter() throws Exception{
Path path = Paths.get("F:\\lucene\\index\\_01_helloworld");
Directory d = FSDirectory.open(path);//索引存放的目录
Analyzer analyzer = new SimpleAnalyzer();//分词器
IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
//1 创建核心对象IndexWriter
IndexWriter indexWriter = new IndexWriter(d,conf);
//2 使用IndexWriter为全文创建索引
Document document1 = new Document();
document1.add(new TextField("title","doc1", Field.Store.YES));
document1.add(new TextField("content",doc1, Field.Store.YES));
indexWriter.addDocument(document1);
Document document2 = new Document();
document2.add(new TextField("title","doc2", Field.Store.YES));
document2.add(new TextField("content",doc2, Field.Store.YES));
indexWriter.addDocument(document2);
Document document3 = new Document();;
document3.add(new TextField("title","doc3", Field.Store.YES));
document3.add(new TextField("content",doc3, Field.Store.YES));
indexWriter.addDocument(document3);
indexWriter.commit();
indexWriter.close();
}
@Test //索引的搜索:IndexSearcher
public void testIndexSearcher() throws Exception{
Path path = Paths.get("F:\\lucene\\index\\_01_helloworld");
Directory d = FSDirectory.open(path);//索引存放的目录
//索引的读的对象
IndexReader indexReader = DirectoryReader.open(d);
//1 创建索引查询对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//2 执行查询操作
String f = "content";//默认查询的字段
Analyzer a = new SimpleAnalyzer();//分词器,注意创建索引的分词器和搜索索引的分词器要保持一致
QueryParser queryParser = new QueryParser(f,a);
String queryStr = "content:java world";//查询字符串
Query query = queryParser.parse(queryStr);
int numHits = 10;//最大命中的条数
TopDocs topDocs = indexSearcher.search(query, numHits);//查询,返回查询结果
//3 分析查询结果
int totalHits = topDocs.totalHits;//命中的条目数,总条目数
System.out.println("总共命中:"+totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;//返回命中文档的id
//System.out.println(doc);
Document document = indexSearcher.doc(doc);
String title = document.get("title");
String content = document.get("content");
System.out.println("title:"+title+";content:"+content);
}
//4 释放资源
indexReader.close();
}
}
案例二:字段
public class FieldTest {
private String doc1 = "hello world";
private String doc2 = "hello java world";
private String doc3 = "hello lucene world";
@Test //索引的创建:IndexWriter
public void testIndexWriter() throws Exception{
Path path = Paths.get("F:\\lucene\\index\\_02_field");
Directory d = FSDirectory.open(path);//索引存放的目录
Analyzer analyzer = new SimpleAnalyzer();//分词器
IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
//1 创建核心对象IndexWriter
IndexWriter indexWriter = new IndexWriter(d,conf);
//2 使用IndexWriter为全文创建索引
Document document1 = new Document();
FieldType type = new FieldType();
type.setStored(true);//该字段的值是否存储
type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);//索引的类型
type.setTokenized(false);//是否分词
document1.add(new Field("title","doc1",type));
document1.add(new Field("content",doc1,type));
// document1.add(new TextField("title","doc1", Field.Store.YES));
// document1.add(new TextField("content",doc1, Field.Store.YES));
indexWriter.addDocument(document1);
indexWriter.commit();
indexWriter.close();
}
@Test //索引的搜索:IndexSearcher
public void testIndexSearcher() throws Exception{
Path path = Paths.get("F:\\lucene\\index\\_02_field");
Directory d = FSDirectory.open(path);//索引存放的目录
//索引的读的对象
IndexReader indexReader = DirectoryReader.open(d);
//1 创建索引查询对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//2 执行查询操作
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.add(new Term("content","hello world"));
Query query = builder.build();
int numHits = 10;//最大命中的条数
TopDocs topDocs = indexSearcher.search(query, numHits);//查询,返回查询结果
//3 分析查询结果
int totalHits = topDocs.totalHits;//命中的条目数,总条目数
System.out.println("总共命中:"+totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;//返回命中文档的id
//System.out.println(doc);
Document document = indexSearcher.doc(doc);
String title = document.get("title");
String content = document.get("content");
System.out.println("title:"+title+";content:"+content);
}
//4 释放资源
indexReader.close();
}
}
案例三:
public class QueryTest {
private final String pathStr = "F:\\lucene\\index\\_03_query";
private String tlbb = "在一个月黑风高的夜晚";
private String sdyxz = "在一个伸手不见五指的夜晚";
private String jpm = "在一个阳光明媚的夜晚";
@Test // 索引的创建:IndexWriter
public void testIndexWriter() throws Exception{
Path path = Paths.get(pathStr);
Directory d = FSDirectory.open(path);//索引存放的目录
Analyzer analyzer = new SmartChineseAnalyzer();//分词器
IndexWriterConfig conf = new IndexWriterConfig(analyzer);//配置对象
//1 创建核心对象IndexWriter
IndexWriter indexWriter = new IndexWriter(d,conf);
FieldType type = new FieldType();
type.setTokenized(false);
type.setStored(true);
//2 使用IndexWriter为全文创建索引
Document document1 = new Document();
document1.add(new Field("title","天龙八部",type));
document1.add(new TextField("content",tlbb, Field.Store.YES));
indexWriter.addDocument(document1);
Document document2 = new Document();
document2.add(new Field("title","射雕英雄传",type));
document2.add(new TextField("content",sdyxz, Field.Store.YES));
indexWriter.addDocument(document2);
Document document3 = new Document();;
document3.add(new Field("title","键盘膜", type));
document3.add(new TextField("content",jpm, Field.Store.YES));
indexWriter.addDocument(document3);
indexWriter.commit();
indexWriter.close();
}
@Test //索引的搜索:IndexSearcher
public void testIndexSearcher() throws Exception{
Path path = Paths.get(pathStr);
Directory d = FSDirectory.open(path);//索引存放的目录
//索引的读的对象
IndexReader indexReader = DirectoryReader.open(d);
//1 创建索引查询对象
IndexSearcher indexSearcher = new IndexSearcher(indexReader);
//2 执行查询操作
/* String f = "content";//默认查询的字段
Analyzer a = new SmartChineseAnalyzer();//分词器,注意创建索引的分词器和搜索索引的分词器要保持一致
QueryParser queryParser = new QueryParser(f,a);
String queryStr = "content:阳光明媚";//查询字符串
Query query = queryParser.parse(queryStr);*/
Query query1 = new TermQuery(new Term("title","天龙八部"));
Query query2 = new TermQuery(new Term("content","夜晚"));
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(query1, BooleanClause.Occur.MUST);
//builder.add(query2, BooleanClause.Occur.MUST);
int numHits = 10;//最大命中的条数
TopDocs topDocs = indexSearcher.search(builder.build(), numHits);//查询,返回查询结果
//3 分析查询结果
int totalHits = topDocs.totalHits;//命中的条目数,总条目数
System.out.println("总共命中:"+totalHits);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;//返回命中的所有的数据
for (ScoreDoc scoreDoc : scoreDocs) {
int doc = scoreDoc.doc;//返回命中文档的id
//System.out.println(doc);
Document document = indexSearcher.doc(doc);
String title = document.get("title");
String content = document.get("content");
System.out.println("title:"+title+";content:"+content);
}
//4 释放资源
indexReader.close();
}
}
项目实战使用:
抽取Lucene工具类
public class LuceneUtil {
private static final String INDEX_DIRCTORY = "F:/lucene/index";
private static Directory directory;//存放索引的目录
private static IndexWriter indexWriter;//索引写对象,线程安全
private static IndexReader indexReader;//索引读对象,线程安全
private static IndexSearcher indexSearcher;//索引查询对象,线程安全
private static Analyzer analyzer;//分词器对象
static{
try { //如果父目录不存在,先创建父目录
File file = new File(INDEX_DIRCTORY);
if(!file.getParentFile().exists()){
file.getParentFile().mkdirs();
}
directory = FSDirectory.open(Paths.get(INDEX_DIRCTORY));
} catch (IOException e) {
e.printStackTrace();
}
}
//获取IndexWriter对象
public static IndexWriter getIndexWriter(){
try {
if(indexWriter==null){
Analyzer analyzer = getAnalyzer();
IndexWriterConfig conf = new IndexWriterConfig(analyzer);
indexWriter = new IndexWriter(directory,conf);
}
return indexWriter;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
//获取IndexReader
public static IndexReader getIndexReader(){
try {
if(indexReader==null){
indexReader = DirectoryReader.open(directory);
}else {
//如果不为空,就使用DirectoryReader打开一个索引变更过的IndexReader类
DirectoryReader newIndexReader = DirectoryReader.openIfChanged((DirectoryReader) indexReader);
if(newIndexReader!=null){
//把旧的索引读对象关掉
indexReader.close();
indexReader = newIndexReader;
}
}
return indexReader;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
//获取IndexSearcher对象
public static IndexSearcher getIndexSearcher(){
if(indexSearcher==null){
indexSearcher = new IndexSearcher(getIndexReader());
}
return indexSearcher;
}
//获取分词器对象
public static Analyzer getAnalyzer() {
if(analyzer!=null){
return analyzer;
}
return new SmartChineseAnalyzer();
}
//创建QueryParser对象
public static QueryParser createQueryParser(String field){
return new QueryParser(field,getAnalyzer());
}
//创建Query对象
public static Query createQuery(String field[],String queryStr){
BooleanQuery booleanQuery = new BooleanQuery();
for (String f : field) {
booleanQuery.add(new TermQuery(new Term(f,queryStr)), BooleanClause.Occur.SHOULD);
}
return booleanQuery;
}
//分页查询索引
public static List<Document> getHitDocuments(String[] field,String queryStr,int pageNum,int pageSize){
List<Document> list = new ArrayList<>();
try {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = createQuery(field,queryStr);
System.out.println(query);
// 查询数据, 结束页面自前的数据都会查询到,但是只取本页的数据
TopDocs topDocs = indexSearcher.search(query, pageNum * pageSize);
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
//总条目数
int totalHits = topDocs.totalHits;
int start = (pageNum-1)*pageSize;
int end = (pageNum*pageSize)>totalHits?totalHits:(pageNum*pageSize);
for(int i=start;i<end;i++){
ScoreDoc scoreDoc = scoreDocs[i];
Document document = indexSearcher.doc(scoreDoc.doc);
list.add(document);
}
} catch (IOException e) {
e.printStackTrace();
}
return list;
}
//总共命中的条目数
public static long totalHits(String[] field,String queryStr){
try {
IndexSearcher indexSearcher = getIndexSearcher();
Query query = createQuery(field,queryStr);
TopDocs topDocs = indexSearcher.search(query, 10);
return topDocs.totalHits;
} catch (IOException e) {
e.printStackTrace();
return 0;
}
}
//删除索引
public static void deleteIndex(String[] field,String queryStr){
try {
Query query = createQuery(field,queryStr);
getIndexWriter().deleteDocuments(query);
} catch (IOException e) {
e.printStackTrace();
}
}
//删除所有索引
public static void deleteAllIndex()throws IOException{
getIndexWriter().deleteAll();
}
//更新索引文档
public static void updateIndex(Term term,Document document) {
try {
getIndexWriter().updateDocument(term, document);
} catch (IOException e) {
e.printStackTrace();
}
}
//更新索引文档
public static void updateIndex(String field,String value,Document document) {
updateIndex( new Term(field, value), document);
}
//添加索引文档
public static void addIndex(Document document) {
updateIndex(null, document);
}
//关闭资源
public static void closeAll(){
try {
if (indexWriter!=null)
indexWriter.close();
if(indexReader!=null)
indexReader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
功能实现:
数据库表字段:
@Component
public class SystemLogHelperImpl implements SystemLogHelper {
@Autowired
private SystemLogMapper systemLogMapper;
@Override //更新数据库中的数据到Lucene索引库 --》 更新索引
public void updateAllIndex() throws IOException {
IndexWriter indexWriter = LuceneUtil.getIndexWriter(); //获取IndexWriter对象
//查询数据库中的数据,创建Doocument
List<SystemLog> systemLogs = systemLogMapper.selectAll();
for (SystemLog systemLog : systemLogs) {
Document document = new Document();
document.add(new TextField("id",String.valueOf(systemLog.getId()), Field.Store.YES));
document.add(new TextField("opuserId",String.valueOf(systemLog.getOpuserId()), Field.Store.YES));
document.add(new TextField("function",systemLog.getFunction(), Field.Store.YES));
document.add(new TextField("params",systemLog.getParams(), Field.Store.YES));
indexWriter.addDocument(document);
}
//提交事务
indexWriter.commit();
//释放资源
LuceneUtil.closeAll();
}
//索引的分页检索 -》 分页查询
public List<SystemLog> querrySystemLog(SystemLogQuery query){
List<Document> documents = LuceneUtil.getHitDocuments(new String[]{"function", "params"}, query.getQ(), query.getPage(), query.getRows());
List<SystemLog> logs = new ArrayList<>();
for (Document document : documents) {
SystemLog log = new SystemLog();
log.setId(Long.parseLong(document.get("id")));
log.setOpuserId(Long.parseLong(document.get("opuserId")));
log.setFunction(document.get("function"));
log.setParams(document.get("params"));
logs.add(log);
}
return logs;
}
//查询总数
public long getTotal(SystemLogQuery query){
return LuceneUtil.totalHits(new String[]{"function", "params"}, query.getQ());
}
}