版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/lanjian056/article/details/52711314
这两天学习了一下Lucene,然后对其进行了应用上的一些封装。主要仿照项目中经常使用的BaseDao的封装方式。对Lucene的一些简单操作进行了封装,在项目中使用起来比较方便。下面介绍一下封装代码。
1.基础类
该类中,主要提供
- createIndex——创建索引
- update——更新索引信息
- delete——删除索引信息
- searchAll——查询所有
- pageSearch——支持分页查询
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import java.beans.PropertyDescriptor;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.lang.reflect.Method;
import java.lang.reflect.ParameterizedType;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 全文检索基础类
*
* @author: alex
* @time: 14-4-1 下午2:24
* @version: 1.0
*/
public abstract class BaseLucene<T extends Object> {
protected static Analyzer analyzer = new IKAnalyzer(); //分词器
protected static File indexDir = null;
static {
indexDir = new File(LuceneContants.INDEX_FILE_PATH); //存放索引文件的目录
}
/**
* 获取文件目录
* @return 文件目录对象
*/
private Directory getDirectory() {
Directory directory = null;
try {
directory = FSDirectory.open(indexDir);
} catch (IOException e) {
e.printStackTrace();
}
return directory;
}
/**
* 获取索引创建器
* @param directory 文件目录
* @return 索引创建器对象
*/
private IndexWriter getIndexWriter(Directory directory) {
IndexWriter indexWriter = null;
try {
//索引创建器配置
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
//设置打开索引模式为创建或追加
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
//索引创建器对象
indexWriter = new IndexWriter(directory, config);
} catch (IOException e) {
e.printStackTrace();
}
return indexWriter;
}
/**
* 关闭文件目录和索引创建器对象
* @param directory 文件目录
* @param indexWriter 索引创建器
*/
private void closeDirectoryAndIndexWriter(Directory directory,IndexWriter indexWriter) {
if (indexWriter != null) {
try {
indexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (directory != null) {
try {
directory.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 创建索引
* @param entity 泛型实体
* @param keyWordFields 关键字索引字段名集合
*/
public void createIndex(T entity,List<String> keyWordFields) {
Directory directory = null;
IndexWriter indexWriter = null;
try {
directory = getDirectory();
indexWriter = getIndexWriter(directory);
//装配成document
Document doc = getDoc(entity,keyWordFields);
indexWriter.addDocument(doc); //写入索引文件
} catch (IOException e) {
e.printStackTrace();
} finally {
closeDirectoryAndIndexWriter(directory,indexWriter);
}
}
/**
* 装配document对象方法
* @param entity 泛型实体
* @param keyWordFields 关键字索引字段名集合
* @return Document对象
*/
private Document getDoc(T entity,List<String> keyWordFields){
FieldType ftIndex = new FieldType(); // 索引类型
ftIndex.setIndexed(true); //设置索引为true
ftIndex.setStored(true); //设置保存为true
ftIndex.setTokenized(true); //设置分词为true
Document doc = new Document();
Map<String,Integer> fieldsMap = getAllFields(entity, keyWordFields);
for (Map.Entry<String,Integer> fieldMap : fieldsMap.entrySet()) {
if(fieldMap.getValue() == LuceneContants.IS_KEY_WORD) { //索引字段
Field field = new Field(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString(),ftIndex);
doc.add(field);
} else { //存储字段
StoredField storedField = new StoredField(fieldMap.getKey(), getterMethod(entity,fieldMap.getKey()).toString());
doc.add(storedField);
}
}
return doc;
}
/**
* 通过反射获取所有实体字段
* @param entity 泛型实体
* @param keyWordFields 关键字索引字段名集合
* @return 实体的所有字段
*/
private Map<String,Integer> getAllFields(T entity,List<String> keyWordFields){
Map<String,Integer> fieldsMap = new HashMap<String,Integer>();
java.lang.reflect.Field[] fields = entity.getClass().getDeclaredFields();
for(int i = 0; i < fields.length; i++) {
Integer keyWordFlag = LuceneContants.NO_KEY_WORD;
String fieldName = fields[i].getName();
for(String keyWordField : keyWordFields) {
if(keyWordField.equals(fieldName)) {
keyWordFlag = LuceneContants.IS_KEY_WORD;
}
}
fieldsMap.put(fieldName,keyWordFlag);
}
return fieldsMap;
}
/**
* 通过反射获取字段值
* @param obj 实体
* @param filedName 字段名
* @return 字段值
*/
private Object getterMethod(Object obj, String filedName) {
Object object = null;
try {
Class clazz = obj.getClass();
PropertyDescriptor propertyDescriptor = new PropertyDescriptor(filedName, clazz);
Method getMethod = propertyDescriptor.getReadMethod();//获得get方法
if (propertyDescriptor != null) {
object = getMethod.invoke(obj); //执行get方法返回一个Object
}
} catch (Exception e) {
e.printStackTrace();
}
return object;
}
/**
* 查询所以索引匹配到的数据
* @param queryWhere 查询条件
* @param defaultQueryField 默认查询的关键字字段
* @param keyWordFields 是关键字且需高亮显示的字段集合
* @return json格式的字符串
*/
public String searchAll(String queryWhere,String defaultQueryField,List<String> keyWordFields) {
if("".equals(queryWhere)){
queryWhere = "(*:*)";
keyWordFields = new ArrayList<String>();
}
String result = null;
IndexReader reader = null;
IndexSearcher indexSearcher = null;
try {
reader = DirectoryReader.open(getDirectory());
indexSearcher = new IndexSearcher(reader);
Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere);
ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs;
result = resultToJson(indexSearcher,query,hits,keyWordFields,0,hits.length);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
/**
* 分页查询所以索引匹配到的数据
* @param queryWhere 查询条件
* @param defaultQueryField 默认查询的关键字字段
* @param keyWordFields 是关键字且需高亮显示的字段集合
* @param currPage 当前页
* @param pageSize 每页显示的条数
* @return json格式的字符串
*/
public String pageSearch(String queryWhere,String defaultQueryField,List<String> keyWordFields,int currPage,int pageSize) {
if("".equals(queryWhere)){
queryWhere = "(*:*)";
keyWordFields = new ArrayList<String>();
}
String result = null;
IndexReader reader = null;
IndexSearcher indexSearcher = null;
try {
reader = DirectoryReader.open(getDirectory());
indexSearcher = new IndexSearcher(reader);
Query query = new QueryParser(Version.LUCENE_46, defaultQueryField, analyzer).parse(queryWhere);
ScoreDoc[] hits = indexSearcher.search(query, LuceneContants.QUERY_MAX_COUNT).scoreDocs;
//分页计算
int start = (currPage - 1) * pageSize;
int totalCount = hits.length;
int end = Math.min(currPage * pageSize,totalCount);
result = resultToJson(indexSearcher,query,hits,keyWordFields,start,end);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}
/**
* 将查询到的数据转成json格式数据返回
* @param indexSearcher 查询索引实体
* @param query 查询实体
* @param hits 匹配的结果对象
* @param keyWordFields 是关键字且需高亮显示的字段集合
* @param start 开始索引
* @param end 结束索引
* @return json格式的字符串
*/
private String resultToJson(IndexSearcher indexSearcher, Query query, ScoreDoc[] hits,List<String> keyWordFields,int start,int end){
JSONArray jsonArray = new JSONArray();
try {
Class<T> entityClass = (Class<T>)((ParameterizedType) this.getClass().getGenericSuperclass()).getActualTypeArguments()[0];
T entity = entityClass.newInstance();
Map<String,Integer> fieldsMap = getAllFields(entity,keyWordFields);
for (int i = start; i < end; i++) {
JSONObject jsonObject = new JSONObject();
Document hitDoc = indexSearcher.doc(hits[i].doc);
for(Map.Entry<String, Integer> field : fieldsMap.entrySet()){
if(field.getValue() == LuceneContants.IS_KEY_WORD) { //是关键字,且需要高亮显示
String value = toHighlighter(query, hitDoc, field.getKey());
jsonObject.put(field.getKey(),value);
} else {
jsonObject.put(field.getKey(),hitDoc.get(field.getKey()));
}
}
jsonArray.add(jsonObject);
}
} catch (Exception e) {
e.printStackTrace();
}
return jsonArray.toString();
}
/**
* 高亮显示设置
* @param query 查询实体
* @param doc 文档对象
* @param field 字段
* @return 高亮设置
*/
private String toHighlighter(Query query, Document doc, String field) {
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter("<font color=\"blue\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter, new QueryScorer(query));
TokenStream tokenStream1 = analyzer.tokenStream("text", new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1, doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
e.printStackTrace();
} catch (InvalidTokenOffsetsException e) {
e.printStackTrace();
}
return null;
}
/**
* 修改索引
* @param entity 泛型实体
* @param keyWordFields 索引字段集合
* @return true成功,false失败
*/
public boolean update(T entity,List<String> keyWordFields) {
boolean flag = false;
IndexWriter indexWriter = null;
Directory directory = null;
try{
directory = getDirectory();
indexWriter = getIndexWriter(directory);
Document doc = getDoc(entity,keyWordFields);
//根据Id进行更新索引
Term term = new Term("id", String.valueOf(getterMethod(entity,"id")));
indexWriter.updateDocument(term, doc);
flag = true;
} catch (Exception e){
flag = false;
e.printStackTrace();
} finally {
closeDirectoryAndIndexWriter(directory,indexWriter);
}
return flag;
}
/**
* 删除索引
* @param id 删除的索引Id
* @return true成功,false失败
*/
public boolean delete(Integer id) {
if(id == null || id.intValue() < 0) {
throw new RuntimeException("参数不正确!");
}
boolean flag = false;
IndexWriter indexWriter = null;
Directory directory = null;
try{
//索引所放目录
directory = getDirectory();
indexWriter = getIndexWriter(directory);
Term term = new Term("id", String.valueOf(id));
indexWriter.deleteDocuments(term);
flag = true;
}catch (Exception e) {
flag = false;
e.printStackTrace();
} finally {
closeDirectoryAndIndexWriter(directory,indexWriter);
}
return flag;
}
}
常量类:
/**
* 常量描述
*
* @author: alex
* @time: 14-4-1 下午2:24
* @version: 1.0
*/
public class LuceneContants {
//索引文件存储目录
public static final String INDEX_FILE_PATH = "D://indexFile";
//查询文件的最大条数
public static final int QUERY_MAX_COUNT = 100000;
//是索引标识
public static final int IS_KEY_WORD = 1;
//不是索引标识
public static final int NO_KEY_WORD = 0;
}
2.service层应用
import java.util.ArrayList;
import java.util.List;
/**
* service层
*
* @author: alex
* @time: 14-4-2 下午1:35
* @version: 1.0
*/
public class PersonSearchService extends BaseLucene<Person> {
private static List<String> keyWordFields = null;
static {
keyWordFields = new ArrayList<String>();
//给名字和介绍做全文检索
keyWordFields.add("id");
keyWordFields.add("name");
keyWordFields.add("introduce");
}
/**
* 保存用户信息
* @param person 用户
*/
public void savePerson(Person person) {
this.createIndex(person,keyWordFields);
}
/**
* 更新用户信息
* @param person 用户实体
* @return true成功,false失败
*/
public boolean updatePerson(Person person) {
return this.update(person,keyWordFields);
}
/**
* 删除用户信息
* @param id 用户ID
* @return true成功,false失败
*/
public boolean deletePerson(Integer id) {
return this.delete(id);
}
/**
* 根据条件查询所有
* @param queryWhere 查询条件
* @param defaultQueryField 默认检索字段
* @return json格式数据
*/
public String queryAll(String queryWhere,String defaultQueryField) {
return this.searchAll(queryWhere,defaultQueryField,keyWordFields);
}
/**
* 根据条件分页查询
* @param queryWhere 查询条件
* @param defaultQueryField 默认检索字段
* @param currPage 当前页
* @param pageSize 每页条数
* @return json格式数据
*/
public String pageQuery(String queryWhere,String defaultQueryField,int currPage,int pageSize) {
return this.pageSearch(queryWhere,defaultQueryField,keyWordFields,currPage,pageSize);
}
}
实体类:
/**
* 实体
*
* @author: alex
* @time: 14-4-2 下午1:32
* @version: 1.0
*/
public class Person {
private int id;
private String name;
private int age;
private String introduce;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
public void setAge(int age) {
this.age = age;
}
public String getIntroduce() {
return introduce;
}
public void setIntroduce(String introduce) {
this.introduce = introduce;
}
}
3.测试类
import junit.framework.Assert;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/**
* 测试类
*
* @author: alex
* @time: 14-4-1 上午10:54
* @version: 1.0
*/
public class PersonSearchServiceTest {
static PersonSearchService personSearchService = null;
@BeforeClass
public static void setUpBeforeClass() throws Exception {
personSearchService = new PersonSearchService();
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
}
@Test
public void testSavePerson() {
Person person = new Person();
person.setId(1);
person.setName("张三");
person.setAge(21);
person.setIntroduce("张三是中国好演员!");
personSearchService.savePerson(person);
person.setId(2);
person.setName("张一三");
person.setAge(22);
person.setIntroduce("张三是中国好替身!");
personSearchService.savePerson(person);
person.setId(3);
person.setName("张三疯");
person.setAge(23);
person.setIntroduce("张三是中国好程序员!");
personSearchService.savePerson(person);
person.setId(4);
person.setName("啊张三");
person.setAge(24);
person.setIntroduce("张三是中国好声音!");
personSearchService.savePerson(person);
person.setId(5);
person.setName("李三");
person.setAge(25);
person.setIntroduce("啊啊啊啊啊啊啊啊啊啊啊啊啊!");
personSearchService.savePerson(person);
}
@Test
public void testUpdatePerson() {
Person person = new Person();
person.setId(4);
person.setName("张三啊");
person.setAge(24);
person.setIntroduce("把啊张三改成了张三啊!");
boolean result = personSearchService.updatePerson(person);
Assert.assertTrue(result);
}
@Test
public void testDeletePerson() {
//删除了第二条
boolean result = personSearchService.deletePerson(2);
Assert.assertTrue(result);
}
@Test
public void testQuery() {
String queryWhere = "name:张三 introduce:中国";
String defaultQueryField = "name";
String result = personSearchService.queryAll(queryWhere, defaultQueryField);
System.out.println(result);
}
@Test
public void testPageQuery() {
String queryWhere = "name:张三 introduce:中国";
String defaultQueryField = "name";
String result = personSearchService.pageQuery(queryWhere, defaultQueryField,1,2);
System.out.println(result);
}
}
4.说明
本示例没有对查询出来的结果进行排序,同时还有一些其他的地方还需完善。代码发出来,给大家参考一下。
示例中应用的jar如下:
开发时,使用的maven,发一下maven的pom文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>lucene</groupId>
<artifactId>lucene</artifactId>
<version>1.0</version>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>4.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-smartcn</artifactId>
<version>3.6.2</version>
</dependency>
<dependency>
<groupId>IKAnalyzer</groupId>
<artifactId>IKAnalyzer</artifactId>
<version>2012FF_u1</version>
</dependency>
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>net.sf.ezmorph</groupId>
<artifactId>ezmorph</artifactId>
<version>1.0.6</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1</version>
</dependency>
<dependency>
<groupId>commons-beanutils</groupId>
<artifactId>commons-beanutils</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
</dependencies>
</project>
至于运行结果没有贴出来,有兴趣的同学可以自己运行一下。