lucene 之创建索引

最近学习lucene，发现不同版本语法变化比较大。在网上查找资料都比较老，下载最新的lucene发现jdk版本也必须更新，由于我的电脑是window64+jdk1.6，所以需要找相应的版本，我发现Lucene4.7.2之后的版本都要求是jdk1.7，所以选择lucene4.7.2做实例，我查看了官方的文档，做的这个实例

import Java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexFiles {
public static void createIndex() throws IOException {
String docsPath = "D:\\lucene-test\\testFile";
String indexPath = "D:\\lucene-test\\index";
File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
}
Directory dir = FSDirectory.open(new File(indexPath));

Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
writer.close();
}

private static void indexDocs(IndexWriter writer, File file) {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {

FileInputStream fis = null;
try {
fis = new FileInputStream(file);

Document doc = new Document();

Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
doc.add(pathField);
doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old
// document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have
// been indexed) so
// we use updateDocument instead to replace the old one
// matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}

} catch (FileNotFoundException fnfe) {
// checking if the file can be read doesn't help
return;
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}

public static void main(String[] args) {
try {
createIndex();
} catch (IOException e) {
e.printStackTrace();
}
}
}

猜你喜欢