本文导读

本文承接《 Lucene 中文分词器 Ik-Analyzer 使用教程》、《 Lucene 实战之入门案例》，不再创建新项目。

删除索引

当实际的原始文档发生变化之后，Lucene 中的数据也应该进行相应的更新或删除。
索引删除/更新之后，索引对应的文档同样会删除/更新！

删除全部索引

如下所示：删除全部索引

package com.lct.wmx.utils;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by Administrator on 2018/8/29 0029.
 * Lucene 索引管理工具类
 */
public class IndexManagerUtils {

    /**
     * 删除指定索引库下面的所有 索引数据
     *
     * @param indexDir
     */
    public static void indexDelAll(File indexDir) throws IOException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** 删除所有索引
         * 如果索引库中的索引已经被删除，则重复删除时无效*/
        indexWriter.deleteAll();

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    public static void main(String[] args) throws IOException {
        File file2 = new File("E:\\wmx\\luceneIndex");
        indexDelAll(file2);
    }
}

删除指定索引

如下所示，根据词元进行删除：deleteDocuments(Term... terms)

 /**
     * 删除指定索引库中 指定域(fieldName) 包含指定字符串(text) 的索引
     * 如 fieldName 等于 fileName，text 为 "solr" 时，表示删除文件名包含"solr"字符串的索引及其文档
     *
     * @param fieldName ：文档域名，必须与创建索引时使用的域名一致
     * @param text      ：根据关键字删除
     * @param indexDir
     */
    public static void indexDelByTerm(File indexDir, String fieldName, String text) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** indexWriter.deleteAll()：删除所有索引
         * 如果索引库中的索引已经被删除，则重复删除时无效
         * Term：表示词元，第一个参数:域名, 第二个参数:要删除含有此关键词的数据
         * deleteDocuments(Term... terms)：可以同时根据多个词元进行删除
         * */
        Term term = new Term(fieldName, text);
        indexWriter.deleteDocuments(term);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    public static void main(String[] args) throws IOException {
        File file2 = new File("E:\\wmx\\luceneIndex");
        /** 删除文件 包含 "web" 字符串的索引
         */
        indexDel(file2, "fileName", "web");
    }

如下所示，根据查询结果进行删除：deleteDocuments(Query... queries)

  /**
     * 删除指定索引库中 指定域(fieldName) 包含指定字符串(text) 的索引
     * 如 fieldName 等于 fileName，text 为 "solr" 时，表示删除文件名包含"solr"字符串的索引及其文档
     *
     * @param fieldName ：文档域名，必须与创建索引时使用的域名一致
     * @param text      ：根据关键字删除
     * @param indexDir  :索引库
     */
    public static void indexDelByQuery(File indexDir, String fieldName, String text) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** indexWriter.deleteAll()：删除所有索引,如果索引库中的索引已经被删除，则重复删除时无效
         * deleteDocuments(Term... terms)：根据词元进行删除
         * deleteDocuments(Query... queries)：根据查询结果进行删除
         * */
        QueryParser queryParser = new QueryParser(fieldName, analyzer);
        Query query = queryParser.parse(text);
        indexWriter.deleteDocuments(query);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    public static void main(String[] args) throws IOException, ParseException {
        File file2 = new File("E:\\wmx\\luceneIndex");
        /** 删除文件名称 包含 "write" 字符串的索引
         */
        indexDelByQuery(file2, "fileName", "write");
    }

更新索引

与删除索引一样，都是使用 org.apache.lucene.index.IndexWriter 对象进行操作！
更新的实质：根据查询如果原来有对应的旧索引与文档，则先删除，然后再添加；如果之前没有旧的，则直接新增。

 /**
     * 根据词元查询后更新指定的文档：根据词元查询-》如果有结果，则删除旧索引与文档，然后再添加新索引与文档-》如果无查询结果，则直接添加
     * 更新索引实质就是：删除旧值，添加新值(如果旧值不存在，则直接添加)
     *
     * @param indexDir        : Lucene 索引库
     * @param fieldName       ：文档域名，必须与创建索引时使用的域名一致
     * @param text            ：根据域名查询的关键字
     * @param newDocumentFile ：待更新的实际文件
     */
    public static void indexUpdateByTerm(File indexDir, String fieldName, String text, File newDocumentFile) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }
        if (newDocumentFile == null || !newDocumentFile.exists() || newDocumentFile.isDirectory()) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** 根据词元查询后更新指定的文档*/
        Term term = new Term(fieldName, text);

        /** 创建新文档对象
         * 更新使用的域名称 要与 创建索引时使用的 域名称一致
         * 就像mysql 数据库一样，CRUD 使用的字段名称都要保持一致*/
        Document newDocument = new Document();
        newDocument.add(new TextField("fileName", newDocumentFile.getName(), Store.YES));
        newDocument.add(new TextField("fileContext", FileUtils.readFileToString(newDocumentFile), Store.YES));
        newDocument.add(new LongField("fileSize", newDocumentFile.length(), Store.YES));

        /**更新索引实质就是：删除旧值，添加新值(如果旧值不存在，则直接添加)*/
        indexWriter.updateDocument(term, newDocument);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    public static void main(String[] args) throws IOException, ParseException {
        File file2 = new File("E:\\wmx\\luceneIndex");
        /** 更新文件名包含 "mybatis" 字符串的索引与文档
         * 用新文件来代替它
         */
        indexUpdateByTerm(file2, "fileName", "mybatis", new File("E:\\wmx\\searchsource\\mybatis使用简介_汪茂雄著作.txt"));
    }

完整类文件

package com.lct.wmx.utils;

import org.apache.commons.io.FileUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Created by Administrator on 2018/8/29 0029.
 * Lucene 索引管理工具类
 */
public class IndexManagerUtils {

    /**
     * 为指定目录下的文件创建索引,包括其下的所有子孙目录下的文件
     *
     * @param targetFileDir ：需要创建索引的文件目录
     * @param indexSaveDir  ：创建好的索引保存目录
     * @throws IOException
     */
    public static void indexCreate(File targetFileDir, File indexSaveDir) throws IOException {
        /** 如果传入的路径不是目录或者目录不存在，则放弃*/
        if (!targetFileDir.isDirectory() || !targetFileDir.exists()) {
            return;
        }

        /** 创建 Lucene 文档列表，用于保存多个 Docuemnt*/
        List<Document> docList = new ArrayList<Document>();

        /**循环目标文件夹，取出文件
         * 然后获取文件的需求内容，添加到 Lucene 文档(Document)中
         * 此例会获取 文件名称、文件内容、文件大小
         * */
        for (File file : targetFileDir.listFiles()) {
            if (file.isDirectory()) {
                /**如果当前是目录，则进行方法回调*/
                indexCreate(file, indexSaveDir);
            } else {
                /**如果当前是文件，则进行创建索引*/
                /** 文件名称：如  abc.txt*/
                String fileName = file.getName();

                /**文件内容：org.apache.commons.io.FileUtils 操作文件更加方便
                 * readFileToString：直接读取整个文本文件内容*/
                String fileContext = FileUtils.readFileToString(file);

                /**文件大小：sizeOf，单位为字节*/
                Long fileSize = FileUtils.sizeOf(file);

                /**Lucene 文档对象(Document)，文件系统中的一个文件就是一个 Docuemnt对象
                 * 一个 Lucene Docuemnt 对象可以存放多个 Field（域）
                 *  Lucene Docuemnt 相当于 Mysql 数据库表的一行记录
                 *  Docuemnt 中 Field 相当于 Mysql 数据库表的字段*/
                Document luceneDocument = new Document();

                /**
                 * TextField 继承于 org.apache.lucene.document.Field
                 * TextField(String name, String value, Store store)--文本域
                 *  name：域名，相当于 Mysql 数据库表的字段名
                 *  value：域值，相当于 Mysql 数据库表的字段值
                 *  store：是否存储，yes 表存储，no 为不存储
                 *
                 * 默认所有的 Lucene 文档的这三个域的内容都会进行分词，创建索引目录，后期可以根据这个三个域来进行检索
                 * 如查询 文件名(fileName) 包含 "web" 字符串的文档
                 * 查询 文件内容(fileContext) 包含 "spring" 字符串的文档
                 * 查询 文件大小(fileSize) 等于 2055 字节的文档 等等
                 *
                 * TextField：表示文本域、默认会分词、会创建索引、第三个参数 Store.YES 表示会存储
                 * 同理还有 StoredField、StringField、FeatureField、BinaryDocValuesField 等等
                 * 都来自于超级接口：org.apache.lucene.index.IndexableField
                 */
                TextField nameFiled = new TextField("fileName", fileName, Store.YES);
                TextField contextFiled = new TextField("fileContext", fileContext, Store.YES);

                /**如果是 Srore.NO，则不会存储，就意味着后期获取 fileSize 值的时候，值会为null
                 * 虽然 Srore.NO 不会存在域的值，但是 TextField本身会分词、会创建索引
                 * 所以后期仍然可以根据 fileSize 域进行检索：queryParser.parse("fileContext:" + queryWord);
                 * 只是获取 fileSize 存储的值为 null：document.get("fileSize"));
                 * 索引是索引，存储的 fileSize 内容是另一回事
                 * */
                TextField sizeFiled = new TextField("fileSize", fileSize.toString(), Store.YES);

                /**将所有的域都存入 Lucene 文档中*/
                luceneDocument.add(nameFiled);
                luceneDocument.add(contextFiled);
                luceneDocument.add(sizeFiled);

                /**将文档存入文档集合中，之后再同统一进行存储*/
                docList.add(luceneDocument);
            }
        }

        /** 创建分词器
         * StandardAnalyzer：标准分词器，对英文分词效果很好，对中文是单字分词，即一个汉字作为一个词，所以对中文支持不足
         * 市面上有很多好用的中文分词器，如 IKAnalyzer 就是其中一个
         * 现在换成 IKAnalyzer 中文分词器
         */
        /*Analyzer analyzer = new StandardAnalyzer();*/
        Analyzer analyzer = new IKAnalyzer();

        /**如果目录不存在，则会自动创建
         * FSDirectory：表示文件系统目录，即会存储在计算机本地磁盘，继承于
         * org.apache.lucene.store.BaseDirectory
         * 同理还有：org.apache.lucene.store.RAMDirectory：存储在内存中
         * Lucene 7.4.0 版本 open 方法传入的 Path 对象
         * Lucene 4.10.3 版本 open 方法传入的是 File 对象
         */
        Directory directory = FSDirectory.open(indexSaveDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象，用于正式写入索引和文档数据*/
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /**将 Lucene 文档加入到 写索引 对象中*/
        for (int i = 0; i < docList.size(); i++) {
            indexWriter.addDocument(docList.get(i));
        }
        /**最后再 刷新流，然后提交、关闭流
         * Lucene 4.10.3 在 close 的时候会自动 flush，程序员无法调用
         * Lucene 7.4.0 可以自己手动调用 flush 方法*/
        indexWriter.commit();
        indexWriter.close();
    }

    /**
     * 删除指定索引库下面的所有 索引数据
     *
     * @param indexDir
     */
    public static void indexDelAll(File indexDir) throws IOException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** 删除所有索引
         * 如果索引库中的索引已经被删除，则重复删除时无效*/
        indexWriter.deleteAll();

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    /**
     * 删除指定索引库中 指定域(fieldName) 包含指定字符串(text) 的索引
     * 如 fieldName 等于 fileName，text 为 "solr" 时，表示删除文件名包含"solr"字符串的索引及其文档
     *
     * @param fieldName ：文档域名，必须与创建索引时使用的域名一致
     * @param text      ：根据关键字删除
     * @param indexDir
     */
    public static void indexDelByTerm(File indexDir, String fieldName, String text) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** indexWriter.deleteAll()：删除所有索引
         * 如果索引库中的索引已经被删除，则重复删除时无效
         * Term：表示词元，第一个参数:域名, 第二个参数:要删除含有此关键词的数据
         * deleteDocuments(Term... terms)：可以同时根据多个词元进行删除
         * */
        Term term = new Term(fieldName, text);
        indexWriter.deleteDocuments(term);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    /**
     * 删除指定索引库中 指定域(fieldName) 包含指定字符串(text) 的索引
     * 如 fieldName 等于 fileName，text 为 "solr" 时，表示删除文件名包含"solr"字符串的索引及其文档
     *
     * @param fieldName ：文档域名，必须与创建索引时使用的域名一致
     * @param text      ：根据关键字删除
     * @param indexDir  :索引库
     */
    public static void indexDelByQuery(File indexDir, String fieldName, String text) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** indexWriter.deleteAll()：删除所有索引,如果索引库中的索引已经被删除，则重复删除时无效
         * deleteDocuments(Term... terms)：根据词元进行删除
         * deleteDocuments(Query... queries)：根据查询结果进行删除
         * */
        QueryParser queryParser = new QueryParser(fieldName, analyzer);
        Query query = queryParser.parse(text);
        indexWriter.deleteDocuments(query);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    /**
     * 根据词元查询后更新指定的文档：根据词元查询-》如果有结果，则删除旧索引与文档，然后再添加新索引与文档-》如果无查询结果，则直接添加
     * 更新索引实质就是：删除旧值，添加新值(如果旧值不存在，则直接添加)
     *
     * @param indexDir        : Lucene 索引库
     * @param fieldName       ：文档域名，必须与创建索引时使用的域名一致
     * @param text            ：根据域名查询的关键字
     * @param newDocumentFile ：待更新的实际文件
     */
    public static void indexUpdateByTerm(File indexDir, String fieldName, String text, File newDocumentFile) throws IOException, ParseException {
        if (indexDir == null || !indexDir.exists() || indexDir.isFile()) {
            return;
        }
        if (fieldName == null || "".equals(fieldName) || text == null) {
            return;
        }
        if (newDocumentFile == null || !newDocumentFile.exists() || newDocumentFile.isDirectory()) {
            return;
        }

        /** 创建 IKAnalyzer 中文分词器
         * IKAnalyzer()：默认使用最细粒度切分算法
         * IKAnalyzer(boolean useSmart)：当为true时，分词器采用智能切分 ；当为false时，分词器迚行最细粒度切分
         * */
        Analyzer analyzer = new IKAnalyzer();
        /** 指定索引和文档存储的目录
         * 如果此目录不是 Lucene 的索引目录，则不进行任何操作*/
        Directory directory = FSDirectory.open(indexDir);

        /** 创建 索引写配置对象，传入分词器
         * Lucene 7.4.0 版本 IndexWriterConfig 构造器不需要指定 Version.LUCENE_4_10_3
         * Lucene 4.10.3 版本 IndexWriterConfig 构造器需要指定 Version.LUCENE_4_10_3
         * */
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_3, analyzer);

        /**创建 索引写对象
         * 用于正式 写入索引与文档数据、删除索引与文档数据
         * */
        IndexWriter indexWriter = new IndexWriter(directory, config);

        /** 根据词元查询后更新指定的文档*/
        Term term = new Term(fieldName, text);

        /** 创建新文档对象
         * 更新使用的域名称 要与 创建索引时使用的 域名称一致
         * 就像mysql 数据库一样，CRUD 使用的字段名称都要保持一致*/
        Document newDocument = new Document();
        newDocument.add(new TextField("fileName", newDocumentFile.getName(), Store.YES));
        newDocument.add(new TextField("fileContext", FileUtils.readFileToString(newDocumentFile), Store.YES));
        newDocument.add(new LongField("fileSize", newDocumentFile.length(), Store.YES));

        /**更新索引实质就是：删除旧值，添加新值(如果旧值不存在，则直接添加)*/
        indexWriter.updateDocument(term, newDocument);

        /** 虽然不 commit，也会生效，但建议做提交操作，*/
        indexWriter.commit();
        /**  关闭流，里面会自动 flush*/
        indexWriter.close();
    }

    public static void main(String[] args) throws IOException, ParseException {
        File file2 = new File("E:\\wmx\\luceneIndex");
        /** 更新文件名包含 "mybatis" 字符串的索引与文档
         * 用新文件来代替它
         */
        indexUpdateByTerm(file2, "fileName", "mybatis", new File("E:\\wmx\\searchsource\\mybatis使用简介_汪茂雄著作.txt"));
    }
}

Lucene 索引维护之删除与更新文档

本文导读

删除索引

删除全部索引

删除指定索引

更新索引

完整类文件

猜你喜欢

Lucene 索引维护 之 删除 与 更新 文档

本文导读

删除索引

删除全部索引

删除指定索引

更新索引

完整类文件

猜你喜欢

Lucene 索引维护之删除与更新文档