Apache POI使用

使用apache poi解析 Excel文件：

package excellucene;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;

import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;

import com.sun.media.sound.InvalidFormatException;

public class ParseExcel {

    public static void main(String[] args) throws IOException {

        String path = "C:\\Users\\Desktop\\a01hos\\img";
        File f = new File(path);
        File[] files = f.listFiles();
        System.out.println(files.length);

        File[] filesxls = f.listFiles(new FilenameFilter() {

            public boolean accept(File dir, String name) {
                if (name.endsWith(".xls") || name.endsWith(".xlsx")) {
                    return true;
                }
                return false;
            }
        });
        System.out.println("Excel文件有： " + filesxls.length);

        for (File f2 : filesxls) {
            String fileDirectPathName = f2.getCanonicalPath();
            System.out.println(fileDirectPathName);
            // System.out.println("文件名： " + f2.getName());

            new ParseExcel().parseXml(fileDirectPathName);
        }

        /*
         * IndexWriter writer; // 创建 Lucene Index Writer Directory dir =
         * FSDirectory.open(Paths.get("f:/excelindex")); writer = new
         * IndexWriter(dir, new IndexWriterConfig( new StandardAnalyzer()));
         * 
         * for (File f2 : filesxls) { // FileReader fr = new FileReader(f); //
         * BufferedReader br = new BufferedReader(fr);
         * System.out.println(f2.getCanonicalPath()); System.out.println("文件名： "
         * + f2.getName());
         * 
         * 
         * // 创建dom对象创建索引 创建索引 Document document = new Document();
         * 
         * Document doc = new Document(); doc.add(new Field("contents",
         * ExcelFileReader(f2.getCanonicalPath()), TextField.TYPE_NOT_STORED));
         * doc.add(new Field("filename", f2.getName(), TextField.TYPE_STORED));
         * doc.add(new StringField("fullpath", f2.getCanonicalPath(),
         * Field.Store.YES));
         * 
         * writer.addDocument(doc);
         * 
         * writer.numDocs();
         * 
         * }
         */
    }

    /**
     * Excel表格提取数据
     * 
     * @param fileName
     *            路径
     * @return
     * @throws IOException
     */
    public static String ExcelFileReader(String fileName) throws IOException {
        InputStream path = new FileInputStream(fileName);
        String content = null;
        // 1、创建新的Excel文件
        HSSFWorkbook wb = new HSSFWorkbook(path);
        ExcelExtractor extractor = new ExcelExtractor(wb);
        extractor.setFormulasNotResults(true);
        extractor.setIncludeSheetNames(false);
        content = extractor.getText();
        return content;
    }

    public void parseXml(String filename) {
        Workbook wb = null;
        try {
            wb = WorkbookFactory.create(new File(filename));
            
            
            
            
            
            
            Sheet sheet = wb.getSheetAt(0);

            for (Row row : sheet) {

                for (Cell cell : row) {
                    System.out.print(getCellValue(cell) + "---");
                    save(getCellValue(cell) + "---");
                }
                System.out.println();
            }
        } catch (EncryptedDocumentException e) {
            e.printStackTrace();
        } catch (InvalidFormatException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } catch (org.apache.poi.openxml4j.exceptions.InvalidFormatException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }

    public Object getCellValue(Cell cell) {
        int type = cell.getCellType();
        String show = null;
        switch (type) {
        case Cell.CELL_TYPE_BLANK:// 空值
            show = null;
            break;
        case Cell.CELL_TYPE_BOOLEAN:// Boolean
            show = String.valueOf(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_ERROR:// 故障
            show = String.valueOf(cell.getErrorCellValue());
            break;
        case Cell.CELL_TYPE_FORMULA:// 公式
            show = cell.getCellFormula();
            break;
        case Cell.CELL_TYPE_NUMERIC:// 数字
            show = String.valueOf(cell.getNumericCellValue());
            break;
        case Cell.CELL_TYPE_STRING:// 字符串
            show = cell.getStringCellValue();
            break;
        default:
            show = null;
        }
        return show;
    }
    
    /**
     * 保存字符串到文本中
     * 
     * @param str
     */
    public boolean save(String str) {
        boolean flag = false; // 声明操作标记

        String fileName = "file/haha.txt"; // 定义文件名
        
        File f = new File(fileName);
        
        if(!f.exists()){
            try {
                f.createNewFile();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        
        FileWriter fw = null; // 用来写入字符文件的便捷类
        PrintWriter out = null; // 向文本输出流打印对象的格式化表示形式类

        try {
            fw = new FileWriter(f, true); // 创建一个FileWriter
            out = new PrintWriter(fw); // 创建一个PrintWriter，以追加方式将内容插入到最后一行
            out.println(str); // 将字符串打印到文本中
            out.flush(); // 刷新缓存

            flag = true;
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                // 关闭PrintWriter
                if (out != null) {
                    out.close();
                    out = null;
                }
                // 关闭FileWriter
                if (fw != null) {
                    fw.close();
                    fw = null;
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        return flag;
    }

}

使用lucene建立索引：

package excellucene;

import java.io.IOException;
import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

public class SearchExcel {

    public static void main(String[] args) throws IOException, ParseException {
        if(args.length!=2){
            throw new IllegalArgumentException(SearchExcel.class.getName()+" <> <query>");
            
        }
        
//        String indexDir = args[0];//解析输入的索引路径
//        String q = args[1];//解析输入的查询字符串
        
        String indexDir = "F:\\excelindex";
        String q = "zhangxing";
        
        search(indexDir, q);
        
    }
    
    public static void search(String indexDir, String q) throws IOException, ParseException{
//        Directory dir = FSDirectory.open(Paths.get(indexDir));
        
        IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir)));
        
        IndexSearcher is = new IndexSearcher(reader);
        
        Analyzer analyzer = new StandardAnalyzer();
        
        // 需要添加  .jar 包
//        lucene-queryparser-7.4.0.jar
        
        QueryParser parser = new QueryParser("filename", analyzer);
        
        Query query = parser.parse(q);
        
        long start = System.currentTimeMillis();
        TopDocs hits = is.search(query, 10);
        long end = System.currentTimeMillis();
        
        System.err.println("Found "+hits.totalHits+" document(s) (in "+ (end-start) +" milliseconds) that matched query'"+q+"':");
        
        for(ScoreDoc scoreDoc:hits.scoreDocs){
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fullpath"));
            
        }
    }
}

使用了的jar包：

猜你喜欢