package com.caac.utils; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; /** * 获取附件内容公共类 */ public class FjUtils { public FjUtils() { super(); } /** * 读取附件的内容,返回字符串 * @param path 附件路径 * @return */ public static String getFjToStr(String fjPath) { String fjStr = ""; String prefix = ""; if (!"".equals(fjPath)) { prefix = fjPath.substring(fjPath.lastIndexOf(".") + 1); //后缀名 try { if ("doc".equals(prefix) || "docx".equals(prefix)) { //word fjStr = getDocContent(fjPath, fjStr, prefix); } else if ("xls".equals(prefix) || "xlsx".equals(prefix)) { //excel fjStr = getExcelContent(fjPath); } else if ("txt".equals(prefix)) { //txt fjStr = getTxtContent(fjPath, fjStr); } else if ("pdf".equals(prefix)) { //pdf fjStr = getPdfContent(fjPath); } } catch (FileNotFoundException fnfe) { fnfe.printStackTrace(); } catch (IOException ioe) { ioe.printStackTrace(); } } return fjStr; } /** * 获取word内容 * @param fjPath * @param fjStr * @param lx * @return * @throws IOException * @throws FileNotFoundException */ private static String getDocContent(String fjPath, String fjStr, String lx) throws java.io.IOException, java.io.FileNotFoundException { if ("doc".equals(lx)) { //word 2003 InputStream is = new FileInputStream(new File(fjPath)); WordExtractor ex = new WordExtractor(is); fjStr = ex.getText(); } else if ("docx".equals(lx)) { //word 2007 InputStream is2 = new FileInputStream(new File(fjPath)); XWPFDocument document = new XWPFDocument(is2); POIXMLTextExtractor extractor = new XWPFWordExtractor(document); fjStr = extractor.getText(); } return fjStr; } /** * 获取excel内容 * @param fjPath * @return */ private static String getExcelContent(String fjPath) { StringBuilder result = new StringBuilder(); try { // Excel获得文件 InputStream inp = new FileInputStream(new File(fjPath)); Workbook wb = WorkbookFactory.create(inp); // 获得第一个工作表对象 Sheet sheet = wb.getSheetAt(0); //读取Excel中第一个sheet的数据 int maxRowNum = sheet.getLastRowNum() + 1; //最大行数 int maxCellNum = sheet.getRow(0).getLastCellNum(); //最大列数 // 得到第一列第一行的单元格 for (int i = 0; i < maxRowNum; i++) { for (int j = 0; j < maxCellNum; j++) { if (isBlankRow(sheet.getRow(i), maxCellNum)) { //空行则跳过 continue; } result.append(getCellToStr(sheet.getRow(i).getCell(j)) + ","); } } } catch (Exception e) { e.printStackTrace(); } return result.toString(); } /** * 获取EXCEL单元格的值,一律转为String返回 * @param cell * @return */ private static String getCellToStr(Cell cell) { String value = ""; if (cell != null) { switch (cell.getCellType()) { case Cell.CELL_TYPE_NUMERIC: String v = "" + cell.getNumericCellValue() + ""; value += v; break; case Cell.CELL_TYPE_STRING: value += cell.getStringCellValue(); break; case Cell.CELL_TYPE_FORMULA: break; case Cell.CELL_TYPE_BOOLEAN: value += cell.getBooleanCellValue() + ""; break; default: break; } } return value; } /** * 功能:判断是否是空行 * */ private static boolean isBlankRow(org.apache.poi.ss.usermodel.Row columnRow, int excelLastcell) { String value = ""; for (int i = 0; i < excelLastcell; i++) { Cell cell = columnRow.getCell(i); if (cell != null) { switch (cell.getCellType()) { case Cell.CELL_TYPE_NUMERIC: value += cell.getNumericCellValue() + ""; break; case Cell.CELL_TYPE_STRING: value += cell.getStringCellValue(); break; case Cell.CELL_TYPE_FORMULA: break; case Cell.CELL_TYPE_BOOLEAN: value += cell.getBooleanCellValue() + ""; break; default: break; } } } if (value == null || "".equals(value)) { return true; } else { return false; } } /** * 获取pdf内容 * @param fjPath * @return */ private static String getPdfContent(String fjPath) { String str = ""; FileInputStream fis; try { fis = new FileInputStream(new File(fjPath)); PDFParser p = new PDFParser(fis); p.parse(); PDDocument pdd = p.getPDDocument(); PDFTextStripper ts = new PDFTextStripper(); str = ts.getText(pdd); pdd.close(); fis.close(); } catch (Exception e) { } return str; } /** * 获取txt内容 * @param fjPath * @param fjStr * @return * @throws IOException * @throws FileNotFoundException */ private static String getTxtContent(String fjPath, String fjStr) throws java.io.IOException, java.io.FileNotFoundException { StringBuilder result = new StringBuilder(); String bm = getCharset(fjPath); //编码 System.out.println("bm:" + bm); BufferedReader br = null; if ("UTF-8".equals(bm)) { br = new BufferedReader(new InputStreamReader(new FileInputStream(fjPath), "UTF-8")); } else { br = new BufferedReader(new FileReader(new File(fjPath))); //构造一个BufferedReader类来读取文件 } String s = null; while ((s = br.readLine()) != null) { //使用readLine方法,一次读一行 result.append(System.lineSeparator() + s); } br.close(); fjStr = result.toString(); if (!"UTF-8".equals(bm)) { writeFile(fjPath, fjStr); } return fjStr; } /** * 获取txt编码格式 * @param fileName * @return * @throws IOException */ private static String getCharset(String fileName) throws IOException { BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileName)); int p = (bin.read() << 8) + bin.read(); String code = null; switch (p) { case 0xefbb: code = "UTF-8"; break; case 0xfffe: code = "Unicode"; break; case 0xfeff: code = "UTF-16BE"; break; default: code = "GBK"; } return code; } /** * 把utf-8编码的内容写回原文件 * @param filePathAndName 含路径文件名 * @param fileContent 写入文件的字符串 */ public static void writeFile(String filePathAndName, String fileContent) { try { File f = new File(filePathAndName); if (!f.exists()) { f.createNewFile(); } //定义编码 OutputStreamWriter write = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); BufferedWriter writer = new BufferedWriter(write); writer.write(fileContent); writer.close(); } catch (Exception e) { System.out.println("写文件内容操作出错"); e.printStackTrace(); } } }
pdfbox和fontbox的版本要一致
--可参考测试例子 testFjContent