最近几天忙于Excel的操作,用到一些东西,记录下来。
第一步直奔http://search.maven.org/下载一些POI的jar,其中包括poi-3.8、poi-ooxml-3.8、poi-ooxml-schemas-3.8。
先建立2个对象一个行对象,一个列对象
package com.zskx.pem.standalone.excel; /** * Excel列对象 * @author yuzhang */ public class XCell { private int rowIndex; private int columnIndex; private String value; public int getRowIndex() { return rowIndex; } public void setRowIndex(int rowIndex) { this.rowIndex = rowIndex; } public int getColumnIndex() { return columnIndex; } public void setColumnIndex(int columnIndex) { this.columnIndex = columnIndex; } public String getValue() { return value; } public void setValue(String value) { this.value = value; } }
package com.zskx.pem.standalone.excel; import java.util.ArrayList; import java.util.List; /** * Excel行对象 * @author yuzhang */ public class XRow { private int rowIndex; private List<XCell> cells=new ArrayList<XCell>(); public int getRowIndex() { return rowIndex; } public void setRowIndex(int rowIndex) { this.rowIndex = rowIndex; } public int getCellsSize() { return cells.size(); } public XRow addCell(XCell cell){ this.cells.add(cell); return this; } public XCell getCell(int cellIndex){ return cells.get(cellIndex); } }
然后建立处理行数据的接口
package com.zskx.pem.standalone.excel; import java.io.IOException; /** * 行数据处理接口 * @author yuzhang */ public interface ExcelRowProcessor{ public void process() throws Exception; public void stop() throws IOException; }
建立建立个2003处理器
package com.zskx.pem.standalone.excel; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.BoundSheetRecord; import org.apache.poi.hssf.record.FormulaRecord; import org.apache.poi.hssf.record.LabelRecord; import org.apache.poi.hssf.record.LabelSSTRecord; import org.apache.poi.hssf.record.NoteRecord; import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.RKRecord; import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public abstract class Excel2003RowProcessor implements ExcelRowProcessor { private MyHSSFListener hssfListener; private File file; private InputStream is; private POIFSFileSystem fs; private int lastRowNumber; private int lastColumnNumber; /** Should we output the formula, or the value it has? */ private boolean outputFormulaValues = true; /** For parsing Formulas */ private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; // Records we pick up as we process private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; /** So we known which sheet we're on */ private int sheetIndex = -1; private int optSheetIndex = -1; private BoundSheetRecord[] orderedBSRs; private ArrayList<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>(); // For handling formulas with string results private int nextRow; private int nextColumn; private boolean outputNextStringRecord; private XRow row = new XRow(); /** * 构造Excel-2003行级解析器 * * @param filename * excel完整文件名 * @throws IOException */ public Excel2003RowProcessor(File file) throws Exception { if (file.getName().endsWith(".xlsx")) { throw new Exception("Excel板式与解析器不匹配,解析器仅支持Excel-2003及以下版本。"); } this.file = file; this.hssfListener = new MyHSSFListener(); } /** * 处理指定索引的sheet,-1则表示处理所有sheet * * @param optSheetIndex * @throws IOException */ private void process(int optSheetIndex) throws IOException { this.optSheetIndex = optSheetIndex; MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(hssfListener); formatListener = new FormatTrackingHSSFListener(listener); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener(formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request, fs); } /** * 处理所有sheet */ public void process() throws IOException { this.is = new FileInputStream(file); this.fs = new POIFSFileSystem(is); sheetIndex = -1; this.process(-1); } /** * 处理指定索引的sheet */ public void processByRow(int optSheetIndex) throws Exception { this.is = new FileInputStream(file); this.fs = new POIFSFileSystem(is); sheetIndex = -1; if (optSheetIndex < 1) { throw new Exception("目标sheet索引至少要从1开始。"); } this.process(optSheetIndex); } public void stop() throws IOException { if (is != null) { is.close(); } } /** * 处理行数据的策略 */ public abstract void processRow(XRow row); /** * 辅助实现类,HSSF监听器 * * @author zhangchaofeng * @version 1.0 * @date Sep 28, 2011 */ private class MyHSSFListener implements HSSFListener { public void processRecord(Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; switch (record.getSid()) { case BoundSheetRecord.sid: boundSheetRecords.add((BoundSheetRecord) record); break; case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { // Create sub workbook if required if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); } // Output the worksheet name // Works by ordering the BSRs by the location of // their BOFRecords, and then knowing that we // process BOFRecords in byte offset order sheetIndex++; // System.out.println("锁定一个sheet:"+sheetIndex+",要操作的sheet:"+optSheetIndex); /* * if(sheetIndex!=optSheetIndex){ * System.out.println("不是要操作的sheet。"); return; } */ if (orderedBSRs == null) { orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords); } } break; case SSTRecord.sid: sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = ""; break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = ""; break; case FormulaRecord.sid: FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()); } break; case StringRecord.sid: if (outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); thisStr = lrec.getValue(); break; case LabelSSTRecord.sid: LabelSSTRecord lsrec = (LabelSSTRecord) record; thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { thisStr = '"' + "(No SST Record, can't identify string)" + '"'; } else { thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString(); } break; case NoteRecord.sid: NoteRecord nrec = (NoteRecord) record; thisRow = nrec.getRow(); thisColumn = nrec.getColumn(); thisStr = '"' + "(TODO)" + '"'; break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); // Format thisStr = formatListener.formatNumberDateCell(numrec); break; case RKRecord.sid: RKRecord rkrec = (RKRecord) record; thisRow = rkrec.getRow(); thisColumn = rkrec.getColumn(); thisStr = '"' + "(TODO)" + '"'; break; default: break; } // 如果不是要操作的sheet,跳过 if ((sheetIndex + 1) != optSheetIndex && optSheetIndex != -1) { // System.out.println("不是要操作的sheet。"); return; } // Handle new row if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // Handle missing column if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; thisRow = mc.getRow(); thisColumn = mc.getColumn(); thisStr = ""; } // If we got something to print out, do so if (thisStr != null) { if (thisColumn > 0) { // output.print(','); } row.setRowIndex(thisRow); XCell cell = new XCell(); cell.setValue(thisStr); cell.setColumnIndex(thisColumn); row.addCell(cell); // output.print(thisStr); } // Update column and row count if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; // Handle end of row if (record instanceof LastCellOfRowDummyRecord) { // Print out any missing commas if needed if (lastColumnNumber == -1) { lastColumnNumber = 0; } // We're onto a new row lastColumnNumber = -1; // End the row if (!isBlankRow(row)) { processRow(row); } row = new XRow(); // output.println(); } } private boolean isBlankRow(XRow row) { boolean b = true; for (int i = 0; i < row.getCellsSize(); i++) { XCell cell = row.getCell(i); if (cell.getValue().equals("") || cell.getValue() == null) { b = false; } } return b; } } }
再建立个2007处理器
package com.zskx.pem.standalone.excel; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; public abstract class Excel2007RowProcessor extends DefaultHandler implements ExcelRowProcessor { private SharedStringsTable sst; private OPCPackage pkg; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new ArrayList<String>(); private int curRow = 0; private int curCol = 0; public Excel2007RowProcessor(OPCPackage pkg) { this.pkg = pkg; } // excel记录行操作方法,以行索引和行元素列表为参数,对一行元素进行操作,元素为String类型 // public abstract void optRows(int curRow, List<String> rowlist) throws // SQLException ; // excel记录行操作方法,以sheet索引,行索引和行元素列表为参数,对sheet的一行元素进行操作,元素为String类型 public abstract void optRows(XRow row) throws Exception; // 只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3 public void processOneSheet(int sheet) throws Exception { XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // 默认以rId+加sheet下标 InputStream sheet2 = r.getSheet("rId" + sheet); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } /** * 遍历 excel 文件 */ public void process() throws Exception { XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 单元格 if (name.equals("c")) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); } catch (Exception e) { } } // t 是数据标签 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("") ? " " : value; rowlist.add(curCol, value); curCol++; } else if (name.equals("t")) { String value = lastContents.trim(); value = value.equals("") ? " " : value; rowlist.add(curCol, value); curCol++; } else { // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row")) { try { XRow row = new XRow(); for (int i = 0; i < rowlist.size(); i++) { XCell cell = new XCell(); cell.setColumnIndex(i); cell.setRowIndex(curRow); cell.setValue((String) rowlist.get(i)); row.addCell(cell); } if (curRow > 0) optRows(row); } catch (Exception e) { e.printStackTrace(); } rowlist.clear(); curRow++; curCol = 0; } } } public void characters(char[] ch, int start, int length) throws SAXException { // 得到单元格内容的值 lastContents += new String(ch, start, length); } public void stop() throws IOException { if (pkg != null) { pkg.close(); } } }
解析来就是重点了,找到合适的处理器处理,然后建立一个读取行数据的抽象方法
package com.zskx.pem.standalone.excel; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import org.apache.poi.openxml4j.opc.OPCPackage; /** * excel处理器 * @author yuzhang */ public abstract class ExcelProcessor implements ExcelRowProcessor{ private ExcelRowProcessor processor; public ExcelProcessor(File file) throws Exception{ if(file.getName()==null||file.getName().equals("")){ throw new Exception("构造Excel导入器失败,未指定文件全名。"); } if(!file.exists()){ throw new Exception("构造Excel导入器失败,指定的文件不存在:"+file.getName()); } try { OPCPackage op=OPCPackage.open(new FileInputStream(file)); processor=new MyExcel2007RowProcessor(op); } catch (Exception e1) { processor=new MyExcel2003RowProcessor(file); e1.printStackTrace(); } stop(); } @Override public void process() throws Exception { processor.process(); } @Override public void stop() throws IOException { processor.stop(); } public abstract void processRow(XRow row); private class MyExcel2003RowProcessor extends Excel2003RowProcessor{ public MyExcel2003RowProcessor(File file) throws Exception { super(file); } @Override public void processRow(XRow row) { ExcelProcessor.this.processRow(row); } } private class MyExcel2007RowProcessor extends Excel2007RowProcessor{ public MyExcel2007RowProcessor(OPCPackage pkg) throws Exception { super(pkg); } @Override public void optRows(XRow row) { ExcelProcessor.this.processRow(row); } } }
最后就是建立读取行数据的类了
package com.zskx.pem.standalone.excel; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class ReadExcel extends ExcelProcessor { public ReadExcel(File file) throws Exception { super(file); } /** * 在这里处理数据 */ @Override public void processRow(XRow row) { for (int i = 0; i < row.getCellsSize(); i++) { System.out.print("[" + row.getRowIndex() + "," + row.getCell(i).getColumnIndex() + "," + row.getCell(i).getValue() + "]"); setUserInfoByIndex(row.getCell(i).getColumnIndex(), row.getCell(i).getValue()); } System.out.println(); } public static void readExcel(File file) { try { ReadExcel reader = new ReadExcel(file); reader.process();// 处理所有的sheet } catch (Exception e) { e.printStackTrace(); } // reader.stop();//运行一半需要停止调用此方法,释放文件流,正常运行完毕会自动释放 } }
打工搞成。