大数据导入Excel

最近几天忙于Excel的操作，用到一些东西，记录下来。

第一步直奔http://search.maven.org/下载一些POI的jar，其中包括poi-3.8、poi-ooxml-3.8、poi-ooxml-schemas-3.8。

先建立2个对象一个行对象，一个列对象

package com.zskx.pem.standalone.excel;
/**
 * Excel列对象
 * @author yuzhang
 */
public class XCell {
	private int rowIndex;
	private int columnIndex;
	private String value;
	
	public int getRowIndex() {
		return rowIndex;
	}
	public void setRowIndex(int rowIndex) {
		this.rowIndex = rowIndex;
	}
	public int getColumnIndex() {
		return columnIndex;
	}
	public void setColumnIndex(int columnIndex) {
		this.columnIndex = columnIndex;
	}
	public String getValue() {
		return value;
	}
	public void setValue(String value) {
		this.value = value;
	}
}

package com.zskx.pem.standalone.excel;

import java.util.ArrayList;
import java.util.List;
/**
 * Excel行对象
 * @author yuzhang
 */
public class XRow {
	private int rowIndex;
	private List<XCell> cells=new ArrayList<XCell>();
	
	public int getRowIndex() {
		return rowIndex;
	}
	public void setRowIndex(int rowIndex) {
		this.rowIndex = rowIndex;
	}
	public int getCellsSize() {
		return cells.size();
	}
	public XRow addCell(XCell cell){
		this.cells.add(cell);
		return this;
	}
	public XCell getCell(int cellIndex){
		return cells.get(cellIndex);
	}
}

然后建立处理行数据的接口

package com.zskx.pem.standalone.excel;

import java.io.IOException;

/**
 * 行数据处理接口
 * @author yuzhang
 */
public interface ExcelRowProcessor{
	public void process() throws Exception;
	public void stop() throws IOException;
}

建立建立个2003处理器

package com.zskx.pem.standalone.excel;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BlankRecord;
import org.apache.poi.hssf.record.BoolErrRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.LabelRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.RKRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public abstract class Excel2003RowProcessor implements ExcelRowProcessor {

	private MyHSSFListener hssfListener;

	private File file;

	private InputStream is;

	private POIFSFileSystem fs;

	private int lastRowNumber;

	private int lastColumnNumber;

	/** Should we output the formula, or the value it has? */
	private boolean outputFormulaValues = true;

	/** For parsing Formulas */
	private SheetRecordCollectingListener workbookBuildingListener;

	private HSSFWorkbook stubWorkbook;

	// Records we pick up as we process
	private SSTRecord sstRecord;

	private FormatTrackingHSSFListener formatListener;

	/** So we known which sheet we're on */
	private int sheetIndex = -1;

	private int optSheetIndex = -1;

	private BoundSheetRecord[] orderedBSRs;

	private ArrayList<BoundSheetRecord> boundSheetRecords = new ArrayList<BoundSheetRecord>();

	// For handling formulas with string results
	private int nextRow;

	private int nextColumn;

	private boolean outputNextStringRecord;

	private XRow row = new XRow();

	/**
	 * 构造Excel-2003行级解析器
	 * 
	 * @param filename
	 *            excel完整文件名
	 * @throws IOException
	 */
	public Excel2003RowProcessor(File file) throws Exception {

		if (file.getName().endsWith(".xlsx")) {
			throw new Exception("Excel板式与解析器不匹配，解析器仅支持Excel-2003及以下版本。");
		}
		this.file = file;
		this.hssfListener = new MyHSSFListener();
	}

	/**
	 * 处理指定索引的sheet，-1则表示处理所有sheet
	 * 
	 * @param optSheetIndex
	 * @throws IOException
	 */
	private void process(int optSheetIndex) throws IOException {

		this.optSheetIndex = optSheetIndex;
		MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(hssfListener);
		formatListener = new FormatTrackingHSSFListener(listener);

		HSSFEventFactory factory = new HSSFEventFactory();
		HSSFRequest request = new HSSFRequest();

		if (outputFormulaValues) {
			request.addListenerForAllRecords(formatListener);
		} else {
			workbookBuildingListener = new SheetRecordCollectingListener(formatListener);
			request.addListenerForAllRecords(workbookBuildingListener);
		}

		factory.processWorkbookEvents(request, fs);
	}

	/**
	 * 处理所有sheet
	 */
	public void process() throws IOException {

		this.is = new FileInputStream(file);
		this.fs = new POIFSFileSystem(is);
		sheetIndex = -1;
		this.process(-1);
	}

	/**
	 * 处理指定索引的sheet
	 */
	public void processByRow(int optSheetIndex) throws Exception {

		this.is = new FileInputStream(file);
		this.fs = new POIFSFileSystem(is);
		sheetIndex = -1;
		if (optSheetIndex < 1) {
			throw new Exception("目标sheet索引至少要从1开始。");
		}
		this.process(optSheetIndex);
	}

	public void stop() throws IOException {

		if (is != null) {
			is.close();
		}
	}

	/**
	 * 处理行数据的策略
	 */
	public abstract void processRow(XRow row);

	/**
	 * 辅助实现类，HSSF监听器
	 * 
	 * @author zhangchaofeng
	 * @version 1.0
	 * @date Sep 28, 2011
	 */
	private class MyHSSFListener implements HSSFListener {

		public void processRecord(Record record) {

			int thisRow = -1;
			int thisColumn = -1;
			String thisStr = null;

			switch (record.getSid()) {
			case BoundSheetRecord.sid:
				boundSheetRecords.add((BoundSheetRecord) record);
				break;
			case BOFRecord.sid:
				BOFRecord br = (BOFRecord) record;
				if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
					// Create sub workbook if required
					if (workbookBuildingListener != null && stubWorkbook == null) {
						stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
					}

					// Output the worksheet name
					// Works by ordering the BSRs by the location of
					// their BOFRecords, and then knowing that we
					// process BOFRecords in byte offset order
					sheetIndex++;
					// System.out.println("锁定一个sheet："+sheetIndex+",要操作的sheet:"+optSheetIndex);
					/*
					 * if(sheetIndex!=optSheetIndex){
					 * System.out.println("不是要操作的sheet。"); return; }
					 */
					if (orderedBSRs == null) {
						orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
					}

				}
				break;

			case SSTRecord.sid:
				sstRecord = (SSTRecord) record;
				break;

			case BlankRecord.sid:
				BlankRecord brec = (BlankRecord) record;

				thisRow = brec.getRow();
				thisColumn = brec.getColumn();
				thisStr = "";
				break;
			case BoolErrRecord.sid:
				BoolErrRecord berec = (BoolErrRecord) record;

				thisRow = berec.getRow();
				thisColumn = berec.getColumn();
				thisStr = "";
				break;

			case FormulaRecord.sid:
				FormulaRecord frec = (FormulaRecord) record;

				thisRow = frec.getRow();
				thisColumn = frec.getColumn();

				if (outputFormulaValues) {
					if (Double.isNaN(frec.getValue())) {
						// Formula result is a string
						// This is stored in the next record
						outputNextStringRecord = true;
						nextRow = frec.getRow();
						nextColumn = frec.getColumn();
					} else {
						thisStr = formatListener.formatNumberDateCell(frec);
					}
				} else {
					thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
				}
				break;
			case StringRecord.sid:
				if (outputNextStringRecord) {
					// String for formula
					StringRecord srec = (StringRecord) record;
					thisStr = srec.getString();
					thisRow = nextRow;
					thisColumn = nextColumn;
					outputNextStringRecord = false;
				}
				break;

			case LabelRecord.sid:
				LabelRecord lrec = (LabelRecord) record;

				thisRow = lrec.getRow();
				thisColumn = lrec.getColumn();
				thisStr = lrec.getValue();
				break;
			case LabelSSTRecord.sid:
				LabelSSTRecord lsrec = (LabelSSTRecord) record;

				thisRow = lsrec.getRow();
				thisColumn = lsrec.getColumn();
				if (sstRecord == null) {
					thisStr = '"' + "(No SST Record, can't identify string)" + '"';
				} else {
					thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
				}
				break;
			case NoteRecord.sid:
				NoteRecord nrec = (NoteRecord) record;

				thisRow = nrec.getRow();
				thisColumn = nrec.getColumn();
				thisStr = '"' + "(TODO)" + '"';
				break;
			case NumberRecord.sid:
				NumberRecord numrec = (NumberRecord) record;

				thisRow = numrec.getRow();
				thisColumn = numrec.getColumn();

				// Format
				thisStr = formatListener.formatNumberDateCell(numrec);
				break;
			case RKRecord.sid:
				RKRecord rkrec = (RKRecord) record;

				thisRow = rkrec.getRow();
				thisColumn = rkrec.getColumn();
				thisStr = '"' + "(TODO)" + '"';
				break;
			default:
				break;
			}

			// 如果不是要操作的sheet，跳过
			if ((sheetIndex + 1) != optSheetIndex && optSheetIndex != -1) {
				// System.out.println("不是要操作的sheet。");
				return;
			}

			// Handle new row
			if (thisRow != -1 && thisRow != lastRowNumber) {
				lastColumnNumber = -1;
			}

			// Handle missing column
			if (record instanceof MissingCellDummyRecord) {
				MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
				thisRow = mc.getRow();
				thisColumn = mc.getColumn();
				thisStr = "";
			}

			// If we got something to print out, do so
			if (thisStr != null) {
				if (thisColumn > 0) {
					// output.print(',');
				}
				row.setRowIndex(thisRow);
				XCell cell = new XCell();
				cell.setValue(thisStr);
				cell.setColumnIndex(thisColumn);
				row.addCell(cell);
				// output.print(thisStr);
			}

			// Update column and row count
			if (thisRow > -1)
				lastRowNumber = thisRow;
			if (thisColumn > -1)
				lastColumnNumber = thisColumn;

			// Handle end of row
			if (record instanceof LastCellOfRowDummyRecord) {
				// Print out any missing commas if needed
				if (lastColumnNumber == -1) {
					lastColumnNumber = 0;
				}

				// We're onto a new row
				lastColumnNumber = -1;

				// End the row
				if (!isBlankRow(row)) {
					processRow(row);
				}
				row = new XRow();
				// output.println();
			}
		}

		private boolean isBlankRow(XRow row) {

			boolean b = true;
			for (int i = 0; i < row.getCellsSize(); i++) {
				XCell cell = row.getCell(i);
				if (cell.getValue().equals("") || cell.getValue() == null) {
					b = false;
				}
			}
			return b;
		}
	}
}

再建立个2007处理器

package com.zskx.pem.standalone.excel;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public abstract class Excel2007RowProcessor extends DefaultHandler implements ExcelRowProcessor {

	private SharedStringsTable sst;

	private OPCPackage pkg;

	private String lastContents;

	private boolean nextIsString;

	private int sheetIndex = -1;

	private List<String> rowlist = new ArrayList<String>();

	private int curRow = 0;

	private int curCol = 0;

	public Excel2007RowProcessor(OPCPackage pkg) {

		this.pkg = pkg;
	}

	// excel记录行操作方法，以行索引和行元素列表为参数，对一行元素进行操作，元素为String类型
	// public abstract void optRows(int curRow, List<String> rowlist) throws
	// SQLException ;

	// excel记录行操作方法，以sheet索引，行索引和行元素列表为参数，对sheet的一行元素进行操作，元素为String类型
	public abstract void optRows(XRow row) throws Exception;

	// 只遍历一个sheet，其中sheetId为要遍历的sheet索引，从1开始，1-3
	public void processOneSheet(int sheet) throws Exception {

		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);
		// 默认以rId+加sheet下标
		InputStream sheet2 = r.getSheet("rId" + sheet);
		sheetIndex++;
		InputSource sheetSource = new InputSource(sheet2);
		parser.parse(sheetSource);
		sheet2.close();
	}

	/**
	 * 遍历 excel 文件
	 */
	public void process() throws Exception {

		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		Iterator<InputStream> sheets = r.getSheetsData();
		while (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
	}

	public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {

		XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
		this.sst = sst;
		parser.setContentHandler(this);
		return parser;
	}

	public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

		// c => 单元格
		if (name.equals("c")) {
			// 如果下一个元素是 SST 的索引，则将nextIsString标记为true
			String cellType = attributes.getValue("t");
			if (cellType != null && cellType.equals("s")) {
				nextIsString = true;
			} else {
				nextIsString = false;
			}
		}
		// 置空
		lastContents = "";
	}

	public void endElement(String uri, String localName, String name) throws SAXException {

		// 根据SST的索引值的到单元格的真正要存储的字符串
		// 这时characters()方法可能会被调用多次
		if (nextIsString) {
			try {
				int idx = Integer.parseInt(lastContents);
				lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
			} catch (Exception e) {

			}
		}

		// t 是数据标签
		// 将单元格内容加入rowlist中，在这之前先去掉字符串前后的空白符
		if (name.equals("v")) {
			String value = lastContents.trim();
			value = value.equals("") ? " " : value;
			rowlist.add(curCol, value);
			curCol++;
		} else if (name.equals("t")) {
			String value = lastContents.trim();
			value = value.equals("") ? " " : value;
			rowlist.add(curCol, value);
			curCol++;
		} else {
			// 如果标签名称为 row ，这说明已到行尾，调用 optRows() 方法
			if (name.equals("row")) {
				try {
					XRow row = new XRow();
					for (int i = 0; i < rowlist.size(); i++) {
						XCell cell = new XCell();
						cell.setColumnIndex(i);
						cell.setRowIndex(curRow);
						cell.setValue((String) rowlist.get(i));
						row.addCell(cell);
					}
					if (curRow > 0)
						optRows(row);
				} catch (Exception e) {
					e.printStackTrace();
				}
				rowlist.clear();
				curRow++;
				curCol = 0;
			}
		}
	}

	public void characters(char[] ch, int start, int length) throws SAXException {

		// 得到单元格内容的值
		lastContents += new String(ch, start, length);
	}

	public void stop() throws IOException {

		if (pkg != null) {
			pkg.close();
		}
	}

}

解析来就是重点了，找到合适的处理器处理，然后建立一个读取行数据的抽象方法

package com.zskx.pem.standalone.excel;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.poi.openxml4j.opc.OPCPackage;
/**
 * excel处理器
 * @author yuzhang
 */
public abstract class ExcelProcessor implements ExcelRowProcessor{
	private ExcelRowProcessor processor;
	
	public ExcelProcessor(File file) throws Exception{
		if(file.getName()==null||file.getName().equals("")){
			throw new Exception("构造Excel导入器失败，未指定文件全名。");
		}
		if(!file.exists()){
			throw new Exception("构造Excel导入器失败，指定的文件不存在："+file.getName());
		}
		try {
			OPCPackage op=OPCPackage.open(new FileInputStream(file));
			processor=new MyExcel2007RowProcessor(op);
		} catch (Exception e1) {
			processor=new MyExcel2003RowProcessor(file);
			e1.printStackTrace();
		}
		stop();
	}
	@Override
	public void process() throws Exception {
		processor.process();
	}
	@Override
	public void stop() throws IOException {
		processor.stop();
	}

	public abstract void processRow(XRow row);
	
	private class MyExcel2003RowProcessor extends Excel2003RowProcessor{
		public MyExcel2003RowProcessor(File file) throws Exception {
			super(file);
		}
		@Override
		public void processRow(XRow row) {
			ExcelProcessor.this.processRow(row);
			
		}
	}
	
	private class MyExcel2007RowProcessor extends Excel2007RowProcessor{
		public MyExcel2007RowProcessor(OPCPackage pkg) throws Exception {
			super(pkg);
		}
		@Override
		public void optRows(XRow row) {
			ExcelProcessor.this.processRow(row);
		}
	}
}

最后就是建立读取行数据的类了

package com.zskx.pem.standalone.excel;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class ReadExcel extends ExcelProcessor {

	public ReadExcel(File file) throws Exception {

		super(file);

	}

	/**
	 * 在这里处理数据
	 */
	@Override
	public void processRow(XRow row) {

		for (int i = 0; i < row.getCellsSize(); i++) {
			System.out.print("[" + row.getRowIndex() + "," + row.getCell(i).getColumnIndex() + ","
					+ row.getCell(i).getValue() + "]");
			setUserInfoByIndex(row.getCell(i).getColumnIndex(), row.getCell(i).getValue());
		}
		System.out.println();

	}

	public static void readExcel(File file) {

		try {
			ReadExcel reader = new ReadExcel(file);
			reader.process();// 处理所有的sheet
		} catch (Exception e) {
			e.printStackTrace();
		}
		// reader.stop();//运行一半需要停止调用此方法，释放文件流，正常运行完毕会自动释放
	}

}

打工搞成。

猜你喜欢