java excel importación y exportación de grandes volúmenes de datos

Gran cantidad de importación y exportación de datos, ¡la prueba profesional puede resolver la importación y exportación de millones de Excel!

base:

La longitud máxima de cada hoja del archivo de Excel de la versión 03 (xls) es 65535

07 versión (xlsx) archivo de Excel La longitud máxima de cada hoja es 1.04 millones

Después de la versión 07, el archivo subyacente es xml, puede cambiar el sufijo xlsx a rar y abrirlo con un software de descompresión para ver el archivo de origen

Importar:

Paquete jar requerido: 3

poi-ooxml-3.12-beta1-sources.jar
poi-3.12-beta1-sources.jar

<! - Jar necesario para analizar Excel con un gran volumen de datos ->

<groupId> org.eclipse.birt.runtime.3_7_1 </groupId>

<artifactId> org.apache.xerces </artifactId>

</dependencia>
xerces-2.7.1.jar

Código de herramienta:
BigDataParseExcel.java

 package com.winit.sms.common.excel;

 import java.io.InputStream;

 import java.sql.SQLException;

 import java.util.ArrayList;

 import java.util.Iterator;

 import java.util.List;

 import org.apache.poi.xssf.eventusermodel.XSSFReader;

 import org.apache.poi.xssf.model.SharedStringsTable;

 import org.apache.poi.xssf.usermodel.XSSFRichTextString;

 import org.apache.poi.openxml4j.opc.OPCPackage;

 import org.xml.sax.Attributes;

 import org.xml.sax.InputSource;

 import org.xml.sax.SAXException;

 import org.xml.sax.XMLReader;

 import org.xml.sax.helpers.DefaultHandler;

 import org.xml.sax.helpers.XMLReaderFactory;

 /**

 * XSSF and SAX (Event API)

 */

 public abstract class BigDataParseExcel extends DefaultHandler {

 private SharedStringsTable sst;

 private String lastContents;

 private boolean nextIsString;

 private int sheetIndex = -1;

 private List<String> rowlist = new ArrayList<String>();

 private int curRow = 0; //当前行

 private int curCol = 0; //当前列索引

 private int preCol = 0; //上一列列索引

 private int titleRow = 0; //标题行，一般情况下为0

 private int rowsize = 0; //列数

 //excel记录行操作方法，以sheet索引，行索引和行元素列表为参数，对sheet的一行元素进行操作，元素为String类型

 public abstract void optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException;

 //只遍历一个sheet，其中sheetId为要遍历的sheet索引，从1开始，1-3

 /**

 *

 * @param filename

 * @param sheetId sheetId为要遍历的sheet索引，从1开始，1-3

 * @throws Exception

 */

 public void processOneSheet(String filename,int sheetId) throws Exception {

 OPCPackage pkg = OPCPackage.open(filename);

 XSSFReader r = new XSSFReader(pkg);

 SharedStringsTable sst = r.getSharedStringsTable();

 XMLReader parser = fetchSheetParser(sst);

 // rId2 found by processing the Workbook

 // 根据 rId# 或 rSheet# 查找sheet

 InputStream sheet2 = r.getSheet("rId"+sheetId);

 sheetIndex++;

 InputSource sheetSource = new InputSource(sheet2);

 parser.parse(sheetSource);

 sheet2.close();

 }

 /**

 * 遍历 excel 文件

 */

 public void process(String filename) throws Exception {

 OPCPackage pkg = OPCPackage.open(filename);

 XSSFReader r = new XSSFReader(pkg);

 SharedStringsTable sst = r.getSharedStringsTable();

 XMLReader parser = fetchSheetParser(sst);

 Iterator<InputStream> sheets = r.getSheetsData();

 while (sheets.hasNext()) {

 curRow = 0;

 sheetIndex++;

 InputStream sheet = sheets.next();

 InputSource sheetSource = new InputSource(sheet);

 parser.parse(sheetSource);

 sheet.close();

 }

 }

 public XMLReader fetchSheetParser(SharedStringsTable sst)

 throws SAXException {

 XMLReader parser = XMLReaderFactory.createXMLReader();

 //.createXMLReader("org.apache.xerces.parsers.SAXParser");

 this.sst = sst;

 parser.setContentHandler(this);

 return parser;

 }

 public void startElement(String uri, String localName, String name,

 Attributes attributes) throws SAXException {

 // c => 单元格

 if (name.equals("c")) {

 // 如果下一个元素是 SST 的索引，则将nextIsString标记为true

 String cellType = attributes.getValue("t");

 String rowStr = attributes.getValue("r");

 curCol = this.getRowIndex(rowStr);

 if (cellType != null && cellType.equals("s")) {

 nextIsString = true;

 } else {

 nextIsString = false;

 }

 }

 // 置空

 lastContents = "";

 }

 public void endElement(String uri, String localName, String name)

 throws SAXException {

 // 根据SST的索引值的到单元格的真正要存储的字符串

 // 这时characters()方法可能会被调用多次

 if (nextIsString) {

 try {

 int idx = Integer.parseInt(lastContents);

 lastContents = new XSSFRichTextString(sst.getEntryAt(idx))

 .toString();

 } catch (Exception e) {

 }

 }

 // v => 单元格的值，如果单元格是字符串则v标签的值为该字符串在SST中的索引

 // 将单元格内容加入rowlist中，在这之前先去掉字符串前后的空白符

 if (name.equals("v")) {

 String value = lastContents.trim();

 value = value.equals("")?" ":value;

 int cols = curCol-preCol;

 if (cols>1){

 for (int i = 0;i < cols-1;i++){

 rowlist.add(preCol,"");

 }

 }

 preCol = curCol;

 rowlist.add(curCol-1, value);

 }else {

 //如果标签名称为 row ，这说明已到行尾，调用 optRows() 方法

 if (name.equals("row")) {

 int tmpCols = rowlist.size();

 if(curRow>this.titleRow && tmpCols<this.rowsize){

 for (int i = 0;i < this.rowsize-tmpCols;i++){

 rowlist.add(rowlist.size(), "");

 }

 }

 try {

 optRows(sheetIndex,curRow,rowlist);

 } catch (SQLException e) {

 e.printStackTrace();

 }

 if(curRow==this.titleRow){

 this.rowsize = rowlist.size();

 }

 rowlist.clear();

 curRow++;

 curCol = 0;

 preCol = 0;

 }

 }

 }

 public void characters(char[] ch, int start, int length)

 throws SAXException {

 //得到单元格内容的值

 lastContents += new String(ch, start, length);

 }

 //得到列索引，每一列c元素的r属性构成为字母加数字的形式，字母组合为列索引，数字组合为行索引，

 //如AB45,表示为第（A-A+1）*26+（B-A+1）*26列，45行

 public int getRowIndex(String rowStr){

 rowStr = rowStr.replaceAll("[^A-Z]", "");

 byte[] rowAbc = rowStr.getBytes();

 int len = rowAbc.length;

 float num = 0;

 for (int i=0;i<len;i++){

 num += (rowAbc[i]-'A'+1)*Math.pow(26,len-i-1 );

 }

 return (int) num;

 }

 public int getTitleRow() {

 return titleRow;

 }

 public void setTitleRow(int titleRow) {

 this.titleRow = titleRow;

 }

 //如何使用

 public static void main(String[] args) throws Exception {

 long start = System.currentTimeMillis();

 BigDataParseExcel xlx = new BigDataParseExcel(){


 @Override

 public void optRows(int sheetIndex, int curRow, List<String> rowlist) throws SQLException {

 System.out.println(rowlist);

 }

 };

 xlx.process("C:\\Users\\winitadmin\\导出123.xlsx");

 long end = System.currentTimeMillis();

 System.out.println((end-start)/1000);

 }

 }

Analice una fila de datos como una lista: entre ellos, preste especial atención a 2 puntos:

1. Debido a que se analiza línea por línea en función de xml, las filas o columnas en blanco que se han manipulado también se pueden analizar.

2. Preste especial atención al formato de la fecha. La fecha reconocible por Excel se analizará en números (puede contener decimales). El significado de este número representa el número de días desde 1900-01-01 hasta esta fecha. La parte decimal es la proporción de horas, minutos y segundos.

El método para procesar el formato de fecha por separado:

/**

  * 此方法处理excel 的日期格式， sax解析出来的日期为 1900-01-01 到此日期的天数

  * 故而要转换， 此方法暂时精确的天， 如若要更精确，请对datum这个数的小数部分做处理

  *

  * 据经验来看，日期格式有两种， 一种是excel可识别的日期格式（这种用上面的两行方法解决）；

  * 另一种是20170215，excel识别为字符串，用simpleDataFormate解析

  *

  * @param datum

  * @return

  * @throws IllegalAccessException

  * @throws IllegalArgumentException

  */

  private Date dealXmlExcelDate(String datum){

  if(StringUtils.isBlank(datum)){

  return new Date();

  };

  Double doubleDay = Double.parseDouble(datum);

  SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");

  Date date = new Date();

  try {

  date = sdf.parse("1900-01-01");

  } catch (ParseException e) {

  e.printStackTrace();

  }

  Calendar ca = Calendar.getInstance();

  ca.setTime(date);

  //42934 7 -20 //39668 8 8

  ca.add(Calendar.DAY_OF_YEAR, doubleDay.intValue());

  ca.add(Calendar.DAY_OF_YEAR, -2);

  return ca.getTime();

  }

presta atención:

Debido al método principal de cargar Excel en la clase de herramienta OPCPackage pkg = OPCPackage.open (nombre de archivo);

Hay dos métodos de sobrecarga: OPCPackage.open (archivo); // archivo

OPCPackage.open (fileInputStream); // Flujo de archivos

Este último depende en gran medida de la memoria, por lo que en las aplicaciones reales, si solo puede obtener el flujo de archivos, se recomienda copiar el archivo en el archivo local a través del flujo primero y luego usar la clase de herramienta de análisis

Algunas sugerencias:

1. Debido a que se trata de una gran cantidad de análisis de datos, si los datos electrónicos analizados deben insertarse en la base de datos, no se recomienda analizar uno e insertar uno. Puede analizar varios e insertarlos en lotes. El tamaño recomendado es 500 ~ 1000! (Puede ser asincrónico o no)

2. Si desea garantizar la gestión de las cosas insertadas en este excel de big data, utilice el número de lote para la gestión, es decir, la tabla de diseño se agrega con un número de lote y cada Excel tiene un número de lote. Si el análisis falla o la inserción falla en el medio, elimine por número de lote.

3. Se necesitan unos 25 minutos para completar el análisis y la inserción de 800.000 datos (dependiendo del número real de columnas). Se recomienda que la función de análisis muestre el estado en la página de primer plano o en una barra de progreso para mejorar la experiencia del usuario.

Exportar:

La idea es la siguiente:

1. Primero averigüe el total,

2. De acuerdo con la consulta de paginación total, mientras se evita que el total sea mayor a 1.04 millones, también es necesario generar Excel por hoja

Al exportar big data, utilice la clase SXSSFWorkbook:

Un ejemplo es el siguiente:

 Thread export2 = new Thread() {
 
 

  public void run() throws SPIException {

  logger.info("导出报表开始....");

  long start_time = System.currentTimeMillis();

  // 2.新增成功，获取报表id

  final int statementId = saveStatement(command, statementCommand);

  OutputStream outStream = null;

  Workbook wb = null;

  try {

  CreatExcel2007.copyFile(templateFilePath, tmpFilePath);

  if (StringUtils.isNotBlank(entity.getReconciliationResult())) {

  entity.setResult(entity.getReconciliationResult().replace("(", "").replace(")", "").split(","));

  }

  int total =reconciliationDao.getTotal(entity);//得到导出总数

  logger.info("导出总数："+total);

  int batch = 500;//每次批处理的条数

  int end = 1;

  wb = new SXSSFWorkbook(10000);//通过限制内存中可访问的记录行数来实现其低内存利用，当达到限定值时，新一行数据的加入会引起老一行的数据刷新到硬盘

  Map<Long, CellStyle> styles = getXSSFCellStyles(wb);

  int headRowNum = 2; // 表格头行数

  int sh= 0;

  int max = 1000000;//设置一个sheet最多存储一百万

  // 6.1 生成明细sheet

  while(total>0){//分sheet处理，一个sheet最多存储104万，超过再生成sheet

  Sheet sheet = wb.createSheet();/sheet

  Sheet sheet2 = wb.createSheet();//明细sheet

  wb.setSheetName(sh, "sh"+sh);

  wb.setSheetName(sh+1, "sh明细"+sh);

  sh += 2;//为了控制sheet名称不一样

  createISPHead(sheet.createRow(0), sheet.createRow(1),sheet);//组装表头

  createOrderLineHead(sheet2.createRow(0));//组装明细sheet

  sheet.setForceFormulaRecalculation(true);// 在打开工作簿时是否要求Excel重新计算此工作表上的所有公式。

  sheet2.setForceFormulaRecalculation(true);

  for(int s=batch; s<=max; s+=batch){//分批查询，一次性查询太多，内存无法存下

  logger.info("正在处理"+s+"条");

  if(s>total&& s>batch&&s>(total+batch)){//第一次一定要遍历，除第一次之外的， 后面如果比total大了，就结束循环

  break;

  }

  entity.setStart(end);

  end = end +batch;

  entity.setEnd(end);

  List<Map<String, Object>> listMap = reconciliationDao.queryPageEntities(entity);

  if (listMap.size() > 0) {

  List<String> trackList = new ArrayList<String>();

  //组装Sheet

  buildISPrecSheet(listMap, sheet, s, batch, headRowNum, trackList, styles);

  //组装明细Sheet

  long begin = System.currentTimeMillis();

  buildOrderLineSheet(trackList, command, batch, sheet2, s, styles);

  long endOf = System.currentTimeMillis();

  logger.info("用时："+(begin-endOf)/1000);

  }

  }

  total -= max;

  }

  // 将数据写入到excel文件

  outStream = new FileOutputStream(tmpFilePath);

  logger.info("生成excel开始>>>>>");

  wb.write(outStream);

  ((SXSSFWorkbook) wb).dispose();

  long end_time = System.currentTimeMillis();

  logger.info("导出结束, 用时："+(end_time-start_time)/1000); logger.info("生成excel结束>>>>>");

  logger.info("生成excel结束>>>>>");
 
 

  updateStatement(statementId, Constants.SUCCESS);

  } catch (IOException e1) {

  logger.error("导出上传Excel到文件服务器失败:" , e1);

  updateStatement(statementId, Constants.FAIL);

  throw new Exception(ErrorCode.CREATE_EXCEL_IOEXCEPTION);

  } catch (Exception e) {

  logger.error("导出上传Excel到文件服务器失败:" , e);

  updateStatement(statementId, Constants.FAIL);

  throw new Exception(e.getErrorCode());

  } catch(Exception e){

  e.printStackTrace();

  } finally {

  try {

  if (null != wb) {

  wb.close();

  }

  if (null != outStream) {

  outStream.close();

  }

  } catch (IOException e) {

  updateStatement(statementId, Constants.FAIL);

  logger.error("导出上传Excel到文件服务器失败:" , e);

  }

  }

  }

  };
 
 

  export2.start();

java excel importación y exportación de grandes volúmenes de datos

Supongo que te gusta