apache的poi是解析execl表格的java开源框架,上手比较的容易,因此受到大部分开发者的喜爱,但是在我做项目中出现了内存溢出的问题,网上找了很多的解决方法都不尽人意,因此通过自己想办法解决了该问题!!现把之前内存溢出的代码和现在的优化后的代码放出来以供参考,注:我这是读取execl表格的数据取出我所要的数据
DateUtil类只要execl表格列和数据过大就会导致内存溢出
package com.rskytech.am.util;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.*;
/**
* @author lizehua
* @date 2018-07-5 17:31 pm
* @DESC 处理车台数据的时间按精确到微妙时间,
* 并把信号对应的时间和值写入到csv文件中
* @package com.rskytech.am.util.DateUtil
*/
public class DateUtil {
/**
* 读取execl表格方法
* (1)判断属于那个类型的xls文件
* (2)根据文件类型的不同调用不同的处理execl方法
* @param file execl文件的路径
* @return
* @throws IOException
*/
public static List<List<Object>> readExcel(File file) throws IOException {
String fileName = file.getName();
String extension = fileName.lastIndexOf(".") == -1 ? "" : fileName.substring(fileName.lastIndexOf(".") + 1);
Workbook hwb;
//判断execl文件文件后缀是否为.xls
if ("xls".equals(extension)) {
hwb = new HSSFWorkbook(new FileInputStream(file));
return readExcel(file, hwb);
} else if ("xlsx".equals(extension)) {
//读取.xlsx文件
hwb = new XSSFWorkbook(new FileInputStream(file));
return readExcel(file, hwb);
} else {
//读取的文件不合法
throw new IOException("不支持的文件类型");
}
}
/**
* 读取 office excel
* (1)读取execl数据取出空行数据只保留有数据的行数
* (2)取出行数据,然后根据行数据取出该行的所有列的数据
* @throws IOException
* @throws FileNotFoundException
*/
private static List<List<Object>> readExcel(File file, Workbook hwb) throws IOException {
List<List<Object>> list = new LinkedList<List<Object>>();
Sheet sheet = hwb.getSheetAt(0);
Object value = "";
Row row = null;
Row lastRow = null;
Cell cell = null;
for (int i = sheet.getFirstRowNum(); i <= sheet.getLastRowNum() + 1; i++) {
row = sheet.getRow(i);
lastRow = sheet.getRow(sheet.getLastRowNum());
lastRow.getPhysicalNumberOfCells();
if (row == null) {
continue;
}
List<Object> linked = new LinkedList<Object>();
for (int j = 0; j <= lastRow.getLastCellNum(); j++) {
cell = row.getCell(j);
if (cell == null) {
linked.add("");
continue;
}
// 格式化 number String 字符
DecimalFormat df = new DecimalFormat("0");
// 格式化日期字符串
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
// 格式化数字
DecimalFormat nf = new DecimalFormat("0");
switch (cell.getCellType()) {
case XSSFCell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
case XSSFCell.CELL_TYPE_NUMERIC:
if ("@".equals(cell.getCellStyle().getDataFormatString())) {
value = df.format(cell.getNumericCellValue());
} else if ("General".equals(cell.getCellStyle().getDataFormatString())) {
value = cell.getNumericCellValue();
} else {
value = sdf.format(HSSFDateUtil.getJavaDate(cell.getNumericCellValue()));
}
break;
case XSSFCell.CELL_TYPE_BOOLEAN:
value = cell.getBooleanCellValue();
break;
case XSSFCell.CELL_TYPE_BLANK:
value = "";
break;
default:
value = cell.toString();
}
linked.add(value);
}
list.add(linked);
}
return list;
}
/**
* 获得ececl的开始时间并处理格式化
* (1)获得开始时间所在的列,然后用"-"分隔成年月日和小时,分钟,秒,毫秒的字符串数组
* (2)取出(1)中的数组的第一个元素用"/"分隔成为一个新的数据,然后把新的数据各个元素取出来按照顺序组合成正确的年日月
* (3)取出(1)中的第二个元素用“:”分隔成为一个字符串数组,这是数组的第一个元素是如16h35,第二个元素是1如18,456
* (4)取出(3)中数组第一个元素用"h"分隔,然后拼接成小时,分钟的格式,如“16:35"
* (5)取出(3)中数组第二个元素用”."分隔注意要转化
* (6)把(2)中处理好的年月日和(4),(5)中处理好的数据拼接成所要的时间格式
* @param execlFIlePath execl文件的路径
* @return
*/
public static String getStartTime(String execlFIlePath) throws IOException {
List<List<Object>> execlData = readExcel(new File(execlFIlePath));
//获得开始时间所在行数的数据
String rowData = execlData.get(1).get(1).toString();
//用-分割得到时间数据的年月日和小时,分钟秒的数据
String[] splitData = rowData.split("-");
//格式化年月日的数据
String[] date = splitData[0].split("/");
String deal_date = date[date.length - 1].trim() + "-" + date[1] + "-" + date[0];
//格式化小时分钟秒的数据
String[] time = splitData[1].split(":");
String[] hourAndMount = time[0].split("h");
String deal_time = hourAndMount[0] + ":" + hourAndMount[1] + ":" + time[1].split("\\.")[0] + "." + time[1].split("\\.")[1];
//处理后得到所要的正确的时间
String start_time = deal_date + "" + deal_time;
return start_time;
}
/**
* 格式化各个信号的时间
* (1)循环取出各个信号对应的没有格式化的时间包括微妙的数据存放相应的集合中
* (2)循环(1)中的两个集合拼接数据,并存放到最终结果的集合
* @param execlFilePath execl文件的路径
* @return
*/
public static List<String> formatDate(String execlFilePath) throws IOException {
String start_time = getStartTime(execlFilePath);
List<List<Object>> execlData = readExcel(new File(execlFilePath));
String date = start_time.split(" ")[0];
List<String> deal_times = new ArrayList<String>();
//存放取出微妙值的集合
List<String> usList = new ArrayList<String>();
//存放处理含有微妙的集合
List<String> after_deal_times = new ArrayList<String>();
//循环取出小时,分钟,秒,毫秒,微妙的值
for (int j = 7; j < execlData.size(); j++) {
for (int i = 0; i < execlData.get(j).size(); i++) {
if (i % 4 == 0) {
String deal_get_time_split[] = execlData.get(j).get(i).toString().split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "," + deal_seconds_millis[1];
String deal_time = date + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
deal_times.add(deal_time);
usList.add(execlData.get(j).get(i + 1).toString());
}
}
}
//循环取出对应的值,然后把微秒的值乘以1000拼接
for (int i = 0; i < deal_times.size(); i++) {
for (int j = 0; j < usList.size(); j++) {
//保证一一对应
if (i == j) {
String us = String.valueOf(Double.parseDouble(usList.get(i)) * 1000);
//把得到的微妙先转为double类型的数据,再把double数据转为String数据,去掉.0
after_deal_times.add(String.format(deal_times.get(i).toString() + "," + us.substring(0, us.lastIndexOf("."))));
}
}
}
return after_deal_times;
}
/**
* 读取execl表格数据,并把对应的信号名的值和时间存入到csv文件中
* (1)读取execl文件并获得数据,同时调用getStartTime()方法获得开始时间
* (2)读取含有信号名的数据,并取出信号名,分别拼接成"信号名_time,信号名_value",并写入文件中
* (3)取出信号名下对应的时间的值并处理精确到毫秒级别,加上年月日和相应的信号值并按行写入文件
* @param execlFilePath 要被读取的execl文件路径
* @param csvFilePath 要写如csv文件的路径
*/
public static String writExeclToCsvFile(String execlFilePath, String csvFilePath) throws IOException {
//读取execl文件获取每行每列的数据
List<List<Object>> write_execl_data = readExcel(new File(execlFilePath));
String start_time = getStartTime(execlFilePath);
String date = start_time.split(" ")[0];
//创建写入流对象,并以UTF-8字符编码格式写入
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(csvFilePath), "UTF-8");
//对应信号名
String sign_name = "";
//拼接信号名下对应的时间和值,并用","分割开
List<Object> sign_names = write_execl_data.get(2);
for (int i = 0; i < sign_names.size(); i++) {
if (i % 4 == 0) {
String sign_name_ = sign_names.get(i).toString();
sign_name += sign_name_ + "_" + "time" + "," + sign_name_ + "_" + "value" + ",";
}
}
//写入信号名对应的时间值和该信号名的值,并换行写入,写入文件,并去掉最后一个","
writer.write(sign_name.substring(0, sign_name.lastIndexOf(",")) + "\n");
writer.flush();
//循环取出小时,分钟,秒,毫秒,微妙的值
for (int j = 7; j < write_execl_data.size(); j++) {
//要写入文件的数据,按","分割开
String line_data = "";
for (int i = 0; i < write_execl_data.get(j).size(); i++) {
if (i % 4 == 0) {
//处理时间,不要把毫秒用","隔开,以后会影响读取csv文件数据的准确性
String deal_get_time_split[] = write_execl_data.get(j).get(i).toString().split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "." + deal_seconds_millis[1];
// line_data += write_execl_data.get(j).get(i).toString() + "," + write_execl_data.get(j).get(i + 2).toString() + ",";
line_data += date + " " + deal_hour_minute_ + ":" + deal_seonds_millis_ + "," + write_execl_data.get(j).get(i + 2).toString() + ",";
}
}
//写入文件,并去掉最后一个","
writer.write(line_data.substring(0, line_data.lastIndexOf(",")) + "\n");
writer.flush();
}
writer.close();
String csvFileName = execlFilePath.split("/")[execlFilePath.split("/").length -1].split("\\.")[0] + ".csv";
System.out.println("写入csv文件名是======>: " + csvFileName);
return csvFileName;
}
public static void main(String[] args) {
String filePath = "/home/lizehua/poiTest/20180330-第一次试验-试验阶段.xls";
String csvFilePath = "/home/lizehua/carCsv/2018-03-30_data.csv";
try {
// formatDate(filePath);
writExeclToCsvFile(filePath, csvFilePath);
} catch (IOException e) {
e.printStackTrace();
}
}
}
下面就把优化后不会出现内存溢出的类贴上
package com.rskytech.am.util;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
public class XlsToCsv implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
private int lastRowNumber;
private int lastColumnNumber;
public long time_cha;
public long getTime_cha() {
return time_cha;
}
public void setTime_cha(long time_cha) {
this.time_cha = time_cha;
}
/**
* Should we output the formula, or the value it has?
*/
private boolean outputFormulaValues = true;
/**
* For parsing Formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/**
* So we known which sheet we're on
*/
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private String d = "";
private final String OUTPUT_CHARSET = "UTF-8";
// private final String OUTPUT_CHARSET = "GBK";
private int count = 0;
public static String deal_time = "";
/**
* Creates a new XLS -> CSV converter
*
* @param fs The POIFSFileSystem to process
* @param output The PrintStream to output the CSV to
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XlsToCsv(POIFSFileSystem fs, PrintStream output, int minColumns) {
this.fs = fs;
this.output = output;
this.minColumns = minColumns;
}
public XlsToCsv(String inputFilePath, String outputFilePath) throws Exception {
fs = new POIFSFileSystem(new FileInputStream(inputFilePath));
output = new PrintStream(outputFilePath, OUTPUT_CHARSET);
minColumns = -1;
}
/**
* Creates a new XLS -> CSV converter
*
* @param filename The file to process
* @param minColumns The minimum number of columns to output, or -1 for no minimum
* @throws IOException
* @throws FileNotFoundException
*/
public XlsToCsv(String filename, int minColumns) throws IOException, FileNotFoundException {
this(new POIFSFileSystem(new FileInputStream(filename)), System.out, minColumns);
}
public XlsToCsv() {
}
public XlsToCsv(long time_cha) {
this.time_cha = time_cha;
}
/**
* Initiates the processing of the XLS file to CSV
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* Main HSSFListener method, processes events, and outputs the CSV as the
* file is processed.
*/
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String sign = "";
switch (record.getSid()) {
case BoundSheetRecord.sid:
//boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Create sub workbook if required
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
// Output the worksheet name
// Works by ordering the BSRs by the location of
// their BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
// thisStr = "";
break;
case BoolErrRecord.sid:
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
// thisStr = "";
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
// thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
thisStr = HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression());
}
break;
case StringRecord.sid:
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
// thisStr = '"' + lrec.getValue() + '"';
thisStr = lrec.getValue();
break;
case LabelSSTRecord.sid:
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
thisStr = '"' + "(No SST Record, can't identify string)" + '"';
} else if (sstRecord.getString(lsrec.getSSTIndex()).toString().contains("30/03/2018 - 16h34:18.440")) {
// thisStr = '"' + sstRecord.getString(lsrec.getSSTIndex()).toString() + '"';
thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
//用-分割得到时间数据的年月日和小时,分钟秒的数据
String[] splitData = thisStr.split("-");
//格式化年月日的数据
String[] date = splitData[0].split("/");
//格式化小时分钟秒的数据
String[] time = splitData[1].split(":");
String[] hourAndMount = time[0].split("h");
String hours = hourAndMount[0] + ":" + hourAndMount[1] + ":" + time[1].split("\\.")[0] + "." + time[1].split("\\.")[1];
d = date[date.length - 1].trim() + "-" + date[1] + "-" + date[0];
deal_time = d + " " + hours;
thisStr = "";
} else {
//处理时间格式
thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString();
// System.out.println("------->" + thisStr);
if (thisStr.contains("h")) {
String deal_get_time_split[] = thisStr.split(":");
String deal_hour_minute[] = deal_get_time_split[0].split("h");
String deal_hour_minute_ = deal_hour_minute[0] + ":" + deal_hour_minute[1];
String deal_seconds_millis[] = deal_get_time_split[1].split(",");
String deal_seonds_millis_ = deal_seconds_millis[0] + "." + deal_seconds_millis[1];
String deal_date = d + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
// thisStr = d + " " + deal_hour_minute_ + ":" + deal_seonds_millis_;
try {
long t = SystemConstants.stdMSsdf.parse(deal_date).getTime() + this.getTime_cha();
// System.out.println("时间差粗例=========> " + t);
String s = SystemConstants.stdMSsdf.format(new Date(t));
System.out.println("s===" + s);
thisStr = s;
} catch (ParseException e) {
e.printStackTrace();
}
} else if (thisStr.contains("Sign")) {
//处理信号的信号名。包括信号产生时间和对应的值
sign += thisStr + "_time," + thisStr + "_value";
thisStr = sign;
} else {
thisStr = "";
}
}
break;
case NoteRecord.sid:
NoteRecord nrec = (NoteRecord) record;
thisRow = nrec.getRow();
thisColumn = nrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
case NumberRecord.sid:
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
// Format
thisStr = formatListener.formatNumberDateCell(numrec);
break;
case RKRecord.sid:
RKRecord rkrec = (RKRecord) record;
thisRow = rkrec.getRow();
thisColumn = rkrec.getColumn();
thisStr = '"' + "(TODO)" + '"';
break;
default:
break;
}
// Handle new row
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// Handle missing column
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
thisRow = mc.getRow();
thisColumn = mc.getColumn();
thisStr = "";
}
// If we got something to print out, do so
if (thisStr != null) {
if (thisStr == "") {
} else {
// System.out.println("thisColumn :" + thisColumn + " thisStr: " + thisStr);
if (thisColumn > 0) {
// System.out.println(thisStr);
// if (thisStr.contains("0.0") || thisStr.equalsIgnoreCase("0") || thisStr.equalsIgnoreCase("100")||thisStr.equalsIgnoreCase("2")){
//控制那一列不被写入到csv文件中,具体以thisStr来决定
if ((thisColumn -1) % 4 == 0){
// System.out.println("us=====>" + thisStr );
thisStr = "";
} else {
output.print(',');
}
}
output.print(thisStr);
}
}
// Update column and row count
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// Handle end of row
if (record instanceof LastCellOfRowDummyRecord) {
if (((LastCellOfRowDummyRecord) record).getRow() > 7) {
// Print out any missing commas if needed
if (minColumns > 0) {
// Columns are 0 based
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
for (int i = lastColumnNumber; i < (minColumns); i++) {
output.print(',');
}
}
// We're onto a new row
lastColumnNumber = -1;
// End the row
output.println();
}
}
}
public static void main(String[] args) throws Exception {
long star = System.currentTimeMillis();
System.out.println("开始读取的时间是======》:: " + star);
String inputPath2 = "/home/java/data/fault_database/20180330-第一次试验-试验阶段.xls";
String outputPath2 = "/home/java/data/fault_database/data.csv";
// XlsToCsv.time_cha=2000;
XlsToCsv xls2csv = new XlsToCsv(inputPath2, outputPath2);
xls2csv.setTime_cha(2000);
xls2csv.process();
System.out.println(XlsToCsv.deal_time);
long end = System.currentTimeMillis();
System.out.println("总过用时==========>: " + (end - star));
}
}