1 应用领域
在各种各样考试中,尤其是在艰苦边远地区,组织考试报名常常采取向邮箱发报名表的方式,缺乏专门的考试报名系统,此时需要将DOC报名表的数据提取出来,对数据进行筛选和统计,本次讲一下如何用JAVA提取类似报名表中的表格数据
2 maven依赖库文件
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.8</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.8</version>
</dependency>
3 java读取文件夹doc文档
public class ReadDocFileFromJava {
public static void main(String[] args) throws Exception {
/**This is the document that you want to read using Java.**/
String dirPath = "D:\\test\\";
List<List<String>> lists = readMyDocuments(dirPath);
for(List<String> list : lists) {
for(String s : list ) {
System.out.print(s+"\t");
}
System.out.println();
}
}
/**
* 读取文档
* @param dirPath
* @return
*/
public static List<List<String>> readMyDocuments(String dirPath){
List<List<String>> lists = new ArrayList<List<String>>();
try {
List<String> docPaths = readDocFilePaths(dirPath);
/** 循环读取文件夹内容 **/
for(String docPath:docPaths) {
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(docPath));
HWPFDocument doc = new HWPFDocument(fs);
List<String> strings = readTable(doc);
lists.add(strings);
}
} catch (Exception e) {
e.printStackTrace();
}
return lists;
}
/**
* 读取表格table
* @param doc
* @return
* @throws Exception
*/
public static List<String> readTable(HWPFDocument doc) throws Exception{
Range range =doc.getRange();
TableIterator it=new TableIterator(range);
List<String> result = new ArrayList<String>();
while(it.hasNext()){
Table tb=(Table)it.next();
for(int i=0;i<tb.numRows();i++){
TableRow tr=tb.getRow(i);
for(int j=0;j<tr.numCells();j++){
TableCell td=tr.getCell(j);
String text = "";
for(int k=0;k<td.numParagraphs();k++){
Paragraph para=td.getParagraph(k);
text = text + para.text().trim();
}
result.add(text);
}
}
}
return result;
}
/**
* 读取文件夹路径
* @param dirPath
* @return
*/
public static List<String> readDocFilePaths(String dirPath){
List<String> result = new ArrayList<String>();
File dir = new File(dirPath);
File[] files = dir.listFiles();
if (files != null) {
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
/** 是文件夹,就递归读取 **/
String path = files[i].getAbsolutePath();
List<String> tmp = readDocFilePaths(path);
result.addAll(tmp);
}else {
/** 不是是文件夹,放入结果集 **/
if(files[i].getPath().endsWith(".doc")) {
result.add(files[i].getPath());
}
}
}
}
return result;
}
}