poi 读取word以及word中的表格

      读取word2003文本

public String readDoc(File docFile) {
		String text2003 = null;
		try {
			// word 2003: 图片不会被读取
			InputStream is = new FileInputStream(docFile);
			WordExtractor ex = new WordExtractor(is);
			text2003 = ex.getText();
			System.out.println(text2003);
			is.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return text2003;
	}

 读取word2003中表格内容:

	public String readExcelInWord(File file) {
		String text2003 = null;
		try {
			FileInputStream in = new FileInputStream(file);// 载入文档
			POIFSFileSystem pfs = new POIFSFileSystem(in);
			HWPFDocument hwpf = new HWPFDocument(pfs);
			Range range = hwpf.getRange();// 得到文档的读取范围
			TableIterator it = new TableIterator(range);
			// 迭代文档中的表格
			while (it.hasNext()) {
				Table tb = (Table) it.next();
				// 迭代行,默认从0开始
				for (int i = 0; i < tb.numRows(); i++) {
					TableRow tr = tb.getRow(i);
					// 迭代列,默认从0开始
					for (int j = 0; j < tr.numCells(); j++) {
						TableCell td = tr.getCell(j);// 取得单元格
						// 循环单元格的段落
						for (int k = 0; k < td.numParagraphs(); k++) {
							Paragraph para = td.getParagraph(k);
							String s = para.text().trim();
							System.out.println(s);
						} 
					} // end for cells
				} // end for rows
			} // end while
		} catch (Exception e) {
			e.printStackTrace();
		}
		return text2003;
	}

 Apache POI: http://poi.apache.org/download.html

猜你喜欢

转载自alog2012.iteye.com/blog/2004190