poi-3.8-20120326.jar
poi-examples-3.8-20120326.jar
poi-excelant-3.8-20120326.jar
poi-ooxml-3.8-20120326.jar
poi-ooxml-schemas-3.8-20120326.jar
poi-scratchpad-3.8-20120326.jar
import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.Range; public class WordDemo extends HttpServlet { private static final long serialVersionUID = 1L; public void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { this.doPost(request, response); } public void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { //从硬盘读取一个doc文档 InputStream in = new FileInputStream("F:\\test.doc"); //类从word文档中提取文本,非特殊情况下,都将使用getParagraphText()与getText() WordExtractor word = new WordExtractor(in); //获取段文本 String [] strArray = word.getParagraphText(); String str = word.getText(); for(int i=0 ; i<strArray.length ; i++){ System.out.println(strArray[i]+"\ti循环:"+i); } System.out.println(str +"\t --"); //这个构造函数从InputStream中加载Word文档。 HWPFDocument doc = new HWPFDocument((InputStream)new FileInputStream("F:\\test.doc")); //这个类为HWPF对象模型,对文档范围段操作 Range range = doc.getRange(); // //看看此文档有多少个段落 int num = range.numParagraphs(); System.out.println(num+"段"); //得到word数据流 byte [] dataStream = doc.getDataStream(); System.out.println("数据流长度:"+dataStream.length); //用于在一段范围内获得段落数 int numChar = range.numCharacterRuns(); System.out.println("CharacterRuns 数:"+numChar); //负责图像提取 和 确定一些文件某块是否包含嵌入的图像。 PicturesTable table = new PicturesTable(doc, dataStream, null); for(int j=0 ; j<numChar ; j++){ //这个类表示一个文本运行,有着共同的属性。 CharacterRun run = range.getCharacterRun(j); //是否存在图片 boolean bool = table.hasPicture(run); System.out.println("是否存在图片:"+bool); if(bool){ //返回图片对象绑定到指定的CharacterRun Picture pic = table.extractPicture(run, true); //图片的内容字节写入到指定的输出流。 pic.writeImageContent(new FileOutputStream("F:\\"+j+".bmp")); System.out.println("成功提取图片"+j+":"); } } request.getRequestDispatcher("ok.jsp").forward(request, response); } }
可正常运行及提取图片