首先引入Maven依赖,如下
<!-- ************word************** --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>openxml4j</artifactId> <version>1.0-beta</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.13</version> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>org.apache.geronimo.specs</groupId> <artifactId>geronimo-stax-api_1.0_spec</artifactId> <version>1.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>ooxml-schemas</artifactId> <version>1.1</version> </dependency> <dependency> <groupId>org.apache.xmlbeans</groupId> <artifactId>xmlbeans</artifactId> <version>2.3.0</version> </dependency>
下面编写Java工具类,代码如下
import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.net.URL; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; public class WordReader { public synchronized static String read(String url){ if (url.endsWith("doc") || url.endsWith("DOC")) { return readWord2003(url); }else if (url.endsWith("docx") || url.endsWith("DOCX")) { return readWord2007(url); }else { return ""; } } private static String readWord2007(String url) { POIXMLTextExtractor ex = null; XWPFDocument xwpf = null; InputStream is = null; try { is = new URL(url).openStream(); xwpf = new XWPFDocument(is); ex = new XWPFWordExtractor(xwpf); return ex.getText(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally{ try { ex.close(); } catch (IOException e) { e.printStackTrace(); } try { xwpf.close(); } catch (IOException e) { e.printStackTrace(); } try { is.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } private static String readWord2003(String url) { WordExtractor wordExtractor = null; InputStream fis = null; try { fis = new URL(url).openStream(); wordExtractor = new WordExtractor(fis); String content = wordExtractor.getText(); return content; } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally{ try { wordExtractor.close(); } catch (IOException e) { e.printStackTrace(); } try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } return null; } public static void main(String[] args) { System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F236.doc")); System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F260.docx")); } }