POI读取word2003和word2007

首先引入Maven依赖,如下

		<!-- ************word************** -->
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<version>3.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-scratchpad</artifactId>
			<version>3.13</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>openxml4j</artifactId>
			<version>1.0-beta</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>3.13</version>
		</dependency>
		<dependency>
			<groupId>dom4j</groupId>
			<artifactId>dom4j</artifactId>
			<version>1.6.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.geronimo.specs</groupId>
			<artifactId>geronimo-stax-api_1.0_spec</artifactId>
			<version>1.0</version>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>ooxml-schemas</artifactId>
			<version>1.1</version>
		</dependency>
		<dependency>
			<groupId>org.apache.xmlbeans</groupId>
			<artifactId>xmlbeans</artifactId>
			<version>2.3.0</version>
		</dependency>

下面编写Java工具类,代码如下

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordReader {
	
	public synchronized static String read(String url){
		if (url.endsWith("doc") || url.endsWith("DOC")) {
			return readWord2003(url);
		}else if (url.endsWith("docx") || url.endsWith("DOCX")) {
			return readWord2007(url);
		}else {
			return "";
		}    
	}
	
	private static String readWord2007(String url) {  
		POIXMLTextExtractor ex = null;
		XWPFDocument xwpf = null;
		InputStream is = null;
        try {
        	is = new URL(url).openStream();
            xwpf = new XWPFDocument(is);
            ex = new XWPFWordExtractor(xwpf);
            return ex.getText();
        } catch (FileNotFoundException e) {
            e.printStackTrace();  
        } catch (IOException e) {  
            e.printStackTrace(); 
        } finally{
        	try {
				ex.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        	try {
				xwpf.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        	try {
        		is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
        }
        return null;
    }
	
	private static String readWord2003(String url) {
		WordExtractor wordExtractor = null;
		InputStream fis = null;
		try {
			fis = new URL(url).openStream();
	        wordExtractor = new WordExtractor(fis);
	        String content = wordExtractor.getText();
	        return content;
	    } catch (FileNotFoundException e) {
	    	e.printStackTrace();  
	    } catch (IOException e) {  
	     	e.printStackTrace(); 
	    } finally{
	    	try {
				wordExtractor.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
	    	try {
				fis.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
	    }
		return null;
	} 
	
	public static void main(String[] args) {
		System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F236.doc"));
		System.out.println(read("http://file.neeq.com.cn/upload/A0/B0/C2/F260.docx"));
	}
}

猜你喜欢

转载自mcj8089.iteye.com/blog/2247789