import package
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.junit.Test;
//read word
@Test
public void readWord(){
try {
//word 2003: the picture will not be read
InputStream is = new FileInputStream(new File("D:/data/ldj/poi reads word/Test/files/2003.doc "));
WordExtractor ex = new WordExtractor(is);
String text2003 = ex.getText();
System.out.println(text2003);
//word 2007 pictures will not be read, the data in the table will be placed in The last
OPCPackage of the string opcPackage = POIXMLDocument.openPackage("D:/Documents/ldj/poi read word/Test/files/2007.docx");
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
String text2007 = extractor.getText( );
System.out.println(text2007);
} catch (Exception e) {
e.printStackTrace();
}
}
//read the table in word
@Test
public void readWordTable(){
try {
String[] s=new String[300];
FileInputStream in=new FileInputStream("D:/Data/ldj/poi read word/Test/files/757900130000-Family Planning Commission.doc ");
POIFSFileSystem pfs=new POIFSFileSystem(in);
HWPFDocument hwpf=new HWPFDocument(pfs);
Range range =hwpf.getRange();
TableIterator it=new TableIterator(range);
int index=0;
while(it.hasNext()){
Table tb=(Table)it.next();
for(int i=0;i
//System.out.println("Numrows :"+tb.numRows());
TableRow tr=tb.getRow(i);
for(int j=0;j
//System.out.println("numCells :"+tr.numCells()); //
//System.out.println("j :"+j);
TableCell td=tr.getCell(j);
for(int k=0;k
//System.out.println("numParagraphs :"+td.numParagraphs());
Paragraph para=td.getParagraph(k);
s[index]=para.text().trim();
System.out.print(s[index]);
index++;
}
System.out.print(" ");
}
System.out.println("");
}
}
// System.out.println(s.toString());
for(int i=0;i
//System.out.println(s[i]);
}
} catch (Exception e) {
e.printStackTrace ();
}
}