poi reads word and reads word table

import package

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;

import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;

import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.junit.Test;

 

//read word

@Test
 public void readWord(){
   try { 
    //word 2003: the picture will not be read 
    InputStream is = new FileInputStream(new File("D:/data/ldj/poi reads word/Test/files/2003.doc ")); 
    WordExtractor ex = new WordExtractor(is); 
   String text2003 = ex.getText(); 
   System.out.println(text2003); 
    
   //word 2007 pictures will not be read, the data in the table will be placed in The last 
   OPCPackage of the string opcPackage = POIXMLDocument.openPackage("D:/Documents/ldj/poi read word/Test/files/2007.docx"); 
   POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); 
   String text2007 = extractor.getText( ); 
   System.out.println(text2007);            
   } catch (Exception e) { 
            e.printStackTrace(); 
   } 

 }

 

//read the table in word

 @Test
 public void readWordTable(){   
  try {            
   String[] s=new String[300];          
   FileInputStream in=new FileInputStream("D:/Data/ldj/poi read word/Test/files/757900130000-Family Planning Commission.doc ");     

  POIFSFileSystem pfs=new POIFSFileSystem(in);         
   HWPFDocument hwpf=new HWPFDocument(pfs);      
   Range range =hwpf.getRange();        

     TableIterator it=new TableIterator(range);      
   int index=0;            
   while(it.hasNext()){              
    Table tb=(Table)it.next(); 

    for(int i=0;i
     //System.out.println("Numrows :"+tb.numRows());           
     TableRow tr=tb.getRow(i);                 
     for(int j=0;j

     //System.out.println("numCells :"+tr.numCells());  //                
      //System.out.println("j   :"+j);                 
      TableCell td=tr.getCell(j);

     for(int k=0;k
       //System.out.println("numParagraphs :"+td.numParagraphs());       
       Paragraph para=td.getParagraph(k); 

       s[index]=para.text().trim();
       System.out.print(s[index]);
       index++;

}
      System.out.print("      ");
      }     
     System.out.println("");
     }
    } 

 // System.out.println(s.toString());      
   for(int i=0;i
    //System.out.println(s[i]);         
    }        
   } catch (Exception e) {

e.printStackTrace ();    
    }
  } 

 

Guess you like

Origin http://10.200.1.11:23101/article/api/json?id=326986285&siteId=291194637