poi读取word及读取word表格

导入包

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.Paragraph;

import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;

import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.openxml4j.opc.OPCPackage;

import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.junit.Test;

 

//读取word

@Test
 public void readWord(){
   try { 
    //word 2003:图片不会被读取 
    InputStream is = new FileInputStream(new File("D:/资料/ldj/poi读取word/Test/files/2003.doc")); 
    WordExtractor ex = new WordExtractor(is); 
   String text2003 = ex.getText(); 
   System.out.println(text2003); 
    
   //word 2007 图片不会被读取, 表格中的数据会被放在字符串的最后 
   OPCPackage opcPackage = POIXMLDocument.openPackage("D:/资料/ldj/poi读取word/Test/files/2007.docx"); 
   POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage); 
   String text2007 = extractor.getText(); 
   System.out.println(text2007);            
   } catch (Exception e) { 
            e.printStackTrace(); 
   } 

 }

 

//读取word中的表格

 @Test
 public void readWordTable(){   
  try {            
   String[] s=new String[300];          
   FileInputStream in=new FileInputStream("D:/资料/ldj/poi读取word/Test/files/757900130000-计生委.doc");     

  POIFSFileSystem pfs=new POIFSFileSystem(in);         
   HWPFDocument hwpf=new HWPFDocument(pfs);      
   Range range =hwpf.getRange();        

     TableIterator it=new TableIterator(range);      
   int index=0;            
   while(it.hasNext()){              
    Table tb=(Table)it.next(); 

    for(int i=0;i
     //System.out.println("Numrows :"+tb.numRows());           
     TableRow tr=tb.getRow(i);                 
     for(int j=0;j

     //System.out.println("numCells :"+tr.numCells());  //                
      //System.out.println("j   :"+j);                 
      TableCell td=tr.getCell(j);

     for(int k=0;k
       //System.out.println("numParagraphs :"+td.numParagraphs());       
       Paragraph para=td.getParagraph(k); 

       s[index]=para.text().trim();
       System.out.print(s[index]);
       index++;

}
      System.out.print("      ");
      }     
     System.out.println("");
     }
    } 

 // System.out.println(s.toString());      
   for(int i=0;i
    //System.out.println(s[i]);         
    }        
   } catch (Exception e) {

e.printStackTrace();    
    }
  } 

 

猜你喜欢

转载自zhitangrui2010.iteye.com/blog/2210503