Word文件和Excel文件转HTML文件

package com.blx.assessment.utils;

import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.util.List;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.junit.Test;
import org.w3c.dom.Document;

public class Poipreview {


    @Test  
    public static String WordToHtml(String path,String rootpath) {  
        try {

        InputStream input = new FileInputStream(path);
          HWPFDocument wordDocument = new HWPFDocument(input);
          WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
            DocumentBuilderFactory.newInstance().newDocumentBuilder()
              .newDocument());
          wordToHtmlConverter.setPicturesManager(new PicturesManager() {
           public String savePicture(byte[] content, PictureType pictureType,
             String suggestedName, float widthInches, float heightInches) {
            return suggestedName;
           }
          });
          wordToHtmlConverter.processDocument(wordDocument);
          List pics = wordDocument.getPicturesTable().getAllPictures();
          if (pics != null) {
           for (int i = 0; i < pics.size(); i++) {
            Picture pic = (Picture) pics.get(i);
            try {
             pic.writeImageContent(new FileOutputStream(rootpath
               + pic.suggestFullFileName()));
            } catch (FileNotFoundException e) {
             e.printStackTrace();
            }
           }
          }
          Document htmlDocument = wordToHtmlConverter.getDocument();
          ByteArrayOutputStream outStream = new ByteArrayOutputStream();
          DOMSource domSource = new DOMSource(htmlDocument);
          StreamResult streamResult = new StreamResult(outStream);
          TransformerFactory tf = TransformerFactory.newInstance();
          Transformer serializer = tf.newTransformer();
          serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
          serializer.setOutputProperty(OutputKeys.INDENT, "yes");
          serializer.setOutputProperty(OutputKeys.METHOD, "html");
          serializer.transform(domSource, streamResult);
          outStream.close();
          String content = new String(outStream.toByteArray());
          return content;
        } catch (Exception e) {
            // TODO: handle exception
            return "";
        }
    }  

    public static String ExcelToHtml(String path,String rootpath) {  
        try {
             InputStream input=new FileInputStream(path);
             HSSFWorkbook excelBook=new HSSFWorkbook(input);
             ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );
             excelToHtmlConverter.processWorkbook(excelBook);
             List pics = excelBook.getAllPictures();
             if (pics != null) {
                 for (int i = 0; i < pics.size(); i++) {
                     Picture pic = (Picture) pics.get (i);
                     try {
                         pic.writeImageContent (new FileOutputStream (rootpath + pic.suggestFullFileName() ) );
                     } catch (FileNotFoundException e) {
                         e.printStackTrace();
                     }
                 }
             }
             Document htmlDocument =excelToHtmlConverter.getDocument();
             ByteArrayOutputStream outStream = new ByteArrayOutputStream();
             DOMSource domSource = new DOMSource (htmlDocument);
             StreamResult streamResult = new StreamResult (outStream);
             TransformerFactory tf = TransformerFactory.newInstance();
             Transformer serializer = tf.newTransformer();
             serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");
             serializer.setOutputProperty (OutputKeys.INDENT, "yes");
             serializer.setOutputProperty (OutputKeys.METHOD, "html");
             serializer.transform (domSource, streamResult);
             outStream.close();

             String content = new String (outStream.toByteArray() );
             return content;
        } catch (Exception e) {
            // TODO: handle exception
            return "";
        }
    }  

}
Word文件和Excel文件转HTML文件

猜你喜欢