POI将word转化为html

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_16773023/article/details/81288124

参考资料

1.POI包依赖:https://poi.apache.org/components/index.html
2.包版本问题:https://bbs.csdn.net/topics/392208805 并没有使用其中的3.9的版本,使用的为3.13
最开始使用的为3.17的版本,但在转为成html中出现错误:java.lang.NoSuchMethodError,使用3.9版本也出现了类似的问题:
java.lang.NoSuchMethodError:org.apache.poi.POIXMLDocumentPart.getPackageRelationship()
3.整体的参考:http://www.cnblogs.com/always-online/p/4800131.html
4.在发布到服务器上时,图片路径问题<img>标签的src:
https://blog.csdn.net/B888888888888/article/details/78113527?locationNum=7&fps=1

相关说明:3.17版本在excel设置样式是API发生了变化
pom.xml—对应包

<!-- 测试poi需要的先决条件包 -->
<dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-math3</artifactId>
    <version>3.6.1</version>
</dependency>
<dependency>
    <groupId>org.apache.xmlbeans</groupId>
    <artifactId>xmlbeans</artifactId>
    <version>2.6.0</version>
</dependency>
<dependency>
    <groupId>commons-codec</groupId>
    <artifactId>commons-codec</artifactId>
    <version>1.10</version>
</dependency>
<dependency>
    <groupId>commons-logging</groupId>
    <artifactId>commons-logging</artifactId>
    <version>1.2</version>
</dependency>
<!-- <dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.2</version>
</dependency> -->
<dependency>
    <groupId>org.apache.commons</groupId>
    <artifactId>commons-collections4</artifactId>
    <version>4.0</version>
</dependency>
<!-- 测试poi需要的先决条件包 -->
<dependency>
    <groupId>com.github.virtuald</groupId>
    <artifactId>curvesapi</artifactId>
    <version>1.04</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>3.13</version>
</dependency>

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.13</version>
</dependency>   

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-examples -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-examples</artifactId>
    <version>3.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-excelant -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-excelant</artifactId>
    <version>3.13</version>
</dependency>

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.core</artifactId>
    <version>1.0.6</version>
</dependency>
<dependency>
     <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>fr.opensagres.xdocreport.document</artifactId>
    <version>1.0.6</version>
</dependency>

<dependency>
    <groupId>fr.opensagres.xdocreport</groupId>
    <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
    <version>1.0.6</version>
</dependency>`

代码如下:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.PicturesManager;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import org.apache.poi.hwpf.usermodel.PictureType;
import org.apache.poi.xwpf.converter.core.BasicURIResolver;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

public class WordToHtml {
    /**
     * doc转为html
     * @param ins
     * @param imageAbsolutePath:存在本机的完整路径
     * @param webImagePath:html上的img标签的src地址
     * @param htmlPath:存在本机上的html路径
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws TransformerException
     */
    public static void docToHtml(InputStream ins,String imageAbsolutePath,String webImagePath, String htmlPath) throws IOException, ParserConfigurationException, TransformerException{
        HWPFDocumentCore wordDocument =  WordToHtmlUtils.loadDoc(ins);

        WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
                DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
        //设置图片存放的位置
        wordToHtmlConverter.setPicturesManager(new PicturesManager() {

            @Override
            public String savePicture(byte[] content, PictureType pictureType,
                    String suggestedName, float widthInches, float heightInches) {
                File file = new File(imageAbsolutePath + suggestedName);
                try {
                    OutputStream out = new FileOutputStream(file);
                    out.write(content);
                    out.close();
                } catch (FileNotFoundException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                //webImagePath为最终html中img标签要读取的服务器上的地址,如<img src="/csdn/a.png"/>
                //改路径自己设定,不要使用imageAbsolutePath路径,该路径为存本地时的完整路径
                return webImagePath + suggestedName;
            }
        });;
        wordToHtmlConverter.processDocument(wordDocument);
        org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
        OutputStream out = new FileOutputStream(new File(htmlPath));
        DOMSource domSource = new DOMSource(htmlDocument);
        StreamResult streamResult = new StreamResult(out);

        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer serializer  = tf.newTransformer();

        serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        serializer.setOutputProperty(OutputKeys.INDENT, "yes");
        serializer.setOutputProperty(OutputKeys.METHOD, "html");
        serializer.transform(domSource, streamResult);
        out.close();
    }

    /**
     * 将docx转为html
     * @param ins
     * @param imageUrl 存储在本机上的图路径
     * @param webImagePath html上img标签src值,其在生成图片时会在你设定的这个文件夹下自动产生/word/media文件夹
     * @param fileUrl
     * @throws IOException
     */
    public static void docxToHtml(InputStream ins,String imageUrl,String webImagePath,String fileUrl) throws IOException{

        //1:加载文档到XWPFDocument
        XWPFDocument document = new XWPFDocument(ins);
        //2:加载图片到指定文件夹
        File imgFile = new File(imageUrl);
        XHTMLOptions options = XHTMLOptions.create();
        options.setExtractor(new FileImageExtractor(imgFile));

        //使用相对路径时,使用BasicURIResolver,使用绝对路径可以使用FileURIResolver
        options.URIResolver(new BasicURIResolver(webImagePath));
        options.setIgnoreStylesIfUnused(false);
        options.setFragment(true);
        //3:转换XWPFDocument to XHTML 
        OutputStream out = new FileOutputStream(new File(fileUrl));  
        XHTMLConverter.getInstance().convert(document, out, options); 

    }
}

猜你喜欢

转载自blog.csdn.net/qq_16773023/article/details/81288124