doc,docx完美转html，图片保存完整代码。

网上一大堆，基本都是doc可以，docx就不行，研究半天终于可以了。

有些包用不上，是有ppt和pptx转图片的包，版本注意一下，最好一致，用低版本的原因是因为只有低版本poi支持ppt文档。有需要ppt和pptx预览需要的前往https://www.cnblogs.com/-llf/p/12671432.html

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi</artifactId>
   <version>3.10-FINAL</version>
</dependency>
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-ooxml</artifactId>
   <version>3.10-FINAL</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-ooxml-schemas</artifactId>
   <version>3.10-FINAL</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
   <groupId>org.apache.poi</groupId>
   <artifactId>poi-scratchpad</artifactId>
   <version>3.10-FINAL</version>
</dependency>

<!-- https://mvnrepository.com/artifact/com.github.virtuald/curvesapi -->
<dependency>
   <groupId>com.github.virtuald</groupId>
   <artifactId>curvesapi</artifactId>
   <version>1.06</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
<dependency>
   <groupId>org.apache.xmlbeans</groupId>
   <artifactId>xmlbeans</artifactId>
   <version>3.1.0</version>
</dependency>
<dependency>
   <groupId>fr.opensagres.xdocreport</groupId>
   <artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
   <version>1.0.6</version>
</dependency>


代码：

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

/**
 * @author ：llf
 * @date ：Created in 2020-04-13 14:24
 * @description：${description}
 * @version: v1.0
 */
public class WordToHtml {

 public static String docToHtml(String fileUrl) throws Exception {
 File filex = new File(fileUrl);
 String path = fileUrl.substring(0,fileUrl.indexOf("."));
 String str = filex.getName().substring(0,filex.getName().indexOf("."));
 String imagePathStr = path+"\\image\\";
 String targetFileName = path + "\\"+str+".html";
 File file = new File(imagePathStr);
 if(!file.exists()) {
 file.mkdirs();
 }
 HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileUrl));
 org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
 WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
 wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
 try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
 out.write(content);
 } catch (Exception e) {
 e.printStackTrace();
 }
 return "image/" + name;
 });
 wordToHtmlConverter.processDocument(wordDocument);
 org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
 DOMSource domSource = new DOMSource(htmlDocument);
 StreamResult streamResult = new StreamResult(new File(targetFileName));
 TransformerFactory tf = TransformerFactory.newInstance();
 Transformer serializer = tf.newTransformer();
 serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
 serializer.setOutputProperty(OutputKeys.INDENT, "yes");
 serializer.setOutputProperty(OutputKeys.METHOD, "html");
 serializer.transform(domSource, streamResult);
 return targetFileName;
 }


 public static void docxToHtml(String fileUrl) throws Exception {
 String path = fileUrl.substring(0,fileUrl.indexOf("."));
 File file = new File(fileUrl);
 String str = file.getName().substring(0,file.getName().indexOf("."));
 InputStream in = new FileInputStream(file);
 XWPFDocument document = new XWPFDocument(in);
 File imageFolderFile = new File(path+"/image");
 if(!imageFolderFile.exists()){
 imageFolderFile.mkdirs();
 }
 XHTMLOptions options = XHTMLOptions.create().URIResolver(
 new FileURIResolver(imageFolderFile));
 options.setExtractor(new FileImageExtractor(imageFolderFile));
 OutputStream out = new FileOutputStream(new File(
 path+"/"+str+".html"));
 XHTMLConverter.getInstance().convert(document, out, options);

 }

 public static void main(String[] args) throws Exception {
 WordToHtml.docxToHtml("E:\\desktop\\2.docx");
 }

doc,docx完美转html，图片保存完整代码。

猜你喜欢