doc,docx完美转html,图片保存完整代码。

网上一大堆,基本都是doc可以,docx就不行,研究半天终于可以了。

有些包用不上,是有ppt和pptx转图片的包,版本注意一下,最好一致,用低版本的原因是因为只有低版本poi支持ppt文档。有需要ppt和pptx预览需要的前往https://www.cnblogs.com/-llf/p/12671432.html

<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.10-FINAL</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.10-FINAL</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml-schemas -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.10-FINAL</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>3.10-FINAL</version>
</dependency>

<!-- https://mvnrepository.com/artifact/com.github.virtuald/curvesapi -->
<dependency>
<groupId>com.github.virtuald</groupId>
<artifactId>curvesapi</artifactId>
<version>1.06</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.xmlbeans/xmlbeans -->
<dependency>
<groupId>org.apache.xmlbeans</groupId>
<artifactId>xmlbeans</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>fr.opensagres.xdocreport</groupId>
<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>
<version>1.0.6</version>
</dependency>


代码:
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.xwpf.converter.core.FileImageExtractor;
import org.apache.poi.xwpf.converter.core.FileURIResolver;
import org.apache.poi.xwpf.converter.xhtml.XHTMLConverter;
import org.apache.poi.xwpf.converter.xhtml.XHTMLOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;

/**
* @author :llf
* @date :Created in 2020-04-13 14:24
* @description:${description}
* @version: v1.0
*/
public class WordToHtml {

public static String docToHtml(String fileUrl) throws Exception {
File filex = new File(fileUrl);
String path = fileUrl.substring(0,fileUrl.indexOf("."));
String str = filex.getName().substring(0,filex.getName().indexOf("."));
String imagePathStr = path+"\\image\\";
String targetFileName = path + "\\"+str+".html";
File file = new File(imagePathStr);
if(!file.exists()) {
file.mkdirs();
}
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileUrl));
org.w3c.dom.Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(document);
wordToHtmlConverter.setPicturesManager((content, pictureType, name, width, height) -> {
try (FileOutputStream out = new FileOutputStream(imagePathStr + name)) {
out.write(content);
} catch (Exception e) {
e.printStackTrace();
}
return "image/" + name;
});
wordToHtmlConverter.processDocument(wordDocument);
org.w3c.dom.Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(new File(targetFileName));
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
return targetFileName;
}


public static void docxToHtml(String fileUrl) throws Exception {
String path = fileUrl.substring(0,fileUrl.indexOf("."));
File file = new File(fileUrl);
String str = file.getName().substring(0,file.getName().indexOf("."));
InputStream in = new FileInputStream(file);
XWPFDocument document = new XWPFDocument(in);
File imageFolderFile = new File(path+"/image");
if(!imageFolderFile.exists()){
imageFolderFile.mkdirs();
}
XHTMLOptions options = XHTMLOptions.create().URIResolver(
new FileURIResolver(imageFolderFile));
options.setExtractor(new FileImageExtractor(imageFolderFile));
OutputStream out = new FileOutputStream(new File(
path+"/"+str+".html"));
XHTMLConverter.getInstance().convert(document, out, options);

}

public static void main(String[] args) throws Exception {
WordToHtml.docxToHtml("E:\\desktop\\2.docx");
}
 

猜你喜欢

转载自www.cnblogs.com/-llf/p/12703999.html