踩坑日记 - word转pdf

说明

最近这几天研究了下word转pdf的功能实现,特别记录下,方便以后查看。
搜索了网上大部分的资料,总的来说,不外乎以下几种:
1. openoffice,支持windows和linux环境,亲测,word转成pdf后,windows环境下格式没问题,但是linux环境下格式有些出入,且不好跟踪定位;
2. word转html,再通过itext转成pdf,实现最方便,效果最差,跨平台,未测试
3. libreoffice,支持跨平台,和openoffice的实现类似,亲测,实现效果比openoffice好;
4. jacob + msOfficeWord + SaveAsPDFandXPS,保持原doc格式,效率最慢,只能在windows环境下进行,未测试;

最终,我选择使用libreoffice来实现word转pdf的功能。

openoffice实现

1、安装openoffice服务
http://www.openoffice.org/download/index.html
2、功能实现

/**
 * 若OFICCE转换PDF不经常调用,则调用时启动openoffice server 将Office文档转换为PDF.
 * 运行该函数需要用到OpenOffice
 * 
 * @description
 * @param sourceFile
 *      源文件, 绝对路径. 可以是Office2003-2007全部格式的文档, Office2010的没测试. 包括.doc, .docx, .xls, .xlsx, .ppt, .pptx等. 
 *      示例: D:/111/淮安药监_数据交换问题201404.doc
 * @param destFile
 *      目标文件. 绝对路径. 示例: D:/111/淮安药监_数据交换问题201404.pdf
 * @return 操作成功与否的提示信息. 
 *      如果返回 -1, 表示找不到源文件, 或OpenOffice 安装目录路径配置错误; 
 *      如果返回 0, 则表示操作成功; 返回1, 则表示转换失败
 * @2014-6-18 上午10:55:01
 */
public int office2PDF(String sourceFile, String destFile) {
    initParameters();
    // connect to an OpenOffice.org instance running on port 8100
    OpenOfficeConnection connection = new SocketOpenOfficeConnection(8100);
    try {
        File inputFile = new File(sourceFile);
        if (!inputFile.exists()) {
            return -1;// 找不到源文件, 则返回-1
        }
        // 如果目标路径不存在, 则新建该路径
        File outputFile = new File(destFile);
        if (!outputFile.getParentFile().exists()) {
            boolean mkdirs = outputFile.getParentFile().mkdirs();
            if (mkdirs == false) {
                throw new RuntimeExceptionOwn("目录" + outputFile.getParentFile().getPath() + "创建失败");
            }
        }

        connection.connect();
        // convert
        DocumentConverter converter = new OpenOfficeDocumentConverter(
                connection);
        converter.convert(inputFile, outputFile);
        // close the connection
        return 0;
    } catch (ConnectException e) {
        log.error("error:" + e);
    } finally {
        connection.disconnect();// 如果发生异常,系统也会切断连接
    }
    return 1;
}


private void initParameters() {
    Properties properties = new Properties();
    InputStream localInputStream = null;
    ClassLoader classLoader = Thread.currentThread()
            .getContextClassLoader();
    try {
        localInputStream = classLoader
                .getResourceAsStream("gcloud/preview.properties");
        if (localInputStream == null)
            log.error("没有找到preview.properties配置文件!");
        else
            properties.load(localInputStream);
    } catch (IOException e) {
        if (log.isErrorEnabled()) {
            log.error("ERROR:", e);
        }
    } finally {
        try {
            if (localInputStream != null) {
                localInputStream.close();
            }
        } catch (IOException e) {
            log.error("ERROR:", e);
        }
    }
    OPENOFFICE_HOME = properties.getProperty("OpenOffice_HOME").trim();

    osType = properties.getProperty("os").trim();

    if (OPENOFFICE_HOME == null || "".equals(OPENOFFICE_HOME)) {
        throw new RuntimeExceptionOwn(OPENOFFICE_HOME+" 请确定openoffice.org办公套件的安装目录路径是否设置正确!");
    }
    // 如果从文件中读取的URL地址最后一个字符不是 '\',则添加'\'
    if (OPENOFFICE_HOME.charAt(OPENOFFICE_HOME.length() - 1) != '\\') {
        OPENOFFICE_HOME += "\\";
    }
    // 启动OpenOffice的服务
    String command = "";
    if ("Linux".equals(osType)) {
        command = OPENOFFICE_HOME + LinuxSoffice;
    } else {
        command = WinSoffice;
    }

    try {
        Runtime.getRuntime().exec(command);
    } catch (IOException e) {
        log.error("启动openoffice server失败,请确定openoffice.org办公套件的安装目录路径是否设置正确!");
        log.info(e);
    }

}

libreoffice实现

1、安装libreoffice服务
https://zh-cn.libreoffice.org/download/libreoffice-still/
2、功能实现

/**
 * 使用libreoffice实现word转pdf功能
 * @param sourceFile word文件绝对路径
 * @param destFile 生成pdf文件存放绝对路径
 * @return
 * @throws IOException
 */
public boolean wordConverterToPdf(String sourceFile, String destFile){
    try {
        File inputFile = new File(sourceFile);
        if (!inputFile.exists()) {
            return false;// 找不到源文件, 则返回-1
        }
        // 如果目标路径不存在, 则新建该路径
        File outputFile = new File(destFile);
        if (!outputFile.getParentFile().exists()) {
            boolean mkdirs = outputFile.getParentFile().mkdirs();
            if (mkdirs == false) {
                throw new RuntimeExceptionOwn("目录" + outputFile.getParentFile().getPath() + "创建失败");
            }
        }
        Properties properties = new Properties();
        InputStream localInputStream = null;
        ClassLoader classLoader = Thread.currentThread()
                .getContextClassLoader();
        try {
            localInputStream = classLoader
                    .getResourceAsStream("gcloud/preview.properties");
            if (localInputStream == null)
                log.error("没有找到preview.properties配置文件!");
            else
                properties.load(localInputStream);
        } catch (IOException e) {
            if (log.isErrorEnabled()) {
                log.error("ERROR:", e);
            }
        } finally {
            try {
                if (localInputStream != null) {
                    localInputStream.close();
                }
            } catch (IOException e) {
                log.error("ERROR:", e);
            }
        }
        OPENOFFICE_HOME = properties.getProperty("OpenOffice_HOME").trim();
        osType = properties.getProperty("os").trim();

        String[] cmdString;
        if (osType.contains("Windows")) {
            cmdString = new String[]{
                    "\""+OPENOFFICE_HOME+"\"",
                    "--headless",
                    "--invisible",
                    "--convert-to",
                    "pdf",
                    sourceFile,
                    "--outdir",
                    inputFile.getParent()
            };
        } else {
            cmdString = new String[]{
                    "\""+OPENOFFICE_HOME+"\"",
                    "--headless",
                    "--invisible",
                    "--convert-to",
                    "pdf",
                    sourceFile,
                    "--outdir",
                    inputFile.getParent()
            };
        }

        Runtime.getRuntime().exec(cmdString);

    } catch (Exception e) {
        e.printStackTrace();
        return false;
    }
    return true;
}

常见问题解决方案

1、使用Runtime.getRuntime().exec()调用转换命令一直无效或失败

方案1(我这边没效果)
Process proc = Runtime.getRuntime().exec(cmdString);

final InputStream is1 = proc.getInputStream();
new Thread(new Runnable() {
    public void run() {
        BufferedReader br = new BufferedReader(new InputStreamReader(is1));
        try {
            while(br.readLine()!=null);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
});
InputStream is2 = proc.getErrorStream();
BufferedReader br2 = new BufferedReader(new InputStreamReader(is2));
StringBuilder buf = new StringBuilder();
String line = null;
while((line = br2.readLine()) != null)
    buf.append(line);

proc.getOutputStream().close();
proc.getInputStream().close();
proc.getErrorStream().close();
方案2(我这边没效果)
Process proc = Runtime.getRuntime().exec(cmdString);
StreamGobbler errorGobbler = new StreamGobbler(proc.getErrorStream(), "Error");  
StreamGobbler outputGobbler = new StreamGobbler(proc.getInputStream(), "Output");  
errorGobbler.start();  
outputGobbler.start();  
proc.waitFor();
package com.inspur.dtdcommon.print.office.poi;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

public class StreamGobbler extends Thread {

    InputStream is;
    String type;

    public StreamGobbler(InputStream is, String type) {
        this.is = is;
        this.type = type;
    }

    public void run() {
        try {
            InputStreamReader isr = new InputStreamReader(is);
            BufferedReader br = new BufferedReader(isr);
            String line = null;
            while ((line = br.readLine()) != null) {
                if (type.equals("Error")) {
                    System.out.println("Error   :" + line);
                } else {
                    System.out.println("Debug:" + line);
                }
            }
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    }
}
方案3(可行)(将对应的参数存到数组中,解决空格的问题)
String[] cmdString;
if (osType.contains("Windows")) {
    cmdString = new String[]{
            "\"D:\\Program Files\\LibreOffice 5\\program\\soffice.exe\"",
            "--headless",
            "--invisible",
            "--convert-to",
            "pdf",
            sourceFile,
            "--outdir",
            inputFile.getParent()
    };
} else {
    cmdString = new String[]{};
}

Runtime.getRuntime().exec(cmdString);

2、报错

org.apache.poi.POIXMLException: org.apache.poi.openxml4j.exceptions.InvalidFormatException: Package should contain a content type part [M1.13]

解决办法:换模板类型,doc改为docx

相关资料

word转pdf
https://www.cnblogs.com/wjqboke/articles/7646606.html
查看软件是否安装
https://www.cnblogs.com/yuanqiangfei/p/8033000.html

linux下安装swftools
https://jingyan.baidu.com/article/93f9803f0f7099e0e46f55a1.html
https://blog.csdn.net/zhizaibide1987/article/details/28902229

linux下安装openoffice
https://blog.csdn.net/u013132051/article/details/53304562
https://www.cnblogs.com/manong–/p/8012324.html
https://blog.csdn.net/zouqingfang/article/details/44460823
https://blog.csdn.net/Jsh_sh/article/details/52992731

猜你喜欢

转载自blog.csdn.net/zyp112/article/details/80176135