Java实现Word转PDF方案选择

Java实现Word转PDF方案选择

      很多应用场景中都会涉及到Word转PDF,但Word转PDF的方案在网上一搜一大把,让人眼花缭乱,笔者踩过无数的坑后,最终总结出以下三种方案

  • OpenOffice实现Word转ODF
  • docx2pdf实现Word转ODF
  • itext+POI实现Word转ODF
方案
OpenOffice实现Word转ODF
这种方案在Windows中可行,且非常简便,但它完全依赖于OpenOffice,想在Linux中实现,显然不是一个好的方案,笔者尝试过Linux中装OpenOffice,但令人发指的是居然还需要装GUI!

Java代码


import java.io.File;
import java.io.IOException;
import java.net.ConnectException;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;

	/**
	 * maven包
	 
		<dependency>
			<groupId>com.artofsolving</groupId>
			<artifactId>jodconverter-maven-plugin</artifactId>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
		</dependency>
		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
		</dependency>
		<dependency>
		    <groupId>org.apache.poi</groupId>
		    <artifactId>poi-scratchpad</artifactId>
		</dependency>
	 * 
	 * 
	 */
public class OfficetoPdfUtil {
	public static void createPDF(String sourceFile, String destFile) {
		// String OpenOffice_HOME = "D:/Program Files/OpenOffice.org 3";//
		// 这里是OpenOffice的安装目录,C:\Program Files (x86)\OpenOffice 4
		String OpenOffice_HOME = "D:\\openoffice\\newgay\\";
		Process pro = null;
		try {
			String command = OpenOffice_HOME
					+ "program\\soffice.exe -headless -accept=\"socket,host=127.0.0.1,port=8300;urp;StarOffice.ServiceManager\" -nofirststartwizard";
			pro = Runtime.getRuntime().exec(command);
			File inputFile = new File(sourceFile);
			// 如果目标路径不存在, 则新建该路径
			File outputFile = new File(destFile);
			if (!outputFile.getParentFile().exists()) {
				outputFile.getParentFile().mkdirs();
			}

			// connect to an OpenOffice.org instance running on port 8100
			OpenOfficeConnection connection = new SocketOpenOfficeConnection(
					"127.0.0.1", 8300);
			connection.connect();

			// convert
			DocumentConverter converter = new OpenOfficeDocumentConverter(
					connection);
			converter.convert(inputFile, outputFile);

			// close the connection
			connection.disconnect();
			pro.destroy();
		} catch (ConnectException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public static void main(String[] args) {
		createPDF("C:\\Users\\Administrator\\Desktop\\test.doc", "C:\\Users\\Administrator\\Desktop\\test.pdf");
	}
}

docx2pdf实现Word转ODF
这种方案在Windows和Linux中都可用,但有一点需要注意,它只支持07以上的Word转换,很不幸,笔者的Word模版是03的,只能用第三种方案了。事实上,这种方案中也依赖了Itext的包

Java代码


import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.poi.xwpf.converter.pdf.PdfConverter;
import org.apache.poi.xwpf.converter.pdf.PdfOptions;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

	/**
	 *      2007.docx 可用      
			maven包
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi-ooxml</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>
				<groupId>org.apache.poi</groupId>
				<artifactId>poi-scratchpad</artifactId>
				<version>3.15</version>
			</dependency>
			<dependency>  
				<groupId>fr.opensagres.xdocreport</groupId>  
				<artifactId>org.apache.poi.xwpf.converter.xhtml</artifactId>  
				<version>1.0.5</version>  
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>org.apache.poi.xwpf.converter.core</artifactId>
				<version>1.0.6</version>
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>org.apache.poi.xwpf.converter.pdf</artifactId>
				<version>1.0.6</version>
			</dependency>


			<dependency>
				<groupId>org.xhtmlrenderer</groupId>
				<artifactId>flying-saucer-pdf</artifactId>
				<version>9.1.16</version>
			</dependency>
			<dependency>
				<groupId>org.jsoup</groupId>
				<artifactId>jsoup</artifactId>
				<version>1.11.3</version>
			</dependency>
			<dependency>
				<groupId>com.itextpdf.tool</groupId>
				<artifactId>xmlworker</artifactId>
				<version>5.5.13</version>
			</dependency>
			<dependency>
				<groupId>fr.opensagres.xdocreport</groupId>
				<artifactId>fr.opensagres.xdocreport.document</artifactId>
				<version>1.0.5</version>
			</dependency>
	 * 
	 */
public class WordToPDF {
	/**
	 * 将word文档, 转换成pdf, 中间替换掉变量
	 * 
	 * @param source
	 *            源为word文档, 必须为docx文档
	 * @param target
	 *            目标输出
	 * @param params
	 *            需要替换的变量
	 * @throws Exception
	 */
	public static void wordConverterToPdf(InputStream source,
			OutputStream target, Map<String, String> params) throws Exception {
		wordConverterToPdf(source, target, null, params);
	}

	/**
	 * 将word文档, 转换成pdf, 中间替换掉变量
	 * 
	 * @param source
	 *            源为word文档, 必须为docx文档
	 * @param target
	 *            目标输出
	 * @param params
	 *            需要替换的变量
	 * @param options
	 *            PdfOptions.create().fontEncoding( "windows-1250" ) 或者其他
	 * @throws Exception
	 */
	public static void wordConverterToPdf(InputStream source, OutputStream target, PdfOptions options,
            Map<String, String> params) throws Exception {  
		XWPFDocument docx = new XWPFDocument(source);
	    PdfConverter.getInstance().convert(docx, target, options);  
    }
	public static void main(String[] args) {
		String filepath = "C:\\Users\\Administrator\\Desktop\\test.docx";
		String outpath = "C:\\Users\\Administrator\\Desktop\\test.pdf";

		InputStream source;
		OutputStream target;
		try {
			source = new FileInputStream(filepath);
			target = new FileOutputStream(outpath);
			Map<String, String> params = new HashMap<String, String>();

			PdfOptions options = PdfOptions.create();

			wordConverterToPdf(source, target, options, params);
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
}

itext+POI实现Word转ODF
这种方案也就是先解析Word,然后将内容转换到PDF中,是三种方案中最繁琐的选择,但如果你的Word模板不是07以上的,而线上环境是Linux,这是最后的选择了

word解析


import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableIterator;
import org.apache.poi.hwpf.usermodel.TableRow;

public class WordUtil {
	public static void main(String[] args) {
		try {
//			InputStream is = null;
//	        BufferedImage src = null;
//	        int ret = -1;
//	        
//	        is = new FileInputStream(new File("C:\\Users\\Administrator\\Desktop\\timg.png"));
//            src = javax.imageio.ImageIO.read(is);
//            System.out.println(src.getHeight());
//            System.out.println(src.getWidth());
//            is.close();
			new WordUtil().testReadByDoc();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	public void testReadByDoc() throws Exception {  
	      InputStream is = new FileInputStream("C:\\Users\\Administrator\\Desktop\\100007zldlwts.doc");
	      HWPFDocument doc = new HWPFDocument(is);
	      //输出书签信息  
	      this.printInfo(doc.getBookmarks(), doc);  
	      //输出文本  
//	      System.out.println(doc.getDocumentText());  
//	      Range range = doc.getRange();  
//	      this.insertInfo(range);  
//	      this.printInfo(range);  
//	      //读表格  
//	      this.readTable(range);  
//	      //读列表  
//	      this.readList(range);  
//	      //删除range  
//	      Range r1 = new Range(0, 1048, doc);  
//	      r1.delete();//在内存中进行删除,如果需要保存到文件中需要再把它写回文件
	      
//	                 把当前HWPFDocument写到输出流中  
//	      doc.write(new FileOutputStream("C:\\Users\\Administrator\\Desktop\\100006bzsupdated.doc"));  
	      this.closeStream(is);  
	   }  
	    
	   /** 
	    * 关闭输入流 
	    * @param is 
	    */  
	   private void closeStream(InputStream is) {  
	      if (is != null) {  
	         try {  
	            is.close();  
	         } catch (IOException e) {  
	            e.printStackTrace();  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 输出书签信息 
	    * @param bookmarks 
	 * @param doc 
	    */  
	   private void printInfo(Bookmarks bookmarks, HWPFDocument doc) {  
	      int count = bookmarks.getBookmarksCount();  
	      System.out.println("书签数量:" + count);  
	      Bookmark bookmark;  
	      for (int i=0; i<count; i++) {  
	         bookmark = bookmarks.getBookmark(i);  
	         System.out.println("书签" + (i+1) + "的名称是:" + bookmark.getName());  
	         System.out.println("开始位置:" + bookmark.getStart());  
	         System.out.println("结束位置:" + bookmark.getEnd());  
	         System.out.println(new Range(bookmark.getStart(), bookmark.getEnd(), doc).text().replaceAll(" FORMTEXT ", "").replaceAll("", ""));
	      }  
	   }  
	    
	   /** 
	    * 读表格 
	    * 每一个回车符代表一个段落,所以对于表格而言,每一个单元格至少包含一个段落,每行结束都是一个段落。 
	    * @param range 
	    */  
	   private void readTable(Range range) {  
	      //遍历range范围内的table。  
	      TableIterator tableIter = new TableIterator(range);  
	      Table table;  
	      TableRow row;  
	      TableCell cell;  
	      while (tableIter.hasNext()) {  
	         table = tableIter.next();  
	         int rowNum = table.numRows();  
	         for (int j=0; j<rowNum; j++) {  
	            row = table.getRow(j);  
	            int cellNum = row.numCells();  
	            for (int k=0; k<cellNum; k++) {  
	                cell = row.getCell(k);  
	                //输出单元格的文本  
	                System.out.println(cell.text().trim());  
	            }  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 读列表 
	    * @param range 
	    */  
	   private void readList(Range range) {  
	      int num = range.numParagraphs();  
	      Paragraph para;  
	      for (int i=0; i<num; i++) {  
	         para = range.getParagraph(i);  
	         if (para.isInList()) {  
	            System.out.println("list: " + para.text());  
	         }  
	      }  
	   }  
	    
	   /** 
	    * 输出Range 
	    * @param range 
	    */  
	   private void printInfo(Range range) {  
	      //获取段落数  
	      int paraNum = range.numParagraphs();  
	      System.out.println(paraNum);  
	      for (int i=0; i<paraNum; i++) {  
	         //this.insertInfo(range.getParagraph(i));  
	         System.out.println("段落" + (i+1) + ":" + range.getParagraph(i).text());  
	         if (i == (paraNum-1)) {  
	            this.insertInfo(range.getParagraph(i));  
	         }  
	      }  
	      int secNum = range.numSections();  
	      System.out.println(secNum);  
	      Section section;  
	      for (int i=0; i<secNum; i++) {  
	         section = range.getSection(i);  
	         System.out.println(section.getMarginLeft());  
	         System.out.println(section.getMarginRight());  
	         System.out.println(section.getMarginTop());  
	         System.out.println(section.getMarginBottom());  
	         System.out.println(section.getPageHeight());  
	         System.out.println(section.text());  
	      }  
	   }  
	    
	   /** 
	    * 插入内容到Range,这里只会写到内存中 
	    * @param range 
	    */  
	   private void insertInfo(Range range) {  
	      range.insertAfter("Hello");  
	   }  

}

pdf生成


import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chunk;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.Image;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.ColumnText;
import com.itextpdf.text.pdf.PdfPageEventHelper;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfWriter;

public class PDFUtil {
	public static void main(String[] args) throws Exception {
		OutputStream out = new FileOutputStream(new File("C:\\Users\\Administrator\\Desktop\\test.pdf"));

		BaseFont bfChinese = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED); 
		Font BlodFont = new Font(bfChinese, 12, Font.NORMAL, BaseColor.BLACK);
		
		Document document = new Document(PageSize.A4);
		// 写入器
		PdfWriter writer = PdfWriter.getInstance(document, out);
		document.open();
		
		Paragraph paragraphRemark = new Paragraph();
		paragraphRemark.setFirstLineIndent(2f);
	    paragraphRemark.add(new Chunk("特别提醒:", BlodFont));
	    paragraphRemark.add(Chunk.NEWLINE);
	    document.add(paragraphRemark);
	    
	    Image img = Image.getInstance("C:\\Users\\Administrator\\Desktop\\图片1.png");
	    img.scaleToFit(400f,200f);
	    document.add(img);
	    
	    HeaderFoot headerFoot = new HeaderFoot("Title");
        writer.setPageEvent(headerFoot);
        
        
	    document.close();
	    writer.close();
        out.flush();
	}
}

class HeaderFoot extends PdfPageEventHelper {
    private String header;
 
    public HeaderFoot(String header) {
		this.header = header;
	}

    @Override
    public void onEndPage(PdfWriter writer, Document document) {
        BaseFont baseFont = null;
        try {
            baseFont = BaseFont.createFont("STSongStd-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
        } catch (DocumentException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        Font BlodFont = new Font(baseFont, 16, Font.BOLD, BaseColor.BLACK);
        Rectangle rect = writer.getBoxSize("art");
        //添加页眉
        ColumnText.showTextAligned(writer.getDirectContent(),
                Element.ALIGN_CENTER, new Phrase(header, BlodFont),
                document.left() + 260, document.top(), 0);
 
    }
}

猜你喜欢

转载自blog.csdn.net/weixin_37481769/article/details/85296357