Java HTML直接导出PDF

Java HTML直接导出PDF

对于java中如何从html中直接导出pdf，有很多的开源代码，这里个人用itext转。

首先需要的包有：core-renderer-1.0.jar

core-renderer-R8pre1.jar

core-renderer.jar

iText-2.0.8.jar

jtidy-4aug2000r7-dev.jar

Tidy.jar

iTextAsian.jar

java代码的话就比较简单了。具体是先用Tidy将html转换为xhtml，将xhtml转换为其它各种格式的。虽然在转化到pdf时也是用的iText。代码如下：

	//struts1.x中

else if("Html2Pdf".equalsIgnoreCase(action)){
	exportPdfFile("http://localhost:8080/jsp/test.jsp");
	return null;
}

// 导出pdf add by huangt 2012.6.1
	public File exportPdfFile(String urlStr) throws BaseException {
		// String outputFile = this.fileRoot + "/" +
		// ServiceConstants.DIR_PUBINFO_EXPORT + "/" + getFileName() + ".pdf";
		String outputFile = "d:/test3.pdf";
		OutputStream os;
		try {
			os = new FileOutputStream(outputFile);

			ITextRenderer renderer = new ITextRenderer();

			String str = getHtmlFile(urlStr);
			renderer.setDocumentFromString(str);
			ITextFontResolver fontResolver = renderer.getFontResolver();
			
			fontResolver.addFont("C:/WINDOWS/Fonts/SimSun.ttc",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);// 宋体字
			fontResolver.addFont("C:/WINDOWS/Fonts/Arial.ttf",BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);// 宋体字
			renderer.layout();

			renderer.createPDF(os);

			System.out.println("转换成功！");
			os.flush();
			os.close();
			return new File(outputFile);
		} catch (FileNotFoundException e) {
			// logger.error("不存在文件！" + e.getMessage());
			throw new BaseException(e);
		} catch (DocumentException e) {
			// logger.error("生成pdf时出错了！" + e.getMessage());
			throw new BaseException(e);
		} catch (IOException e) {
			// logger.error("pdf出错了！" + e.getMessage());
			throw new BaseException(e);
		}

	}

	// 读取页面内容 add by huangt 2012.6.1
	public String getHtmlFile(String urlStr) throws BaseException {
		URL url;
		try {
			if (urlStr.indexOf("?") != -1) {
				urlStr = urlStr + "&locale="
						+ LocaleContextHolder.getLocale().toString();
			} else {
				urlStr = urlStr + "?locale="
						+ LocaleContextHolder.getLocale().toString();
			}
			url = new URL(urlStr);

			URLConnection uc = url.openConnection();
			InputStream is = uc.getInputStream();
			
			Tidy tidy = new Tidy();

			OutputStream os2 = new ByteArrayOutputStream();
			tidy.setXHTML(true); // 设定输出为xhtml(还可以输出为xml)
			tidy.setCharEncoding(Configuration.UTF8); // 设定编码以正常转换中文
			tidy.setTidyMark(false); // 不设置它会在输出的文件中给加条meta信息
			tidy.setXmlPi(true); // 让它加上<?xml version="1.0"?>
			tidy.setIndentContent(true); // 缩进，可以省略，只是让格式看起来漂亮一些
			tidy.parse(is, os2);

			is.close();

			// 解决乱码 --将转换后的输出流重新读取改变编码
			String temp;
			StringBuffer sb = new StringBuffer();
			BufferedReader in = new BufferedReader(new InputStreamReader(
					new ByteArrayInputStream(
							((ByteArrayOutputStream) os2).toByteArray()),
					"utf-8"));
			while ((temp = in.readLine()) != null) {
				sb.append(temp);
			}

			return sb.toString();
		} catch (IOException e) {
			// logger.error("读取客户端网页文本信息时出错了" + e.getMessage());
			throw new BaseException(e);
		}

	}

为了解决包的问题，加上Maven

			<dependency>
				<groupId>com.lowagie</groupId>
				<artifactId>itext</artifactId>
				<version>2.1.7</version>
			</dependency>
			<dependency>
				<groupId>org.xhtmlrenderer.flyingsaucer</groupId>
				<artifactId>pdf-renderer</artifactId>
				<version>1.0</version>
			</dependency>
			<dependency>
				<groupId>jtidy</groupId>
				<artifactId>jtidy</artifactId>
				<version>4aug2000r7-dev</version>
				<type>jar</type>
				<scope>compile</scope>
			</dependency>
			<dependency>
				<groupId>net.sf.barcode4j</groupId>
				<artifactId>barcode4j-light</artifactId>
				<version>2.0</version>
			</dependency>
			<dependency>
				<groupId>avalon-framework</groupId>
				<artifactId>avalon-framework-impl</artifactId>
				<version>4.2.0</version>
			</dependency>
			<!-- pdf -->

另外附上稍微复杂的PDFUtils.java文件，由于没时间就不做整理解释了！见下载附件！

Java HTML直接导出PDF

猜你喜欢