java pdf转word 高效不失真

将java工程导成jar包 使用 bat 执行 jar 包。

---------------------------------------------------------------------------------------------------------------

eclipse 工程 

  1. Pdf2Word.java 解析pdf主类。
  2. laq_.txt             操作说明。
  3. license.xml       aspose license。
  4. aspose.pdf-11.5.0.jar 转word所需jar包 可在 aspose 官网 https://docs.aspose.com/display/pdfnet/Printing+PDF+Document 下载

---------------------------------------------------------------------------------------------------------------

附源码

Pdf2Word.java

package com.bxj;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;

import com.aspose.pdf.Document;
import com.aspose.pdf.License;
import com.aspose.pdf.SaveFormat;

public class Pdf2Word {

public static boolean getLicense() {
boolean result = false;
try
{
InputStream license = ClassLoader.getSystemResourceAsStream("license.xml");
License aposeLic = new License();
aposeLic.setLicense(license);
result = true;
}
catch (Exception e)
{
System.out.println("license not found!");
}
return result;
}

public static void main(String[] args) {
try
{
if (!getLicense()) {
return;
}
String pdfFile = args[0];
pdfFile = pdfFile.replace("\\", "/");
AnalyzePdf pdf=new AnalyzePdf(pdfFile);
AnalyzeTxt txt=new AnalyzeTxt();
Thread t1=new Thread(pdf);
Thread t2=new Thread(txt);
t1.start();
t2.start();
}
catch (Exception e)
{
e.printStackTrace();
}
}
}


class AnalyzePdf implements Runnable{

String fileDir;

public AnalyzePdf(String fileDir){
this.fileDir = fileDir;
}

@Override
public void run()
{
recursion(fileDir);
}

private static void recursion(String fileDir)
{
File file = new File(fileDir);
File[] files = file.listFiles();// 获取目录下的所有文件或文件夹
if (files == null)
{
return;
}
for (File f : files)
{
if (f.isFile() && f.getName().endsWith(".pdf"))
{
pdf2word(f);
}
else if (f.isDirectory())
{
//递归
recursion(f.getAbsolutePath());
}
}
}

private static void pdf2word(File pdfFile)
{
try {
InputStream fileInput = new FileInputStream(pdfFile);// 待处理的文件
Document pdfDocument = new Document(fileInput);
String fileName = pdfFile.getAbsolutePath().replace("pdf", "doc");
File file = new File(fileName);
if (!file.exists())
{
file.createNewFile();
}
OutputStream fileOutput = new FileOutputStream(fileName);
pdfDocument.save(fileOutput, SaveFormat.DocX);
} catch (Exception e) {
e.printStackTrace();
}
}

public String getFileDir()
{
return fileDir;
}

public void setFileDir(String fileDir)
{
this.fileDir = fileDir;
}
}

class AnalyzeTxt implements Runnable{

@Override
public void run()
{
try
{
InputStream inputStream = ClassLoader.getSystemResourceAsStream("laq_.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
int c;
while((c=reader.read()) != -1){
if(c!= 32){
Thread.sleep(1);
}
System.out.print((char)c);
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}

laq_.txt 

  • 将需转换的pdf文件置于 out 目录
  • 输出文件在 out 目录查看

license.xml

<License>
<Data>
<Products>
<Product>Aspose.Total for Java</Product>
<Product>Aspose.Pdf for Java</Product>
</Products>
<EditionType>Enterprise</EditionType>
<SubscriptionExpiry>20991231</SubscriptionExpiry>
<LicenseExpiry>20991231</LicenseExpiry>
<SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>
</Data>
<Signature>sNLLKGMUdF0r8O1kKilWAGdgfs2BvJb/2Xp8p5iuDVfZXmhppo+d0Ran1P9TKdjV4ABwAgKXxJ3jcQTqE/2IRfqwnPf8itN8aFZlV3TJPYeD3yWE7IT55Gz6EijUpC7aKeoohTb4w2fpox58wWoF3SNp6sK6jDfiAUGEHYJ9pjU=</Signature>
</License>

MANIFEST.MF

Manifest-Version: 1.0
Class-Path: lib/aspose.pdf-11.5.0.jar
Main-Class: com.bxj.Pdf2Word

 ---------------------------------------------------------------------------------------------------------------

导为 jar 包

导出目录

lib 存放 aspose.pdf-11.5.0.jar 

out 存放 转换的pdf文件 (转换完成的word文件也在这个路径)

 运行.bat 批处理文件

@echo off
set current_path=%cd%\out
java -jar -Xms4000m -Xmx4000m -Xmn2000m Pdf2Word_LAQ.jar %current_path%
@pause

欢迎关注  有疑问可交流  

猜你喜欢

转载自www.cnblogs.com/0o00o0/p/9412119.html