itext,jxl实现pdf转为txt,txt转excel

itext,jxl实现pdf转为txt,txt转excel

pom.xml配置

<!--管理依赖的版本号-->
<properties>
    <com.itextpdf.version>5.3.2</com.itextpdf.version>
    <org.bouncycastle.version>1.52</org.bouncycastle.version>
    <jxl.version>1.0</jxl.version>
</properties>
   
<!--依赖-->
<dependencies>
   <dependency>
       <groupId>com.itextpdf</groupId>
       <artifactId>itextpdf</artifactId>
       <version>${com.itextpdf.version}</version>
       <scope>compile</scope>
   </dependency>
   <!--读取pdf-->
   <dependency>
       <groupId>org.bouncycastle</groupId>
       <artifactId>bcpg-jdk15on</artifactId>
       <version>${org.bouncycastle.version}</version>
   </dependency>
   <!--text-->
   <dependency>
       <groupId>jxl</groupId>
       <artifactId>jxl</artifactId>
       <version>${jxl.version}</version>
   </dependency>
</dependencies>

itext 读取pdf->txt

package itext;


import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;

public class ReadPdfByiText {

    public static void main(String[] args) throws IOException {
        String outputPath = "D:\\developcodespace\\PdfContent_1.txt";
        PrintWriter writer = new PrintWriter(new FileOutputStream(outputPath));
        String fileName = "D:\\developcodespace\\20190323175137823782.pdf";

        readPdf(writer, fileName);//直接读全PDF面

        //readPdf_filter(fileName);//读取PDF面的某个区域

    }

    public static void readPdf(PrintWriter writer,String fileName){
        String pageContent = "";
        try {
            PdfReader reader = new PdfReader(fileName);
            int pageNum = reader.getNumberOfPages();
            for(int i=1;i<=pageNum;i++){
                String textFromPage = PdfTextExtractor.getTextFromPage(reader, i);
                pageContent += textFromPage;//读取第i页的文档内容
//                pageContent += PdfTextExtractor.getTextFromPage(reader, i);//读取第i页的文档内容
            }
            writer.write(pageContent);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            writer.close();
        }
    }

    public static void readPdf_filter(PrintWriter writer,String fileName){
        String pageContent = "";
        try {
            Rectangle rect = new Rectangle(90, 0, 450, 40);
            RenderFilter filter = new RegionTextRenderFilter(rect);
            PdfReader reader = new PdfReader(fileName);
            int pageNum = reader.getNumberOfPages();
            TextExtractionStrategy strategy;
            for (int i = 1; i <= pageNum; i++) {
                strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
                pageContent +=PdfTextExtractor.getTextFromPage(reader, i, strategy);
            }
			/*String[] split = pageContent.split(" ");
			for(String ss : split){
				System.out.println(ss.substring(ss.lastIndexOf(":")+1, ss.length()));
			}*/
            writer.write(pageContent);
        } catch (Exception e) {
            e.printStackTrace();
        }finally{
            writer.close();
        }
    }
}

jxl读取txt->excel

package itext;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
import jxl.write.WriteException;
import jxl.write.biff.RowsExceededException;
public class TextToExcel {

    public static void main(String[] args) {

        File file = new File("D:\\developcodespace\\PdfContent_1.txt");// 将读取的txt文件
        File file2 = new File("D:\\developcodespace\\work.xls");// 将生成的excel表格

        if (file.exists() && file.isFile()) {

            InputStreamReader read = null;
            String line = "";
            BufferedReader input = null;
            WritableWorkbook wbook = null;
            WritableSheet sheet;

            try {
                read = new InputStreamReader(new FileInputStream(file), "utf-8");
                input = new BufferedReader(read);

                wbook = Workbook.createWorkbook(file2);// 根据路径生成excel文件
                sheet = wbook.createSheet("first", 0);// 新标签页

                try {
                    Label company = new Label(0, 0, "公司名称");// 如下皆为列名
                    sheet.addCell(company);
                    Label position = new Label(1, 0, "岗位");
                    sheet.addCell(position);
                    Label salary = new Label(2, 0, "薪资");
                    sheet.addCell(salary);
                    Label status = new Label(3, 0, "状态");
                    sheet.addCell(status);
                } catch (RowsExceededException e) {
                    e.printStackTrace();
                } catch (WriteException e) {
                    e.printStackTrace();
                }

                int m = 1;// excel行数
                int n = 0;// excel列数
                Label t;
                while ((line = input.readLine()) != null) {
                    if(!line.startsWith("014")){
                        continue;
                    }
                    String[] words = line.split("[ \t]");// 把读出来的这行根据空格或tab分割开

                    for (int i = 0; i < words.length; i++) {
                        if (!words[i].matches("\\s*")) { // 当不是空行时
                            t = new Label(n, m, words[i].trim());
                            sheet.addCell(t);
                            n++;
                        }
                    }
                    n = 0;// 回到列头部
                    m++;// 向下移动一行
                }
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (RowsExceededException e) {
                e.printStackTrace();
            } catch (WriteException e) {
                e.printStackTrace();
            } finally {
                try {
                    wbook.write();
                    wbook.close();
                    input.close();
                    read.close();
                } catch (IOException e) {
                    e.printStackTrace();
                } catch (WriteException e) {
                    e.printStackTrace();
                }
            }
            System.out.println("over!");
            System.exit(0);
        } else {
            System.out.println("file is not exists or not a file");
            System.exit(0);
        }
    }
}

发布了22 篇原创文章 · 获赞 3 · 访问量 6756

猜你喜欢

转载自blog.csdn.net/weixin_39910081/article/details/88778032