PDF解析器

目录

1GUI

2效果图

2.1GUI

 2.2提取文字

 2.3转为Word

2.4转为Excel

2.5提取图片


运行该程序首先要部署Java运行环境,windows部署JDK的方式请自行网上搜索。

1GUI

利用Java实现PDF文件提取文字、转为Word、转为Excel和提取图片功能,GUI代码如下:

扫描二维码关注公众号,回复: 17180975 查看本文章
package com.example.yrz;

/**
 * @Author yrz
 * @create 2023/6/20 17:45
 * @Description TODO
 */


import com.example.yrz.authorization.AuthorizationCheck;
import com.example.yrz.encoder.PDFEncoder;

import javax.swing.*;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;

public class FileChooser extends JFrame implements ActionListener, ItemListener {
    JButton openButton;
    JPanel radioPanel;
    ButtonGroup radioGroup;
    JTextArea log;
    JFileChooser fc;
    JRadioButton extractTextButton;
    JRadioButton convertToWordButton;
    JRadioButton convertToExcelButton;
    JRadioButton extractImagesButton;
    DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
    // 共享变量 0提取文字 1转Word 2转Excel 3提取图片
    public static Integer PARSE_TYPE = 0;

    public FileChooser() {
        super("PDF解析器");
        openButton = new JButton("点击此处选择文件");
        openButton.addActionListener(this);
        log = new JTextArea(11, 20);
        log.setEditable(false);
        JScrollPane logScrollPane = new JScrollPane(log);
        fc = new JFileChooser();
        // 文件类型为pdf
        FileNameExtensionFilter filter = new FileNameExtensionFilter("PDF Documents", "pdf");
        fc.addChoosableFileFilter(filter);
        // create radio button group
        radioPanel = new JPanel(new GridLayout(1, 0));
        radioGroup = new ButtonGroup();
        extractTextButton = new JRadioButton("提取文字");
        convertToWordButton = new JRadioButton("转Word");
        convertToExcelButton = new JRadioButton("转Excel");
        extractImagesButton = new JRadioButton("提取图片");
        radioGroup.add(extractTextButton);
        radioGroup.add(convertToWordButton);
        radioGroup.add(convertToExcelButton);
        radioGroup.add(extractImagesButton);
        radioPanel.add(extractTextButton);
        radioPanel.add(convertToWordButton);
        radioPanel.add(convertToExcelButton);
        radioPanel.add(extractImagesButton);
        // set default selected radio button
        extractTextButton.setSelected(true);
        // add item listeners to radio buttons
        extractTextButton.addItemListener(this);
        convertToWordButton.addItemListener(this);
        convertToExcelButton.addItemListener(this);
        extractImagesButton.addItemListener(this);
        // add components to frame
        add(radioPanel, BorderLayout.PAGE_START);
        add(openButton, BorderLayout.CENTER);
        add(logScrollPane, BorderLayout.PAGE_END);
        setSize(400, 300);
        setVisible(true);
        // 关闭窗口后停止进程
        setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
    }

    @Override
    public void actionPerformed(ActionEvent e) {
        if (e.getSource() == openButton) {
            int returnVal = fc.showOpenDialog(FileChooser.this);
            if (returnVal == JFileChooser.APPROVE_OPTION) {
                File file = fc.getSelectedFile();
                log.append(LocalDateTime.now().format(dateTimeFormatter) + "正在解析: " + file.getName() + "!" + "\n");
                switch (PARSE_TYPE){
                    case 1:
                        // 转Word
                        PDFEncoder.turnIntoWord(file);
                        break;
                    case 2:
                        // 转Excel
                        PDFEncoder.turnIntoExcel(file);
                        break;
                    case 3:
                        // 提取图片
                        PDFEncoder.extractImg(file);
                        break;
                    default:
                        // 提取pdf中的文字
                        PDFEncoder.textExtraction(file);
                        break;
                }
                log.append(LocalDateTime.now().format(dateTimeFormatter) + "解析完成: " + file.getName() + "!" + "\n");
            } else {
                log.append(LocalDateTime.now().format(dateTimeFormatter) + "用户取消选择文件!" + "\n");
            }
            log.setCaretPosition(log.getDocument().getLength());
        }
    }

    @Override
    public void itemStateChanged(ItemEvent e) {
        if (e.getStateChange() == ItemEvent.SELECTED) {
            if (e.getSource() == extractTextButton) {
                // extract text option selected
                PARSE_TYPE = 0;
            } else if (e.getSource() == convertToWordButton) {
                // convert to Word option selected
                PARSE_TYPE = 1;
            } else if (e.getSource() == convertToExcelButton) {
                // convert to Excel option selected
                PARSE_TYPE = 2;
            } else if (e.getSource() == extractImagesButton) {
                // extract images option selected
                PARSE_TYPE = 3;
            }
        }
    }

    public static void main(String[] args) {
        // 校验授权码
        AuthorizationCheck.check();
        new FileChooser();
    }
}

2效果图

2.1GUI

 

 2.2提取文字

pdf文档:

 txt文档:

 2.3转为Word

 转为Word后没法保留原始的布局与格式。

2.4转为Excel

pdf文档:

 Excel文档:

 纯文本的pdf可以转化为正确的excel文档。

2.5提取图片

 想要PDF解析器程序的请私聊我(* ̄︶ ̄)

猜你喜欢

转载自blog.csdn.net/qq_27890899/article/details/131332958
今日推荐