目录
运行该程序首先要部署Java运行环境,windows部署JDK的方式请自行网上搜索。
1GUI
利用Java实现PDF文件提取文字、转为Word、转为Excel和提取图片功能,GUI代码如下:
扫描二维码关注公众号,回复:
17180975 查看本文章
package com.example.yrz;
/**
* @Author yrz
* @create 2023/6/20 17:45
* @Description TODO
*/
import com.example.yrz.authorization.AuthorizationCheck;
import com.example.yrz.encoder.PDFEncoder;
import javax.swing.*;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
public class FileChooser extends JFrame implements ActionListener, ItemListener {
JButton openButton;
JPanel radioPanel;
ButtonGroup radioGroup;
JTextArea log;
JFileChooser fc;
JRadioButton extractTextButton;
JRadioButton convertToWordButton;
JRadioButton convertToExcelButton;
JRadioButton extractImagesButton;
DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
// 共享变量 0提取文字 1转Word 2转Excel 3提取图片
public static Integer PARSE_TYPE = 0;
public FileChooser() {
super("PDF解析器");
openButton = new JButton("点击此处选择文件");
openButton.addActionListener(this);
log = new JTextArea(11, 20);
log.setEditable(false);
JScrollPane logScrollPane = new JScrollPane(log);
fc = new JFileChooser();
// 文件类型为pdf
FileNameExtensionFilter filter = new FileNameExtensionFilter("PDF Documents", "pdf");
fc.addChoosableFileFilter(filter);
// create radio button group
radioPanel = new JPanel(new GridLayout(1, 0));
radioGroup = new ButtonGroup();
extractTextButton = new JRadioButton("提取文字");
convertToWordButton = new JRadioButton("转Word");
convertToExcelButton = new JRadioButton("转Excel");
extractImagesButton = new JRadioButton("提取图片");
radioGroup.add(extractTextButton);
radioGroup.add(convertToWordButton);
radioGroup.add(convertToExcelButton);
radioGroup.add(extractImagesButton);
radioPanel.add(extractTextButton);
radioPanel.add(convertToWordButton);
radioPanel.add(convertToExcelButton);
radioPanel.add(extractImagesButton);
// set default selected radio button
extractTextButton.setSelected(true);
// add item listeners to radio buttons
extractTextButton.addItemListener(this);
convertToWordButton.addItemListener(this);
convertToExcelButton.addItemListener(this);
extractImagesButton.addItemListener(this);
// add components to frame
add(radioPanel, BorderLayout.PAGE_START);
add(openButton, BorderLayout.CENTER);
add(logScrollPane, BorderLayout.PAGE_END);
setSize(400, 300);
setVisible(true);
// 关闭窗口后停止进程
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
@Override
public void actionPerformed(ActionEvent e) {
if (e.getSource() == openButton) {
int returnVal = fc.showOpenDialog(FileChooser.this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fc.getSelectedFile();
log.append(LocalDateTime.now().format(dateTimeFormatter) + "正在解析: " + file.getName() + "!" + "\n");
switch (PARSE_TYPE){
case 1:
// 转Word
PDFEncoder.turnIntoWord(file);
break;
case 2:
// 转Excel
PDFEncoder.turnIntoExcel(file);
break;
case 3:
// 提取图片
PDFEncoder.extractImg(file);
break;
default:
// 提取pdf中的文字
PDFEncoder.textExtraction(file);
break;
}
log.append(LocalDateTime.now().format(dateTimeFormatter) + "解析完成: " + file.getName() + "!" + "\n");
} else {
log.append(LocalDateTime.now().format(dateTimeFormatter) + "用户取消选择文件!" + "\n");
}
log.setCaretPosition(log.getDocument().getLength());
}
}
@Override
public void itemStateChanged(ItemEvent e) {
if (e.getStateChange() == ItemEvent.SELECTED) {
if (e.getSource() == extractTextButton) {
// extract text option selected
PARSE_TYPE = 0;
} else if (e.getSource() == convertToWordButton) {
// convert to Word option selected
PARSE_TYPE = 1;
} else if (e.getSource() == convertToExcelButton) {
// convert to Excel option selected
PARSE_TYPE = 2;
} else if (e.getSource() == extractImagesButton) {
// extract images option selected
PARSE_TYPE = 3;
}
}
}
public static void main(String[] args) {
// 校验授权码
AuthorizationCheck.check();
new FileChooser();
}
}
2效果图
2.1GUI
2.2提取文字
pdf文档:
txt文档:
2.3转为Word
转为Word后没法保留原始的布局与格式。
2.4转为Excel
pdf文档:
Excel文档:
纯文本的pdf可以转化为正确的excel文档。
2.5提取图片
想要PDF解析器程序的请私聊我(* ̄︶ ̄)