Table of contents
To run this program, you must first deploy the Java operating environment. Please search online for how to deploy JDK in Windows.
1GUI
Use Java to implement the functions of extracting text from PDF files, converting them to Word, converting them to Excel, and extracting pictures. The GUI code is as follows:
package com.example.yrz;
/**
* @Author yrz
* @create 2023/6/20 17:45
* @Description TODO
*/
import com.example.yrz.authorization.AuthorizationCheck;
import com.example.yrz.encoder.PDFEncoder;
import javax.swing.*;
import javax.swing.filechooser.FileNameExtensionFilter;
import java.awt.*;
import java.awt.event.*;
import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
public class FileChooser extends JFrame implements ActionListener, ItemListener {
JButton openButton;
JPanel radioPanel;
ButtonGroup radioGroup;
JTextArea log;
JFileChooser fc;
JRadioButton extractTextButton;
JRadioButton convertToWordButton;
JRadioButton convertToExcelButton;
JRadioButton extractImagesButton;
DateTimeFormatter dateTimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
// 共享变量 0提取文字 1转Word 2转Excel 3提取图片
public static Integer PARSE_TYPE = 0;
public FileChooser() {
super("PDF解析器");
openButton = new JButton("点击此处选择文件");
openButton.addActionListener(this);
log = new JTextArea(11, 20);
log.setEditable(false);
JScrollPane logScrollPane = new JScrollPane(log);
fc = new JFileChooser();
// 文件类型为pdf
FileNameExtensionFilter filter = new FileNameExtensionFilter("PDF Documents", "pdf");
fc.addChoosableFileFilter(filter);
// create radio button group
radioPanel = new JPanel(new GridLayout(1, 0));
radioGroup = new ButtonGroup();
extractTextButton = new JRadioButton("提取文字");
convertToWordButton = new JRadioButton("转Word");
convertToExcelButton = new JRadioButton("转Excel");
extractImagesButton = new JRadioButton("提取图片");
radioGroup.add(extractTextButton);
radioGroup.add(convertToWordButton);
radioGroup.add(convertToExcelButton);
radioGroup.add(extractImagesButton);
radioPanel.add(extractTextButton);
radioPanel.add(convertToWordButton);
radioPanel.add(convertToExcelButton);
radioPanel.add(extractImagesButton);
// set default selected radio button
extractTextButton.setSelected(true);
// add item listeners to radio buttons
extractTextButton.addItemListener(this);
convertToWordButton.addItemListener(this);
convertToExcelButton.addItemListener(this);
extractImagesButton.addItemListener(this);
// add components to frame
add(radioPanel, BorderLayout.PAGE_START);
add(openButton, BorderLayout.CENTER);
add(logScrollPane, BorderLayout.PAGE_END);
setSize(400, 300);
setVisible(true);
// 关闭窗口后停止进程
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
}
@Override
public void actionPerformed(ActionEvent e) {
if (e.getSource() == openButton) {
int returnVal = fc.showOpenDialog(FileChooser.this);
if (returnVal == JFileChooser.APPROVE_OPTION) {
File file = fc.getSelectedFile();
log.append(LocalDateTime.now().format(dateTimeFormatter) + "正在解析: " + file.getName() + "!" + "\n");
switch (PARSE_TYPE){
case 1:
// 转Word
PDFEncoder.turnIntoWord(file);
break;
case 2:
// 转Excel
PDFEncoder.turnIntoExcel(file);
break;
case 3:
// 提取图片
PDFEncoder.extractImg(file);
break;
default:
// 提取pdf中的文字
PDFEncoder.textExtraction(file);
break;
}
log.append(LocalDateTime.now().format(dateTimeFormatter) + "解析完成: " + file.getName() + "!" + "\n");
} else {
log.append(LocalDateTime.now().format(dateTimeFormatter) + "用户取消选择文件!" + "\n");
}
log.setCaretPosition(log.getDocument().getLength());
}
}
@Override
public void itemStateChanged(ItemEvent e) {
if (e.getStateChange() == ItemEvent.SELECTED) {
if (e.getSource() == extractTextButton) {
// extract text option selected
PARSE_TYPE = 0;
} else if (e.getSource() == convertToWordButton) {
// convert to Word option selected
PARSE_TYPE = 1;
} else if (e.getSource() == convertToExcelButton) {
// convert to Excel option selected
PARSE_TYPE = 2;
} else if (e.getSource() == extractImagesButton) {
// extract images option selected
PARSE_TYPE = 3;
}
}
}
public static void main(String[] args) {
// 校验授权码
AuthorizationCheck.check();
new FileChooser();
}
}
2 renderings
2.1GUI
2.2 Extract text
pdf document:
txt document:
2.3 Convert to Word
After converting to Word, the original layout and format cannot be retained.
2.4 Convert to Excel
pdf document:
Excel document:
Plain text pdf can be converted into a correct excel document.
2.5 Extract pictures
If you want a PDF parser program, please message me privately (* ̄︶ ̄)