package com.dhcc.zhfc.elesign.util;
import org.apache.commons.lang.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
import org.hibernate.annotations.common.util.StringHelper;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
/**
* @ClassName PdfBoxKeyWordPosition
* @Description TODO
* @Author 86173
* @Date 2020/5/11 15:14
* @Version 1.0
*/
public class PdfBoxKeyWordPosition extends PDFTextStripper { // Keyword character array private char[] key; // PDF file path private String pdfPath; private byte[] fileBytes; // Coordinate information collection private List<float[]> list = new ArrayList<float[]>(); // Current page information collection private List<float[ ]> pagelist = new ArrayList<float[]>(); // Constructor with parameters public PdfBoxKeyWordPosition(String keyWords, String pdfPath,byte[] bin) throws IOException { super();
super.setSortByPosition(true);
this.pdfPath = pdfPath;
this.fileBytes= bin;
char[] key = new char[keyWords.length()];
for (int i = 0; i < keyWords.length(); i++) {
key[i] = keyWords.charAt(i);
}
this.key = key;
}
public char[] getKey() {
return key;
}
public void setKey(char[] key) {
this.key = key;
}
public String getPdfPath() {
return pdfPath;
}
public void setPdfPath(String pdfPath) {
this.pdfPath = pdfPath;
}
// 获取坐标信息
public List<float[]> getCoordinate() throws IOException {
try {
if(!StringHelper.isEmpty(pdfPath)){
document = PDDocument.load(new File(pdfPath));
}
if(document==null&&fileBytes!=null){
document = PDDocument.load(fileBytes);
}
int pages = document.getNumberOfPages();
for (int i = 1; i <= pages; i++) {
pagelist.clear();
super.setSortByPosition(true);
super.setStartPage(i);
super.setEndPage(i);
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
super.writeText(document, dummy);
for (float[] li : pagelist) {
li[2] = i;
}
list.addAll(pagelist);
}
return list;
} catch (Exception e) {
e.printStackTrace();
} finally {
if (document != null) {
document.close();
}
}
return list;
}
// 获取坐标信息
@Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
for (int i = 0; i < textPositions.size(); i++) {
String fonts = textPositions.get(i).getFont().getName();
String str = textPositions.get(i).getUnicode();
if (str.equals(key[0] + "")) {
int count = 0;
for (int j = 0; j < key.length-1; j++) {
String s = "";
try {
s = textPositions.get(i + j).getUnicode();
} catch (Exception e) {
s = "";
}
if (s.equals(key[j] + "")) { count++; } } if (count == key.length-1) { float[] idx = new float[3]; // Some adjustments are needed Make the seal cover the font // The length of the font is added to the X coordinate here, or you can directly idx[0] = textPositions.get(i).getX() idx[0] = textPositions.get(i).getX ()+textPositions.get(i).getFontSize(); //The length of the font subtracted from the Y coordinate here can also be directly idx[1] = textPositions.get(i).getPageHeight()-textPositions.get( i).getY()
idx[1] = textPositions.get(i).getHeight()-textPositions.get(i).getY()-4*textPositions.get(i).getFontSize();
System.out.println("x=" + idx[0] + ",y=" + idx[1]);
pagelist.add(idx);
return;
}
}
}
}
public static void main(String[] args) throws IOException {
String pdfPath = "C:\\Users\\pangq\\Desktop\\555.pdf";
File file = new File(pdfPath);
//PDDocument doc = PDDocument.load(file);
String keyWords = "纪海祥";
//PDImageXObject pdImage = PDImageXObject.createFromFile("C:/Programs/test/sign.png", doc);
byte[] bytes = File2byte(file);
PdfBoxKeyWordPosition pdf = new PdfBoxKeyWordPosition(keyWords, "",bytes);
PDPageContentStream contentStream = null;
List<float[]> list = pdf.getCoordinate();
List<Integer> convertResult = convert(list);
String a = convert2String(list);
// 多页pdf的处理*/
for (float[] fs : list) {
float x = fs[0];
float y = fs[1];
}
//doc.close();
}
public static byte[] File2byte(File tradeFile){
byte[] buffer = null;
FileInputStream fis =null;
ByteArrayOutputStream bos =null;
try
{
fis = new FileInputStream(tradeFile);
bos = new ByteArrayOutputStream();
byte[] b = new byte[1024];
int n;
while ((n = fis.read(b)) != -1)
{
bos.write(b, 0, n);
}
fis.close();
bos.close();
buffer = bos.toByteArray();
}catch (FileNotFoundException e){
e.printStackTrace();
}catch (IOException e){
e.printStackTrace();
}finally {
if(fis !=null){
try {
fis.close();
}catch (IOException io){
io.printStackTrace();
}
}
if(bos !=null){
try {
bos.close();
}catch (IOException io){
io.printStackTrace();
}
}
}
return buffer;
}
public static List<Integer> convert(List<float[]> list){
List<Integer> res = new ArrayList<Integer>();
if(list!=null&&list.size()>0) {
for (float[] fs : list) {
int page = (int) fs[2];
if(!res.contains(page)){
res.add(page);
}
}
}
return res;
}
public static String convert2String(List<float[]> list){
List<Integer> res = convert(list);
String str = StringUtils.join(res.iterator(),",");
return str;
}
/**
* Get the page number in pdf
* @param bystes
* @return
*/
public static int getPdfNubers(byte[] bystes){
int pages = 0 ;
ByteArrayInputStream in = new ByteArrayInputStream ( bystes ) ;
PDDocument pdfReader = null;
try { pdfReader = PDDocument.load(in); pages= pdfReader.getNumberOfPages(); } catch ( IOException e ) { return pages ; } }
return pages;
}
}