public class pdfAnalysis { /** * @throws IOException * @param从网络上下载PDF,截取PDF字符串, */ public static void main(String[] args) throws IOException { // 下载的连接 下载下来的名字 下载下来的路径 // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/"); // 读取文件 pdfAnalysis pdf = new pdfAnalysis(); // 读取文件 String pdfName = "F:\\CC.pdf"; // 解析PDF里的值 存入变量pdf_Body String pdf_Body = pdf.readFileOfPDF(pdfName); //System.out.println(pdf_Body); String str = (pdf_Body.substring(pdf_Body.indexOf("乘客详情"), pdf_Body.indexOf("餐食详情"))); System.out.println("我是str的值:" + str); int a = 0; if (str.contains("先生") && str.contains("女士")) { a = appearNumber(str, "先生"); System.out.println("先生出现的次数" + a); a = appearNumber(str, "女士"); System.out.println("女士出现的次数" + a); a += a; System.out.println("一共出现了" + a + "次"); } else if (str.contains("先生")) { a = appearNumber(str, "先生"); System.out.println("先生出现的次数" + a); } else if (str.contains("女士")) { a = appearNumber(str, "女士"); System.out.println("女士出现的次数" + a); } String m2=null,wm1=null; for(int i = 0;i<a ;i++){ if(str.contains("先生")||str.contains("女士")){ String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); }else if(str.contains("女士")){ String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); }else if(str.contains("先生")){ String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); } } System.out.println(m2+":"+wm1); /* List<String> list = new ArrayList<>(); if (str.contains("先生") && str.contains("女士")) { String m2 = null; String wm1 = null; for (int i = 0; i < a; i++) { String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); } list.add(m2); list.add(wm1); } else if (str.contains("先生")) { for (int i = 0; i < a; i++) { String m = str.substring(str.indexOf("先生") + 2); String m2 = (m.substring(0, m.indexOf("-"))).trim(); list.add(m2); } } else if (str.contains("女士")) { for (int i = 0; i < a; i++) { String wm = str.substring(str.indexOf("女士") + 2); String wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); list.add(wm1); } } for (String s : list) { System.out.println("人名" + s); }*/ /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details")); String str1 = str.substring(str.indexOf("H (")); String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" }; for(String sto:pp){ if(str1.contains(sto)){ String result = str1.substring(str1.indexOf(sto)); //System.out.println(result); //System.out.println(result.length()); String result2 = result.substring(0,result.indexOf(",")); String result3 = result2.trim(); System.out.println("大家好,我是闫老五要的时间:"+result3+"我是"+pdfName+"文件"); } }*/ /*if(str1.contains("Monday")||str1.contains("Tuesday")|| str1.contains("Wednesday")||str1.contains("Thursday")|| str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){ }*/ // System.out.println(str1); /* // 取出Depart里的值(离开地) String depart_Temp = pdf_Body.substring(pdf_Body.indexOf("Depart"), pdf_Body.indexOf("Arrive")); String depart_Temp2 = depart_Temp.substring(depart_Temp.indexOf("Depart")); // System.out.println("取出来的Depart总值:" + depart_Temp2); String depart_Temp3 = depart_Temp2.substring(depart_Temp2.indexOf("("), depart_Temp2.indexOf(")")); // 把括号替换成空字符串,并去掉空字符串 String depart = depart_Temp3.replace("(", "").trim(); System.out.println("Depart:" + depart); // 取出Arrive的值(到达地) String arrive_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive:"), pdf_Body.indexOf("passenger details")); String arrive_Temp1 = arrive_Temp.substring(arrive_Temp.indexOf("("), arrive_Temp.indexOf(")")); // 把括号替换成空字符串,并去掉空字符串 String arrive = arrive_Temp1.replace("(", "").trim(); System.out.println("Arrive:" + arrive); // 取出金钱值 String money = pdf_Body.substring(pdf_Body.indexOf("AUD ") + 4, pdf_Body.indexOf("GST")); System.out.println("总金额:" + money); // 取出人名值 String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details")); // System.out.println(str); String name_Temp1 = null; String result_name = null; List<String> list_Name = new ArrayList<>(); for (int i = 1; i < name_Temp.length(); i++) { if (name_Temp.contains(i + ".")) { name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + ".")); result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3, name_Temp1.indexOf("Seat Number Services")); list_Name.add(result_name); } // System.out.println(add); // System.out.println(str2); if (name_Temp1.equals("null")) { continue; } } for (String i : list_Name) { System.out.println("所有的人名:" + i); }*/ if (pdfAnalysis.infile != null) { pdfAnalysis.infile.close(); System.out.println("我要准备关闭PDF文档了"); } } public static int appearNumber(String srcText, String temp) { int count = 0; Pattern p = Pattern.compile(temp); Matcher m = p.matcher(srcText); while (m.find()) { count++; } return count; } public static FileInputStream infile = null; public String readFileOfPDF(String pdfName) throws IOException { String context = null; File file = new File(pdfName);// 创建一个文件对象 try { infile = new FileInputStream(pdfName);// 创建一个文件输入流 // 新建一个PDF解析器对象 PDFParser parser = new PDFParser(infile); // 对PDF文件进行解析 parser.parse(); // 获取解析后得到的PDF文档对象 PDDocument pdfdocument = parser.getPDDocument(); // 新建一个PDF文本剥离器 PDFTextStripper stripper = new PDFTextStripper(); // 从PDF文档对象中剥离文本 context = stripper.getText(pdfdocument); System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:"); // System.out.println(context); } catch (Exception e) { System.out.println("读取PDF文件" + file.getAbsolutePath() + "失败!" + e.getMessage()); } finally { if (infile != null) { try { infile.close(); } catch (IOException e1) { } } } return context; }
【Java解析PDF】
猜你喜欢
转载自www.cnblogs.com/iitxt/p/8984131.html
今日推荐
周排行