public class pdfAnalysis { /** * @throws IOException * @param download PDF from the Internet, intercept PDF string, */ public static void main(String[] args) throws IOException { // The downloaded link is the downloaded name and the downloaded path // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/"); // read the file pdfAnalysis pdf = new pdfAnalysis(); // read the file String pdfName = "F:\\CC.pdf"; // Parse the value in the PDF and store it in the variable pdf_Body String pdf_Body = pdf.readFileOfPDF(pdfName); //System.out.println(pdf_Body); String str = (pdf_Body.substring(pdf_Body.indexOf("Passenger Details"), pdf_Body.indexOf("Meal Details"))); System.out.println("I am the value of str: " + str); int a = 0; if (str.contains("先生") && str.contains("女士")) { a = appearNumber(str, "先生"); System.out.println("Number of occurrences of Mr." + a); a = appearNumber(str, "女士"); System.out.println("Number of times the lady appeared" + a); a += a; System.out.println("A total of " + a + " times"); } else if (str.contains("先生")) { a = appearNumber(str, "先生"); System.out.println("Number of occurrences of Mr." + a); } else if (str.contains("女士")) { a = appearNumber(str, "女士"); System.out.println("Number of times the lady appeared" + a); } String m2=null,wm1=null; for(int i = 0;i<a ;i++){ if(str.contains("先生")||str.contains("女士")){ String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); }else if(str.contains("女士")){ String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); }else if(str.contains("先生")){ String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); } } System.out.println(m2+":"+wm1); /* List<String> list = new ArrayList<>(); if (str.contains("先生") && str.contains("女士")) { String m2 = null; String wm1 = null; for (int i = 0; i < a; i++) { String m = str.substring(str.indexOf("先生")); m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim(); String wm = str.substring(str.indexOf("女士") + 2); wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); } list.add(m2); list.add(wm1); } else if (str.contains("先生")) { for (int i = 0; i < a; i++) { String m = str.substring(str.indexOf("先生") + 2); String m2 = (m.substring(0, m.indexOf("-"))).trim(); list.add(m2); } } else if (str.contains("女士")) { for (int i = 0; i < a; i++) { String wm = str.substring(str.indexOf("女士") + 2); String wm1 = (wm.substring(0, wm.indexOf("-"))).trim(); list.add(wm1); } } for (String s : list) { System.out.println("人名" + s); }*/ /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details")); String str1 = str.substring(str.indexOf("H (")); String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" }; for(String sto:pp){ if(str1.contains(sto)){ String result = str1.substring(str1.indexOf(sto)); //System.out.println(result); //System.out.println(result.length()); String result2 = result.substring(0,result.indexOf(",")); String result3 = result2.trim(); System.out.println("Hello everyone, I am Yan Laowu. The time I want: "+result3+"I am "+pdfName+"file"); } }*/ /*if(str1.contains("Monday")||str1.contains("Tuesday")|| str1.contains("Wednesday")||str1.contains("Thursday")|| str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){ }*/ // System.out.println(str1); /* // Get the value in the Department (leave the ground) String depart_Temp = pdf_Body.substring(pdf_Body.indexOf("Depart"), pdf_Body.indexOf("Arrive")); String depart_Temp2 = depart_Temp.substring(depart_Temp.indexOf("Depart")); // System.out.println("Total value of Department taken out:" + depart_Temp2); String depart_Temp3 = depart_Temp2.substring(depart_Temp2.indexOf("("), depart_Temp2.indexOf(")")); // replace parentheses with empty strings and remove empty strings String depart = depart_Temp3.replace("(", "").trim(); System.out.println("Depart:" + depart); // Get the value of Arrive (arrival) String arrive_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive:"), pdf_Body.indexOf("passenger details")); String arrive_Temp1 = arrive_Temp.substring(arrive_Temp.indexOf("("), arrive_Temp.indexOf(")")); // replace parentheses with empty strings and remove empty strings String arrive = arrive_Temp1.replace("(", "").trim(); System.out.println("Arrive:" + arrive); // get the money value String money = pdf_Body.substring(pdf_Body.indexOf("AUD ") + 4, pdf_Body.indexOf("GST")); System.out.println("Total amount:" + money); // get the name value String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details")); // System.out.println(str); String name_Temp1 = null; String result_name = null; List<String> list_Name = new ArrayList<>(); for (int i = 1; i < name_Temp.length(); i++) { if (name_Temp.contains(i + ".")) { name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + ".")); result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3, name_Temp1.indexOf("Seat Number Services")); list_Name.add(result_name); } // System.out.println(add); // System.out.println(str2); if (name_Temp1.equals("null")) { continue; } } for (String i : list_Name) { System.out.println("All names: " + i); }*/ if (pdfAnalysis.infile != null) { pdfAnalysis.infile.close(); System.out.println("I'm about to close the PDF document"); } } public static int appearNumber(String srcText, String temp) { int count = 0; Pattern p = Pattern.compile(temp); Matches m = p.matches (srcText); while (m.find()) { count++; } return count; } public static FileInputStream infile = null; public String readFileOfPDF(String pdfName) throws IOException { String context = null; File file = new File(pdfName);//Create a file object try { infile = new FileInputStream(pdfName);// Create a file input stream // Create a new PDF parser object PDFParser parser = new PDFParser(infile); // Parse the PDF file parser.parse(); // Get the PDF document object obtained after parsing PDDocument pdfdocument = parser.getPDDocument(); // Create a new PDF text stripper PDFTextStripper stripper = new PDFTextStripper(); // Strip text from the PDF document object context = stripper.getText(pdfdocument); System.out.println("PDF file" + file.getAbsolutePath() + "The text content is as follows: "); // System.out.println(context); } catch (Exception e) { System.out.println("Read PDF file" + file.getAbsolutePath() + "Failed!" + e.getMessage()); } finally { if (infile != null) { try { infile.close(); } catch (IOException e1) { } } } return context; }