[Java parsing PDF]

public class pdfAnalysis {
    /**
     * @throws IOException
     * @param download PDF from the Internet, intercept PDF string,
     */

    public static void main(String[] args) throws IOException {
        // The downloaded link is the downloaded name and the downloaded path
        // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/");
        // read the file
        pdfAnalysis pdf = new pdfAnalysis();

        // read the file
        String pdfName = "F:\\CC.pdf";
        // Parse the value in the PDF and store it in the variable pdf_Body
        String pdf_Body = pdf.readFileOfPDF(pdfName);
        //System.out.println(pdf_Body);

        String str = (pdf_Body.substring(pdf_Body.indexOf("Passenger Details"), pdf_Body.indexOf("Meal Details")));
        System.out.println("I am the value of str: " + str);
        int a = 0;
         if (str.contains("先生") && str.contains("女士")) {
            a = appearNumber(str, "先生");
            System.out.println("Number of occurrences of Mr." + a);
            a = appearNumber(str, "女士");
            System.out.println("Number of times the lady appeared" + a);
            a += a;
            System.out.println("A total of " + a + " times");


        } else if (str.contains("先生")) {
            a = appearNumber(str, "先生");
            System.out.println("Number of occurrences of Mr." + a);
        } else if (str.contains("女士")) {
            a = appearNumber(str, "女士");
            System.out.println("Number of times the lady appeared" + a);
        }

        String m2=null,wm1=null;
        for(int i = 0;i<a ;i++){

            if(str.contains("先生")||str.contains("女士")){
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();
                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
            }else if(str.contains("女士")){
                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
            }else if(str.contains("先生")){
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();
            }

        }
        System.out.println(m2+":"+wm1);



      /*  List<String> list = new ArrayList<>();


        if (str.contains("先生") && str.contains("女士")) {

            String m2 = null;
            String wm1 = null;
            for (int i = 0; i < a; i++) {
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();

                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();

            }
            list.add(m2);
            list.add(wm1);
        } else if (str.contains("先生")) {
            for (int i = 0; i < a; i++) {
                String m = str.substring(str.indexOf("先生") + 2);
                String m2 = (m.substring(0, m.indexOf("-"))).trim();
                list.add(m2);
            }
        } else if (str.contains("女士")) {
            for (int i = 0; i < a; i++) {
                String wm = str.substring(str.indexOf("女士") + 2);
                String wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
                list.add(wm1);
            }
        }


        for (String s : list) {
            System.out.println("人名" + s);

        }*/


       /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details"));
        String str1 = str.substring(str.indexOf("H ("));
        String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" };

        for(String sto:pp){
            if(str1.contains(sto)){
                String result = str1.substring(str1.indexOf(sto));
                //System.out.println(result);
                //System.out.println(result.length());
                String result2 = result.substring(0,result.indexOf(","));
                String result3 = result2.trim();
                System.out.println("Hello everyone, I am Yan Laowu. The time I want: "+result3+"I am "+pdfName+"file");
            }

        }*/



        /*if(str1.contains("Monday")||str1.contains("Tuesday")||
                str1.contains("Wednesday")||str1.contains("Thursday")||
                str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){



        }*/
        // System.out.println(str1);





       /* // Get the value in the Department (leave the ground)
        String depart_Temp = pdf_Body.substring(pdf_Body.indexOf("Depart"), pdf_Body.indexOf("Arrive"));
        String depart_Temp2 = depart_Temp.substring(depart_Temp.indexOf("Depart"));
        // System.out.println("Total value of Department taken out:" + depart_Temp2);
        String depart_Temp3 = depart_Temp2.substring(depart_Temp2.indexOf("("), depart_Temp2.indexOf(")"));
        // replace parentheses with empty strings and remove empty strings
        String depart = depart_Temp3.replace("(", "").trim();
        System.out.println("Depart:" + depart);

        // Get the value of Arrive (arrival)
        String arrive_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive:"), pdf_Body.indexOf("passenger details"));
        String arrive_Temp1 = arrive_Temp.substring(arrive_Temp.indexOf("("), arrive_Temp.indexOf(")"));
        // replace parentheses with empty strings and remove empty strings
        String arrive = arrive_Temp1.replace("(", "").trim();
        System.out.println("Arrive:" + arrive);

        // get the money value
        String money = pdf_Body.substring(pdf_Body.indexOf("AUD ") + 4, pdf_Body.indexOf("GST"));
        System.out.println("Total amount:" + money);

        // get the name value
        String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details"));
        // System.out.println(str);
        String name_Temp1 = null;
        String result_name = null;
        List<String> list_Name = new ArrayList<>();
        for (int i = 1; i < name_Temp.length(); i++) {

            if (name_Temp.contains(i + ".")) {
                name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + "."));

                result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3,
                        name_Temp1.indexOf("Seat Number Services"));
                list_Name.add(result_name);
            }
            // System.out.println(add);
            // System.out.println(str2);
            if (name_Temp1.equals("null")) {
                continue;
            }
        }
        for (String i : list_Name) {
            System.out.println("All names: " + i);
        }*/


        if (pdfAnalysis.infile != null) {
            pdfAnalysis.infile.close();
            System.out.println("I'm about to close the PDF document");
        }

    }

    public static int appearNumber(String srcText, String temp) {
        int count = 0;
        Pattern p = Pattern.compile(temp);
        Matches m = p.matches (srcText);
        while (m.find()) {
            count++;
        }
        return count;
    }
    public static FileInputStream infile = null;

    public String readFileOfPDF(String pdfName) throws IOException {
        String context = null;
        File file = new File(pdfName);//Create a file object


        try {
            infile = new FileInputStream(pdfName);// Create a file input stream
            // Create a new PDF parser object
            PDFParser parser = new PDFParser(infile);
            // Parse the PDF file
            parser.parse();
            // Get the PDF document object obtained after parsing
            PDDocument pdfdocument = parser.getPDDocument();
            // Create a new PDF text stripper
            PDFTextStripper stripper = new PDFTextStripper();
            // Strip text from the PDF document object
            context = stripper.getText(pdfdocument);
            System.out.println("PDF file" + file.getAbsolutePath() + "The text content is as follows: ");
            // System.out.println(context);

        } catch (Exception e) {
            System.out.println("Read PDF file" + file.getAbsolutePath() + "Failed!" + e.getMessage());
        } finally {

            if (infile != null) {
                try {
                    infile.close();
                } catch (IOException e1) {
                }
            }
        }
        return context;
    }

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325226732&siteId=291194637