【Java解析PDF】

public class pdfAnalysis {
    /**
     * @throws IOException
     * @param从网络上下载PDF,截取PDF字符串,
     */

    public static void main(String[] args) throws IOException {
        // 下载的连接 下载下来的名字 下载下来的路径
        // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/");
        // 读取文件
        pdfAnalysis pdf = new pdfAnalysis();

        // 读取文件
        String pdfName = "F:\\CC.pdf";
        // 解析PDF里的值 存入变量pdf_Body
        String pdf_Body = pdf.readFileOfPDF(pdfName);
        //System.out.println(pdf_Body);

        String str = (pdf_Body.substring(pdf_Body.indexOf("乘客详情"), pdf_Body.indexOf("餐食详情")));
        System.out.println("我是str的值:" + str);
        int a = 0;
         if (str.contains("先生") && str.contains("女士")) {
            a = appearNumber(str, "先生");
            System.out.println("先生出现的次数" + a);
            a = appearNumber(str, "女士");
            System.out.println("女士出现的次数" + a);
            a += a;
            System.out.println("一共出现了" + a + "次");


        } else if (str.contains("先生")) {
            a = appearNumber(str, "先生");
            System.out.println("先生出现的次数" + a);
        } else if (str.contains("女士")) {
            a = appearNumber(str, "女士");
            System.out.println("女士出现的次数" + a);
        }

        String m2=null,wm1=null;
        for(int i = 0;i<a ;i++){

            if(str.contains("先生")||str.contains("女士")){
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();
                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
            }else if(str.contains("女士")){
                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
            }else if(str.contains("先生")){
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();
            }

        }
        System.out.println(m2+":"+wm1);



      /*  List<String> list = new ArrayList<>();


        if (str.contains("先生") && str.contains("女士")) {

            String m2 = null;
            String wm1 = null;
            for (int i = 0; i < a; i++) {
                String m = str.substring(str.indexOf("先生"));
                m2 = m.substring(m.indexOf("先生") + 2, m.indexOf("-")).trim();

                String wm = str.substring(str.indexOf("女士") + 2);
                wm1 = (wm.substring(0, wm.indexOf("-"))).trim();

            }
            list.add(m2);
            list.add(wm1);
        } else if (str.contains("先生")) {
            for (int i = 0; i < a; i++) {
                String m = str.substring(str.indexOf("先生") + 2);
                String m2 = (m.substring(0, m.indexOf("-"))).trim();
                list.add(m2);
            }
        } else if (str.contains("女士")) {
            for (int i = 0; i < a; i++) {
                String wm = str.substring(str.indexOf("女士") + 2);
                String wm1 = (wm.substring(0, wm.indexOf("-"))).trim();
                list.add(wm1);
            }
        }


        for (String s : list) {
            System.out.println("人名" + s);

        }*/


       /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details"));
        String str1 = str.substring(str.indexOf("H ("));
        String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" };

        for(String sto:pp){
            if(str1.contains(sto)){
                String result = str1.substring(str1.indexOf(sto));
                //System.out.println(result);
                //System.out.println(result.length());
                String result2 = result.substring(0,result.indexOf(","));
                String result3 = result2.trim();
                System.out.println("大家好,我是闫老五要的时间:"+result3+"我是"+pdfName+"文件");
            }

        }*/



        /*if(str1.contains("Monday")||str1.contains("Tuesday")||
                str1.contains("Wednesday")||str1.contains("Thursday")||
                str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){



        }*/
        // System.out.println(str1);





       /* // 取出Depart里的值(离开地)
        String depart_Temp = pdf_Body.substring(pdf_Body.indexOf("Depart"), pdf_Body.indexOf("Arrive"));
        String depart_Temp2 = depart_Temp.substring(depart_Temp.indexOf("Depart"));
        // System.out.println("取出来的Depart总值:" + depart_Temp2);
        String depart_Temp3 = depart_Temp2.substring(depart_Temp2.indexOf("("), depart_Temp2.indexOf(")"));
        // 把括号替换成空字符串,并去掉空字符串
        String depart = depart_Temp3.replace("(", "").trim();
        System.out.println("Depart:" + depart);

        // 取出Arrive的值(到达地)
        String arrive_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive:"), pdf_Body.indexOf("passenger details"));
        String arrive_Temp1 = arrive_Temp.substring(arrive_Temp.indexOf("("), arrive_Temp.indexOf(")"));
        // 把括号替换成空字符串,并去掉空字符串
        String arrive = arrive_Temp1.replace("(", "").trim();
        System.out.println("Arrive:" + arrive);

        // 取出金钱值
        String money = pdf_Body.substring(pdf_Body.indexOf("AUD ") + 4, pdf_Body.indexOf("GST"));
        System.out.println("总金额:" + money);

        // 取出人名值
        String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details"));
        // System.out.println(str);
        String name_Temp1 = null;
        String result_name = null;
        List<String> list_Name = new ArrayList<>();
        for (int i = 1; i < name_Temp.length(); i++) {

            if (name_Temp.contains(i + ".")) {
                name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + "."));

                result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3,
                        name_Temp1.indexOf("Seat Number Services"));
                list_Name.add(result_name);
            }
            // System.out.println(add);
            // System.out.println(str2);
            if (name_Temp1.equals("null")) {
                continue;
            }
        }
        for (String i : list_Name) {
            System.out.println("所有的人名:" + i);
        }*/


        if (pdfAnalysis.infile != null) {
            pdfAnalysis.infile.close();
            System.out.println("我要准备关闭PDF文档了");
        }

    }

    public static int appearNumber(String srcText, String temp) {
        int count = 0;
        Pattern p = Pattern.compile(temp);
        Matcher m = p.matcher(srcText);
        while (m.find()) {
            count++;
        }
        return count;
    }
    public static FileInputStream infile = null;

    public String readFileOfPDF(String pdfName) throws IOException {
        String context = null;
        File file = new File(pdfName);// 创建一个文件对象


        try {
            infile = new FileInputStream(pdfName);// 创建一个文件输入流
            // 新建一个PDF解析器对象
            PDFParser parser = new PDFParser(infile);
            // 对PDF文件进行解析
            parser.parse();
            // 获取解析后得到的PDF文档对象
            PDDocument pdfdocument = parser.getPDDocument();
            // 新建一个PDF文本剥离器
            PDFTextStripper stripper = new PDFTextStripper();
            // 从PDF文档对象中剥离文本
            context = stripper.getText(pdfdocument);
            System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:");
            // System.out.println(context);

        } catch (Exception e) {
            System.out.println("读取PDF文件" + file.getAbsolutePath() + "失败!" + e.getMessage());
        } finally {

            if (infile != null) {
                try {
                    infile.close();
                } catch (IOException e1) {
                }
            }
        }
        return context;
    }

猜你喜欢

转载自www.cnblogs.com/iitxt/p/8984131.html