PDFBox-0.7.3和itextsharp 两张PDF读取

“`
using org.pdfbox.pdmodel;
using org.pdfbox.util;
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;

using iTextSharp;
using iTextSharp.text;
using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;

namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}

    private void button1_Click(object sender, EventArgs e)
    {
        string path = "d:\\A.pdf";
        FileInfo file = new FileInfo(path);
        FileInfo txtfile = new FileInfo("d:\\test.txt");

        pdf2txt(file, txtfile);
        OnCreated(path);
            ;
    }

    //PDFBox-0.7.3
    public string pdf2txt(FileInfo file, FileInfo txtfile)
    {
        string path = "d:\\B.pdf";
        PDDocument doc = PDDocument.load(file.FullName);
        PDFTextStripper pdfStripper = new PDFTextStripper();

        FileStream fs = new FileStream(path, FileMode.Open);
        byte[] buffer = new byte[8];
        fs.Read(buffer, 0, 8);
        fs.Close();
        if (buffer[7] == 52)//%PDF-1.4
        {
            return "PDF版本太低,无法读出.";//%PDF-1.7可读
        }

        string text = pdfStripper.getText(doc);
        StreamWriter swPdfChange = new StreamWriter(txtfile.FullName, false, Encoding.GetEncoding("utf-8"));
        swPdfChange.Write(text);
        swPdfChange.Close();
        return "";
    }

    //itextsharp 
    private void OnCreated(string filepath)
    {
        try
        {
            string pdffilename = filepath;
            PdfReader pdfReader = new PdfReader(pdffilename);
            int numberOfPages = pdfReader.NumberOfPages;
            string text = string.Empty;

            for (int i = 1; i <= numberOfPages; ++i)
            {
                byte[] bufferOfPageContent = pdfReader.GetPageContent(i);

                //text += System.Text.Encoding.UTF8.GetString(bufferOfPageContent);
                text += PdfTextExtractor.GetTextFromPage(pdfReader, numberOfPages);
            }
            pdfReader.Close();



            FileInfo txtfile = new FileInfo("d:\\test.txt");
            StreamWriter swPdfChange = new StreamWriter(txtfile.FullName, false, Encoding.GetEncoding("gb2312"));
            swPdfChange.Write(text);
            swPdfChange.Close();
        }
        catch (Exception ex)
        {


        }

    }  
}

}
引用
“`这里写图片描述

猜你喜欢

转载自blog.csdn.net/liangyaomu/article/details/74670912