Import PDFParser pdfminer.pdfparser from, PDFDocument from pdfminer.pdfinterp Import PDFResourceManager, PDFPageInterpreter, PDFTextExtractionNotAllowed from pdfminer.converter Import PDFPageAggregator from pdfminer.layout Import LTTextBoxHorizontal, LAParams, LTTextLineHorizontal, LTFigure, LTRect, LTLine, LTCurve Import OS class PdfForString (Object): __init __ DEF (Self): self.pdf_list = os.listdir (r'E: \ StockExchange \ PDF ') # get all pdf PDF folder names # document storage resources self.src = PDFResourceManager () # device object self.device PDFPageAggregator = (self.src, laparams = LAParams ()) # interpreter object, self.inter = PDFPageInterpreter (self.src, self.device) # Pdf generating path DEF for_string (Self): for in self.pdf_list pdf: pdf_path = the os.path.join (os.path.dirname (os.path.dirname (__ file__)) + '/ PDF', pdf) the yield pdf_path # parse pdf DEF pdf_analysis (Self): for path in self.for_string (): pd_file = Open (path, 'rb') Parser = PDFParser (pd_file) # pdf file parsing objects # pdf document object the document PDFDocument = () Parser. set_document (the Document) document.set_parser (Parser) Pages and the document.get_pages = () yield Pages and the # get the PDF information def get_string (self): for pages in self.pdf_analysis(): for page in pages: self.inter.process_page(page) layout = self.device.get_result() for x in layout: if isinstance(x, LTTextBoxHorizontal): print(str(x.get_text())) # break PdfForString().get_string()