[python3] pypdf2 处理书签

pypdf4 能添加书签,但是没有跳转功能 (PyPDF2好像还不能读取书签数据)

https://pythonhosted.org/PyPDF2/index.html

#!/mingw64/bin/python3
# -*- coding: utf-8 -*-

from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger, generic
import PyPDF2

class GetPdfBookmark:
	deep_count    = 0
	input         = None
	bm            = None
	inputname     = None
	reallypagenum = {}
	tabs          = ""

	def __init__(self, inputname):
		self.inputname = inputname
		try:
			self.input = PdfFileReader(open(inputname, "rb"))
		except:
			quit()

		self.bm = self.input.outlines

		self.__FixBookmarksPageNumber(self.input.getPage(0)['/Parent']['/Kids'], self.input.getPage(0)['/Parent']['/Count'])

	def __FixBookmarksPageNumber(self, data, data_len):
		for i in range(data_len):
			self.reallypagenum[str(data[i].idnum)] = str(i)

	def __writetab(self, out, count):
		for i in range(count):
			self.tabs += "\t"

	def __getpage(self, v):
		num = str(v[15:-4])
		return self.reallypagenum[num]

	def __writeData(self, out, item):
		out.write("%s%s\t%s\t%s\t%s\n" % (self.tabs, str(item.title), self.__getpage(str(item.page)), str(item.left), str(item.top)))
		self.tabs = ""

	def __writeBookmarks(self, out, bm):
		for item in bm:
			if not hasattr(item, "typ"):
				self.deep_count += 1
				self.__writeBookmarks(out, item)
				self.deep_count -= 1
			else:
				self.__writetab(out, self.deep_count)
				self.__writeData(out, item)

	def write(self, outputname=None):
		
		if outputname == None:
			out = open(self.inputname[:-4] + "_bookmarks.txt", "w", encoding="utf-8")
		else:
			out = open(outputname, "w", encoding="utf-8")

		self.__writeBookmarks(out, self.bm)
		out.close()

class SetPdfBookmark:
	# file = None
	def __init__(self, filename):
		self.filename = filename
		input = PdfFileReader(open(filename, "rb"))
		self.file = PdfFileWriter()
		self.file.appendPagesFromReader(input)

	def testset(self, data=None):

		peretns = self.file.addBookmark("test", 10, None, None, False, False, "/XYZ", 0, 728, 0)
		peretns = self.file.addBookmark("test", 100, peretns, None, False, False, "/XYZ", 0, 728, 0)

		self.file.write(open(self.filename[:-4] + "_bookmarks.pdf", "wb"))

	def getTabCount(self, str):
		count = 0
		for ch in str:
			if ch == '\t':
				count += 1
			else:
				break
		return count

	def setfromfile(self, file):
		# try:
		file = open(file, "r", encoding="utf-8")

		alllines = file.readlines()

		perents = [None]

		oldperent = None

		nowLayer = 0

		for index in range(len(alllines)):
			line = alllines[index]

			count = self.getTabCount(line)

			if count > nowLayer:
				perents.append(oldperent)
				nowLayer += 1
			elif count < nowLayer:
				perents.pop()
				nowLayer -= 1
			elif count == 0:
				perents = [None]
				nowLayer = 0

			tp = line[count:].split("\t")

			if len(tp) != 4:
				continue
			
			oldperent = self.file.addBookmark(tp[0], int(tp[1]), perents[-1], None, False, False, "/XYZ", int(tp[2]), int(tp[3]), 0)

		self.file.write(open(self.filename[:-4] + "_bookmarks.pdf", "wb"))

		print("ok")

		file.close()

bm = GetPdfBookmark("test.pdf")
bm.write()

# bm = SetPdfBookmark("nobookmarks.pdf")
# bm.setfromfile("bookmarks.txt")

读取出来的书签数据类似于

标题\t position\t left\t top
    标题\t position\t left\t top
        标题\t position\t left\t top
    标题\t position\t left\t top

如果是想调整书签的层次

请利用好正则(前提是标签能够真确匹配)

^\t+(\d+\.\d+)(?!\.) 匹配二级书签

^\t+(\d+\.\d+\.\d+) 匹配三级书签

发布了36 篇原创文章 · 获赞 1 · 访问量 8994

猜你喜欢

转载自blog.csdn.net/u011091701/article/details/95583316