python create PDF files from TXT --reportlab

Reportlab create PDF files using
e-books are generally txt format, some e-readers can not read txt documents, such as DPT-RP1. Therefore, this paper use python to achieve txt to pdf conversion, and supports the build directory, the directory can generate connections clicks (provided that in the txt file to know the position of each chapter), support Chinese.

reportlab use reportlab can view the official documentation. txt turn pdf detailed code as follows:

# coding: utf-8

# setting sts font utf-8
import sys
reload(sys)
sys.setdefaultencoding('utf-8')

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import BaseDocTemplate, Frame, PageTemplate, Paragraph
from reportlab.platypus.tableofcontents import TableOfContents
from reportlab.platypus import PageBreak
from reportlab.lib.pagesizes import A4

pdfmetrics.registerFont(TTFont('STSONG', './STSONG.TTF')) #register Font
pdfmetrics.registerFont(TTFont('simhei', './simhei.ttf')) #register Font
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(fontName='STSONG', name='STSONG', leading=20, fontSize=12, firstLineIndent=22, wordWrap='CJK'))
styles.add(ParagraphStyle(fontName='simhei', name='simhei', leading=25, fontSize=14, wordWrap='CJK')) # content Font

class MyDocTemplate(BaseDocTemplate):
def __init__(self, filename, **kw):
self.allowSplitting = 0
apply(BaseDocTemplate.__init__, (self, filename), kw)

# Entries to the table of contents can be done either manually by
# calling the addEntry method on the TableOfContents object or automatically
# by sending a 'TOCEntry' notification in the afterFlowable method of
# the DocTemplate you are using. The data to be passed to notify is a list
# of three or four items countaining a level number, the entry text, the page
# number and an optional destination key which the entry should point to.
# This list will usually be created in a document template's method like
# afterFlowable(), making notification calls using the notify() method
# with appropriate data.

def afterFlowable(self, flowable):
"Registers TOC entries."
if flowable.__class__.__name__ == 'Paragraph':
text = flowable.getPlainText()
style = flowable.style.name
if style == 'Heading1':
level = 0
elif style == 'simhei':
level = 1
else:
return
E = [level, text, self.page]
#if we have a bookmark name append that to our notify data
bn = getattr(flowable,'_bookmarkName',None)
if bn is not None: E.append(bn)
self.notify('TOCEntry', tuple(E))


# this function makes our headings
def doHeading(data, text, sty):
from hashlib import sha1
# create bookmarkname
bn = sha1(text).hexdigest()
# modify paragraph text to include an anchor point with name bn
h = Paragraph(text + '<a name="%s"/>' % bn, sty)
# store the bookmark name on the flowable so afterFlowable can see this
h._bookmarkName = bn
data.append(h)

# Page Number
def footer(canvas, doc):
page_num = canvas.getPageNumber()
canvas.saveState()
P = Paragraph("%d" % page_num ,
styles['Normal'])
w, h = P.wrap(doc.width, doc.bottomMargin)
P.drawOn(canvas, doc.leftMargin + w/2, h)
canvas.restoreState()

# load txt file
def loadTxt(txt_path):
with open(txt_path, 'r') as f:
txt_datas = f.readlines()
return txt_datas

def toPDF(txt_datas, pdf_path):
PDF = MyDocTemplate(pdf_path, pagesize=A4)
frame = Frame(PDF.leftMargin, PDF.bottomMargin, PDF.width, PDF.height,
id='normal')
template = PageTemplate(frames=frame, onPage=footer)
PDF.addPageTemplates([template])

data = []

# table of contents
toc = TableOfContents()
# setting contents fontName and fontSize
toc.levelStyles = [
ParagraphStyle(fontName='simhei', fontSize=20, name='TOCHeading1', leftIndent=20, firstLineIndent=-20, spaceBefore=10,
leading=16),
ParagraphStyle(fontName='simhei', fontSize=18, name='TOCHeading2', leftIndent=40, firstLineIndent=-20, spaceBefore=5, leading=12),
]
data.append(toc) # add contents
data.append(PageBreak()) #next page

NUM = 0
# add txt
for txt_data in txt_datas:
txt_data = txt_data.lstrip() # remove left space
if len(txt_data) == 0: # no text
continue
try:
txt_data = txt_data.decode("gb2312")
except:
txt_data = txt_data.decode("gbk")

if txt_data[0] == u"第" and (u"章" in txt_data):
doHeading(data, txt_data, styles['simhei'])
else:
data.append(Paragraph(txt_data, styles['STSONG']))
NUM = NUM + 1
print('{} line'.format(NUM))

print('Build pdf!')
PDF.multiBuild(data)

if __name__ == "__main__":
txt_path = "财运天降.txt".decode("utf8")
pdf_path = "财运天降.pdf".decode("utf8")
txt_datas = loadTxt(txt_path)
toPDF(txt_datas, pdf_path)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
1 16
117
1 18
119
120
121
122
123
124
125
126
127
The code windows and tested at python2, main attention:

Default font settings:
Import SYS
reload (SYS)
sys.setdefaultencoding ( 'UTF-. 8')
. 1
2
. 3
Chinese font support:
pdfmetrics.registerFont (TTFont ( 'STSong', './STSONG.TTF')) #register the Font
pdfmetrics.registerFont (TTFont ( 'simhei', './simhei.ttf')) #register the Font
Styles = getSampleStyleSheet (http://www.my516.com)
styles.add (ParagraphStyle (fontName = 'STSong', name = 'STSong', = 20 is leading, the fontSize = 12 is, firstLineIndent = 22 is, the wordWrap = 'CJK'))
styles.add (ParagraphStyle (fontName = 'simhei', name = 'simhei', leading = 25, 14 = the fontSize, the wordWrap = 'CJK')) # Content the Font
. 1
2
. 3
. 4
. 5
Chinese Font directory:
toc.levelStyles = [
ParagraphStyle (fontName = 'simhei', 20 is the fontSize =, name = 'TOCHeading1', leftindent = 20 is, firstLineIndent = -20, a spaceBefore = 10,
leading = 16),
ParagraphStyle (fontName = 'simhei', 18 is the fontSize =, name = 'TOCHeading2', leftindent = 40, firstLineIndent = -20, a spaceBefore =. 5, 12 is leading =),
]
. 1
2
. 3
. 4
. 5
catalog positioned, this needs to be modified according to locate your actual txt article
if txt_data [0] == u "the first" and (u "chapter" in txt_data):
1
Chinese decoded, can not be decoded because the Traditional Chinese is gb2312, so use try-except the way
the try:
txt_data = txt_data.decode ( "GB2312")
the except:
txt_data = txt_data. decode ( "GBK")
1
2
3
4
the effect is as follows:
the internet easily found a txt article:

Generate pdf catalog:

Pdf generated content:
--------------------- 

Guess you like

Origin www.cnblogs.com/ly570/p/10995942.html