Python脚本将文件夹下doc文件转化为pdf并合并到一个文件中

import os
import subprocess
from PyPDF2 import PdfMerger, PdfReader

# 将所有 Doc/Docx 文件转换为 PDF 文件
for filename in os.listdir('/tmp/test'):
    if filename.endswith('.doc') or filename.endswith('.docx'):
        proc = subprocess.Popen(
            f'unoconv -f pdf "{
      
      os.path.join("/tmp/test", filename)}"',
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            stdin=subprocess.PIPE
        )
        out, err = proc.communicate()

# 读取文件夹中所有 PDF 文件,并将它们添加到合并对象中
pdf_merger = PdfMerger(strict=False)  # 添加 strict=False 参数以强制忽略错误
for filename in os.listdir('/tmp/test'):
    if filename.endswith('.pdf'):
        with open(os.path.join('/tmp/test', filename), 'rb') as f:
			print(f)
            pdf_merger.append(PdfReader(f), import_outline=False)

# 将所有 PDF 文件合并,并保存为一个新的文件
with open('/tmp/test/output.pdf', 'wb') as f:
    pdf_merger.write(f)


猜你喜欢

转载自blog.csdn.net/zyh821351004/article/details/130587293