Python converts a pdf double-page document into a png image. The png image is cut into two equal parts on the left and right, and then merged into a new pdf single-page document.

1. Introduction of problems

The existing pdf double-page document is as follows:
Insert image description here
Now cut and splice the pdf double-page document according to the following page number sequence. There are two points that need special attention. One is that the cover page only cuts the middle part, and the other is that the document is in order from right to left. Typesetting
Insert image description here

2. python program

import os
import office
from PIL import Image
import img2pdf


# 新建两个文件夹分别存储裁剪前和裁剪后的图片
def newfolders():
    for folder in ['images before cropping', 'images after cropping']:
        if os.path.isdir(folder) == False:
            os.mkdir(folder)


# pdf双页文档转为多张png图片
def pdf_to_images(pdf_path):
    office.pdf.pdf2imgs(pdf_path=pdf_path, out_dir='images before cropping')


# png图片批量裁剪为左右两等分
def images_cropping(pdf_name):
    images = os.listdir('images before cropping')
    images.sort(key=lambda x: int(x.replace(' [' + pdf_name.split('.')[0] + ']-', '').split('.')[0]))
    index = 0
    for image in images:
        image = Image.open('images before cropping/' + image)
        width, height = image.size
        item_width = int(width / 2)
        item_height = int(height / 2)
        box_list = []
        if index == 0:
            box = (int(item_width / 2), 0, int(item_width * 3 / 2), height)
            box_list.append(box)
        else:
            for i in range(2):
                # box = (i * item_width, 0, (i + 1) * item_width, height)  # 从左往右
                box = (abs(i - 1) * item_width, 0, (abs(i - 1) + 1) * item_width, height)  # 从右往左
                box_list.append(box)
        image_list = [image.crop(box) for box in box_list]
        for j in range(len(image_list)):
            image_list[j].save('images after cropping/' + str(2 * index + 1 + j) + '.png', 'PNG')
        index += 1


# 裁剪后的png图片合并为新的pdf单页文档
def images_to_pdf(pdf_name):
    images = os.listdir('images after cropping')
    images.sort(key=lambda x: int(x.split('.')[0]))
    images = ['images after cropping/' + i for i in images]
    with open('pdf/' + pdf_name, 'wb') as f:
        f.write(img2pdf.convert(images))


if __name__ == '__main__':
    pdf_name = '名探偵コナン日本語版第100巻.pdf'
    new_pdf_name = '名侦探柯南日文版第100卷.pdf'
    pdf_path = 'pdf/' + pdf_name
    newfolders()  # 新建两个文件夹分别存储裁剪前和裁剪后的图片
    pdf_to_images(pdf_path)  # pdf双页文档转为多张png图片
    images_cropping(pdf_name)  # png图片批量裁剪为左右两等分
    images_to_pdf(new_pdf_name)  # 裁剪后的png图片合并为新的pdf单页文档

    # 文档树:
    # │  main.py
    # │
    # ├─images after cropping
    # │      1.png
    # │      10.png
    # │      100.png
    # │      101.png
    # │      102.png
    # │      103.png
    #        ……
    # │      95.png
    # │      96.png
    # │      97.png
    # │      98.png
    # │      99.png
    # │
    # ├─images before cropping
    # │      [名探偵コナン日本語版第100巻]-0.jpg
    # │      [名探偵コナン日本語版第100巻]-1.jpg
    # │      [名探偵コナン日本語版第100巻]-10.jpg
    # │      [名探偵コナン日本語版第100巻]-100.jpg
    # │      [名探偵コナン日本語版第100巻]-11.jpg
    # │      [名探偵コナン日本語版第100巻]-12.jpg
    # │      [名探偵コナン日本語版第100巻]-13.jpg
    #        ……
    # │      [名探偵コナン日本語版第100巻]-95.jpg
    # │      [名探偵コナン日本語版第100巻]-96.jpg
    # │      [名探偵コナン日本語版第100巻]-97.jpg
    # │      [名探偵コナン日本語版第100巻]-98.jpg
    # │      [名探偵コナン日本語版第100巻]-99.jpg
    # │
    # └─pdf
    #        名侦探柯南日文版第100卷.pdf
    #        名探偵コナン日本語版第100巻.pdf

3. Effect display

Insert image description here

Guess you like

Origin blog.csdn.net/m0_67790374/article/details/132621108