[Network security takes you to practice reptiles - 100 exercises] Practice 20: Data processing - and write to the specified file location

Table of contents

1. Goal 1: Decoding + Delabeling

2. Goal 2: Extract the content in the tag

3. Goal 3: Insert the processed data into the original location

4. Goal 4: Insert the specified content into the specified location

5. Goal 5: Format contextual fonts

6. Goal 6: Insert different strings into multiple different positions

7. Goal 7: Insert different strings into multiple different positions

8. Goal 8: Write the graphic and text behind the specified string


1. Goal 1: Decoding + Delabeling

Use function: html.unescape () decoding + replace () replacement

import html

data = '\u003cp\u003e(此处忽略一万个字)'

# 解码HTML实体,并替换相应字符
decoded_data = html.unescape(data).replace('<p><br></p>', '\n').replace('<p>','').replace('</p>','')


# 输出结果
print(decoded_data)



2. Goal 2: Extract the content in the tag

Idea: In fact, it is regular matching

The img tag is removed and wrapped, leaving only the URL

code:

import re

text = '<img src="URL">…………(此处省略一万字)'

# 提取URL
urls = re.findall(r'<img\s+src="([^"]+)"\s*>', text)

# 替换<img>标签为URL,并添加换行符
for url in urls:
    text = re.sub(r'<img\s+src="[^"]+"\s*>', url + '\n', text, count=1)

print(text)



3. Goal 3: Insert the processed data into the original location

After downloading the picture URL in the following code, insert it into the document according to the original position

import requests
from docx import Document
from docx.shared import Inches

# 创建一个新的Word文档
doc = Document()

text = '''
图片:
https://xxxxx.png
'''

# 以换行符分割文本
lines = text.split('\n')

for line in lines:
    if line.startswith('https://'):
        # 下载图片
        response = requests.get(line)
        image_path = line.split('/')[-1]  # 使用URL中的最后一部分作为文件名保存图片
        with open(image_path, 'wb') as f:
            f.write(response.content)
        
        # 插入图片到Word文档
        doc.add_picture(image_path, width=Inches(4))  # 根据需要调整图片的宽度
    else:
        # 插入文本到Word文档
        doc.add_paragraph(line)

# 保存Word文档
doc.save("output.docx")

4. Goal 4: Insert the specified content into the specified location

Use python to open a word document and write the content after the specified string

from docx import Document

# 打开Word文档
doc = Document('example.docx')

# 获取文档中所有段落的内容
paragraphs = [p.text for p in doc.paragraphs]

# 指定要插入内容的位置
target_string = '指定字符串'
insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

# 要插入的内容
new_content = '要插入的内容'

# 在指定位置后插入内容
doc.paragraphs[insert_index].insert_paragraph_before(new_content)

# 保存修改后的Word文档
doc.save('example_modified.docx')



5. Goal 5: Format contextual fonts

The font size of the written text is the same as the previous line

from docx import Document
from docx.shared import Pt

# 打开Word文档
doc = Document('example.docx')

# 获取上一行的字体大小
previous_paragraph = doc.paragraphs[-1]
previous_run = previous_paragraph.runs[-1]
previous_font_size = previous_run.font.size

# 要写入的文本内容
new_text = '新的文本'

# 在新行中写入文本
new_paragraph = doc.add_paragraph()
new_run = new_paragraph.add_run(new_text)

# 设置新行的字体大小与上一行一致
new_font = new_run.font
new_font.size = previous_font_size

# 保存修改后的Word文档
doc.save('example_modified.docx')

Insert text with the same font size as the previous line

from docx import Document
from docx.shared import Pt

def word_info_w():
    # 打开Word文档
    doc = Document('test.docx')

    # 获取文档中所有段落的内容
    paragraphs = [p.text for p in doc.paragraphs]

    # 指定要插入内容的位置
    target_string = '附件:'
    insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

    # 获取上一行的字体大小
    previous_paragraph = doc.paragraphs[insert_index - 1]
    previous_run = previous_paragraph.runs[-1]
    previous_font_size = previous_run.font.size

    # 要插入的内容
    new_content = '测试title'

    # 在指定位置后插入内容
    new_paragraph = doc.paragraphs[insert_index].insert_paragraph_before(new_content)

    # 设置新插入内容的字体大小与上一行一致
    new_run = new_paragraph.runs[0]
    new_font = new_run.font
    new_font.size = previous_font_size

    # 保存修改后的Word文档
    doc.save('test.docx')

if __name__ == '__main__':
    word_info_w()



6. Goal 6: Insert different strings into multiple different positions

Insert different strings into multiple different positions

(may be inserted in the same position)

from docx import Document

def insert_content(doc, insert_dict):
    # 获取文档中所有段落的内容
    paragraphs = [p.text for p in doc.paragraphs]

    for target_string, new_content in insert_dict.items():
        if target_string in paragraphs:
            # 指定要插入内容的位置
            insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

            # 获取上一行的字体大小
            previous_paragraph = doc.paragraphs[insert_index - 1]
            previous_run = previous_paragraph.runs[-1]
            previous_font_size = previous_run.font.size

            # 在指定位置后插入内容
            new_paragraph = doc.paragraphs[insert_index].insert_paragraph_before(new_content)

            # 设置新插入内容的字体大小与上一行一致
            new_run = new_paragraph.runs[0]
            new_font = new_run.font
            new_font.size = previous_font_size

    # 保存修改后的Word文档
    doc.save('test.docx')

if __name__ == '__main__':
    # 打开Word文档
    doc = Document('test.docx')

    # 定义要插入的内容和位置的字典
    insert_dict = {
        '附件:': '测试title1',
        '目录:': '测试title2'
    }

    # 插入内容
    insert_content(doc, insert_dict)



7. Goal 7: Insert different strings into multiple different positions

from docx import Document

def insert_content(doc, target_string, new_content):
    # 获取文档中所有段落的内容
    paragraphs = [p.text for p in doc.paragraphs]

    if target_string in paragraphs:
        # 指定要插入内容的位置
        insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

        if insert_index < len(doc.paragraphs):
            # 在指定位置后插入内容
            doc.paragraphs[insert_index].insert_paragraph_before(new_content)

    # 保存修改后的 Word 文档
    doc.save('test.docx')

if __name__ == '__main__':
    # 打开 Word 文档
    doc = Document('test.docx')

    # 定义要插入的内容和位置的字典
    insert_dict = {
        '指定字符1位置': '插入内容1',
        '指定字符2位置': '插入内容2',
        '指定字符3位置': '插入内容3'
    }

    for target_string, new_content in insert_dict.items():
        # 插入内容
        insert_content(doc, target_string, new_content)

specify font size

from docx import Document
from docx.shared import Pt

def insert_content(doc, target_string, new_content):
    # 获取文档中所有段落的内容
    paragraphs = [p.text for p in doc.paragraphs]

    if target_string in paragraphs:
        # 指定要插入内容的位置
        insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

        if insert_index < len(doc.paragraphs):
            # 在指定位置后插入内容
            paragraph = doc.paragraphs[insert_index]
            run = paragraph.insert_paragraph_before(new_content).runs[0]
            font = run.font
            font.size = Pt(12)  # 设置字体大小为3号字体(12磅)

    # 保存修改后的 Word 文档
    doc.save('test.docx')

if __name__ == '__main__':
    # 打开 Word 文档
    doc = Document('test.docx')

    # 定义要插入的内容和位置的字典
    insert_dict = {
        '指定字符1位置': '插入内容1',
        '指定字符2位置': '插入内容2',
        '指定字符3位置': '插入内容3'
    }

    for target_string, new_content in insert_dict.items():
        # 插入内容
        insert_content(doc, target_string, new_content)



8. Goal 8: Write the graphic and text behind the specified string

from docx import Document
from docx.shared import Pt
from docx.shared import Inches
import requests

def word_img_text_w(word, target_string):
    # 打开 Word 文档
    doc = Document('test.docx')

    # 获取文档中所有段落的内容
    paragraphs = [p.text for p in doc.paragraphs]

    if target_string in paragraphs:
        # 指定目标字符串的位置
        insert_index = paragraphs.index(target_string) + 1  # 在目标字符串后面插入,所以需要加1

        # 以换行符分割文本
        lines = word.split('\n')

        for line in lines:
            if line.startswith('https://'):
                # 下载图片
                response = requests.get(line)
                image_path = line.split('/')[-1]  # 图片保存的本地路径,使用URL中的最后一部分作为文件名
                with open(image_path, 'wb') as f:
                    f.write(response.content)
                # 插入图片到Word文档
                doc.paragraphs[insert_index].add_run().add_picture(image_path, width=Inches(4))  # 根据需要调整图片的宽度
                insert_index += 1
            else:
                # 插入文本到Word文档
                run = doc.paragraphs[insert_index].add_run(line)
                run.font.size = Pt(16)  # 设置字体大小为16磅
                insert_index += 1

    # 保存Word文档
    doc.save("test.docx")

if __name__ == '__main__':
    # 要插入的内容
    content = '''测试
https://xx.png
https://xxxx.png'''

    # 指定目标字符串
    target_string = '指定目标字符1'

    # 插入内容到Word文档
    word_img_text_w(content, target_string)

Guess you like

Origin blog.csdn.net/qq_53079406/article/details/132140855