Read word documents and extract and write data (based on python 3.6)

#!/usr/bin/python3 
# -*- coding: utf-8 -*-
# @File : delete_file # @Author
: moucong
# @Date : 2018/4/1 16:33 #
@Software: PyCharm #Read

Text code example in
docx import docx
import re #Get

document
file=docx.Document("E:\\python_word\\word.docx")
print("Number of paragraphs:"+str(len(file.paragraphs))) #Output the number of paragraphs
file_word = docx.Document() #Output

the content of each paragraph
for para in file.paragraphs:
print(para.text) #Output the

paragraph number and paragraph content
para_data = []
for i in range(len(file. paragraphs)):
# for j in map(lambda x:x.split(' '),file.paragraphs[i].text):
para_single = file.paragraphs[i].text.split(' ')
while '' in para_single:# remove spaces
para_single.remove('')
# para_data.append(para_single)
for data_number in range(len(para_single)):
data_num = re.findall(r"\d", para_single[data_number])
data_num = ''.join(data_num)
para_data.append(data_num + ' ')
file_word.add_paragraph(para_data)
file_word.save("E:\\python_word\\number.docx")

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324413211&siteId=291194637