Python3提取xml文件中的内容

import  xml.dom.minidom

def find_child(Par_nodes, mystr):
    for child_node in Par_nodes:
        if(len(child_node.childNodes) > 0):
            mystr = find_child(child_node.childNodes, mystr)
        elif(child_node.nodeValue != None):
            mystr += child_node.data.replace('\n', '')
    return mystr

if __name__ == '__main__':

    dom1 = xml.dom.minidom.parse('2.XML')  #打开xml文件
    root = dom1.documentElement         #得到文档元素对象
    app_nums = root.getElementsByTagName('base:DocNumber')  #按标签名称查找,返回标签结点数组
    app_num = app_nums[2]
    print('专利申请号:'+app_num.firstChild.data)
    titles = root.getElementsByTagName('business:InventionTitle')
    title = titles[0]
    print('专利名称:'+title.firstChild.data)
    Paragraphs = root.getElementsByTagName('base:Paragraphs')
    abstract = Paragraphs[0]
    print('专利摘要:'+abstract.firstChild.data)
    company_names = root.getElementsByTagName('base:Name')
    company_name = company_names[0]
    print('公司名称:'+company_name.firstChild.data)
    mystr = ''
    for i in range(len(Paragraphs)):
        if (Paragraphs[i].firstChild.data == '发明内容\n\t'):
            i+=1
            while Paragraphs[i].firstChild.data != '附图说明\n\t':
                mystr = find_child(Paragraphs[i].childNodes, mystr)
                i+=1

    print('发明内容:' + mystr)

猜你喜欢

转载自blog.csdn.net/xyx_HFUT/article/details/82891453