Python代码:
# -*- coding: UTF-8 -*- ''' Created on 2016年3月20日 @author: Administrator ''' from xml.dom import minidom import os def listFiles(path): return os.listdir(path) def readXML(xmlPath): # 加载读取XML文件 doc = minidom.parse(xmlPath) # 获取XML文档对象 root = doc.documentElement bookList = root.getElementsByTagName("bookList")[0] # 获取book节点列表 bookNodeList = bookList.getElementsByTagName("book") # 节点操作 for node in bookNodeList: # #读取节点属性为“id”的值 # book_id = node.getAttribute("id") # 读取节点属性为“name”的值 book_name = node.getAttribute("name") if book_name not in attributeValueList: attributeValueList.append(book_name) # books["id"]=book_id # books["name"]=book_name # 读取节点的文本内容。 # value=node.childNodes[0].nodeValue # valueList["value"]=value def compareXML(xmlPath): for tfile in listFiles(xmlPath): readXML(xmlPath + os.path.join(tfile)) def printList(tlist): for tl in tlist: print tl def writeTxt(txtPath, argList): # if os.path.exists(txtPath): # os.remove(txtPath) f = open(txtPath, "w") for arg in sorted(argList): f.write(arg + "\n") f.close() if __name__ == '__main__': attributeValueList = [] # valueList={} # # books={} # flag=False xmlPath = "D:\\test\\" txtPath = "D:\\out\\out_file.txt" compareXML(xmlPath) printList(attributeValueList) writeTxt(txtPath,attributeValueList)
test.xml:
<?xml version="1.0" encoding="UTF-8"?> <mybook id="mb001"> <bookList> <book name="bookA" id="A">AAA</book> <book name="bookB" id="B">BBB</book> <book name="bookD" id="D">DDD</book> <book name="bookE" id="E">EEE</book> </bookList> <bc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bc> </mybook>
test2.xml
<?xml version="1.0" encoding="UTF-8"?> <mybook id="mb001"> <bookList> <book name="bookA" id="A">AAA</book> <book name="bookB" id="B">BBB</book> <book name="bookC" id="C">CCC</book> </bookList> <bc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bc> <bcc> <bookList> <book name="bcbookA" id="A">AAA</book> <book name="bcbookB" id="B">BBB</book> </bookList> </bcc> </mybook>