There are two XML parsing methods, SAX and DOM. SAX has inherent advantages when simply reading XML documents. The following describes the SAX parsing XML:
import xml.sax from xml.sax import make_parser from xml.sax.handler import ContentHandler class ParseHandler(ContentHandler): def __init__(self): self.CurrentData = "" self.type = "" self.format = "" self.year = "" self.ration = "" self.stars = "" self.description = "" def characters(self, content): print(content.strip().replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '').strip()) def startDocument(self): print("Parsing started") def endDocument(self): print("End of parsing") def startElement(self, name, attrs): if name == 'movie': print("startElement", name, "attrs", attrs["title"]) else: print("startElement", name, "attrs") def endElement(self, name): print("endElement", name) if __name__ == '__main__': parser = make_parser() parser.setFeature(xml.sax.handler.feature_namespaces,0) Trade = ParseHandler () parser.setContentHandler(Handel) parser.parse("movies.xml")
The contents of the file movies.xml are as follows:
<?xml version="1.0" encoding="UTF-8"?> <collection shelf="New Arrivals"> <movie title="Enemy Behind"> <type>War, Thriller</type> <format>DVD</format> <year>2003</year> <rating>PG</rating> <stars>10</stars> <description>Talk about a US-Japan war</description> </movie> <movie title="Transformers"> <type>Anime, Science Fiction</type> <format>DVD</format> <year>1989</year> <rating>R</rating> <stars>8</stars> <description>A schientific fiction</description> </movie> <movie title="Trigun"> <type>Anime, Action</type> <format>DVD</format> <episodes>4</episodes> <rating>PG</rating> <stars>10</stars> <description>Vash the Stampede!</description> </movie> <movie title="Ishtar"> <type>Comedy</type> <format>VHS</format> <rating>PG</rating> <stars>2</stars> <description>Viewable boredom</description> </movie> </collection>