3.1 csv data
https://github.com/jackiekazil/data-wrangling
Download book information
This is chapter three
ModuleNotFoundError: No module named 'CSV'---Cannot capitalize! csv
import csv
csvfile = open('data-text.csv','r')#是r而不是rb
reader = csv.reader(csvfile)
for row in reader:
print(row)
_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)
csvfile = open('data-text.csv','r')# is r instead of rb, which is a small mistake in the book.
2. The keys of the dictionary come from the first line of the csv file, and all subsequent lines are the values of the dictionary.
import csv
csvfile = open('data-text.csv','r')
reader = csv.DictReader(csvfile)
for row in reader:
print(row)
3.2 JSON data
print item 错了SyntaxError: Missing parentheses in call to 'print'. Did you mean print(item)?
import json
json_data = open('data-text.json').read()
data = json.loads(json_data)
for item in data:
print (item)
3.3 XML data
The data form is dazzling! ! !
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root
print(root) prints the outermost tag of xml as <Element 'GHO' at 0x033D0B70>
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root)
print('root的所有方法和属性',dir(root))
dir(root) == all methods and properties of root
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('查看根元素的子元素',list(root))
print('View the child elements of the root element', list(root))
fix it for observation
Get the child elements of the Data element below, and get the child elements of the Data element under the root
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
print('获取root下的Data元素的子元素',list(data))
Output a very long list, ending with ], so it is a list.
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素',item)
Get Dim and Value objects.
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的文本内容',item.text)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的列表内容',list(item))
If there is no value between the labels of a node, then there will usually be attributes within the labels.
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的节点属性',item.attrib)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
for item in observation:
print('获取root.Data.observation列表的子元素的节点字典的键',item.attrib.keys())
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key = (item.attrib.keys())[0]
print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)
produces the error: TypeError: 'dict_keys' object does not support indexing
This is because python3 has changed dict.keys, which returns a dict_keys object, which supports iterable but not indexable. We can explicitly convert it into a list:
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)
With the name of the key, the next step is to find the value corresponding to the key.
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
rec_key = item.attrib[lookup_key]
print('获取root.Data.observation列表的子元素的节点字典的键 的键 对应的值',rec_key)
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = None
print('rec_key,rec_value',rec_key)
Next, change None to the value corresponding to code
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = item.attrib[lookup_key]
rec_value = item.attrib['Code']
print('rec_key,rec_value',rec_key,rec_value)
create dictionary
from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
record = {}
for item in observation:
lookup_key_list = list(item.attrib.keys())
lookup_key = lookup_key_list[0]
if lookup_key == 'Numeric':
rec_key = 'NUMERIC'
rec_value = item.attrib['Numeric']
else:
rec_key = item.attrib[lookup_key]
rec_value = item.attrib['Code']
record[rec_key] = rec_value
all_data.append(record)
print(all_data)
I don't know if it's right. .