Python Data Processing - New Book - Chapter 3 - Reading Data

3.1 csv data

https://github.com/jackiekazil/data-wrangling

Download book information

 

This is chapter three

ModuleNotFoundError: No module named 'CSV'---Cannot capitalize! csv

import csv
csvfile = open('data-text.csv','r')#是r而不是rb
reader = csv.reader(csvfile)
for row in reader:
	print(row)

_csv.Error: iterator should return strings, not bytes (did you open the file in text mode?)

csvfile = open('data-text.csv','r')# is r instead of rb, which is a small mistake in the book.

2. The keys of the dictionary come from the first line of the csv file, and all subsequent lines are the values ​​of the dictionary.

import csv
csvfile = open('data-text.csv','r')
reader = csv.DictReader(csvfile)
for row in reader:
	print(row)

3.2 JSON data

print item 错了SyntaxError: Missing parentheses in call to 'print'. Did you mean print(item)?

import json
json_data = open('data-text.json').read()
data = json.loads(json_data)
for item in data:
	print (item) 

3.3 XML data

The data form is dazzling! ! !

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root

print(root) prints the outermost tag of xml as <Element 'GHO' at 0x033D0B70>

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('xml的最外层标签为',root)
print('root的所有方法和属性',dir(root))

dir(root) == all methods and properties of root

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
print('查看根元素的子元素',list(root))

print('View the child elements of the root element', list(root))

fix it for observation

Get the child elements of the Data element below, and get the child elements of the Data element under the root

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
print('获取root下的Data元素的子元素',list(data))

Output a very long list, ending with ], so it is a list.

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素',item)

Get Dim and Value objects.

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的文本内容',item.text)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的列表内容',list(item))

If there is no value between the labels of a node, then there will usually be attributes within the labels.

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点属性',item.attrib)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
for observation in data:
	for item in observation:
		print('获取root.Data.observation列表的子元素的节点字典的键',item.attrib.keys())

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key = (item.attrib.keys())[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

produces the error: TypeError: 'dict_keys' object does not support indexing

This is because python3 has changed dict.keys, which returns a dict_keys object, which supports iterable but not indexable. We can explicitly convert it into a list:
 

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键',lookup_key)

With the name of the key, the next step is to find the value corresponding to the key.

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation: 
		lookup_key_list = list(item.attrib.keys())
		lookup_key = lookup_key_list[0]
		rec_key = item.attrib[lookup_key]
		print('获取root.Data.observation列表的子元素的节点字典的键 的键 对应的值',rec_key)

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = None	
		print('rec_key,rec_value',rec_key)

Next, change None to the value corresponding to code

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else:
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']
		
		print('rec_key,rec_value',rec_key,rec_value)

create dictionary

from xml.etree import ElementTree as ET
tree = ET.parse('data-text.xml')
root = tree.getroot()
data = root.find('Data')
all_data = []
for observation in data:
	record = {}
	for item in observation:  
		lookup_key_list = list(item.attrib.keys()) 
		lookup_key = lookup_key_list[0]
		if lookup_key == 'Numeric':
			rec_key = 'NUMERIC'
			rec_value = item.attrib['Numeric']
		else: 
			rec_key = item.attrib[lookup_key]
			rec_value = item.attrib['Code']	
		record[rec_key] = rec_value	 
	all_data.append(record)
print(all_data)

I don't know if it's right. .

 

 

 

 

 

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324113338&siteId=291194637