数据加载、存储与文件格式
源码下载链接
读写文本格式的数据
本文对Python中的数据加载、存储与文件格式做了一个简要的说明,实际应用中的情况更加复杂,每个小节的内容都很有限。如果用到相关内容,各位读者还需上网查找补充。
import pandas as pd
df=pd.read_csv('ch06/ex1.csv')
df
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_table('ch06/ex1.csv',sep=',')
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ch06/ex2.csv',header=None)
|
0 |
1 |
2 |
3 |
4 |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
pd.read_csv('ch06/ex2.csv',names=['a','b','c','d','message'])
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
names=['a','b','c','d','message']
pd.read_csv('ch06/ex2.csv',names=names,index_col='message')
|
a |
b |
c |
d |
message |
|
|
|
|
hello |
1 |
2 |
3 |
4 |
world |
5 |
6 |
7 |
8 |
foo |
9 |
10 |
11 |
12 |
parsed=pd.read_csv('ch06/csv_mindex.csv',index_col=['key1','key2'])
parsed
|
|
value1 |
value2 |
key1 |
key2 |
|
|
one |
a |
1 |
2 |
b |
3 |
4 |
c |
5 |
6 |
d |
7 |
8 |
two |
a |
9 |
10 |
b |
11 |
12 |
c |
13 |
14 |
d |
15 |
16 |
list(open('ch06/ex3.txt'))
[’ A B C\n’, ‘aaa -0.264438 -1.026059 -0.619500\n’, ‘bbb 0.927272 0.302904 -0.032399\n’, ‘ccc -0.264273 -0.386314 -0.217601\n’, ‘ddd -0.871858 -0.348382 1.100491\n’]
result=pd.read_table('ch06/ex3.txt',sep='\s+')
result
|
A |
B |
C |
aaa |
-0.264438 |
-1.026059 |
-0.619500 |
bbb |
0.927272 |
0.302904 |
-0.032399 |
ccc |
-0.264273 |
-0.386314 |
-0.217601 |
ddd |
-0.871858 |
-0.348382 |
1.100491 |
pd.read_csv('ch06/ex4.csv',skiprows=[0,2,3])
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
result=pd.read_csv('ch06/ex5.csv')
result
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
two |
5 |
6 |
NaN |
8 |
world |
2 |
three |
9 |
10 |
11.0 |
12 |
foo |
pd.isnull(result)
|
something |
a |
b |
c |
d |
message |
0 |
False |
False |
False |
False |
False |
True |
1 |
False |
False |
False |
True |
False |
False |
2 |
False |
False |
False |
False |
False |
False |
result=pd.read_csv('ch06/ex5.csv',na_values=['NULL'])
result
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
two |
5 |
6 |
NaN |
8 |
world |
2 |
three |
9 |
10 |
11.0 |
12 |
foo |
sentinels={'message':['foo','NA'],'something':['two']}
pd.read_csv('ch06/ex5.csv',na_values=sentinels)
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
NaN |
5 |
6 |
NaN |
8 |
world |
2 |
three |
9 |
10 |
11.0 |
12 |
NaN |
逐块读取文本文件
result=pd.read_csv('ch06/ex6.csv')
result
|
one |
two |
three |
four |
key |
0 |
0.467976 |
-0.038649 |
-0.295344 |
-1.824726 |
L |
1 |
-0.358893 |
1.404453 |
0.704965 |
-0.200638 |
B |
2 |
-0.501840 |
0.659254 |
-0.421691 |
-0.057688 |
G |
3 |
0.204886 |
1.074134 |
1.388361 |
-0.982404 |
R |
4 |
0.354628 |
-0.133116 |
0.283763 |
-0.837063 |
Q |
5 |
1.817480 |
0.742273 |
0.419395 |
-2.251035 |
Q |
6 |
-0.776764 |
0.935518 |
-0.332872 |
-1.875641 |
U |
7 |
-0.913135 |
1.530624 |
-0.572657 |
0.477252 |
K |
8 |
0.358480 |
-0.497572 |
-0.367016 |
0.507702 |
S |
9 |
-1.740877 |
-1.160417 |
-1.637830 |
2.172201 |
G |
10 |
0.240564 |
-0.328249 |
1.252155 |
1.072796 |
8 |
11 |
0.764018 |
1.165476 |
-0.639544 |
1.495258 |
R |
12 |
0.571035 |
-0.310537 |
0.582437 |
-0.298765 |
1 |
13 |
2.317658 |
0.430710 |
-1.334216 |
0.199679 |
P |
14 |
1.547771 |
-1.119753 |
-2.277634 |
0.329586 |
J |
15 |
-1.310608 |
0.401719 |
-1.000987 |
1.156708 |
E |
16 |
-0.088496 |
0.634712 |
0.153324 |
0.415335 |
B |
17 |
-0.018663 |
-0.247487 |
-1.446522 |
0.750938 |
A |
18 |
-0.070127 |
-1.579097 |
0.120892 |
0.671432 |
F |
19 |
-0.194678 |
-0.492039 |
2.359605 |
0.319810 |
H |
20 |
-0.248618 |
0.868707 |
-0.492226 |
-0.717959 |
W |
21 |
-1.091549 |
-0.867110 |
-0.647760 |
-0.832562 |
C |
22 |
0.641404 |
-0.138822 |
-0.621963 |
-0.284839 |
C |
23 |
1.216408 |
0.992687 |
0.165162 |
-0.069619 |
V |
24 |
-0.564474 |
0.792832 |
0.747053 |
0.571675 |
I |
25 |
1.759879 |
-0.515666 |
-0.230481 |
1.362317 |
S |
26 |
0.126266 |
0.309281 |
0.382820 |
-0.239199 |
L |
27 |
1.334360 |
-0.100152 |
-0.840731 |
-0.643967 |
6 |
28 |
-0.737620 |
0.278087 |
-0.053235 |
-0.950972 |
J |
29 |
-1.148486 |
-0.986292 |
-0.144963 |
0.124362 |
Y |
… |
… |
… |
… |
… |
… |
9970 |
0.633495 |
-0.186524 |
0.927627 |
0.143164 |
4 |
9971 |
0.308636 |
-0.112857 |
0.762842 |
-1.072977 |
1 |
9972 |
-1.627051 |
-0.978151 |
0.154745 |
-1.229037 |
Z |
9973 |
0.314847 |
0.097989 |
0.199608 |
0.955193 |
P |
9974 |
1.666907 |
0.992005 |
0.496128 |
-0.686391 |
S |
9975 |
0.010603 |
0.708540 |
-1.258711 |
0.226541 |
K |
9976 |
0.118693 |
-0.714455 |
-0.501342 |
-0.254764 |
K |
9977 |
0.302616 |
-2.011527 |
-0.628085 |
0.768827 |
H |
9978 |
-0.098572 |
1.769086 |
-0.215027 |
-0.053076 |
A |
9979 |
-0.019058 |
1.964994 |
0.738538 |
-0.883776 |
F |
9980 |
-0.595349 |
0.001781 |
-1.423355 |
-1.458477 |
M |
9981 |
1.392170 |
-1.396560 |
-1.425306 |
-0.847535 |
H |
9982 |
-0.896029 |
-0.152287 |
1.924483 |
0.365184 |
6 |
9983 |
-2.274642 |
-0.901874 |
1.500352 |
0.996541 |
N |
9984 |
-0.301898 |
1.019906 |
1.102160 |
2.624526 |
I |
9985 |
-2.548389 |
-0.585374 |
1.496201 |
-0.718815 |
D |
9986 |
-0.064588 |
0.759292 |
-1.568415 |
-0.420933 |
E |
9987 |
-0.143365 |
-1.111760 |
-1.815581 |
0.435274 |
2 |
9988 |
-0.070412 |
-1.055921 |
0.338017 |
-0.440763 |
X |
9989 |
0.649148 |
0.994273 |
-1.384227 |
0.485120 |
Q |
9990 |
-0.370769 |
0.404356 |
-1.051628 |
-1.050899 |
8 |
9991 |
-0.409980 |
0.155627 |
-0.818990 |
1.277350 |
W |
9992 |
0.301214 |
-1.111203 |
0.668258 |
0.671922 |
A |
9993 |
1.821117 |
0.416445 |
0.173874 |
0.505118 |
X |
9994 |
0.068804 |
1.322759 |
0.802346 |
0.223618 |
H |
9995 |
2.311896 |
-0.417070 |
-1.409599 |
-0.515821 |
L |
9996 |
-0.479893 |
-0.650419 |
0.745152 |
-0.646038 |
E |
9997 |
0.523331 |
0.787112 |
0.486066 |
1.093156 |
K |
9998 |
-0.362559 |
0.598894 |
-1.843201 |
0.887292 |
G |
9999 |
-0.096376 |
-1.012999 |
-0.657431 |
-0.573315 |
0 |
10000 rows × 5 columns
pd.read_csv('ch06/ex6.csv',nrows=5)
|
one |
two |
three |
four |
key |
0 |
0.467976 |
-0.038649 |
-0.295344 |
-1.824726 |
L |
1 |
-0.358893 |
1.404453 |
0.704965 |
-0.200638 |
B |
2 |
-0.501840 |
0.659254 |
-0.421691 |
-0.057688 |
G |
3 |
0.204886 |
1.074134 |
1.388361 |
-0.982404 |
R |
4 |
0.354628 |
-0.133116 |
0.283763 |
-0.837063 |
Q |
chunker=pd.read_csv('ch06/ex6.csv',chunksize=1000)
chunker
from pandas import Series,DataFrame
tot=Series([])
for piece in chunker:
tot=tot.add(piece['key'].value_counts(),fill_value=0)
tot=tot.sort_values(ascending=False)
tot[:10]
E 368.0 X 364.0 L 346.0 O 343.0 Q 340.0 M 338.0 J 337.0 F 335.0 K 334.0 H 330.0 dtype: float64
将数据写入到文本格式
data=pd.read_csv('ch06/ex5.csv')
data
|
something |
a |
b |
c |
d |
message |
0 |
one |
1 |
2 |
3.0 |
4 |
NaN |
1 |
two |
5 |
6 |
NaN |
8 |
world |
2 |
three |
9 |
10 |
11.0 |
12 |
foo |
data.to_csv('ch06/myout.csv')
data.to_csv('ch06/myout1.csv',sep='|')
data.to_csv('ch06/myout2.csv',na_rep='MULL')
data.to_csv('ch06/myout3.csv',index=False,header=False)
data.to_csv('ch06/myout4.csv',index=False,columns=['a','b','c'])
import numpy as np
dates=pd.date_range('1/1/2000',periods=7)
ts=Series(np.arange(7),index=dates)
ts.to_csv('ch06/tseries.csv')
Series.from_csv('ch06/tseries.csv',parse_dates=True)
2000-01-01 0 2000-01-02 1 2000-01-03 2 2000-01-04 3 2000-01-05 4 2000-01-06 5 2000-01-07 6 dtype: int64 ##手工处理分隔符格式
import csv
f=open('ch06/ex7.csv')
reader=csv.reader(f)
for line in reader:
print(line)
[‘a’, ‘b’, ‘c’] [‘1’, ‘2’, ‘3’] [‘1’, ‘2’, ‘3’, ‘4’]
lines=list(csv.reader(open('ch06/ex7.csv')))
header,values=lines[0],lines[1:]
data_dict={h:v for h,v in zip(header,zip(*values))}
data_dict
{‘a’: (‘1’, ‘1’), ‘b’: (‘2’, ‘2’), ‘c’: (‘3’, ‘3’)}
with open('ch06/mydata.csv','w') as f:
writer=csv.writer(f)
writer.writerow(('one','two','three'))
writer.writerow(('1','2','3'))
writer.writerow(('4','5','6'))
writer.writerow(('7','8','9'))
JSON数据
obj="""
{"name":"Wes","places_lived":["United States","Spain","Germany"],
"pet":null,
"siblings":[{"name":"Scott","age":25,"pet":"Zuko"},
{"name":"Katie","age":33,"pet":"Cisco"}]}
"""
import json
result=json.loads(obj)
result
{‘name’: ‘Wes’, ‘pet’: None, ‘places_lived’: [‘United States’, ‘Spain’, ‘Germany’], ‘siblings’: [{‘age’: 25, ‘name’: ‘Scott’, ‘pet’: ‘Zuko’}, {‘age’: 33, ‘name’: ‘Katie’, ‘pet’: ‘Cisco’}]}
asjson=json.dumps(result)
siblings=DataFrame(result['siblings'],columns=['name','age'])
siblings
|
name |
age |
0 |
Scott |
25 |
1 |
Katie |
33 |
siblings.to_json('ch06/json.csv')
XML和HTML:Web信息收集
利用lxml.objectify解析XML
from lxml import objectify
path='ch06/mta_perf/Performance_MNR.xml'
parsed=objectify.parse(open(path))
root=parsed.getroot()
data=[]
skip_fields=['PARENT_SEQ','INDICATOR_SEQ','DESIRED_CHANGE','DECIMAL_PLACES']
for elt in root.INDICATOR:
el_data={}
for child in elt.getchildren():
if child.tag in skip_fields:
continue
el_data[child.tag]=child.pyval
data.append(el_data)
perf=DataFrame(data)
perf
|
AGENCY_NAME |
CATEGORY |
DESCRIPTION |
FREQUENCY |
INDICATOR_NAME |
INDICATOR_UNIT |
MONTHLY_ACTUAL |
MONTHLY_TARGET |
PERIOD_MONTH |
PERIOD_YEAR |
YTD_ACTUAL |
YTD_TARGET |
0 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.9 |
95 |
1 |
2008 |
96.9 |
95 |
1 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95 |
95 |
2 |
2008 |
96 |
95 |
2 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.9 |
95 |
3 |
2008 |
96.3 |
95 |
3 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
98.3 |
95 |
4 |
2008 |
96.8 |
95 |
4 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95.8 |
95 |
5 |
2008 |
96.6 |
95 |
5 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
94.4 |
95 |
6 |
2008 |
96.2 |
95 |
6 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96 |
95 |
7 |
2008 |
96.2 |
95 |
7 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.4 |
95 |
8 |
2008 |
96.2 |
95 |
8 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
93.7 |
95 |
9 |
2008 |
95.9 |
95 |
9 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.4 |
95 |
10 |
2008 |
96 |
95 |
10 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.9 |
95 |
11 |
2008 |
96.1 |
95 |
11 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95.1 |
95 |
12 |
2008 |
96 |
95 |
12 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
92.6 |
96.2 |
1 |
2009 |
92.6 |
96.2 |
13 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.8 |
96.2 |
2 |
2009 |
94.6 |
96.2 |
14 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.9 |
96.2 |
3 |
2009 |
95.4 |
96.2 |
15 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
97.1 |
96.2 |
4 |
2009 |
95.9 |
96.2 |
16 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
97.8 |
96.2 |
5 |
2009 |
96.2 |
96.2 |
17 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
97.3 |
96.2 |
6 |
2009 |
96.4 |
96.2 |
18 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.7 |
96.2 |
7 |
2009 |
96.5 |
96.2 |
19 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95.7 |
96.2 |
8 |
2009 |
96.4 |
96.2 |
20 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.1 |
96.2 |
9 |
2009 |
96.3 |
96.2 |
21 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
94.8 |
96.2 |
10 |
2009 |
96.2 |
96.2 |
22 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95.7 |
96.2 |
11 |
2009 |
96.1 |
96.2 |
23 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
95 |
96.2 |
12 |
2009 |
96 |
96.2 |
24 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
98 |
96.3 |
1 |
2010 |
98 |
96.3 |
25 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
93 |
96.3 |
2 |
2010 |
95.6 |
96.3 |
26 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
96.9 |
96.3 |
3 |
2010 |
96.1 |
96.3 |
27 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
98.1 |
96.3 |
4 |
2010 |
96.6 |
96.3 |
28 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
97.6 |
96.3 |
5 |
2010 |
96.8 |
96.3 |
29 |
Metro-North Railroad |
Service Indicators |
Percent of commuter trains that arrive at thei… |
M |
On-Time Performance (West of Hudson) |
% |
97.4 |
96.3 |
6 |
2010 |
96.9 |
96.3 |
… |
… |
… |
… |
… |
… |
… |
… |
… |
… |
… |
… |
… |
618 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
94 |
|
7 |
2009 |
95.14 |
|
619 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
97 |
|
8 |
2009 |
95.38 |
|
620 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
98.3 |
|
9 |
2009 |
95.7 |
|
621 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
98.7 |
|
10 |
2009 |
96 |
|
622 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
98.1 |
|
11 |
2009 |
96.21 |
|
623 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
|
12 |
2009 |
96.5 |
|
624 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
97.95 |
97 |
1 |
2010 |
97.95 |
97 |
625 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
2 |
2010 |
98.92 |
97 |
626 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
3 |
2010 |
99.29 |
97 |
627 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
4 |
2010 |
99.47 |
97 |
628 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
5 |
2010 |
99.58 |
97 |
629 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
91.21 |
97 |
6 |
2010 |
98.19 |
97 |
630 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
7 |
2010 |
98.46 |
97 |
631 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
8 |
2010 |
98.69 |
97 |
632 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
95.2 |
97 |
9 |
2010 |
98.3 |
97 |
633 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
90.91 |
97 |
10 |
2010 |
97.55 |
97 |
634 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
96.67 |
97 |
11 |
2010 |
97.47 |
97 |
635 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
90.03 |
97 |
12 |
2010 |
96.84 |
97 |
636 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
1 |
2011 |
100 |
97 |
637 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
100 |
97 |
2 |
2011 |
100 |
97 |
638 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
97.07 |
97 |
3 |
2011 |
98.86 |
97 |
639 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
98.18 |
97 |
4 |
2011 |
98.76 |
97 |
640 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
79.18 |
97 |
5 |
2011 |
90.91 |
97 |
641 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
6 |
2011 |
|
97 |
642 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
7 |
2011 |
|
97 |
643 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
8 |
2011 |
|
97 |
644 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
9 |
2011 |
|
97 |
645 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
10 |
2011 |
|
97 |
646 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
11 |
2011 |
|
97 |
647 |
Metro-North Railroad |
Service Indicators |
Percent of the time that escalators are operat… |
M |
Escalator Availability |
% |
|
97 |
12 |
2011 |
|
97 |
648 rows × 12 columns
from io import StringIO
tag='<a href="http://www.google.com">Google</a>'
root=objectify.parse(StringIO(tag)).getroot()
root
root.get('href')
‘http://www.google.com’
root.text
‘Google’
二进制数据格式
frame=pd.read_csv('ch06/ex1.csv')
frame
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
frame.to_pickle('ch06/frame_pickle')
pd.read_pickle('ch06/frame_pickle')
|
a |
b |
c |
d |
message |
0 |
1 |
2 |
3 |
4 |
hello |
1 |
5 |
6 |
7 |
8 |
world |
2 |
9 |
10 |
11 |
12 |
foo |
使用HDF5格式
store=pd.HDFStore('ch06/mydata.h5')
store['obj1']=frame
store['obj1_col']=frame['a']
store
store['obj1']
读取Microsoft Excel文件
xls_file=pd.ExcelFile('ch06/book.xlsx')
table=xls_file.parse('Sheet1')
table
|
书名 |
索引号 |
作者或出版社 |
0 |
推荐系统 |
ISBN:9787115310699 |
[[奥地利] Gerhard Friedrich 等 著;蒋凡 译 |
1 |
推荐系统实践 |
国际标准书号ISBN:9787115281589 |
项亮 |
2 |
SuperMap iClient for Flex从入门到精通 |
国际标准书号ISBN:9787302335931 |
SuperMap图书编委会 |
3 |
HTML5与WebGL编程 |
ISBN:9787115421333 |
[美] Tony Parisi |
4 |
HTML5实战 |
ISBN:9787115378835 |
[英]罗伯·克洛泽(Rob Crowther) |
5 |
Python核心编程(第3版) |
ISBN:9787115414779 |
美] Wesley Chun 著;孙波翔,李斌,李晗 译 |
6 |
利用Python进行数据分析 |
ISBN:9787111436737 |
Wes McKinney 著;唐学韬 等 译 |
7 |
Python网络数据采集 |
ISBN:9787115416292 |
[美] 米切尔(Ryan Mitchell) 著;陶俊杰,陈小莉 |
8 |
Java编程思想(第4版)/计算机科学丛书 |
ISBN:9787111213826 |
[美] 埃克尔,译者:陈昊鹏 编 |
9 |
unix网络编程(卷一) |
国际标准书号ISBN:9787115367198 |
作者:[美]史蒂文斯 注意,匿名 译 |
10 |
unix网络编程(卷二) |
国际标准书号ISBN:9787115367204 |
作者:[美]史蒂文斯 注意,匿名 译 |
11 |
Python金融大数据分析 |
国际标准书号ISBN:9787115404459 |
出版社:人民邮电出版社 |
12 |
ZigBee无线传感器网络设计与实现 |
国际标准书号ISBN:9787122137463 |
作者:王小强,欧阳骏,黄宁淋 编著 |
13 |
和秋叶一起学Word |
ISBN:9787115400239 |
出版社: 人民邮电出版社 |
14 |
和秋叶一起学PPT 又快又好打造说服力幻灯片(第2版) |
ISBN:9787115349446 |
出版社: 人民邮电出版社 |
15 |
谁说菜鸟不会数据分析(5周年特别套装共3册) |
ISBN:11920136 |
出版社: 电子工业出版社 |
table['书名']
0 推荐系统 1 推荐系统实践 2 SuperMap iClient for Flex从入门到精通 3 HTML5与WebGL编程 4 HTML5实战 5 Python核心编程(第3版) 6 利用Python进行数据分析 7 Python网络数据采集 8 Java编程思想(第4版)/计算机科学丛书 9 unix网络编程(卷一) 10 unix网络编程(卷二) 11 Python金融大数据分析 12 ZigBee无线传感器网络设计与实现 13 和秋叶一起学Word 14 和秋叶一起学PPT 又快又好打造说服力幻灯片(第2版) 15 谁说菜鸟不会数据分析(5周年特别套装共3册) Name: 书名, dtype: object
使用HTML和WebAPI
import requests
url='http://api.map.baidu.com/telematics/v3/weather?location=海口&output=json&ak=5slgyqGDENN7Sy7pw29IUvrZ'
resp=requests.get(url)
resp
import json
data=json.loads(resp.text)
data.keys()
dict_keys([‘status’, ‘message’])
data['message']
‘APP被用户自己禁用,请在控制台解禁’
使用数据库
import pymysql.cursors
config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'713zjl',
'db':'wuxing',
'charset':'utf8mb4',
'cursorclass':pymysql.cursors.DictCursor,
}
connection = pymysql.connect(**config)
try:
with connection.cursor() as cursor:
sql = 'SELECT * from service'
cursor.execute(sql)
result = cursor.fetchall()
data=DataFrame(result)
print(result)
connection.commit()
finally:
connection.close()
data
[{‘cost’: 1.0, ‘alility’: 1.0, ‘id’: 1, ‘result’: 1.714, ‘response_time’: 286.0}, {‘cost’: 0.5, ‘alility’: 0.8, ‘id’: 2, ‘result’: 2.053, ‘response_time’: 247.0}, {‘cost’: 0.5, ‘alility’: 0.9, ‘id’: 3, ‘result’: 2.146, ‘response_time’: 254.0}, {‘cost’: 0.5, ‘alility’: 0.8, ‘id’: 4, ‘result’: 2.039, ‘response_time’: 261.0}, {‘cost’: 0.7, ‘alility’: 0.9, ‘id’: 5, ‘result’: 1.869, ‘response_time’: 331.0}, {‘cost’: 0.6, ‘alility’: 0.6, ‘id’: 6, ‘result’: 1.797, ‘response_time’: 203.0}, {‘cost’: 0.7, ‘alility’: 0.9, ‘id’: 7, ‘result’: 1.998, ‘response_time’: 202.0}, {‘cost’: 0.5, ‘alility’: 0.9, ‘id’: 8, ‘result’: 2.2, ‘response_time’: 200.0}, {‘cost’: 0.8, ‘alility’: 0.6, ‘id’: 9, ‘result’: 1.621, ‘response_time’: 179.0}, {‘cost’: 0.6, ‘alility’: 0.8, ‘id’: 10, ‘result’: 2.029, ‘response_time’: 171.0}, {‘cost’: 0.6, ‘alility’: 0.6, ‘id’: 11, ‘result’: 1.683, ‘response_time’: 317.0}, {‘cost’: 0.7, ‘alility’: 0.7, ‘id’: 12, ‘result’: 1.772, ‘response_time’: 228.0}, {‘cost’: 0.6, ‘alility’: 0.7, ‘id’: 13, ‘result’: 1.857, ‘response_time’: 243.0}, {‘cost’: 0.9, ‘alility’: 0.6, ‘id’: 14, ‘result’: 1.402, ‘response_time’: 298.0}, {‘cost’: 0.7, ‘alility’: 1.0, ‘id’: 15, ‘result’: 1.97, ‘response_time’: 330.0}, {‘cost’: 0.6, ‘alility’: 0.5, ‘id’: 16, ‘result’: 1.68, ‘response_time’: 220.0}, {‘cost’: 0.8, ‘alility’: 1.0, ‘id’: 17, ‘result’: 1.937, ‘response_time’: 263.0}, {‘cost’: 0.7, ‘alility’: 1.0, ‘id’: 18, ‘result’: 2.081, ‘response_time’: 219.0}, {‘cost’: 0.6, ‘alility’: 0.7, ‘id’: 19, ‘result’: 1.899, ‘response_time’: 201.0}, {‘cost’: 0.7, ‘alility’: 0.6, ‘id’: 20, ‘result’: 1.693, ‘response_time’: 207.0}, {‘cost’: 0.9, ‘alility’: 1.0, ‘id’: 21, ‘result’: 1.798, ‘response_time’: 302.0}, {‘cost’: 0.6, ‘alility’: 0.9, ‘id’: 22, ‘result’: 2.042, ‘response_time’: 258.0}, {‘cost’: 0.5, ‘alility’: 0.7, ‘id’: 23, ‘result’: 1.92, ‘response_time’: 280.0}, {‘cost’: 0.6, ‘alility’: 0.5, ‘id’: 24, ‘result’: 1.594, ‘response_time’: 306.0}, {‘cost’: 1.0, ‘alility’: 1.0, ‘id’: 25, ‘result’: 1.647, ‘response_time’: 353.0}, {‘cost’: 1.0, ‘alility’: 0.7, ‘id’: 26, ‘result’: 1.489, ‘response_time’: 211.0}, {‘cost’: 1.0, ‘alility’: 0.8, ‘id’: 27, ‘result’: 1.562, ‘response_time’: 238.0}, {‘cost’: 0.6, ‘alility’: 0.5, ‘id’: 28, ‘result’: 1.693, ‘response_time’: 207.0}, {‘cost’: 0.9, ‘alility’: 1.0, ‘id’: 29, ‘result’: 1.904, ‘response_time’: 196.0}, {‘cost’: 0.9, ‘alility’: 0.7, ‘id’: 30, ‘result’: 1.603, ‘response_time’: 197.0}, {‘cost’: 0.6, ‘alility’: 0.9, ‘id’: 31, ‘result’: 1.876, ‘response_time’: 424.0}, {‘cost’: 0.6, ‘alility’: 0.8, ‘id’: 32, ‘result’: 1.711, ‘response_time’: 489.0}, {‘cost’: 0.9, ‘alility’: 0.9, ‘id’: 33, ‘result’: 1.601, ‘response_time’: 399.0}, {‘cost’: 0.5, ‘alility’: 0.9, ‘id’: 34, ‘result’: 2.032, ‘response_time’: 368.0}, {‘cost’: 1.0, ‘alility’: 0.8, ‘id’: 35, ‘result’: 1.497, ‘response_time’: 303.0}, {‘cost’: 0.8, ‘alility’: 0.9, ‘id’: 36, ‘result’: 1.791, ‘response_time’: 309.0}, {‘cost’: 0.5, ‘alility’: 0.5, ‘id’: 37, ‘result’: 1.57, ‘response_time’: 430.0}, {‘cost’: 0.5, ‘alility’: 1.0, ‘id’: 38, ‘result’: 2.188, ‘response_time’: 312.0}, {‘cost’: 0.8, ‘alility’: 0.6, ‘id’: 39, ‘result’: 1.513, ‘response_time’: 287.0}, {‘cost’: 0.6, ‘alility’: 0.9, ‘id’: 40, ‘result’: 2.077, ‘response_time’: 223.0}, {‘cost’: 0.9, ‘alility’: 0.6, ‘id’: 41, ‘result’: 1.423, ‘response_time’: 277.0}, {‘cost’: 0.5, ‘alility’: 0.9, ‘id’: 42, ‘result’: 2.153, ‘response_time’: 247.0}, {‘cost’: 1.0, ‘alility’: 1.0, ‘id’: 43, ‘result’: 1.647, ‘response_time’: 353.0}, {‘cost’: 1.0, ‘alility’: 0.8, ‘id’: 44, ‘result’: 1.571, ‘response_time’: 229.0}, {‘cost’: 1.0, ‘alility’: 0.9, ‘id’: 45, ‘result’: 1.653, ‘response_time’: 247.0}, {‘cost’: 1.0, ‘alility’: 1.0, ‘id’: 46, ‘result’: 1.798, ‘response_time’: 202.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 47, ‘result’: 1.707, ‘response_time’: 193.0}, {‘cost’: 1.0, ‘alility’: 0.8, ‘id’: 48, ‘result’: 1.607, ‘response_time’: 193.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 49, ‘result’: 1.573, ‘response_time’: 327.0}, {‘cost’: 0.9, ‘alility’: 1.0, ‘id’: 50, ‘result’: 1.876, ‘response_time’: 224.0}, {‘cost’: 0.8, ‘alility’: 0.5, ‘id’: 51, ‘result’: 1.483, ‘response_time’: 217.0}, {‘cost’: 0.8, ‘alility’: 0.9, ‘id’: 52, ‘result’: 1.901, ‘response_time’: 199.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 53, ‘result’: 1.668, ‘response_time’: 232.0}, {‘cost’: 0.9, ‘alility’: 0.6, ‘id’: 54, ‘result’: 1.498, ‘response_time’: 202.0}, {‘cost’: 0.5, ‘alility’: 1.0, ‘id’: 55, ‘result’: 2.31, ‘response_time’: 190.0}, {‘cost’: 0.7, ‘alility’: 0.6, ‘id’: 56, ‘result’: 1.726, ‘response_time’: 174.0}, {‘cost’: 0.5, ‘alility’: 0.5, ‘id’: 57, ‘result’: 1.778, ‘response_time’: 222.0}, {‘cost’: 0.7, ‘alility’: 0.6, ‘id’: 58, ‘result’: 1.717, ‘response_time’: 183.0}, {‘cost’: 0.8, ‘alility’: 0.5, ‘id’: 59, ‘result’: 1.352, ‘response_time’: 348.0}, {‘cost’: 0.6, ‘alility’: 0.9, ‘id’: 60, ‘result’: 2.073, ‘response_time’: 227.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 61, ‘result’: 1.673, ‘response_time’: 227.0}, {‘cost’: 0.9, ‘alility’: 1.0, ‘id’: 62, ‘result’: 1.881, ‘response_time’: 219.0}, {‘cost’: 0.7, ‘alility’: 1.0, ‘id’: 63, ‘result’: 2.004, ‘response_time’: 296.0}, {‘cost’: 0.9, ‘alility’: 0.9, ‘id’: 64, ‘result’: 1.801, ‘response_time’: 199.0}, {‘cost’: 0.8, ‘alility’: 0.6, ‘id’: 65, ‘result’: 1.603, ‘response_time’: 197.0}, {‘cost’: 0.7, ‘alility’: 0.5, ‘id’: 66, ‘result’: 1.617, ‘response_time’: 183.0}, {‘cost’: 0.9, ‘alility’: 1.0, ‘id’: 67, ‘result’: 1.907, ‘response_time’: 193.0}, {‘cost’: 0.5, ‘alility’: 0.5, ‘id’: 68, ‘result’: 1.802, ‘response_time’: 198.0}, {‘cost’: 0.6, ‘alility’: 0.7, ‘id’: 69, ‘result’: 1.772, ‘response_time’: 328.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 70, ‘result’: 1.673, ‘response_time’: 227.0}, {‘cost’: 0.9, ‘alility’: 0.7, ‘id’: 71, ‘result’: 1.572, ‘response_time’: 228.0}, {‘cost’: 0.7, ‘alility’: 1.0, ‘id’: 72, ‘result’: 2.117, ‘response_time’: 183.0}, {‘cost’: 0.8, ‘alility’: 0.9, ‘id’: 73, ‘result’: 1.869, ‘response_time’: 231.0}, {‘cost’: 0.6, ‘alility’: 0.6, ‘id’: 74, ‘result’: 1.835, ‘response_time’: 165.0}, {‘cost’: 0.8, ‘alility’: 0.7, ‘id’: 75, ‘result’: 1.714, ‘response_time’: 186.0}, {‘cost’: 0.6, ‘alility’: 0.5, ‘id’: 76, ‘result’: 1.709, ‘response_time’: 191.0}, {‘cost’: 0.9, ‘alility’: 0.7, ‘id’: 77, ‘result’: 1.583, ‘response_time’: 217.0}, {‘cost’: 1.0, ‘alility’: 0.9, ‘id’: 78, ‘result’: 1.717, ‘response_time’: 183.0}]
|
alility |
cost |
id |
response_time |
result |
0 |
1.0 |
1.0 |
1 |
286.0 |
1.714 |
1 |
0.8 |
0.5 |
2 |
247.0 |
2.053 |
2 |
0.9 |
0.5 |
3 |
254.0 |
2.146 |
3 |
0.8 |
0.5 |
4 |
261.0 |
2.039 |
4 |
0.9 |
0.7 |
5 |
331.0 |
1.869 |
5 |
0.6 |
0.6 |
6 |
203.0 |
1.797 |
6 |
0.9 |
0.7 |
7 |
202.0 |
1.998 |
7 |
0.9 |
0.5 |
8 |
200.0 |
2.200 |
8 |
0.6 |
0.8 |
9 |
179.0 |
1.621 |
9 |
0.8 |
0.6 |
10 |
171.0 |
2.029 |
10 |
0.6 |
0.6 |
11 |
317.0 |
1.683 |
11 |
0.7 |
0.7 |
12 |
228.0 |
1.772 |
12 |
0.7 |
0.6 |
13 |
243.0 |
1.857 |
13 |
0.6 |
0.9 |
14 |
298.0 |
1.402 |
14 |
1.0 |
0.7 |
15 |
330.0 |
1.970 |
15 |
0.5 |
0.6 |
16 |
220.0 |
1.680 |
16 |
1.0 |
0.8 |
17 |
263.0 |
1.937 |
17 |
1.0 |
0.7 |
18 |
219.0 |
2.081 |
18 |
0.7 |
0.6 |
19 |
201.0 |
1.899 |
19 |
0.6 |
0.7 |
20 |
207.0 |
1.693 |
20 |
1.0 |
0.9 |
21 |
302.0 |
1.798 |
21 |
0.9 |
0.6 |
22 |
258.0 |
2.042 |
22 |
0.7 |
0.5 |
23 |
280.0 |
1.920 |
23 |
0.5 |
0.6 |
24 |
306.0 |
1.594 |
24 |
1.0 |
1.0 |
25 |
353.0 |
1.647 |
25 |
0.7 |
1.0 |
26 |
211.0 |
1.489 |
26 |
0.8 |
1.0 |
27 |
238.0 |
1.562 |
27 |
0.5 |
0.6 |
28 |
207.0 |
1.693 |
28 |
1.0 |
0.9 |
29 |
196.0 |
1.904 |
29 |
0.7 |
0.9 |
30 |
197.0 |
1.603 |
… |
… |
… |
… |
… |
… |
48 |
0.7 |
0.8 |
49 |
327.0 |
1.573 |
49 |
1.0 |
0.9 |
50 |
224.0 |
1.876 |
50 |
0.5 |
0.8 |
51 |
217.0 |
1.483 |
51 |
0.9 |
0.8 |
52 |
199.0 |
1.901 |
52 |
0.7 |
0.8 |
53 |
232.0 |
1.668 |
53 |
0.6 |
0.9 |
54 |
202.0 |
1.498 |
54 |
1.0 |
0.5 |
55 |
190.0 |
2.310 |
55 |
0.6 |
0.7 |
56 |
174.0 |
1.726 |
56 |
0.5 |
0.5 |
57 |
222.0 |
1.778 |
57 |
0.6 |
0.7 |
58 |
183.0 |
1.717 |
58 |
0.5 |
0.8 |
59 |
348.0 |
1.352 |
59 |
0.9 |
0.6 |
60 |
227.0 |
2.073 |
60 |
0.7 |
0.8 |
61 |
227.0 |
1.673 |
61 |
1.0 |
0.9 |
62 |
219.0 |
1.881 |
62 |
1.0 |
0.7 |
63 |
296.0 |
2.004 |
63 |
0.9 |
0.9 |
64 |
199.0 |
1.801 |
64 |
0.6 |
0.8 |
65 |
197.0 |
1.603 |
65 |
0.5 |
0.7 |
66 |
183.0 |
1.617 |
66 |
1.0 |
0.9 |
67 |
193.0 |
1.907 |
67 |
0.5 |
0.5 |
68 |
198.0 |
1.802 |
68 |
0.7 |
0.6 |
69 |
328.0 |
1.772 |
69 |
0.7 |
0.8 |
70 |
227.0 |
1.673 |
70 |
0.7 |
0.9 |
71 |
228.0 |
1.572 |
71 |
1.0 |
0.7 |
72 |
183.0 |
2.117 |
72 |
0.9 |
0.8 |
73 |
231.0 |
1.869 |
73 |
0.6 |
0.6 |
74 |
165.0 |
1.835 |
74 |
0.7 |
0.8 |
75 |
186.0 |
1.714 |
75 |
0.5 |
0.6 |
76 |
191.0 |
1.709 |
76 |
0.7 |
0.9 |
77 |
217.0 |
1.583 |
77 |
0.9 |
1.0 |
78 |
183.0 |
1.717 |
78 rows × 5 columns