《Python机器学习与实践指南》一书中代码示例。
1.2数据的获取和查看
import os
import pandas as pd
import requests
#print(os.getcwd()) #获取当前工作目录
PATH = r'C:/Users/Administrator/Desktop/iris/' #r代表去转义
#PATH = os.getcwd()
r = requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data') #requests来获取数据连接
#print(r.text)
with open(PATH + 'iris.data' , 'w') as f:
f.write(r.text) #保存数据
print(os.chdir(PATH))
df = pd.read_csv(PATH + 'iris.data' , names = ['sepal length' , 'sepal width' , 'petal length' , 'petal width' , 'class'])
print(df.head())
#print(df['sepal length'])
print(df.ix[:3 , :2]) #输出前四行和前三列,从0开始
print(df.ix[:3 , [x for x in df.columns if 'width' in x]]) #可以选择列名中含有width的输出
#print(df['class'].unique())
print(df[df['class']=='Iris-virginica']) #选择类别是Iris-virginica的输出
print(df.count()) #数据总共多少条
print(df[df['class'] == 'Iris-virginica'].count()) #类别是Iris-virginica的数据总共有多少条
virginica = df[df['class'] == 'Iris-virginica'] #将数据重新保存为一个新的数据框
virginica
print(df[(df['class'] == 'Iris-virginica') & (df['petal width'] > 2.2)]) #进一步筛选
print(df.describe()) #统计数据信息
print(df.describe(percentiles = [.20 ,.40 , .80 , .90, .95]))
df.corr() #特征之间的想关性
输出:
None
sepal length sepal width petal length petal width class
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
sepal length sepal width
0 5.1 3.5
1 4.9 3.0
2 4.7 3.2
3 4.6 3.1
sepal width petal width
0 3.5 0.2
1 3.0 0.2
2 3.2 0.2
3 3.1 0.2
sepal length sepal width petal length petal width class
100 6.3 3.3 6.0 2.5 Iris-virginica
101 5.8 2.7 5.1 1.9 Iris-virginica
102 7.1 3.0 5.9 2.1 Iris-virginica
103 6.3 2.9 5.6 1.8 Iris-virginica
104 6.5 3.0 5.8 2.2 Iris-virginica
105 7.6 3.0 6.6 2.1 Iris-virginica
106 4.9 2.5 4.5 1.7 Iris-virginica
107 7.3 2.9 6.3 1.8 Iris-virginica
108 6.7 2.5 5.8 1.8 Iris-virginica
109 7.2 3.6 6.1 2.5 Iris-virginica
110 6.5 3.2 5.1 2.0 Iris-virginica
111 6.4 2.7 5.3 1.9 Iris-virginica
112 6.8 3.0 5.5 2.1 Iris-virginica
113 5.7 2.5 5.0 2.0 Iris-virginica
114 5.8 2.8 5.1 2.4 Iris-virginica
115 6.4 3.2 5.3 2.3 Iris-virginica
116 6.5 3.0 5.5 1.8 Iris-virginica
117 7.7 3.8 6.7 2.2 Iris-virginica
118 7.7 2.6 6.9 2.3 Iris-virginica
119 6.0 2.2 5.0 1.5 Iris-virginica
120 6.9 3.2 5.7 2.3 Iris-virginica
121 5.6 2.8 4.9 2.0 Iris-virginica
122 7.7 2.8 6.7 2.0 Iris-virginica
123 6.3 2.7 4.9 1.8 Iris-virginica
124 6.7 3.3 5.7 2.1 Iris-virginica
125 7.2 3.2 6.0 1.8 Iris-virginica
126 6.2 2.8 4.8 1.8 Iris-virginica
127 6.1 3.0 4.9 1.8 Iris-virginica
128 6.4 2.8 5.6 2.1 Iris-virginica
129 7.2 3.0 5.8 1.6 Iris-virginica
130 7.4 2.8 6.1 1.9 Iris-virginica
131 7.9 3.8 6.4 2.0 Iris-virginica
132 6.4 2.8 5.6 2.2 Iris-virginica
133 6.3 2.8 5.1 1.5 Iris-virginica
134 6.1 2.6 5.6 1.4 Iris-virginica
135 7.7 3.0 6.1 2.3 Iris-virginica
136 6.3 3.4 5.6 2.4 Iris-virginica
137 6.4 3.1 5.5 1.8 Iris-virginica
138 6.0 3.0 4.8 1.8 Iris-virginica
139 6.9 3.1 5.4 2.1 Iris-virginica
140 6.7 3.1 5.6 2.4 Iris-virginica
141 6.9 3.1 5.1 2.3 Iris-virginica
142 5.8 2.7 5.1 1.9 Iris-virginica
143 6.8 3.2 5.9 2.3 Iris-virginica
144 6.7 3.3 5.7 2.5 Iris-virginica
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica
sepal length 150
sepal width 150
petal length 150
petal width 150
class 150
dtype: int64
sepal length 50
sepal width 50
petal length 50
petal width 50
class 50
dtype: int64
sepal length sepal width petal length petal width class
100 6.3 3.3 6.0 2.5 Iris-virginica
109 7.2 3.6 6.1 2.5 Iris-virginica
114 5.8 2.8 5.1 2.4 Iris-virginica
115 6.4 3.2 5.3 2.3 Iris-virginica
118 7.7 2.6 6.9 2.3 Iris-virginica
120 6.9 3.2 5.7 2.3 Iris-virginica
135 7.7 3.0 6.1 2.3 Iris-virginica
136 6.3 3.4 5.6 2.4 Iris-virginica
140 6.7 3.1 5.6 2.4 Iris-virginica
141 6.9 3.1 5.1 2.3 Iris-virginica
143 6.8 3.2 5.9 2.3 Iris-virginica
144 6.7 3.3 5.7 2.5 Iris-virginica
145 6.7 3.0 5.2 2.3 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
sepal length sepal width petal length petal width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
25% 5.100000 2.800000 1.600000 0.300000
50% 5.800000 3.000000 4.350000 1.300000
75% 6.400000 3.300000 5.100000 1.800000
max 7.900000 4.400000 6.900000 2.500000
sepal length sepal width petal length petal width
count 150.000000 150.000000 150.000000 150.000000
mean 5.843333 3.054000 3.758667 1.198667
std 0.828066 0.433594 1.764420 0.763161
min 4.300000 2.000000 1.000000 0.100000
20% 5.000000 2.700000 1.500000 0.200000
40% 5.600000 3.000000 3.900000 1.160000
50% 5.800000 3.000000 4.350000 1.300000
80% 6.520000 3.400000 5.320000 1.900000
90% 6.900000 3.610000 5.800000 2.200000
95% 7.255000 3.800000 6.100000 2.300000
max 7.900000 4.400000 6.900000 2.500000
Out[1]:
|
sepal length |
sepal width |
petal length |
petal width |
sepal length |
1.000000 |
-0.109369 |
0.871754 |
0.817954 |
sepal width |
-0.109369 |
1.000000 |
-0.420516 |
-0.356544 |
petal length |
0.871754 |
-0.420516 |
1.000000 |
0.962757 |
petal width |
0.817954 |
-0.356544 |
0.962757 |
1.000000 |