UCI数据集iris数据简单的可视化

数据集官网下载;

jupyter notebook 实现;

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



fname = 'E:\\pythonwork\\project\\Deeplearning\\Task\\data\\iris.data'
with open(fname, 'r+', encoding='utf-8') as f:
    s = [i[:-1].split(',') for i in f.readlines()]
# 读取TXT,逗号为分隔符

# pandas读取数据 样本数为各50个
names=['slength','swidth','plength','pwidth','name']
iris = pd.DataFrame(data=s,  columns=names)
# 删除一个莫名其妙的空行:
iris.dropna(axis=0, how='any', inplace=True)
# 有三种类别:
seto = iris.iloc[0:50,:]
vers = iris.iloc[50:100,:]
virg = iris.iloc[100:150,:]
seto.shape
vers.shape
# 统计每个品种有多少个样本
iris['name'].value_counts()
# 字符串类型的数据变成float(否则不能画图)
iris.iloc[:,:4]=iris.iloc[:,:4].astype('float')
# 画出slength和swidth的关系图
plt.scatter(x=iris['slength'],y=iris['swidth'])
plt.show()
 
  
 
#-------------------
# 按颜色不同分类 画图
plt.scatter(x=seto['slength'],y=seto['swidth'],color='red')
plt.scatter(x=vers['slength'],y=seto['swidth'],color='blue',marker="+")
plt.scatter(x=virg['slength'],y=seto['swidth'],color='green',marker='*')
plt.xlabel('s length')
plt.ylabel('s width')
plt.show()
 

猜你喜欢

转载自www.cnblogs.com/flowerIron/p/12037449.html