Lazy to read csv file with numpy to do data analysis for each property

import numpy as np

url = 'F:\Coding\Anaconda\DataAnalogy\Data\iris.csv'
'''
with open(url, encoding = 'utf-8') as fp:
    data = np.loadtxt(fp, str, delimiter = ',', usecols=(0,1,2,3,4))#将csv文件以str格式读出,加上参数skipprows = n表示忽略第n行
    print(data)
'''

data = np.loadtxt(url, dtype=float, delimiter=',', skiprows=1, usecols=(0, 1, 2, 3, 4))#忽略第一行载入前五列元素
data = np.array(data)#将数据转换为numpy能够识别的数组

Petal_width_Sum = -1#属性之和
Petal_width_Len = -1#行数 == data.shape[0]
Petal_width_Ave = -1#平均值
Petal_width_More = -1#众数
Petal_width_Max = -1#最大值
Petal_width_Min = -1#最小值
Petal_width_Vp = -1#极差
Petal_width_Mid = -1#中位数

Petal_length_Sum = -1
Petal_length_Len = -1
Petal_length_Ave = -1
Petal_length_More = -1
Petal_length_Max = -1
Petal_length_Min = -1
Petal_length_Vp = -1
Petal_length_Mid = -1

Sepal_width_Sum = -1
Sepal_width_Len = -1
Sepal_width_Ave = -1
Sepal_width_More = -1
Sepal_width_Max = -1
Sepal_width_Min = -1
Sepal_width_Vp = -1
Sepal_width_Mid = -1

Sepal_length_Sum = -1
Sepal_length_Len = -1
Sepal_length_Ave = -1
Sepal_length_More = -1
Sepal_length_Max = -1
Sepal_length_Min = -1
Sepal_length_Vp = -1
Sepal_length_Mid = -1

Attributes = {1: [Petal_width_Sum, Petal_width_Len, Petal_width_Ave, Petal_width_Max, Petal_width_Min, Petal_width_Mid, Petal_width_Vp, Petal_width_More],
              2: [Petal_length_Sum, Petal_length_Len, Petal_length_Ave, Petal_length_Max, Petal_length_Min, Petal_length_Mid, Petal_length_Vp, Petal_length_More],
              3: [Sepal_width_Sum, Sepal_width_Len, Sepal_width_Ave, Sepal_width_Max, Sepal_width_Min, Sepal_width_Mid, Sepal_width_Vp, Sepal_width_More],
              4: [Sepal_length_Sum, Sepal_length_Len, Sepal_length_Ave, Sepal_length_Max, Sepal_length_Min, Sepal_length_Mid, Sepal_length_Vp, Sepal_length_More]}

for i in Attributes.keys():
    Attributes[i][0] = np.sum(data[0:-1, i])
    Attributes[i][1] = len(data[0:-1, i])
    Attributes[i][2] = np.mean(data.T[i])
    print("属性", i, "的平均值为:", Attributes[i][2])
    Attributes[i][3] = np.amax(data.T[i], axis = 0)
    Attributes[i][4] = np.amin(data.T[i], axis = 0)
    Attributes[i][6] = Attributes[i][4] - Attributes[i][5]
    print("属性", i, "的极差值为:", Attributes[i][6])
    Attributes[i][5] = np.median(data.T[i])
    print("属性", i, "的中位数为:", Attributes[i][5])
    counts = np.bincount(data.T[i])
    Attributes[i][7] = np.argmax(counts)
    print("属性", i, "的众数为:", Attributes[i][7])

 

Published 43 original articles · won praise 44 · views 6600

Guess you like

Origin blog.csdn.net/qq_41582910/article/details/104912429