PythonML_ready

map,apply,applymap,groupby
import os
import pandas as pd
import numpy as np
import seaborn as sns
import requests
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

PATH = r'C:/Users/Administrator/Desktop/iris/'
r = requests.get('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
#print(r.text)
with open(PATH + 'iris.data' , 'w') as f:
    f.write(r.text)
    
os.chdir(PATH)

df = pd.read_csv(PATH + 'iris.data' , names = ['sepal length' , 'sepal width' , 'petal length' , 'petal width' , 'class'])


#Map操作,替换类别的名字，列转变
df['class'] = df['class'].map({'Iris-setosa' : 'SET' , 'Iris-virginica' : 'VIR' , 'Iris-versicolor' : 'VER'})
df

#Apply对列操作
df['wide petal'] = df['petal width'].apply(lambda v : 1 if v >= 1.3 else 0) #如果petal width列中数据大于等于1.3，则新加的列wide petal中为1
df

#Apply对整个数据框操作
df['petal area'] = df.apply(lambda r : r['petal width'] * r['petal length'] , axis = 1)  #axis = 1 表示对行操作，axis=0表示对列操作
df

#ApplyMap为数据框所有的数据单元执行一个函数
df.applymap(lambda v : np.log(v) if isinstance(v , float) else v) #如果数据类型是float，则取对数

#Groupby进行分组
df.groupby('class').mean()  #分类后每种属性的均值
df

df.groupby('class').describe()  #整体信息分类后的描述
df

df.groupby('petal width')['class'].unique().to_frame()  #通过和每一个唯一类相关联的花瓣宽度，对类别进行分组


#根据类别来分组花瓣宽度时，使用np.max和np.min这两个函数，以及返回最大花瓣减去最小花瓣宽度的lambda函数
df.groupby('class')['petal width']\
.agg({'delta' : lambda x: x.max() - x.min() , 'max': np.max , 'min': np.min})
猜你喜欢