Python数据分析与机器学习实战-09.pandas自定义函数

1.pandas排序和重新定义索引

#pandas排序
new_titanic_survival = titanic_survival.sort_values("Age",ascending=False)
print(new_titanic_survival[0:10])
titanic_reindexed = new_titanic_survival.reset_index(drop=True)#重新定义索引,True是丢掉原来的索引,重新定义索引
print(titanic_reindexed.iloc[0:10])

2.apply()函数:可以用于行元素,也可以用于列元素,也可以用于当个元素

def f(x):
    return (x-1)
print(data)
print(type(data))
#result=(data.loc[:,['0','1']]).apply(f)#取字段名为'0','1'的所有数据
result=(data.iloc[[0,1]]).apply(f)#注意loc和iloc的区别
print(result)
data=np.arange(0,16).reshape(4,4)
data=pd.DataFrame(data,columns=['0','1','2','3'])
def f(x):
    return (x-1)
print(data)
print(data.apply(f))#默认值是0,表示apply可以用作行
data=np.arange(0,16).reshape(4,4)
data=pd.DataFrame(data,columns=['0','1','2','3'])
def f(x):
    return (x-1)
print(data)
print(data.apply(f,axis=1))#axis=1表示apply可以用作列,把所有列都传进去
def which_class(row):
    pclass = row['Pclass']
    if pd.isnull(pclass):
        return "Unknown"
    elif pclass == 1:
        return "First Class"
    elif pclass == 2:
        return "Second Class"
    elif pclass == 3:
        return "Third Class"

classes = titanic_survival.apply(which_class, axis=1)#意思是把数据titanic_survival全部按照列传进去
print(classes)
def is_minor(row):
    if row["Age"] < 18:
        return True
    else:
        return False

minors = titanic_survival.apply(is_minor, axis=1)
print(minors)
def generate_age_label(row):
    age = row["Age"]#把列传进去
    if pd.isnull(age):
        return "unknown"
    elif age < 18:
        return "minor"
    else:
        return "adult"

age_labels = titanic_survival.apply(generate_age_label, axis=1)#axis=1的意思是所有列都传进去
print(age_labels)

titanic_survival['age_labels'] = age_labels
age_group_survival = titanic_survival.pivot_table(index="age_labels", values="Survived")
print(age_group_survival)
发布了49 篇原创文章 · 获赞 9 · 访问量 3481

猜你喜欢

转载自blog.csdn.net/qq_39817865/article/details/100706880
今日推荐