首先,需要导入必要的模块`
from pandas import DataFrame,Series
import pandas as pd
import numpy as np
pandas的运算
1.算数运算
obj_1=Series([1,2,3.4,5.6],index=['a','c','b','d'])
obj_2=Series([1,2,3.4,5.6],index=['b','c','e','a'])
print(obj_1)
print('-'*30)
print(obj_2)
print('-'*30)
print(obj_1+obj_2)
df_1=DataFrame(np.arange(16).reshape(4,4),columns=['-a','-b','-c','-d'],index=['a','b','c','d'])
df_2=DataFrame(np.arange(16,32).reshape(4,4),columns=['-b','-c','-d','-d'],index=['b','c','d','e'])
print(df_1)
print('-'*30)
print(df_2)
print('-'*30)
print(df_1+df_2)
print('-'*30)
s=df_1._ixs(1)
print(s)
print('*'*30)
print(df_1-s)
2.函数的应用和索引
函数名 | 使用 |
---|---|
map | 将函数套用在Series的每个元素中 |
apply | 将函数套用到DataFrame的行和列上 |
applymap | 将函数套用到DataFrame的每个元素上 |
data={
'books_name':['《三国演义》','《中华上下五千年》','《史记》','《资治通鉴》','《旧唐书》'],
'price':['34元','56元','24元','78元','12元']
}
df_3=DataFrame(data=data)
print(df_3)
print('*'*30)
def f(x):
return x[:x.find('元')]
df_3.price=df_3['price'].map(f)
print(df_3)
print('*'*30)
df_4=DataFrame(np.arange(12).reshape(4,3),index=['a','b','c','d'],columns=['e','f','g'])
print(df_4)
f=lambda x:x.max()-x.min()
# 一列中的最大值减去一列的最小值
print('*'*30)
print(df_4.apply(f))
print(df_4.applymap(lambda x:x+2.5))
3.排序
obj_3=Series([-1,2,-13,4,5],index=['c','a','f','w','z'])
print(obj_3)
print('*'*30)
print(obj_3.sort_index()) #升序
print(obj_3.sort_index(ascending=False)) # 降序
print('*'*30)
print(obj_3.sort_values())
print('*'*30)
df_5=DataFrame(np.random.rand(16).reshape(4,4),index=['a','b','c','d'],columns=['-a','-b','-c','-d'])
print(df_5)
print('*'*30)
print(df_5.sort_values(by='-b'))
4.汇总与统计
在DataFrame数据当中,通过sum函数可以对每列进行求和汇总
df_6=DataFrame(np.random.randn(9).reshape(3,3),columns=['a','b','c'])
print(df_6)
print('*'*30)
print(df_6.sum()) # 按行汇总
print('*'*30)
print(df_6.sum(axis=1)) # 按列汇总
print('*'*30)
print(df_6.describe())
层次化索引
# Series数据
obj_4=Series(np.random.randn(4),index=[['one','one','two','two'],['a','b','a','b']])
print(obj_4)
print('*'*30)
print(obj_4.index)
print('*'*30)
print(obj_4.one)
print('*'*30)
print(obj_4[:,'a'])
# DataFrame数据
df_6=DataFrame(np.arange(16).reshape(4,4),
index=[['one','one','two','two'],['a','b','a','b']],
columns=[['a','a','b','b'],['-a','-b','-a','-b']])
print(df_6)
print('*'*30)
print(df_6['a'])
print(df_6.swaplevel()) #对层次化索引进行重排
print('*'*30)
print(df_6.sum(level=0))
print('*'*30)
print(df_6.sum(level=1))
print('*'*30)
print(df_6.sum(level=1,axis=1))