Series和DataFrame的

reindex函数

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
s1=Series([1,2,3,4],index=['A','B','C','D'])
print(s1.reindex(index=['A','B','C','D','E']))
print(s1.reindex(index=['A','B','C','D','E'],fill_value=10))
s2=Series(['A','B','C'],index=[1,5,10])
print(s2)
#按照前一个索引值对其赋值
s3=s2.reindex(index=range(15),method='ffill')
print(s3)
#reindex dataframe
df1=DataFrame(np.random.rand(25).reshape(5,5),index=['A','B','C','D','E'],
              columns=['c1','c2','c2','c4','c5'])
print(df1)
df2=df1.reindex(index=['A','B','C','D','E','F'])
print(df2)
df3=df1.reindex(columns=['c1','c2','c3','c4','c5','c6'])
print(df3)
#切割Series和DataFrame
s3=s1.reindex(index=['A','B'])
print(s3)
df4=df1.reindex(index=['A','B'])
print(df4)
print(s1.drop('A'))
#DataFrame中判断删除一行还是删除一列，用axis来判断
print(df1.drop('A',axis=0))

NaN

import numpy as np
import padas as pd
from pandas import DataFrame,Series
#NaN means not a number
n=np.nan
print(type(n))
m=1
#nan与任何数做计算结果都为nan
print(m+n)
#nan in Series
s1=Series([1,2,np.nan,3,4],index=['A','B','C','D','E'])
print(s1)
print(s1.isnull())
print(s1.dropna())
#nan in DataFrame
dframe=DataFrame([[1,2,3],[np.nan,5,6],[7,np.nan,9],
                 [np.nan,np.nan,np.nan]])
print(dframe)
print(dframe.isnull())
print(dframe.notnull())
df1=dframe.dropna(axis=0)
print(df1)
dfe2=dframe.dropna(axis=1,how='any')
print(dfe2)
#thresh：界限，界定nan的数目
def2=dframe.dropna(thresh=None)
print(def2)
def3=dframe.dropna(thresh=2)
print(def3)
print(dframe.fillna(value=1))
print(dframe.fillna(value=(0:1,1:1,2:2,3:3)))

多级index

import numpy as np
import pandas as pd
from pandas import DataFrame,Series
#多级index
s1=Series(np.random.randn(6),index=[['1','1','1','2','2','2'],
          ['a','b','c,','a','b','c']])
print(s1)
print(type(s1['1']))
print(s1['1']['a'])
print(s1[:,'a'])
df1=s1.unstack()
print(df1)
df2=DataFrame([s1['1'],s1['2']])
#把DataFrame转化成多级index
s2=df1.unstack()
print(s2)
s2=df1.T.unstack()
df=DataFrame(np.arange(16).reshape(4,4))
print(df)
df=DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],
            [1,2,1,2]])
print(df)
df=DataFrame(np.arange(16).reshape(4,4),index=[['a','a','b','b'],
            [1,2,1,2]],columns=[['BJ','BJ','SH','GZ'],[8,8,9,9]])
print(df)
print(df['BJ'])
print(df['BJ'][8])

Mapping和Replace

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
#create a dataframe
df1=DataFrame({"city":['Beijing','Shanghai','GuangZhou'],
               "population":[1000,2000,1500]})
print(df1)
df1['GDP']=Series([1000,2000,1500])
print(df1)
df1=DataFrame({"city":['Beijing','Shanghai','GuangZhou'],
               "population":[1000,2000,1500]})
gdp_map={"Beijing":1000,"Shanghai":2000,"Guangzhou":1500}
df1['GDP']=df1["city"].map(gdp_map)
print(df1)
#replace in Series
s1=Series(np.arange(10))
print(s1)
s2=s1.replace(1,np.nan)
print(s2)
s1=Series(np.arange(10))
s3=s1.replace([1,2,3],[10,20,30])
print(s3)

简单的数学运算

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
#Series的简单运算
s1=Series([1,2,3],index=['A','B','C'])
print(s1)
s2=Series([4,5,6,7],index=['B','C','D','E'])
print(s2)
print(s1+s2)
#DataFrame的运算
df1=DataFrame(np.arange(4).reshape(2,2),index=['A','B'],
              columns=['Beijing','Shanghai'])
print(df1)
df2=DataFrame(np.arange(9).reshape(3,3),index=['A','B','C'],
              columns=['Beijing','Shanghai','Guangzhou'])
print(df1+df2)
df3=DataFrame([[1,2,3],[4,5,np.nan],[7,8,9]],index=['A','B','C'],
              columns=['c1','c2','c3'])
print(df3)
print(df3.sum())
print(df3.sum(axis=1))
print(df3.min())
print(df3.max())
print(df3.describe())

Series的排序

import numpy as np
import pandas as pd
from pandas import DataFrame,Series
s1=Series(np.random.randn(10))
print(s1.values)
print(s1.index)
s2=s1.sort_values()
print(s2)
print(s2.sort_values())
#DataFrame排序
df1=DataFrame(np.random.randn(40).reshape(8,5),columns=['A','B','C','D','E'])
print(df1)
print(df1['A'].sort_values())
print(df1.sort_values('A'))

猜你喜欢