pandas学习笔记

from pandas import Series,DataFrame
import pandas as pd
'''
series 是类似一维数组的对象
数组和索引组成
可以看成是一个定长的有序字典
索引不会变,进行计算时会匹配索引
'''
obj=Series([4,7,-5,3])
obj
0 4 1 7 2 -5 3 3 dtype: int64
obj.values
array([ 4, 7, -5, 3], dtype=int64)
obj.index
Index([‘a’, ‘s’, ‘d’, ‘f’], dtype=’object’)
obj=Series([4,7,-5,3],index=('a','s','d','f'))
obj
a 4 s 7 d -5 f 3 dtype: int64
obj['d']
-5
obj[obj>0]
a 4 s 7 f 3 dtype: int64
'''
DataFrame是一个表格型的数据结构
包括行索引和列索引
++++可以指定列序列
'''
data={'a':obj,'b':obj,'c':obj}
objframe=DataFrame(data)
type(data)
dict
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
a b c
a 4 4 4
s 7 7 7
d -5 -5 -5
f 3 3 3
objframe['a']   #返回一个Series结构对象,有name属性
a 4 s 7 d -5 f 3 Name: a, dtype: int64
import numpy as np
objframe['d']=np.random.randint(0,2,size=4)  #对列进行赋值
'''  Series会对索引配对'''
colval=Series([1,2,3,4],index=['a','d','f','g'])
objframe['e']=colval
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
a b c d e
a 4 4 4 0 1.0
s 7 7 7 1 NaN
d -5 -5 -5 0 2.0
f 3 3 3 0 3.0
del objframe['a']
objframe
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
b c d e
a 4 4 0 1.0
s 7 7 1 NaN
d -5 -5 0 2.0
f 3 3 0 3.0
np.where(objframe>3,6,-1)    #np.where类似与if a 则 b,否则 c。
array([[ 6, 6, -1, -1], [ 6, 6, -1, -1], [-1, -1, -1, -1], [-1, -1, -1, -1]])
objcopy=objframe>8  # 返回布尔值,也可以利用布尔值进行判断,0为false
objcopy
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
b c d e
a False False False False
s False False False False
d False False False False
f False False False False
frame= DataFrame(np.arange(9).reshape(3,3),index=['a','b','c'],columns=['ohin','leis','cal'])
frame.T
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
a b c
ohin 0 3 6
leis 1 4 7
cal 2 5 8
frame['leis'][:2]   #访问
a 1 b 4 Name: leis, dtype: int32
'''
+++++++pd的基本功能++++++++
1,reindex重新索引
2,drop 丢弃指定值
3,索引,选取,过滤  
    Series索引类似数组,除了标签切片不同。
    DataFrame索引 obj.ix[]是行,obj[]是列
4,算术运算和数据对齐(add,sub,div,mul)
    在算术运算中加入填充值,fill_value=0等,并进行传播
    DataFrame和Series之间进行运算,Series会自动广播
5,函数应用与映射
'''

frame2=frame.reindex(['a','b','c','d'],fill_value=0)  #++++++++重新索引,没有值的索引填充。method=('ffill)
frame3=frame.reindex(list('abcde'),fill_value=0)
print(frame)
frame3
ohin leis cal a 0 1 2 b 3 4 5 c 6 7 8
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis cal
a 0 1 2
b 3 4 5
c 6 7 8
d 0 0 0
e 0 0 0
sates =['ohin','leis','ca']
frame2.reindex(columns=sates)  #这是视图,没有对frame2做修改
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis ca
a 0 1 NaN
b 3 4 NaN
c 6 7 NaN
d 0 0 NaN
frame3 =frame2.drop('a')
frame3
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis cal
b 3 4 5
c 6 7 8
d 0 0 0
frame2.drop('leis',axis=1)  #这个轴axis
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin cal
a 0 2
b 3 5
c 6 8
d 0 0
frame2.mean(axis=0)  #axis=0是列
ohin 2.25 leis 3.00 cal 3.75 dtype: float64
frame2.mean(axis=1)  #axis=1是行
a 1.0 b 4.0 c 7.0 d 0.0 dtype: float64
frame2.drop('d',axis=0)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis cal
a 0 1 2
b 3 4 5
c 6 7 8
frame2.drop('cal',axis=1)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis
a 0 1
b 3 4
c 6 7
d 0 0
frame2.ix['a']
ohin 0 leis 1 cal 2 Name: a, dtype: int32
frame2[:1]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis cal
a 0 1 2
frame2[frame2['cal']>2]
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
ohin leis cal
b 3 4 5
c 6 7 8
print(frame2.ix[:1,:1])
ohin a 0
frame3=DataFrame(np.arange(15).reshape((3,5)),index=list('abc'),columns=list('hello'))
frame3
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
h e l l o
a 0 1 2 3 4
b 5 6 7 8 9
c 10 11 12 13 14
serie=Series(frame3.ix['a'])
serie
h 0 e 1 l 2 l 3 o 4 Name: a, dtype: int32
frame3 + serie              #匹配行进行广播
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
h e l l o
a 0 2 4 6 8
b 5 7 9 11 13
c 10 12 14 16 18
serie2=Series(frame3['h'])
serie2
a 0 b 5 c 10 Name: h, dtype: int32
frame3.add(serie2 ,axis=0)   #沿着列进行广播
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
h e l l o
a 0 1 2 3 4
b 10 11 12 13 14
c 20 21 22 23 24

猜你喜欢

转载自blog.csdn.net/u012881559/article/details/79992012