pandas note2

1、定义DataFrame对象

"""
#DataFrame可以理解为一个由Series组成的字典,其中每一列的名称为字典的键key,形#成DataFrame的列的Series作为字典的值value。
In [6]: import pandas as pd
In [7]: data = {'color': ['blue', 'green', 'yellow', 'red', 'white'], 
   ...: 'object': ['ball', 'pen', 'pencil', 'paper', 'mug'], 
   ...: 'price': [1.2, 1.0, 0.6, 0.9, 1.7]}
In [8]: frame = pd.DataFrame(data)
In [9]: frame
Out[9]: 
    color  object  price
0    blue    ball    1.2
1   green     pen    1.0
2  yellow  pencil    0.6
3     red   paper    0.9
4   white     mug    1.7
#单独索引'object', 'price'两列
In [11]: frame2 = pd.DataFrame(data, columns = ['object', 'price'])
In [12]: frame2 
Out[12]: 
   object  price
0    ball    1.2
1     pen    1.0
2  pencil    0.6
3   paper    0.9
4     mug    1.7
#更换索引号index
In [13]: frame3 = pd.DataFrame(data, index = ['one', 'two', 'three', 'four', 'five'])
In [14]: frame3
Out[14]: 
        color  object  price
one      blue    ball    1.2
two     green     pen    1.0
three  yellow  pencil    0.6
four      red   paper    0.9
five    white     mug    1.7
#建立一个4x4的矩阵,自主定义索引号,列表号及索引内容
"""
In [20]:  frame4 = pd.DataFrame(np.arange(16).reshape((4, 4)), index = ['red', 'blue', 'yellow', 'white'], columns = ['ball', 'pen', 'pencil','paper'])
In [21]: frame4
Out[21]: 
        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15
"""

2、选择dataFrame中的元素

"""
#获取列名称
In [22]: frame.columns
Out[22]: Index(['color', 'object', 'price'], dtype='object')
"""
#获取索引对象
In [23]: frame.index
Out[23]: RangeIndex(start=0, stop=5, step=1)
"""
#获取存储在数据结构中的对象
In [24]: frame.values
Out[24]: 
array([['blue', 'ball', 1.2],
       ['green', 'pen', 1.0],
       ['yellow', 'pencil', 0.6],
       ['red', 'paper', 0.9],
       ['white', 'mug', 1.7]], dtype=object)
#获取一列的数字frame['price']/frame.price
In [25]: frame['price']
Out[25]: 
0    1.2
1    1.0
2    0.6
3    0.9
4    1.7
Name: price, dtype: float64
#frame.iloc[:, 列号]
In [31]: frame
Out[31]: 
    color  object  price
0    blue    ball    1.2
1   green     pen    1.0
2  yellow  pencil    0.6
3     red   paper    0.9
4   white     mug    1.7
In [32]: frame.iloc[:,1]
Out[32]: 
0      ball
1       pen
2    pencil
3     paper
4       mug
Name: object, dtype: object
# 获取行
In [34]: frame.ix[[2, 4]]
Out[34]: 
    color  object  price
2  yellow  pencil    0.6
4   white     mug    1.7
In [35]: frame[0:1]
Out[35]: 
  color object  price
0  blue   ball    1.2
In [36]: frame[0:5:2]
Out[36]: 
    color  object  price
0    blue    ball    1.2
2  yellow  pencil    0.6
4   white     mug    1.7
#默认前五条
In [37]: frame.head()
Out[37]: 
    color  object  price
0    blue    ball    1.2
1   green     pen    1.0
2  yellow  pencil    0.6
3     red   paper    0.9
4   white     mug    1.7
#以行的名字为索引号,行元素为索引值查找
In [41]: frame.loc[1] # 名称
Out[41]: 
color     green
object      pen
price         1
Name: 1, dtype: object
#iloc为列索引,用切片的方式查找
In [43]: frame.iloc[2] == frame.iloc[2,:]
Out[43]: 
color     True
object    True
price     True
Name: 2, dtype: bool
#使用键值查找
In [44]: frame['object'][3]
Out[44]: 'paper'
#frame.iat[行号, 列号]
#frame.iloc[行号, 列号]
#frame.loc[行名称,列名称]
In [46]: frame.iat[1, 2]
Out[46]: 1.0
In [47]: frame.iloc[1, 2]
Out[47]: 1.0
In [48]: frame.loc[1,'color']
Out[48]: 'green'
"""

3、赋值操作

"""
In [50]: frame
Out[50]: 
    color  object  price
0    blue    ball    1.2
1   green     pen    1.0
2  yellow  pencil    0.6
3     red   paper    0.9
4   white     mug    1.7
#列表的值必须与行数对应,不然会报错
In [54]: frame['color']=[12,2,4,58.0,4]
In [55]: frame
Out[55]: 
   color  object  price
0   12.0    ball    1.2
1    2.0     pen    1.0
2    4.0  pencil    0.6
3   58.0   paper    0.9
4    4.0     mug    1.7
#frame['price'][2] = 3.3 # 改变某个值
#frame.index.name = 'id’; frame.columns.name = 'item'
In [59]: frame.index.name = 'id'
In [60]: frame.columns.name = 'item'
In [61]: frame
Out[61]: 
item  color  object  price
id                        
0      12.0    ball    1.2
1       2.0     pen    1.0
2       4.0  pencil    3.3
3      58.0   paper    0.9
4       4.0     mug    1.7
#元素的所属关系及判断
In [62]: frame.isin([1.0, 'price'])
Out[62]: 
item  color  object  price
id                        
0     False   False  False
1     False   False   True
2     False   False  False
3     False   False  False
4     False   False  False
In [64]: frame[frame.isin([1.0,'price'])] # 筛选
Out[64]: 
item  color object  price
id                       
0       NaN    NaN    NaN
1       NaN    NaN    1.0
2       NaN    NaN    NaN
3       NaN    NaN    NaN
4       NaN    NaN    NaN
"""
#删除一列
"""
In [66]: del frame['price']  #原来的数据会变化
In [67]: frame
Out[67]: 
item  color  object
id                 
0      12.0    ball
1       2.0     pen
2       4.0  pencil
3      58.0   paper
4       4.0     mug
#frame.drop([列名1, 列名2, ...], axis = 1),原来的数据不会被改变,就是对副本的删除操作
In [69]: frame.drop(['color', 'object'], axis = 1)
Out[69]: 
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]
In [70]: frame
Out[70]: 
item  color  object
id                 
0      12.0    ball
1       2.0     pen
2       4.0  pencil
3      58.0   paper
4       4.0     mug
"""

3、嵌套字典及转置运算

"""
In [73]: nestdict = {'red':{2012:22, 2013:33}, 'white':{2011:13,2012:22, 2013:16}, 
    ...: 'blue':{2011:17, 2012:27, 2013:18}}

In [74]: frame2 = pd.DataFrame(nestdict)
In [75]: frame2
Out[75]: 
      blue   red  white
2011    17   NaN     13
2012    27  22.0     22
2013    18  33.0     16
4、DataFrame转置
In [77]: frame2.T
Out[77]: 
       2011  2012  2013
blue   17.0  27.0  18.0
red     NaN  22.0  33.0
white  13.0  22.0  16.0
In [78]: frame2. transpose()
Out[78]: 
       2011  2012  2013
blue   17.0  27.0  18.0
red     NaN  22.0  33.0
white  13.0  22.0  16.0
"""

猜你喜欢

转载自blog.csdn.net/zztingfeng/article/details/80101387