1、定义DataFrame对象
"""
#DataFrame可以理解为一个由Series组成的字典,其中每一列的名称为字典的键key,形#成DataFrame的列的Series作为字典的值value。
In [6]: import pandas as pd
In [7]: data = {'color': ['blue', 'green', 'yellow', 'red', 'white'],
...: 'object': ['ball', 'pen', 'pencil', 'paper', 'mug'],
...: 'price': [1.2, 1.0, 0.6, 0.9, 1.7]}
In [8]: frame = pd.DataFrame(data)
In [9]: frame
Out[9]:
color object price
0 blue ball 1.2
1 green pen 1.0
2 yellow pencil 0.6
3 red paper 0.9
4 white mug 1.7
#单独索引'object', 'price'两列
In [11]: frame2 = pd.DataFrame(data, columns = ['object', 'price'])
In [12]: frame2
Out[12]:
object price
0 ball 1.2
1 pen 1.0
2 pencil 0.6
3 paper 0.9
4 mug 1.7
#更换索引号index
In [13]: frame3 = pd.DataFrame(data, index = ['one', 'two', 'three', 'four', 'five'])
In [14]: frame3
Out[14]:
color object price
one blue ball 1.2
two green pen 1.0
three yellow pencil 0.6
four red paper 0.9
five white mug 1.7
#建立一个4x4的矩阵,自主定义索引号,列表号及索引内容
"""
In [20]: frame4 = pd.DataFrame(np.arange(16).reshape((4, 4)), index = ['red', 'blue', 'yellow', 'white'], columns = ['ball', 'pen', 'pencil','paper'])
In [21]: frame4
Out[21]:
ball pen pencil paper
red 0 1 2 3
blue 4 5 6 7
yellow 8 9 10 11
white 12 13 14 15
"""
2、选择dataFrame中的元素
"""
#获取列名称
In [22]: frame.columns
Out[22]: Index(['color', 'object', 'price'], dtype='object')
"""
#获取索引对象
In [23]: frame.index
Out[23]: RangeIndex(start=0, stop=5, step=1)
"""
#获取存储在数据结构中的对象
In [24]: frame.values
Out[24]:
array([['blue', 'ball', 1.2],
['green', 'pen', 1.0],
['yellow', 'pencil', 0.6],
['red', 'paper', 0.9],
['white', 'mug', 1.7]], dtype=object)
#获取一列的数字frame['price']/frame.price
In [25]: frame['price']
Out[25]:
0 1.2
1 1.0
2 0.6
3 0.9
4 1.7
Name: price, dtype: float64
#frame.iloc[:, 列号]
In [31]: frame
Out[31]:
color object price
0 blue ball 1.2
1 green pen 1.0
2 yellow pencil 0.6
3 red paper 0.9
4 white mug 1.7
In [32]: frame.iloc[:,1]
Out[32]:
0 ball
1 pen
2 pencil
3 paper
4 mug
Name: object, dtype: object
# 获取行
In [34]: frame.ix[[2, 4]]
Out[34]:
color object price
2 yellow pencil 0.6
4 white mug 1.7
In [35]: frame[0:1]
Out[35]:
color object price
0 blue ball 1.2
In [36]: frame[0:5:2]
Out[36]:
color object price
0 blue ball 1.2
2 yellow pencil 0.6
4 white mug 1.7
#默认前五条
In [37]: frame.head()
Out[37]:
color object price
0 blue ball 1.2
1 green pen 1.0
2 yellow pencil 0.6
3 red paper 0.9
4 white mug 1.7
#以行的名字为索引号,行元素为索引值查找
In [41]: frame.loc[1] # 名称
Out[41]:
color green
object pen
price 1
Name: 1, dtype: object
#iloc为列索引,用切片的方式查找
In [43]: frame.iloc[2] == frame.iloc[2,:]
Out[43]:
color True
object True
price True
Name: 2, dtype: bool
#使用键值查找
In [44]: frame['object'][3]
Out[44]: 'paper'
#frame.iat[行号, 列号]
#frame.iloc[行号, 列号]
#frame.loc[行名称,列名称]
In [46]: frame.iat[1, 2]
Out[46]: 1.0
In [47]: frame.iloc[1, 2]
Out[47]: 1.0
In [48]: frame.loc[1,'color']
Out[48]: 'green'
"""
3、赋值操作
"""
In [50]: frame
Out[50]:
color object price
0 blue ball 1.2
1 green pen 1.0
2 yellow pencil 0.6
3 red paper 0.9
4 white mug 1.7
#列表的值必须与行数对应,不然会报错
In [54]: frame['color']=[12,2,4,58.0,4]
In [55]: frame
Out[55]:
color object price
0 12.0 ball 1.2
1 2.0 pen 1.0
2 4.0 pencil 0.6
3 58.0 paper 0.9
4 4.0 mug 1.7
#frame['price'][2] = 3.3 # 改变某个值
#frame.index.name = 'id’; frame.columns.name = 'item'
In [59]: frame.index.name = 'id'
In [60]: frame.columns.name = 'item'
In [61]: frame
Out[61]:
item color object price
id
0 12.0 ball 1.2
1 2.0 pen 1.0
2 4.0 pencil 3.3
3 58.0 paper 0.9
4 4.0 mug 1.7
#元素的所属关系及判断
In [62]: frame.isin([1.0, 'price'])
Out[62]:
item color object price
id
0 False False False
1 False False True
2 False False False
3 False False False
4 False False False
In [64]: frame[frame.isin([1.0,'price'])] # 筛选
Out[64]:
item color object price
id
0 NaN NaN NaN
1 NaN NaN 1.0
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
"""
#删除一列
"""
In [66]: del frame['price'] #原来的数据会变化
In [67]: frame
Out[67]:
item color object
id
0 12.0 ball
1 2.0 pen
2 4.0 pencil
3 58.0 paper
4 4.0 mug
#frame.drop([列名1, 列名2, ...], axis = 1),原来的数据不会被改变,就是对副本的删除操作
In [69]: frame.drop(['color', 'object'], axis = 1)
Out[69]:
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3, 4]
In [70]: frame
Out[70]:
item color object
id
0 12.0 ball
1 2.0 pen
2 4.0 pencil
3 58.0 paper
4 4.0 mug
"""
3、嵌套字典及转置运算
"""
In [73]: nestdict = {'red':{2012:22, 2013:33}, 'white':{2011:13,2012:22, 2013:16},
...: 'blue':{2011:17, 2012:27, 2013:18}}
In [74]: frame2 = pd.DataFrame(nestdict)
In [75]: frame2
Out[75]:
blue red white
2011 17 NaN 13
2012 27 22.0 22
2013 18 33.0 16
4、DataFrame转置
In [77]: frame2.T
Out[77]:
2011 2012 2013
blue 17.0 27.0 18.0
red NaN 22.0 33.0
white 13.0 22.0 16.0
In [78]: frame2. transpose()
Out[78]:
2011 2012 2013
blue 17.0 27.0 18.0
red NaN 22.0 33.0
white 13.0 22.0 16.0
"""