数据增加和删除
- 在数据中,直接添加列
- 使用df.insert方法在数据中添加一列
- drop(labels, axis, inplace=True)方法(删除)
- labels表示删除的数据,axis表示作用轴,inplace=True表示是否对原数据生效
- axis=0按行操作,axis=1按列操作
- 使用del函数直接删除其中一列
import pandas as pd
import os
import numpy as np
os.getcwd()
'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据'
os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')
df = pd.read_csv('baby_trade_history.csv', encoding='utf-8', dtype={'user_id':str})
df['购买量'] = np.where(df['buy_mount']>3, '高', '低')
df.head(5)
|
user_id |
auction_id |
cat_id |
cat1 |
property |
buy_mount |
day |
购买量 |
0 |
786295544 |
41098319944 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
低 |
1 |
532110457 |
17916191097 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
低 |
2 |
249013725 |
21896936223 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
低 |
3 |
917056007 |
12515996043 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
低 |
4 |
444069173 |
20487688075 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
低 |
auction_id = df['auction_id']
del df['auction_id']
df.head(5)
|
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
购买量 |
0 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
低 |
1 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
低 |
2 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
低 |
3 |
917056007 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
低 |
4 |
444069173 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
低 |
df.insert(0, 'auction_id_new', auction_id)
df.head(5)
|
auction_id_new |
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
购买量 |
0 |
41098319944 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
低 |
1 |
17916191097 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
低 |
2 |
21896936223 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
低 |
3 |
12515996043 |
917056007 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
低 |
4 |
20487688075 |
444069173 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
低 |
df.drop(labels=['auction_id_new', '购买量'], axis=1).head(5)
|
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
0 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
1 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
3 |
917056007 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
4 |
444069173 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
df.head(5)
|
auction_id_new |
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
购买量 |
0 |
41098319944 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
低 |
1 |
17916191097 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
低 |
2 |
21896936223 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
低 |
3 |
12515996043 |
917056007 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
低 |
4 |
20487688075 |
444069173 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
低 |
df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-30-acf2a75acaf3> in <module>
----> 1 df.drop(labels=['auction_id_new', '购买量'], axis=1, inplace=True)
D:\Anaconda3\lib\site-packages\pandas\core\frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3938 index=index, columns=columns,
3939 level=level, inplace=inplace,
-> 3940 errors=errors)
3941
3942 @rewrite_axis_style_signature('mapper', [('copy', True),
D:\Anaconda3\lib\site-packages\pandas\core\generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
3778 for axis, labels in axes.items():
3779 if labels is not None:
-> 3780 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
3781
3782 if inplace:
D:\Anaconda3\lib\site-packages\pandas\core\generic.py in _drop_axis(self, labels, axis, level, errors)
3810 new_axis = axis.drop(labels, level=level, errors=errors)
3811 else:
-> 3812 new_axis = axis.drop(labels, errors=errors)
3813 result = self.reindex(**{axis_name: new_axis})
3814
D:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in drop(self, labels, errors)
4963 if errors != 'ignore':
4964 raise KeyError(
-> 4965 '{} not found in axis'.format(labels[mask]))
4966 indexer = indexer[~mask]
4967 return self.delete(indexer)
KeyError: "['auction_id_new' '购买量'] not found in axis"
df.head(5)
|
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
0 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
1 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
3 |
917056007 |
50018831 |
50014815 |
21458:15841995;21956:3494076;27000458:59723383... |
2 |
20141023 |
4 |
444069173 |
50013636 |
50008168 |
21458:30992;13658074:3323064;1628665:3233941;1... |
1 |
20141103 |
df.drop(labels=[3,4], axis=0, inplace=True)
df.head(5)
|
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
0 |
786295544 |
50014866 |
50022520 |
21458:86755362;13023209:3593274;10984217:21985... |
2 |
20140919 |
1 |
532110457 |
50011993 |
28 |
21458:11399317;1628862:3251296;21475:137325;16... |
1 |
20131011 |
2 |
249013725 |
50012461 |
50014815 |
21458:30992;1628665:92012;1628665:3233938;1628... |
1 |
20131011 |
5 |
152298847 |
121394024 |
50008168 |
21458:3408353;13023209:727117752;22009:2741771... |
1 |
20141103 |
6 |
513441334 |
50010557 |
50008168 |
25935:21991;1628665:29784;22019:34731;22019:20... |
1 |
20121212 |
df.drop(labels=range(0,3), axis=0, inplace=True)
df.head(5)
|
user_id |
cat_id |
cat1 |
property |
buy_mount |
day |
5 |
152298847 |
121394024 |
50008168 |
21458:3408353;13023209:727117752;22009:2741771... |
1 |
20141103 |
6 |
513441334 |
50010557 |
50008168 |
25935:21991;1628665:29784;22019:34731;22019:20... |
1 |
20121212 |
7 |
297411659 |
50010542 |
50008168 |
21458:60020529;25935:31381;1633959:27247291;16... |
1 |
20121212 |
8 |
82830661 |
50013874 |
28 |
21458:11580;21475:137325 |
1 |
20121101 |
9 |
475046636 |
203527 |
28 |
22724:40168;22729:40278;21458:21817;2770200:24... |
1 |
20121101 |