pandas基本操作(创建,删除)

import pandas as pd
data={"states":['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
      "year":[2000, 2001, 2002, 2001, 2002, 2003],
     "pop":[1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
a=pd.DataFrame(data)
print(a)
   pop  states  year
0  1.5    Ohio  2000
1  1.7    Ohio  2001
2  3.6    Ohio  2002
3  2.4  Nevada  2001
4  2.9  Nevada  2002
5  3.2  Nevada  2003
a.head()
pop states year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
a=pd.DataFrame(data,columns=["year","state","pop"])#调换column的顺序
year state pop
0 2000 NaN 1.5
1 2001 NaN 1.7
2 2002 NaN 3.6
3 2001 NaN 2.4
4 2002 NaN 2.9
5 2003 NaN 3.2
a["year"]
0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64
a["class"]=1
print(a)
   pop  states  year  class
0  1.5    Ohio  2000      1
1  1.7    Ohio  2001      1
2  3.6    Ohio  2002      1
3  2.4  Nevada  2001      1
4  2.9  Nevada  2002      1
5  3.2  Nevada  2003      1
a["class"]=a['states']=="Ohio"
print(a)
   pop  states  year  class
0  1.5    Ohio  2000   True
1  1.7    Ohio  2001   True
2  3.6    Ohio  2002   True
3  2.4  Nevada  2001  False
4  2.9  Nevada  2002  False
5  3.2  Nevada  2003  False
del a["class"]
a.columns
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'class'


During handling of the above exception, another exception occurred:


KeyError                                  Traceback (most recent call last)

<ipython-input-17-9eda942a25cb> in <module>()
----> 1 del a["class"]
      2 a.columns


D:\ano\lib\site-packages\pandas\core\generic.py in __delitem__(self, key)
   2094             # there was no match, this call should raise the appropriate
   2095             # exception:
-> 2096             self._data.delete(key)
   2097 
   2098         # delete from the caches


D:\ano\lib\site-packages\pandas\core\internals.py in delete(self, item)
   3900         Delete selected item (items if non-unique) in-place.
   3901         """
-> 3902         indexer = self.items.get_loc(item)
   3903 
   3904         is_deleted = np.zeros(self.shape[0], dtype=np.bool_)


D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2525                 return self._engine.get_loc(key)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 
   2529         indexer = self.get_indexer([key], method=method, tolerance=tolerance)


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'class'
pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)
Navada Ohio
2001 204 1.5
2002 209 1.7
import numpy  as np
import pandas
pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)
print(c)
c.T
      Navada  Ohio
2001     204   1.5
2002     209   1.7
2001 2002
Navada 204.0 209.0
Ohio 1.5 1.7
pdata={"Navada":c["Navada"][:1],"Ohio":c["Ohio"][:1]}
pd.DataFrame(pdata)
Navada Ohio
2001 204 1.5
c.index.name="year"
c.columns.name="state"
c
state Navada Ohio
year
2001 204 1.5
2002 209 1.7
c.values
array([[204. ,   1.5],
       [209. ,   1.7]])
c.index[1:]
Int64Index([2002], dtype='int64')
import numpy as np
import pandas as pd
lables=pd.Index(np.arange(4))  #创建index
pd.Series([1.5,3,7,6],index=lables)
0    1.5
1    3.0
2    7.0
3    6.0
dtype: float64
import numpy as np
import pandas as pd
obj=pd.Series([4.5,7.2,-5.3,3.6],index=["d","b","a","c"])
obj
d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64
 obj2=obj.reindex=['a','b', 'c', 'd', 'e']
print(obj2)
['a', 'b', 'c', 'd', 'e']
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print(obj3)
0      blue
2    purple
4    yellow
dtype: object
obj3.reindex(range(6),method="ffill")#method="ffill"实现向前填充
0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object
frame=pd.DataFrame(np.arange(9).reshape(3,3),index=['a', 'c', 'd'],
                    columns=['Ohio', 'Texas', 'California'])
print(frame)
   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8
frame2=frame.reindex(['a', 'b', 'c', 'd'])
print(frame2)
   Ohio  Texas  California
a   0.0    1.0         2.0
b   NaN    NaN         NaN
c   3.0    4.0         5.0
d   6.0    7.0         8.0
#删除的两种方法
del frame2["Ohio"]#按照columns 索引删除
print(frame2)
   Texas  California
a    1.0         2.0
b    NaN         NaN
c    4.0         5.0
d    7.0         8.0
#.drop()方法删除
new_frame2=frame2.drop("c")#按列索引删除  frame2.drop(["c","d"])
print(new_frame2)
   Texas  California
a    1.0         2.0
b    NaN         NaN
d    7.0         8.0
new_frame2=frame2.drop("Texas",axis=1)
print(new_frame2)
   California
a         2.0
b         NaN
c         5.0
d         8.0

猜你喜欢

转载自blog.csdn.net/weixin_43196158/article/details/89260630