pandas基本操作（创建，删除）

import pandas as pd
data={"states":['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
      "year":[2000, 2001, 2002, 2001, 2002, 2003],
     "pop":[1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
a=pd.DataFrame(data)
print(a)

   pop  states  year
0  1.5    Ohio  2000
1  1.7    Ohio  2001
2  3.6    Ohio  2002
3  2.4  Nevada  2001
4  2.9  Nevada  2002
5  3.2  Nevada  2003

a.head()

	pop	states	year
0	1.5	Ohio	2000
1	1.7	Ohio	2001
2	3.6	Ohio	2002
3	2.4	Nevada	2001
4	2.9	Nevada	2002

a=pd.DataFrame(data,columns=["year","state","pop"])#调换column的顺序

	year	state	pop
0	2000	NaN	1.5
1	2001	NaN	1.7
2	2002	NaN	3.6
3	2001	NaN	2.4
4	2002	NaN	2.9
5	2003	NaN	3.2

a["year"]

0    2000
1    2001
2    2002
3    2001
4    2002
5    2003
Name: year, dtype: int64

a["class"]=1
print(a)

   pop  states  year  class
0  1.5    Ohio  2000      1
1  1.7    Ohio  2001      1
2  3.6    Ohio  2002      1
3  2.4  Nevada  2001      1
4  2.9  Nevada  2002      1
5  3.2  Nevada  2003      1

a["class"]=a['states']=="Ohio"
print(a)

   pop  states  year  class
0  1.5    Ohio  2000   True
1  1.7    Ohio  2001   True
2  3.6    Ohio  2002   True
3  2.4  Nevada  2001  False
4  2.9  Nevada  2002  False
5  3.2  Nevada  2003  False

del a["class"]
a.columns

---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2524             try:
-> 2525                 return self._engine.get_loc(key)
   2526             except KeyError:


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'class'


During handling of the above exception, another exception occurred:


KeyError                                  Traceback (most recent call last)

<ipython-input-17-9eda942a25cb> in <module>()
----> 1 del a["class"]
      2 a.columns


D:\ano\lib\site-packages\pandas\core\generic.py in __delitem__(self, key)
   2094             # there was no match, this call should raise the appropriate
   2095             # exception:
-> 2096             self._data.delete(key)
   2097 
   2098         # delete from the caches


D:\ano\lib\site-packages\pandas\core\internals.py in delete(self, item)
   3900         Delete selected item (items if non-unique) in-place.
   3901         """
-> 3902         indexer = self.items.get_loc(item)
   3903 
   3904         is_deleted = np.zeros(self.shape[0], dtype=np.bool_)


D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
   2525                 return self._engine.get_loc(key)
   2526             except KeyError:
-> 2527                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2528 
   2529         indexer = self.get_indexer([key], method=method, tolerance=tolerance)


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()


KeyError: 'class'

pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)

	Navada	Ohio
2001	204	1.5
2002	209	1.7

import numpy  as np
import pandas
pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)
print(c)
c.T

      Navada  Ohio
2001     204   1.5
2002     209   1.7

	2001	2002
Navada	204.0	209.0
Ohio	1.5	1.7

pdata={"Navada":c["Navada"][:1],"Ohio":c["Ohio"][:1]}
pd.DataFrame(pdata)

	Navada	Ohio
2001	204	1.5

c.index.name="year"
c.columns.name="state"
c

state	Navada	Ohio
year
2001	204	1.5
2002	209	1.7

c.values

array([[204. ,   1.5],
       [209. ,   1.7]])

c.index[1:]

Int64Index([2002], dtype='int64')

import numpy as np
import pandas as pd
lables=pd.Index(np.arange(4))  #创建index
pd.Series([1.5,3,7,6],index=lables)

0    1.5
1    3.0
2    7.0
3    6.0
dtype: float64

import numpy as np
import pandas as pd
obj=pd.Series([4.5,7.2,-5.3,3.6],index=["d","b","a","c"])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

 obj2=obj.reindex=['a','b', 'c', 'd', 'e']
print(obj2)

['a', 'b', 'c', 'd', 'e']

obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print(obj3)

0      blue
2    purple
4    yellow
dtype: object

obj3.reindex(range(6),method="ffill")#method="ffill"实现向前填充

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

frame=pd.DataFrame(np.arange(9).reshape(3,3),index=['a', 'c', 'd'],
                    columns=['Ohio', 'Texas', 'California'])
print(frame)

   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8

frame2=frame.reindex(['a', 'b', 'c', 'd'])
print(frame2)

   Ohio  Texas  California
a   0.0    1.0         2.0
b   NaN    NaN         NaN
c   3.0    4.0         5.0
d   6.0    7.0         8.0

#删除的两种方法
del frame2["Ohio"]#按照columns 索引删除
print(frame2)

   Texas  California
a    1.0         2.0
b    NaN         NaN
c    4.0         5.0
d    7.0         8.0

#.drop()方法删除
new_frame2=frame2.drop("c")#按列索引删除  frame2.drop(["c","d"])
print(new_frame2)

   Texas  California
a    1.0         2.0
b    NaN         NaN
d    7.0         8.0

new_frame2=frame2.drop("Texas",axis=1)
print(new_frame2)

   California
a         2.0
b         NaN
c         5.0
d         8.0

pandas基本操作（创建，删除）

猜你喜欢