import pandas as pd
data={"states":['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
"year":[2000, 2001, 2002, 2001, 2002, 2003],
"pop":[1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
a=pd.DataFrame(data)
print(a)
pop states year
0 1.5 Ohio 2000
1 1.7 Ohio 2001
2 3.6 Ohio 2002
3 2.4 Nevada 2001
4 2.9 Nevada 2002
5 3.2 Nevada 2003
a.head()
|
pop |
states |
year |
0 |
1.5 |
Ohio |
2000 |
1 |
1.7 |
Ohio |
2001 |
2 |
3.6 |
Ohio |
2002 |
3 |
2.4 |
Nevada |
2001 |
4 |
2.9 |
Nevada |
2002 |
a=pd.DataFrame(data,columns=["year","state","pop"])
|
year |
state |
pop |
0 |
2000 |
NaN |
1.5 |
1 |
2001 |
NaN |
1.7 |
2 |
2002 |
NaN |
3.6 |
3 |
2001 |
NaN |
2.4 |
4 |
2002 |
NaN |
2.9 |
5 |
2003 |
NaN |
3.2 |
a["year"]
0 2000
1 2001
2 2002
3 2001
4 2002
5 2003
Name: year, dtype: int64
a["class"]=1
print(a)
pop states year class
0 1.5 Ohio 2000 1
1 1.7 Ohio 2001 1
2 3.6 Ohio 2002 1
3 2.4 Nevada 2001 1
4 2.9 Nevada 2002 1
5 3.2 Nevada 2003 1
a["class"]=a['states']=="Ohio"
print(a)
pop states year class
0 1.5 Ohio 2000 True
1 1.7 Ohio 2001 True
2 3.6 Ohio 2002 True
3 2.4 Nevada 2001 False
4 2.9 Nevada 2002 False
5 3.2 Nevada 2003 False
del a["class"]
a.columns
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2524 try:
-> 2525 return self._engine.get_loc(key)
2526 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'class'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-17-9eda942a25cb> in <module>()
----> 1 del a["class"]
2 a.columns
D:\ano\lib\site-packages\pandas\core\generic.py in __delitem__(self, key)
2094 # there was no match, this call should raise the appropriate
2095 # exception:
-> 2096 self._data.delete(key)
2097
2098 # delete from the caches
D:\ano\lib\site-packages\pandas\core\internals.py in delete(self, item)
3900 Delete selected item (items if non-unique) in-place.
3901 """
-> 3902 indexer = self.items.get_loc(item)
3903
3904 is_deleted = np.zeros(self.shape[0], dtype=np.bool_)
D:\ano\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2525 return self._engine.get_loc(key)
2526 except KeyError:
-> 2527 return self._engine.get_loc(self._maybe_cast_indexer(key))
2528
2529 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 'class'
pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)
|
Navada |
Ohio |
2001 |
204 |
1.5 |
2002 |
209 |
1.7 |
import numpy as np
import pandas
pop={"Navada":{2001:204,2002:209},"Ohio":{2001:1.5,2002:1.7}}
c=pd.DataFrame(pop)
print(c)
c.T
Navada Ohio
2001 204 1.5
2002 209 1.7
|
2001 |
2002 |
Navada |
204.0 |
209.0 |
Ohio |
1.5 |
1.7 |
pdata={"Navada":c["Navada"][:1],"Ohio":c["Ohio"][:1]}
pd.DataFrame(pdata)
c.index.name="year"
c.columns.name="state"
c
state |
Navada |
Ohio |
year |
|
|
2001 |
204 |
1.5 |
2002 |
209 |
1.7 |
c.values
array([[204. , 1.5],
[209. , 1.7]])
c.index[1:]
Int64Index([2002], dtype='int64')
import numpy as np
import pandas as pd
lables=pd.Index(np.arange(4))
pd.Series([1.5,3,7,6],index=lables)
0 1.5
1 3.0
2 7.0
3 6.0
dtype: float64
import numpy as np
import pandas as pd
obj=pd.Series([4.5,7.2,-5.3,3.6],index=["d","b","a","c"])
obj
d 4.5
b 7.2
a -5.3
c 3.6
dtype: float64
obj2=obj.reindex=['a','b', 'c', 'd', 'e']
print(obj2)
['a', 'b', 'c', 'd', 'e']
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print(obj3)
0 blue
2 purple
4 yellow
dtype: object
obj3.reindex(range(6),method="ffill")
0 blue
1 blue
2 purple
3 purple
4 yellow
5 yellow
dtype: object
frame=pd.DataFrame(np.arange(9).reshape(3,3),index=['a', 'c', 'd'],
columns=['Ohio', 'Texas', 'California'])
print(frame)
Ohio Texas California
a 0 1 2
c 3 4 5
d 6 7 8
frame2=frame.reindex(['a', 'b', 'c', 'd'])
print(frame2)
Ohio Texas California
a 0.0 1.0 2.0
b NaN NaN NaN
c 3.0 4.0 5.0
d 6.0 7.0 8.0
del frame2["Ohio"]
print(frame2)
Texas California
a 1.0 2.0
b NaN NaN
c 4.0 5.0
d 7.0 8.0
new_frame2=frame2.drop("c")
print(new_frame2)
Texas California
a 1.0 2.0
b NaN NaN
d 7.0 8.0
new_frame2=frame2.drop("Texas",axis=1)
print(new_frame2)
California
a 2.0
b NaN
c 5.0
d 8.0