NaN 意为 Not A Number
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
n = np.nan
type(n)
Out[8]: float
m = 1
n+m
Out[11]: nan
s1 = Series([1,2,np.nan,3,4], index=['A','B','C','D','E'])
s1
Out[15]:
A 1.0
B 2.0
C NaN
D 3.0
E 4.0
dtype: float64
s1.isnull()
Out[16]:
A False
B False
C True
D False
E False
dtype: bool
s1.notnull()
Out[17]:
A True
B True
C False
D True
E True
dtype: bool
s1.dropna()
Out[18]:
A 1.0
B 2.0
D 3.0
E 4.0
dtype: float64
dframe = DataFrame([[1,2,3], [np.nan,5,6],[7,8,np.nan],[np.nan,np.nan,np.nan]])
dframe
Out[20]:
0 1 2
0 1.0 2.0 3.0
1 NaN 5.0 6.0
2 7.0 8.0 NaN
3 NaN NaN NaN
n [21]: dframe.isnull()
Out[21]:
0 1 2
0 False False False
1 True False False
2 False False True
3 True True True
dframe.notnull()
Out[22]:
0 1 2
0 True True True
1 False True True
2 True True False
3 False False False
df1 = dframe.dropna(axis=0)
df1
Out[24]:
0 1 2
0 1.0 2.0 3.0
df1 = dframe.dropna(axis=1)
df1
Out[26]:
Empty DataFrame
Columns: []
Index: [0, 1, 2, 3]
df1 = dframe.dropna(axis=0, how='any')
df1
Out[28]:
0 1 2
0 1.0 2.0 3.0
df1 = dframe.dropna(axis=0, how='all')
df1
Out[30]:
0 1 2
0 1.0 2.0 3.0
1 NaN 5.0 6.0
2 7.0 8.0 NaN
dframe = DataFrame([[1,2,3], [np.nan,5,6],[7,np.nan,np.nan],[np.nan,np.nan,np.nan]])
df2 = dframe.dropna(axis=0, thresh=2)
df2
Out[37]:
0 1 2
0 1.0 2.0 3.0
1 NaN 5.0 6.0
dframe.fillna(value=1)
Out[38]:
0 1 2
0 1.0 2.0 3.0
1 1.0 5.0 6.0
2 7.0 1.0 1.0
3 1.0 1.0 1.0
dframe.fillna(value = {0:0, 1:1, 2:2, 3:3})
Out[39]:
0 1 2
0 1.0 2.0 3.0
1 0.0 5.0 6.0
2 7.0 1.0 2.0
3 0.0 1.0 2.0