# We print the number of non-NaN values in our DataFrame
print()
print('Number of non-NaN values in the columns of our DataFrame:')
print(store_items.count())
Number of non-NaN values in the columns of our DataFrame:
bikes 3
glasses 2
pants 3
shirts 2
shoes 3
suits 2
watches 3
dtype: int64
处理缺失值
dropna(axis)
0 删除行
1 删除列
# We drop any rows with NaN values
store_items.dropna(axis = 0)
import pandas as pd
import numpy as np
pd.set_option('precision', 1)
books = pd.Series(data=[
'Great Expectations', 'Of Mice and Men', 'Romeo and Juliet',
'The Time Machine', 'Alice in Wonderland'
])
authors = pd.Series(data=[
'Charles Dickens', 'John Steinbeck', 'William Shakespeare', ' H. G. Wells',
'Lewis Carroll'
])
user_1 = pd.Series(data=[3.2, np.nan, 2.5])
user_2 = pd.Series(data=[5., 1.3, 4.0, 3.8])
user_3 = pd.Series(data=[2.0, 2.3, np.nan, 4])
user_4 = pd.Series(data=[4, 3.5, 4, 5, 4.2])
dat = {
'Book Title': books,
'Author': authors,
'User 1': user_1,
'User 2': user_2,
'User 3': user_3,
'User 4': user_4
}
book_ratings = pd.DataFrame(dat)
book_ratings.fillna(book_ratings.mean(),inplace=True)
book_ratings
Book Title Author User 1 User 2 User 3 User 4
0 Great Expectations Charles Dickens 3.2 5.0 2.0 4.0
1 Of Mice and Men John Steinbeck 2.9 1.3 2.3 3.5
2 Romeo and Juliet William Shakespeare 2.5 4.0 2.8 4.0
3 The Time Machine H. G. Wells 2.9 3.8 4.0 5.0
4 Alice in Wonderland Lewis Carroll 2.9 3.5 2.8 4.2