【pandas学习笔记】Series

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

创建Series以及对Series的相关操作

# 自动添加索引
# np.nan：empty value
>>>s1 = pd.Series([1,2,3,4,np.nan,5,6,7]) 
0    1.0
1    2.0
2    3.0
3    4.0
4    NaN
5    5.0
6    6.0
7    7.0
dtype: float64

>>>s1.values
array([ 1.,  2.,  3.,  4., nan,  5.,  6.,  7.])

>>>s1.index
RangeIndex(start=0, stop=8, step=1)

# 主动添加索引
>>>s2 = pd.Series([21,23,42,21,23],index=['Jack','Lucy','Helen','Milky','Jasper'])
Jack      21
Lucy      23
Helen     42
Milky     21
Jasper    23
dtype: int64

>>>s2['Jack']
21

>>>s2.loc['Jack'] # 根据名字索引
21

>>>s2.iloc[0] # 根据位置索引
21

>>>print(s2.shape,s2.size)
(5,) 5

>>>s2.head(2) #选择头两行,默认前五行
Jack    21
Lucy    23
dtype: int64

>>>s2.describe()
count     5.0
mean     26.0
std       9.0
min      21.0
25%      21.0
50%      23.0
75%      23.0
max      42.0
dtype: float64

>>>s2.sort_values() # 对values排序
Jack      21
Milky     21
Lucy      23
Jasper    23
Helen     42
dtype: int64

>>>s2[s2>22]# Check the people who is older than 22
Lucy      23
Helen     42
Jasper    23
dtype: int64

>>>'Lucy' in s2
True

>>>s2_dict = s2.to_dict() # Series转换为字典
{'Helen': 42, 'Jack': 21, 'Jasper': 23, 'Lucy': 23, 'Milky': 21}

>>>s2_series = pd.Series(s2_dict) # 字典转换为Series
Helen     42
Jack      21
Jasper    23
Lucy      23
Milky     21
dtype: int64

# 检查dict中没有的索引，自动添加
>>>name = ['Jack','Lucy','Helen','Milky','Tom','Jasper','Helen']
>>>s2_new = pd.Series(s2_dict,index = name)
Jack      21.0
Lucy      23.0
Helen     42.0
Milky     21.0
Tom        NaN
Jasper    23.0
Helen     42.0
dtype: float64

>>>s2_new.drop_duplicates() ## drop the duplicate in value 去掉values中的重复值，保留第一个
Jack     21.0
Lucy     23.0
Helen    42.0
Tom       NaN
dtype: float64

>>>pd.isnull(s2_new) #判断是否为空值 = s2_new.isnull()
Jack      False
Lucy      False
Helen     False
Milky     False
Tom        True
Jasper    False
Helen     False
dtype: bool

【pandas学习笔记】Series

猜你喜欢