pandas-categorical data type

Insert picture description here
Insert picture description here

#!/usr/bin/env python
# coding: utf-8

# #    第三课 Pandas类别型数据分析
# ## 第二节 类别型数据类型

# In[1]
import pandas as pd
# * 构造类别型数据
# In[2]:
# dtype=‘category’
s = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
s
# In[3]:
# astype(‘category’)
s1 = pd.Series(['a', 'b', 'c', 'a'])
s2 = s1.astype('category')
# In[4]:
s1
# In[5]:
s2
# In[6]:
# pd.Categorical(),不指定类别
# 类别是通过数据本身推测的
raw = pd.Categorical(['a', 'b', 'c', 'a'])
s3 = pd.Series(raw)
s3
# In[7]:
# pd.Categorical(),指定类别
s4 = pd.Series(
        pd.Categorical(['a', 'b', 'c', 'a'], categories=['c', 'b', 'd'])
    )
s4
# In[8]:
# pd.Categorical(),指定类别,指定顺序
s5 = pd.Series(
        pd.Categorical(['a', 'b', 'c', 'a'], categories=['b', 'c', 'd'], ordered=True)
    )
s5
# In[9]:
# 对比ordered参数的使用
s5.max()
# In[10]:
s4.max()
# * describe()
# In[11]:


import numpy as np
l = ['a', 'c', 'c', np.nan]
cat = pd.Categorical(l, categories=['b', 'a', 'c'], ordered=True)
df = pd.DataFrame({
    
    'cat': cat,
                  's': l})

df
# In[12]:
df.describe()
# In[13]:
df['cat'].max()
# In[14]:
df['s'].max()

# In[ ]:





Guess you like

Origin blog.csdn.net/lildn/article/details/115015518