《使用pandas进行数据分析》网课笔记（13到18）

十三. How do I change the data type of a Pandas Series


# coding: utf-8

# In[3]:


#13
import pandas as pd


# In[5]:


drinks = pd.read_csv('http://bit.ly/drinksbycountry')


# In[6]:


drinks.head()


# In[7]:


drinks.dtypes


# In[8]:


drinks['beer_servings'] = drinks.beer_servings.astype(float)


# In[9]:


drinks.dtypes


# In[10]:


drinks = pd.read_csv('http://bit.ly/drinksbycountry',dtype = {'beer_servings':float})


# In[11]:


#-----------------------#


# In[12]:


orders = pd.read_table('http://bit.ly/chiporders')


# In[13]:


orders.head()


# In[15]:


orders.dtypes


# In[19]:


orders.item_price.str.replace('$','').astype(float).mean()


# In[17]:


orders.head()


# In[21]:


orders.item_name.str.contains('Chicken').astype(int).head()

十四. When I should use a groupby in Pandas


# coding: utf-8

# In[1]:


#14
import pandas as pd


# In[2]:


drinks = pd.read_csv('http://bit.ly/drinksbycountry')


# In[3]:


drinks.head()


# In[4]:


drinks.beer_servings.mean()


# In[8]:


drinks.groupby('continent').beer_servings.mean()


# In[12]:


drinks[drinks.continent == 'Africa'].beer_servings.mean()


# In[13]:


drinks.groupby('continent').beer_servings.max()


# In[14]:


drinks.groupby('continent').beer_servings.agg(['count','min','max','mean'])


# In[15]:


drinks.groupby('continent').mean()


# In[16]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[17]:


drinks.groupby('continent').mean().plot(kind='bar')

十五. How do I explore a Pandas Series


# coding: utf-8

# In[1]:


#15
import pandas as pd


# In[2]:


movies = pd.read_csv('http://bit.ly/imdbratings')


# In[3]:


movies.head()


# In[4]:


movies.dtypes


# In[5]:


movies.genre.describe()


# In[7]:


movies.genre.value_counts()


# In[9]:


movies.genre.value_counts(normalize = True)


# In[10]:


movies.genre.unique()


# In[11]:


movies.genre.nunique()


# In[12]:


pd.crosstab(movies.genre,movies.content_rating)


# In[13]:


movies.duration.describe()


# In[14]:


movies.duration.mean()


# In[15]:


movies.duration.value_counts()


# In[16]:


get_ipython().run_line_magic('matplotlib', 'inline')


# In[17]:


movies.duration.plot(kind = 'hist')


# In[18]:


movies.genre.value_counts().plot(kind='bar')

十六. How do I handle missing values in Pandas


# coding: utf-8

# In[1]:


#16
import pandas as pd


# In[2]:


ufo = pd.read_csv('http://bit.ly/uforeports')


# In[3]:


ufo.tail()


# In[5]:


ufo.isnull().tail()


# In[6]:


ufo.notnull().tail()


# In[7]:


ufo.isnull().sum()


# In[8]:


pd.Series([True,False,True]).sum()


# In[9]:


ufo[ufo.City.isnull()]


# In[11]:


ufo.shape


# In[12]:


ufo.dropna(how='any').shape


# In[13]:


ufo.shape


# In[14]:


ufo.dropna(how='all').shape


# In[15]:


ufo.dropna(subset = ['City','Shape Reported'], how='all').shape


# In[22]:


ufo['Shape Reported'].fillna(value = 'VARIOUS',inplace=True)


# In[23]:


ufo['Shape Reported'].value_counts(dropna = False)

十七. What do I need to know about Pandas index(part I)


# coding: utf-8

# In[1]:


#17
import pandas as pd


# In[2]:


drinks = pd.read_csv('http://bit.ly/drinksbycountry')


# In[3]:


drinks.head()


# In[4]:


drinks.index


# In[5]:


drinks.columns


# In[6]:


drinks.shape


# In[7]:


pd.read_csv('http://bit.ly/movieusers',header = None,sep='|').head()


# In[8]:


drinks[drinks.continent == 'South America']


# In[9]:


drinks.loc[3,'beer_servings']


# In[10]:


#Add a new index
drinks.set_index('country',inplace = True)


# In[12]:


drinks.head()


# In[13]:


drinks.index


# In[14]:


drinks.columns


# In[15]:


drinks.shape


# In[16]:


drinks.loc['Andorra','wine_servings']


# In[18]:


drinks.head()


# In[19]:


drinks.index.name = None
drinks.head()


# In[20]:


drinks.index.name='country'
drinks.reset_index(inplace = True)
drinks.head()


# In[21]:


drinks.describe()


# In[22]:


drinks.describe().index


# In[23]:


drinks.describe().columns


# In[25]:


drinks.describe().loc['25%','spirit_servings']

十八. What do I need to know about Pandas index(part II)


# coding: utf-8

# In[2]:


#18
import pandas as pd


# In[3]:


drinks = pd.read_csv('http://bit.ly/drinksbycountry')


# In[4]:


drinks.head()


# In[5]:


drinks.continent.head()


# In[6]:


drinks.set_index('country',inplace = True)
drinks.head()


# In[7]:


drinks.continent.head()


# In[8]:


drinks.continent.value_counts()


# In[9]:


drinks.continent.value_counts().values


# In[10]:


drinks.continent.value_counts()['Africa']


# In[13]:


drinks.continent.value_counts().sort_index()


# In[14]:


people = pd.Series([3000000,85000],index = ['Albania','Andorra'],name = 'population')
people


# In[15]:


#注意这里会自动寻找相关行去进行乘法
drinks.beer_servings * people


# In[17]:


#在原有的基础上添加新的列
pd.concat([drinks,people],axis = 1).head()

《使用pandas进行数据分析》网课笔记（13到18）

猜你喜欢