Python matplotlib 练习题

matplotlib —— 课后练✋

%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

练习1:航班乘客变化分析

  • 分析年度乘客总量变化情况(折线图)
  • 分析乘客在一年中各月份的分布(柱状图)
data = sns.load_dataset("flights")
data.head()
# 年份,月份,乘客数
year month passengers
0 1949 January 112
1 1949 February 118
2 1949 March 132
3 1949 April 129
4 1949 May 121

年度变化

# your code
year_group=data.groupby('year').sum()

fig,ax=plt.subplots()
ax.plot(year_group.index,year_group['passengers'])
ax.set_xlabel('year')
ax.set_ylabel('passengers')
ax.set_title('Annual Variation Trend of Passengers')
<matplotlib.text.Text at 0x7f89cacfaf50>

这里写图片描述

各月份之间的差异

data_1949=data[data['year']==1949]
month_group=data.groupby('month').sum()
month_group['month_num']=range(12)

fig1,ax1=plt.subplots()
ax1.bar(month_group['month_num'],month_group['passengers'],align='center')
ax1.set_xlabel('month')
ax1.set_ylabel('passengers')
ax1.set_xticks(range(12))
month_names=[str[:3] for str in list(month_group.index)]
ax1.set_xticklabels(month_names)
ax1.set_title('Monthly Distribution of Passengers')
<matplotlib.text.Text at 0x7f89cabdad10>

这里写图片描述

练习2:鸢尾花花型尺寸分析

  • 萼片(sepal)和花瓣(petal)的大小关系(散点图)
  • 不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
  • 不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)
data = sns.load_dataset("iris")
data.head()
# 萼片长度,萼片宽度,花瓣长度,花瓣宽度,种类
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
2 4.7 3.2 1.3 0.2 setosa
3 4.6 3.1 1.5 0.2 setosa
4 5.0 3.6 1.4 0.2 setosa
# your code
#尺寸为长乘以宽
data['sepal_size']=data['sepal_length']*data['sepal_width']
data['petal_size']=data['petal_length']*data['petal_width']

花瓣与萼片的关系

fig, ax2_1 = plt.subplots()
ax2_1.scatter(data['sepal_size'],data['petal_size'])

# 添加标题和坐标说明
ax2_1.set_title('Size of Sepal vs Size of Petal')
ax2_1.set_xlabel('size of sepal')
ax2_1.set_ylabel('size of petal')
<matplotlib.text.Text at 0x7f89caa496d0>

这里写图片描述

species=data['species'].unique()
species
array([‘setosa’, ‘versicolor’, ‘virginica’], dtype=object)
data1=data[data['species']==species[0]]
data2=data[data['species']==species[1]]
data3=data[data['species']==species[2]]

不同种类之间萼片与花瓣的关系

fig, ax2_2 = plt.subplots()

ax2_2.scatter(data1['sepal_size'],data1['petal_size'],color = '#ff0000',label=species[0])
ax2_2.scatter(data2['sepal_size'],data2['petal_size'],color = '#00ff00',label =species[1])
ax2_2.scatter(data3['sepal_size'],data3['petal_size'],color = '#0000ff',label=species[2])
ax2_2.legend(loc = 'best')

# 添加标题和坐标说明
ax2_2.set_title('Size of Sepal vs Size of Petal')
ax2_2.set_xlabel('size of sepal')
ax2_2.set_ylabel('size of petal')
<matplotlib.text.Text at 0x7f89ca98b990>

这里写图片描述

不同种类的花瓣与萼片大小

def boxplot(x_data, y_data, base_color, median_color, x_label, y_label, title):
    _, ax = plt.subplots()
    ax.boxplot(y_data
               # 箱子是否颜色填充
               , patch_artist = True
               # 中位数线颜色
               , medianprops = {'color': base_color}
               # 箱子颜色设置,color:边框颜色,facecolor:填充颜色
               , boxprops = {'color': base_color, 'facecolor': median_color}
               # 猫须颜色whisker
               , whiskerprops = {'color': median_color}
               # 猫须界限颜色whisker cap
               , capprops = {'color': base_color})
    # 箱图与x_data保持一致
    ax.set_xticklabels(x_data)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)


bp_data=[data1['sepal_size'],data2['sepal_size'],data3['sepal_size']]

# 调用绘图函数
boxplot(x_data = species
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Species'
        , y_label = 'Size of Sepal'
        , title = 'Size Distribution of Sepal By Species')

这里写图片描述

bp_data=[data1['petal_size'],data2['petal_size'],data3['petal_size']]

# 调用绘图函数
boxplot(x_data = species
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Species'
        , y_label = 'Size of Petal'
        , title = 'Size Distribution of Petal By Species')

这里写图片描述

练习3:餐厅小费情况分析

  • 小费和总消费之间的关系(散点图)
  • 男性顾客和女性顾客,谁更慷慨(分类箱式图)
  • 抽烟与否是否会对小费金额产生影响(分类箱式图)
  • 工作日和周末,什么时候顾客给的小费更慷慨(分类箱式图)
  • 午饭和晚饭,哪一顿顾客更愿意给小费(分类箱式图)
  • 就餐人数是否会对慷慨度产生影响(分类箱式图)
  • 性别+抽烟的组合因素对慷慨度的影响(分组柱状图)
data = sns.load_dataset("tips")
data.head()
# 总消费,小费,性别,吸烟与否,就餐星期,就餐时间,就餐人数
total_bill tip sex smoker day time size
0 16.99 1.01 Female No Sun Dinner 2
1 10.34 1.66 Male No Sun Dinner 3
2 21.01 3.50 Male No Sun Dinner 3
3 23.68 3.31 Male No Sun Dinner 2
4 24.59 3.61 Female No Sun Dinner 4

小费与总消费的关系

# your code
_, ax3_1 = plt.subplots()
ax3_1.scatter(data['tip'],data['total_bill'])
ax3_1.set_title('Tip vs Total bill')
ax3_1.set_xlabel('total bill')
ax3_1.set_ylabel('tip')
<matplotlib.text.Text at 0x7f89ca689150>

这里写图片描述

男性与女性

sex=data['sex'].unique()
bp_data=[data[data['sex']==sex[0]]['tip'],data[data['sex']==sex[1]]['tip']]

# 调用绘图函数
boxplot(x_data = sex
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Sex'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Sex')

这里写图片描述

抽烟与否

smoker=data['smoker'].unique()
bp_data=[data[data['smoker']==smoker[0]]['tip'],data[data['smoker']==smoker[1]]['tip']]

# 调用绘图函数
boxplot(x_data = smoker
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Smoke or Not'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Smoker')

这里写图片描述

工作日与周末

day=data['day'].unique()
bp_data=[data[data['day'].isin(day[:2])]['tip'],data[data['day'].isin(day[2:4])]['tip']]

# 调用绘图函数
boxplot(x_data = ['weekend','weekday']
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Day'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Day')

这里写图片描述

午餐与晚餐

time=data['time'].unique()
bp_data=[data[data['time']==time[0]]['tip'],data[data['time']==time[1]]['tip']]

# 调用绘图函数
boxplot(x_data = time
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Time'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Time')

这里写图片描述

就餐人数

size=data['size'].unique()
bp_data=[]
for i in range(len(size)):
    bp_data.append(data[data['size']==size[i]]['tip'])

# 调用绘图函数
boxplot(x_data = size
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'Size'
        , y_label = 'Tip'
        , title = 'Distribution of Tip By Size')

这里写图片描述

性别+抽烟

tip_by_sex_smoke=data.groupby(['sex','smoker']).mean()['tip']
tip_by_sex_smoke=tip_by_sex_smoke.unstack()
tip_by_sex_smoke
smoker Yes No
sex
Male 3.051167 3.113402
Female 2.931515 2.773519
# 绘制分组柱状图的函数
def groupedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label,title):
    _, ax = plt.subplots()
    # 设置每一组柱状图的宽度
    total_width = 0.8
    # 设置每一个柱状图的宽度
    ind_width = total_width / len(y_data_list)
    # 计算每一个柱状图的中心偏移
    alteration = np.arange(-total_width/2+ind_width/2, total_width/2+ind_width/2, ind_width)

    # 分别绘制每一个柱状图
    for i in range(0, len(y_data_list)):
        # 横向散开绘制
        ax.bar(x_data + alteration[i], y_data_list[i], color = colors[i], label = y_data_names[i], width = ind_width)
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right')



# 调用绘图函数
groupedbarplot(x_data = range(2)
               , y_data_list = [tip_by_sex_smoke['Yes'],tip_by_sex_smoke['No']]
               , y_data_names = ['Yes', 'No']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'sex'
               , y_label = 'tip'
               ,title = 'Tip By Smoker and Sex')
ax=plt.gca()
ax.set_xticks(range(2))
ax.set_xticklabels(tip_by_sex_smoke.index.values)
[<matplotlib.text.Text at 0x7f89ca39cdd0>,
 <matplotlib.text.Text at 0x7f89ca3a7e90>]

这里写图片描述

练习4:泰坦尼克号海难幸存状况分析

  • 不同仓位等级中幸存和遇难的乘客比例(堆积柱状图)
  • 不同性别的幸存比例(堆积柱状图)
  • 幸存和遇难乘客的票价分布(分类箱式图)
  • 幸存和遇难乘客的年龄分布(分类箱式图)
  • 不同上船港口的乘客仓位等级分布(分组柱状图)
  • 幸存和遇难乘客堂兄弟姐妹的数量分布(分类箱式图)
  • 幸存和遇难乘客父母子女的数量分布(分类箱式图)
  • 单独乘船与否和幸存之间有没有联系(堆积柱状图或者分组柱状图)
data = sns.load_dataset("titanic")
data.head()
# 幸存与否,仓位等级,性别,年龄,堂兄弟姐妹数,父母子女数,票价,上船港口缩写,仓位等级,人员分类,是否成年男性,所在甲板,上船港口,是否幸存,是否单独乘船
survived pclass sex age sibsp parch fare embarked class who adult_male deck embark_town alive alone
0 0 3 male 22.0 1 0 7.2500 S Third man True NaN Southampton no False
1 1 1 female 38.0 1 0 71.2833 C First woman False C Cherbourg yes False
2 1 3 female 26.0 0 0 7.9250 S Third woman False NaN Southampton yes True
3 1 1 female 35.0 1 0 53.1000 S First woman False C Southampton yes False
4 0 3 male 35.0 0 0 8.0500 S Third man True NaN Southampton no True

不同仓位等级幸存比例

# your code
# 绘制堆积柱状图
def stackedbarplot(x_data, y_data_list, y_data_names, colors, x_label, y_label, title):
    _, ax = plt.subplots()
    # 循环绘制堆积柱状图
    for i in range(0, len(y_data_list)):
        if i == 0:
            ax.bar(x_data, y_data_list[i], color = colors[i], align = 'center', label = y_data_names[i])
        else:
            # 采用堆积的方式,除了第一个分类,后面的分类都从前一个分类的柱状图接着画
            # 用归一化保证最终累积结果为1
            ax.bar(x_data, y_data_list[i], color = colors[i], bottom = y_data_list[i - 1], align = 'center', label = y_data_names[i])
    ax.set_ylabel(y_label)
    ax.set_xlabel(x_label)
    ax.set_title(title)
    ax.legend(loc = 'upper right') # 设定图例位置
pclass_survived=data.groupby(['pclass','survived']).size().unstack()
pclass_survived
survived 0 1
pclass
1 80 136
2 97 87
3 372 119
pclass_survived['sum']=pclass_survived[0]+pclass_survived[1]
pclass_survived['yes_prop']=pclass_survived[1]/pclass_survived['sum']
pclass_survived['no_prop']=pclass_survived[0]/pclass_survived['sum']
pclass_survived
survived 0 1 sum yes_prop no_prop
pclass
1 80 136 216 0.629630 0.370370
2 97 87 184 0.472826 0.527174
3 372 119 491 0.242363 0.757637
# 调用绘图函数
stackedbarplot(x_data = pclass_survived.index.values
               , y_data_list = [pclass_survived['yes_prop'], pclass_survived['no_prop']]
               , y_data_names = ['Survived', 'Not survived']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'Pclass'
               , y_label = 'Number of People'
               , title = 'Number of People By Survived Or Not and Pclass')

ax=plt.gca()
ax.set_xticks(range(1,4))
ax.set_xticklabels(pclass_survived.index.values)
[<matplotlib.text.Text at 0x7f89ca2f1890>,
 <matplotlib.text.Text at 0x7f89ca27a410>,
 <matplotlib.text.Text at 0x7f89ca26d6d0>]

这里写图片描述

不同性别幸存比例

sex_survived=data.groupby(['sex','survived']).size().unstack()
sex_survived
survived 0 1
sex
female 81 233
male 468 109
sex_survived['sum']=sex_survived[0]+sex_survived[1]
sex_survived['yes_prop']=sex_survived[1]/sex_survived['sum']
sex_survived['no_prop']=sex_survived[0]/sex_survived['sum']
sex_survived
survived 0 1 sum yes_prop no_prop
sex
female 81 233 314 0.742038 0.257962
male 468 109 577 0.188908 0.811092
# 调用绘图函数
stackedbarplot(x_data = [0,1]
               , y_data_list = [sex_survived['yes_prop'], sex_survived['no_prop']]
               , y_data_names = ['Survived', 'Not survived']
               , colors = ['#539caf', '#7663b0']
               , x_label = 'Sex'
               , y_label = 'Number of People'
               , title = 'Number of People By Survived Or Not and Sex')
ax=plt.gca()
ax.set_xticks(range(2))
ax.set_xticklabels(sex_survived.index.values)
[<matplotlib.text.Text at 0x7f89ca1c3a10>,
 <matplotlib.text.Text at 0x7f89ca1ce0d0>]

这里写图片描述

幸存or遇难の票价分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['fare'],data[data['survived']==survived[1]]['fare']]

# 调用绘图函数
boxplot(x_data = survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'fare'
        , title = 'Distribution of Fare By Survived')

这里写图片描述

幸存or遇难の年龄分布

data['age'].fillna(0,inplace=True)
survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['age'],data[data['survived']==survived[1]]['age']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'age'
        , title = 'Distribution of Age By Survived')

这里写图片描述

不同上传港口の仓位等级

embark_pclass=data.groupby(['embark_town','pclass']).size().unstack()
#embark_pclass.fillna(0,inplace=True)
embark_pclass
pclass 1 2 3
embark_town
Cherbourg 85 17 66
Queenstown 2 3 72
Southampton 127 164 353
pclass_list=[embark_pclass.iloc[:,0],embark_pclass.iloc[:,1],embark_pclass.iloc[:,2]]
pclass_list
[embark_town Cherbourg 85 Queenstown 2 Southampton 127 Name: 1, dtype: int64, embark_town Cherbourg 17 Queenstown 3 Southampton 164 Name: 2, dtype: int64, embark_town Cherbourg 66 Queenstown 72 Southampton 353 Name: 3, dtype: int64]

# 调用绘图函数
groupedbarplot(x_data = range(3)
               , y_data_list = pclass_list
               , y_data_names = embark_pclass.columns
               , colors = ['#539caf', '#7663b0','#00ff00']
               , x_label = 'embark_town'
               , y_label = 'counts of pclass'
               ,title = 'Counts of Pclass vs Embark Town')

ax=plt.gca()
ax.set_xticks(range(3))
ax.set_xticklabels(embark_pclass.index.values)
[<matplotlib.text.Text at 0x7f89c9f488d0>,
 <matplotlib.text.Text at 0x7f89ca045b10>,
 <matplotlib.text.Text at 0x7f89c9eec150>]

这里写图片描述

幸存or遇难の堂兄弟姐妹数量分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['sibsp'],data[data['survived']==survived[1]]['sibsp']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'sibsp'
        , title = 'Distribution of Sibsp By Survived')

这里写图片描述

幸存or遇难の父母子女数量分布

survived=data['survived'].unique()
bp_data=[data[data['survived']==survived[0]]['parch'],data[data['survived']==survived[1]]['parch']]
# 调用绘图函数
boxplot(x_data=survived
        , y_data = bp_data
        , base_color = 'b'
        , median_color = 'r'
        , x_label = 'survived'
        , y_label = 'parch'
        , title = 'Distribution of Parch By Survived')

这里写图片描述

单独乘船 vs 幸存

alone_survived=data.groupby(['alone','survived']).size().unstack()
alone_survived
survived 0 1
alone
False 175 179
True 374 163
_, ax = plt.subplots()
width=0.4
index=alone_survived.index.values
ax.bar(index, alone_survived[0], color = '#ff0000', label = 'Not survived', width = width)
ax.bar(index+width, alone_survived[1], color = '#00ff00', label = 'Survived', width = width)

ax.set_ylabel('numbers of People')
ax.set_xlabel('alone')
ax.set_title('People Survived vs Alone')
ax.legend(loc = 'upper right')
plt.xticks(index+width,index)
([<matplotlib.axis.XTick at 0x7f89ca2d73d0>,
  <matplotlib.axis.XTick at 0x7f89ca85f4d0>],
 <a list of 2 Text xticklabel objects>)

这里写图片描述

猜你喜欢

转载自blog.csdn.net/czl389/article/details/76944504