import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
一、分类散点图
tips = sns.load_dataset("tips")
tips.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
2 | 21.01 | 3.50 | Male | No | Sun | Dinner | 3 |
3 | 23.68 | 3.31 | Male | No | Sun | Dinner | 2 |
4 | 24.59 | 3.61 | Female | No | Sun | Dinner | 4 |
sns.catplot(x="day", y="total_bill", data=tips);
jitter参数控制抖动的大小或完全禁用它
sns.catplot(x="day", y="total_bill", jitter=False, data=tips);
激活设置kind=”swarm”
sns.catplot(x="day", y="total_bill", kind="swarm", data=tips);
使用order参数在特定图表的基础上控制排序
sns.catplot(x="smoker", y="tip", order=["No", "Yes"], data=tips);
将变量赋值交换到轴
sns.catplot(x="total_bill", y="day", hue="time", kind="swarm", data=tips);
二、类别内观察的分布
1、箱线图
sns.catplot(x="day", y="total_bill", kind="box", data=tips);
添加hue语义时,语义变量的每个级别的框都沿着分类轴移动,不会重叠
sns.catplot(x="day", y="total_bill", hue="smoker", kind="box", data=tips);
tips.head(2)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
total_bill | tip | sex | smoker | day | time | size | |
---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 |
#添加一列数据"weekend"
tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
tips.head(2)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
total_bill | tip | sex | smoker | day | time | size | weekend | |
---|---|---|---|---|---|---|---|---|
0 | 16.99 | 1.01 | Female | No | Sun | Dinner | 2 | True |
1 | 10.34 | 1.66 | Male | No | Sun | Dinner | 3 | True |
sns.catplot(x="day", y="total_bill", hue="weekend",
kind="box", dodge=False, data=tips);
函数boxenplot()(kind=”boxen”)绘制的图表类似于箱形图,但已经过优化,可显示有关分布形状的更多信息。它最适合更大的数据集:
diamonds = sns.load_dataset("diamonds")
diamonds.head()
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
carat | cut | color | clarity | depth | table | price | x | y | z | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.23 | Ideal | E | SI2 | 61.5 | 55.0 | 326 | 3.95 | 3.98 | 2.43 |
1 | 0.21 | Premium | E | SI1 | 59.8 | 61.0 | 326 | 3.89 | 3.84 | 2.31 |
2 | 0.23 | Good | E | VS1 | 56.9 | 65.0 | 327 | 4.05 | 4.07 | 2.31 |
3 | 0.29 | Premium | I | VS2 | 62.4 | 58.0 | 334 | 4.20 | 4.23 | 2.63 |
4 | 0.31 | Good | J | SI2 | 63.3 | 58.0 | 335 | 4.34 | 4.35 | 2.75 |
sns.catplot(x="color", y="price", kind="boxen",
data=diamonds.sort_values("color"));
三、类别内的统计估计
条形图
titanic = sns.load_dataset("titanic")
titanic.head(2)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
sns.catplot(x="sex", y="survived", hue="class", kind="bar", data=titanic);
点图:
pointplot()功能提供了一种可视化相同信息的替代方式(将条形图表示为变化的斜率)
sns.catplot(x="sex", y="survived", hue="class", kind="point", data=titanic);
加入线条样式与色调一起表示,使得观察更清晰
sns.catplot(x="class", y="survived", hue="sex",
palette={"male": "g", "female": "m"},
markers=["^", "o"], linestyles=["-", "--"],
kind="point", data=titanic);
四、绘制“宽格式”数据
iris = sns.load_dataset("iris")
iris.head(3)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
sns.catplot(data=iris, orient="h", kind="box");
五、显示与facet的多种关系
sns.catplot(x="day", y="total_bill", hue="smoker",
col="time", aspect=.6,
kind="swarm", data=tips);
自定义绘图,可以使用FacetGrid它返回的对象上的方法
titanic.head(3)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
survived | pclass | sex | age | sibsp | parch | fare | embarked | class | who | adult_male | deck | embark_town | alive | alone | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S | Third | man | True | NaN | Southampton | no | False |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C | First | woman | False | C | Cherbourg | yes | False |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S | Third | woman | False | NaN | Southampton | yes | True |
g = sns.catplot(x="fare", y="survived", row="class",
kind="box", orient="h", height=1.5, aspect=4,
data=titanic.query("fare > 0"))
g.set(xscale="log");