Python seaborn 画图

https://www.jianshu.com/p/388abcc1bc3d

本文涉及到的图形如下:
箱线图boxplot
小提琴图violinplot
散点图striplot
带分布的散点图swarmplot
直方图barplot
计数的直方图countplot
两变量关系图factorplot

以下对seanborn 画图简单的介绍。多多练习。

import seaborn as sns
import numpy as np


sns.set_style("whitegrid")
tips = sns.load_dataset("tips")
tips
#绘制箱线图
ax = sns.boxplot(x = tips['total_bill'])
#绘制竖着放的箱线图,即把x 和 y 互换位置
ax = sns.boxplot(y = tips['total_bill'])
# 分组绘制箱线图, 分组因子是day,在x 轴不同位置绘制
ax = sns.boxplot(x = 'day', y = 'total_bill', data = tips)
#分组箱线图,分组因子实smoker, 不同的因子用不同的颜色区分, 相当于分组之后又分组
ax = sns.boxplot(x = "day", y = "total_bill", hue = "smoker", data = tips, palette = "Set3")
# 改变线宽, linewidth 参数
ax = sns.boxplot(x= "day", y = "total_bill", hue = 'time', data = tips, linewidth= 2.5)
#改变x轴顺序,order 参数
ax = sns.boxplot(x = 'time', y = 'tip', data = tips, order = ['Dinner', 'Lunch'])
#对dataFrame的每个变量都绘制一个箱线图,水平放置
iris = sns.load_dataset("iris")
ax = sns.boxplot(data = iris, orient = 'h', palette = 'Set2')
#箱线图+ 有分布趋势的散点图
#图形组合也就是两条绘图语句一起运行就可以了,相当于图形覆盖了
ax = sns.boxplot(x='day', y = 'total_bill', data = tips)
ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips, color = '.25')

#绘制小提琴图
ax = sns.violinplot(x = tips["total_bill"])
#分组的小提琴图,同上面的箱线图一样通过x轴分组
ax = sns.violinplot(x = "day", y = "total_bill", data = tips)
#通过hue分组的小提琴图,相当于分组后又分组
ax = sns.violinplot(x = "day", y = "total_bill", hue = "smoker", data = tips, palette = "muted")
#分组组合的小提琴图,其实就是hue分组后,各取一半组成一个小提琴图
ax = sns.violinplot(x = 'day', y = 'total_bill', hue = "smoker", data = tips, palette = 'muted', split = True)
#调整x轴顺序,同样通过order参数
ax = sns.violinplot(x = "time", y = "tip", data = tips, order = ['Dinner', 'Lunch'])


#----------------------------stripplot------------------------------------#
#普通的散点图
ax1 = sns.stripplot(x = tips['total_bill'])
#带分布密度的散点图
ax2 = sns.swarmplot(x = tips["total_bill"])
#分组的散点图
ax = sns.stripplot(x = 'day', y = 'total_bill', data = tips)
#添加抖动项的散点图,jitter可以实0.1, 0.2...这样的小数,表示抖动程度的大小
ax = sns.stripplot(x = 'day', y = "total_bill", data = tips, jitter = True)
#交换x, y 的顺序
ax = sns.stripplot(x = "total_bill", y = "day",data = tips, jitter = True)
#分组绘制,而且实分组后分开绘制,在柱状图中,跟分组柱状图类似,通过hue, split参数控制
#1. 分组
ax = sns.stripplot(x = "sex", y = "total_bill", hue = "day", data = tips, jitter = True)
#2. 分开绘制
ax = sns.stripplot(x = "day", y = "total_bill", hue = "smoker", data = tips, jitter = True, palette = "Set2", split = True )
#散点图+小提请图  两条命令一起运行就行了
ax = sns.violinplot(x = "day", y = "total_bill", data = tips, inner = None, color = ".8")
ax = sns.stripplot(x = "day", y = "total_bill", data = tips, jitter = True)

#-----------------------------------swarmplot 带分布的散点图--------------------------#
ax = sns.swarmplot(x = tips['total_bill'])
#分组的散点图
ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips)
#箱线图+ 散点图
#whis 参数设定是否显示箱线图的离群点, whis = np.inf 表示不显示
ax = sns.boxplot(x = "tip", y = "day", data = tips, whis = np.inf)
ax = sns.swarmplot(x = 'tip', y = 'day', data = tips)
# 小提琴图 + 散点图
ax = sns.violinplot(x = 'day', y = 'total_bill', data = tips, innner = None)
ax = sns.swarmplot(x = 'day', y = 'total_bill', data = tips, color = 'white', edgecolor= 'gray')
#统计参数默认实mean
ax = sns.barplot(x = "day", y = "total_bill", data = tips, ci = 0)
tips.groupby(by = 'day')['total_bill'].mean()

# 分组的柱状图
ax = sns.barplot(x = 'day', y = 'total_bill', hue = 'sex', data = tips, ci = 0)
#绘制变量中位数的直方图,estimator自封统计函数
ax = sns.barplot(x = 'day', y = 'tip', data = tips, estimator= np.median, ci = 0)
#改变主题的颜色
ax = sns.barplot("size", y = "total_bill", data = tips, palette = "Blues_d")

#---------------countplot 计数统计图----------------------------#
#对因子变量来说这个实很重要的
titanic = sns.load_dataset("titanic")
ax = sns.countplot(x = "class", data = titanic)
# 分组绘图
ax = sns.countplot(x = "class", hue = "who", data = titanic)
#转换x, y 的位置
ax = sns.countplot(y = "class", hue = "who", data = titanic)

#--------------------------factorplot 联合绘图------------------------------#
#用小提琴图反应time-plus两变量的分布情况
exercise = sns.load_dataset("exercise")
g = sns.factorplot(x = "time", y = "pulse", hue = "kind", data = exercise, kind = "violin")
#不同的因子绘制不同的数值,col为分子绘图,col_wrap 每行画4个子图
g = sns.factorplot(x = "alive", col = "deck", col_wrap = 4,
                   data = titanic[titanic.deck.notnull()], kind = "count", size = 2.5, aspect = .8)



#-----------------------回归图 lmplot-------------------------------#
#线性回归图
g = sns.lmplot(x="total_bill", y = "tip", data = tips)
#分组的线性回归图,通过hue参数控制
g = sns.lmplot(x = "total_bill", y = "tip", hue = "smoker", data = tips)
#分组绘图, 不同的组用不同的形状标记
g = sns.lmplot(x = 'total_bill', y = 'tip', hue = 'smoker', data = tips, markers = ["o", "x"])
#不仅分组,还分开不同的子图绘制,用col参数控制
g = sns.lmplot(x = 'total_bill', y = 'tip', col = 'smoker', data = tips)

# col + hue 双分组参数,既分组又分子图绘制,jitter控制散点抖动程度
g = sns.lmplot(x = "size", y = "total_bill", hue = "day", col = "day", data = tips,
               aspect= .4, x_jitter = .1)

# 分组绘制,控制size尺寸
g = sns.lmplot(x = "total_bill", y = "tip", col = "day", hue = "day",
               data = tips, col_wrap= 2, size = 3)


#既然col可以控制分组子图,那么row也是可以控制分组子图的
g = sns.lmplot(x = "total_bill", y = "tip", row = "sex", col = "time", data = tips, size = 4)

#-------------------------------回归图  regplot------------------------------#
ax = sns.regplot(x = "total_bill", y = "tip", data = tips)
#控制散点图的形状和颜色
np.random.seed(8)
mean, cov = [4,6], [(1.5, .7), (.7, 1)]
x, y = np.random.multivariate_normal(mean, cov, 80).T
ax = sns.regplot(x = x, y = y, color="g", marker="+")
#控制回归的置信度
ax = sns.regplot(x = x, y = y, ci = 68)
#上面的都是拟合一次曲线,拟合二次曲线通过order = 2设置, 拟合一次曲线相当于 order = 1
ans = sns.load_dataset("anscombe")
ax = sns.regplot(x = "x", y = "y", data= ans.loc[ans.dataset == "II"], 
                 scatter_kws={"s": 80}, order = 2, ci = None, truncate = True)


#-------------------------数值分布绘图------------------------#
#绘制数值变量的密度分布图, 默认既绘制概率密度曲线,也绘制直方图
sns.set(rc = {"figure.figsize" : (8,4)})
np.random.seed(0)
x = np.random.randn(100)
ax = sns.distplot(x)
#只绘制密度曲线,不返回直方图
ax = sns.distplot(x, rug = True, hist = False)
#横着放
ax = sns.distplot(x, vertical = True)
#-----------------------------核密度图kdeplot-----------------------------#
#绘制核密度图
np.random.seed(10)
mean, cov = [0,2], [(1, .5), (.5, 1)]
x, y = np.random.multivariate_normal(mean, cov, size = 50).T
ax = sns.kdeplot(x)
# shade 参数决定是否填充曲线下面积
ax = sns.kdeplot(x, shade = True, color = "r")
#双变量密度图,相当于等高线图
#shade 参数改用颜色深浅表示密度的大小.
ax = sns.kdeplot(x, y)
ax = sns.kdeplot(x, y, shade= True)

#分组绘制双变量的核密度图, 相当于绘制两个核密度图,通过图可以看到密度中心,类似挖掘算法中的聚类中心绘图
setosa = iris.loc[iris.species == 'setosa'] #组1
virgnica = iris.loc[iris.species == 'virginica'] # 组2
ax = sns.kdeplot(setosa.sepal_width, setosa.sepal_length, cmap = "Reds", shade=True, shade_lowest=False)
ax = sns.kdeplot(virgnica.sepal_width, virgnica.sepal_length, cmap = "Blues", shade = True, shade_lowest=False)

#-----------------------------------------双变量关系图, joinplot----------------------------------#
#默认绘制双变量的散点图,计算两个变量的直方图,计算两个变量的相关系数和置信度
np.random.seed(0)
sns.set(style = "white", color_codes = True)
g = sns.jointplot(x = "total_bill", y = "tip", data = tips)
#通过kind 参数,除了绘制散点图,还要绘制拟合的直线,拟合的核密度
g = sns.jointplot("total_bill", "tip", data = tips, kind = "reg")
#使用六角形代替点图
g = sns.jointplot("total_bill", "tip", data = tips, kind = "hex")
#绘制核密度图
g = sns.jointplot("sepal_width", "petal_length", data = iris, kind = "kde", space = 0, color = "g")
#控制图形的大小和颜色
g = sns.jointplot("total_bill", "tip", data = tips, size = 5, ratio=3, color="g")

#--------------------------------变量关系组图,pairplot----------------------------------------#
# x-y 的散点图, 不画回归线, fit_reg = False
g = sns.lmplot(x = "total_bill", y = "tip", data = tips, fit_reg = False, hue = "smoker", scatter = True)
#只画回归线,不画散点图, scatter= False
g = sns.lmplot(x = "total_bill", y = "tip", data = tips, fit_reg = True, hue = "smoker", scatter = False)
#分组的变量关系图
g = sns.pairplot(iris)
g = sns.pairplot(iris, hue = "species")
#不同的组用不同的形状标记
g = sns.pairplot(iris, hue = "species", markers = ["o", "s", "D"])
#只取dataframe中的一部分变量绘图
g = sns.pairplot(iris, vars = ["sepal_width", "sepal_length"])
#对角线默认绘制直方图,当然也可以绘制核密度图
g = sns.pairplot(iris, diag_kind="kde")
#两变量关系图,也可以绘制线性回归图
uniform_data = np.random.rand(10, 12)
ax = sns.heatmap(uniform_data)
# 改变颜色映射值的范围
ax = sns.heatmap(uniform_data, vmin = 0, vmax = 1)
#绘制热力图,还要将数值写到热力图上
flights = sns.load_dataset("flights")
flights = flights.pivot("month", "year", "passengers")
ax = sns.heatmap(flights, annot = True, fmt = "d")

#----------------------tsplot 时序图--------------------------#
np.random.seed(22)
sns.set(color_codes = True)
x = np.linspace(0, 15, 31)
data = np.sin(x) + np.random.rand(10, 31) + np.random.randn(10,1)
ax = sns.tsplot(data = data)

#tsplot
gammas = sns.load_dataset("gammas")
ax= sns.tsplot(time = "timepoint", value = "BOLD signal", unit = "subject", condition = "ROI", data = gammas)
# 绘制不同的置信度拟合图
ax = sns.tsplot(data = data, ci = [68, 95], color = "m")
# 使用不同的统计函数,默认实均值,这里选择求和
ax = sns.tsplot(data= data, estimator=np.sum)

#--------------------------双坐标轴--------------------------------------#
import pandas as pd
import matplotlib.pyplot as plt
sale=pd.Series(np.random.random(10)*100).map(int)
tmperature=pd.Series(np.random.random(10)*10).map(int)

ax=plt.subplot(111)
sale.plot(ax=ax,color='b')
ax.set_xlabel('time')
ax.set_ylabel('sale')

# 重点来了,twinx 或者 twiny 函数
ax2 = ax.twinx()
tmperature.plot(ax=ax2,color='r')
ax2.set_ylabel('tmperature')
plt.title('double series figure')

#----------------------------批量保存图片----------------------------------#
from matplotlib.pyplot import savefig
import time
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
tips['total_bill'].plot(ax = ax , style = 'o-', title = '%s 月销量趋势图')
file = r''
savefig(file)
time.sleep(0.5) #注意这里需要暂停一下, 否则会报错
plt.close()  #记得关闭句柄

#------------------------显示中文---------------------------------#
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt


mpl.rcParams['font.sans-serif'] = ['SimHei']  # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题








猜你喜欢

转载自blog.csdn.net/weixin_42983055/article/details/82593324
今日推荐