案例
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# %matplotlib inline
import numpy as np
import pandas as pd
from scipy import stats, integrate
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
# np.random.seed(sum(map(ord, "distributions")))#随机数种子
x = np.random.normal(size=100)
sns.distplot(x,kde=False)
sns.distplot(x, bins=20, kde=False)
sns.distplot(x, bins=40, kde=False)
数据分布情况-加上曲线
x = np.random.gamma(6, size=200)
sns.distplot(x, kde=False, fit=stats.gamma)
根据均值和协方差生成数据
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
print(df)
'''
x y
0 -0.966779 1.224554
1 1.326123 0.467515
2 -1.233853 0.459449
3 -0.877749 0.512031
......
'''
观测两个变量之间的分布关系最好用散点图
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", data=df);
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
sns.jointplot(x="x", y="y", kind="hex",data=df);
x, y = np.random.multivariate_normal(mean, cov, 1000).T
with sns.axes_style("white"):
sns.jointplot(x=x, y=y, kind="hex", color="k")
案例:鸢尾花特征散点分布
iris = sns.load_dataset("iris")
sns.pairplot(iris)