# 查看当前挂载的数据集目录
!ls /home/kesci/input/
In [ ]:
# 查看个人持久化工作区文件
!ls /home/kesci/work/
In [ ]:
# 查看当前kernerl下的package
!pip list --format=columns
In [ ]:
# 显示cell运行时长
%load_ext klab-autotime
In [7]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
import numpy as np
import pandas as pd
In [8]:
data=np.random.multivariate_normal([0,0],[[5,2],[2,2]],size=2000)
data=pd.DataFrame(data,columns=['x','y'])
In [9]:
data.head()
Out[9]:
In [10]:
for col in 'xy':#频次直方图
plt.hist(data[col],normed=True,alpha=0.5)
In [11]:
for col in 'xy':#KDE可视化
sns.kdeplot(data[col],shade=True)
In [14]:
sns.distplot(data['x'])#频次直方图与KDE的结合
sns.distplot(data['y']);
In [15]:
sns.kdeplot(data);#二维KDE图
In [18]:
with sns.axes_style('white'):
sns.jointplot('x','y',data,kind='kde')
In [19]:
with sns.axes_style('white'):
sns.jointplot('x','y',data,kind='hex')
In [20]:
iris=sns.load_dataset('iris')
In [21]:
iris.head()
Out[21]:
In [24]:
sns.pairplot(iris,hue='species',size=2.5)#矩阵图
Out[24]:
In [26]:
tips=sns.load_dataset('tips')
tips.head()
Out[26]:
In [32]:
tips['tip_pct']=100*tips['tip']/tips['total_bill']#分面频次直方图
grid=sns.FacetGrid(tips,row='sex',col='time',margin_titles=True)
grid.map(plt.hist,'tip_pct',bins=np.linspace(0,40,15));
In [35]:
with sns.axes_style(style='ticks'): # 因子图中不同离散因子分布对比
g = sns.factorplot('day', 'total_bill', 'sex', data=tips, kind='box')
g.set_axis_labels('Day', 'Total Bill')
In [37]:
with sns.axes_style('white'):#联合分布图
sns.jointplot('total_bill','tip',data=tips,kind='hex')
In [38]:
sns.jointplot('total_bill','tip',data=tips,kind='reg')#带回归拟合的联合分布
Out[38]:
In [40]:
planets=sns.load_dataset('planets')#用行星数据
planets.head()
Out[40]:
In [41]:
with sns.axes_style('white'):
g=sns.factorplot('year',data=planets,aspect=2,kind='count',color='steelblue')
g.set_xticklabels(step=5)
In [42]:
with sns.axes_style('white'):#不同年份、方法发现的行星数量
g=sns.factorplot('year',data=planets,aspect=4.0,kind='count',hue='method',order=range(2001,2015))
g.set_ylabels('number of planets discovered')