❝本文分享最常用「6个偏差(Deviation)关系图」。
❞
本文目录
12、发散型文本图(Diverging Texts)-水平方向
13、发散型文本图(Diverging Texts)-垂直方向
15、带Marker的发散型棒棒糖图 (Diverging Lollipop Chart with Markers)
二、偏差 (Deviation)关系图
11、发散型柱形图 (Diverging Bars)
展示单个指标的变化的顺序和数量。
df = pd.read_csv("./datasets/mtcars.csv")
x = df.loc[:, ['mpg']]
df['mpg_z'] = (x - x.mean()) / x.std()
df['colors'] = ['red' if x < 0 else 'green' for x in df['mpg_z']]
df.sort_values('mpg_z', inplace=True)
df.reset_index(inplace=True)
# Draw plot
plt.figure(figsize=(10, 6), dpi=80)
plt.hlines(y=df.index,
xmin=0,
xmax=df.mpg_z,
color=df.colors,
alpha=0.8,
linewidth=5)
# Decorations
plt.gca().set(ylabel='$Model, xlabel='$Mileage)
plt.yticks(df.index, df.cars, fontsize=12)
plt.xticks(fontsize=12)
plt.title('Diverging Bars of Car Mileage')
plt.grid(linestyle='--', alpha=0.5)
plt.show()
12、发散型文本图(Diverging Texts)-水平方向
和上一个图的区别是该图在柱子上添加了数值文本。
# Prepare Data
df = pd.read_csv("./datasets/mtcars.csv")
#df['Species'] =
x = df.loc[:, ['mpg']]
df['mpg_z'] = (x - x.mean())/x.std()
df['colors'] = ['red' if x < 0 else 'green' for x in df['mpg_z']]
df.sort_values('mpg_z', inplace=True)
df.reset_index(inplace=True)
# Draw plot
plt.figure(figsize=(10,8), dpi= 80)
plt.hlines(y=df.index, xmin=0, xmax=df.mpg_z,color=df.colors, alpha=0.8, linewidth=5)
for x, y, tex in zip(df.mpg_z, df.index, df.mpg_z):
t = plt.text(x, y, round(tex, 2), horizontalalignment='right' if x < 0 else 'left',
verticalalignment='center', fontdict={'color':'black' if x < 0 else 'black', 'size':10})
# Decorations
plt.yticks(df.index, df.cars, fontsize=12)
plt.xticks(fontsize=10)
plt.title('Diverging Text Bars of Car Mileage', fontdict={'size':15})
plt.grid(linestyle='--', alpha=0.5)
plt.xlim(-2.5, 2.5)
plt.show()
13、发散型文本图(Diverging Texts)-垂直方向
# Prepare Data
df = pd.read_csv("./datasets/mtcars.csv")
x = df.loc[:, ['mpg']]
df['mpg_z'] = (x - x.mean()) / x.std()
df['colors'] = ['red' if x < 0 else 'green' for x in df['mpg_z']]
df.sort_values('mpg_z', inplace=True)
df.reset_index(inplace=True)
# Draw plot
plt.figure(figsize=(10, 6), dpi=80)
plt.vlines(x=df.index,
ymin=0,
ymax=df.mpg_z,
color=df.colors,
alpha=0.8,
linewidth=5)
for y, x, tex in zip(df.mpg_z, df.index, df.mpg_z):
t = plt.text(x,
y+0.2,
round(tex, 1),
horizontalalignment='center',
fontdict={
'color': 'black' if x < 0 else 'black',
'size': 8
})
# Decorations
plt.xticks(df.index, df.cars, fontsize=12, rotation=90)
plt.yticks(fontsize=12)
plt.title('Diverging Text Bars of Car Mileage', fontdict={'size': 12})
plt.grid(linestyle='--', alpha=0.5)
plt.show()
14、发散型点图(Diverging Dot Plot)
与发散性文本图的区别是缺失柱子,减少了组之间的对比差异。
# Prepare Data
df = pd.read_csv("./datasets/mtcars.csv")
x = df.loc[:, ['mpg']]
df['mpg_z'] = (x - x.mean()) / x.std()
df['colors'] = ['red' if x < 0 else 'darkgreen' for x in df['mpg_z']]
df.sort_values('mpg_z', inplace=True)
df.reset_index(inplace=True)
# Draw plot
plt.figure(figsize=(12, 10), dpi=80)
plt.scatter(df.mpg_z, df.index, s=250, alpha=.6, color=df.colors)
for x, y, tex in zip(df.mpg_z, df.index, df.mpg_z):
t = plt.text(x,
y,
round(tex, 1),
horizontalalignment='center',
verticalalignment='center',
fontdict={'color': 'black','size': '10'})
# Decorations
# Lighten borders
plt.gca().spines["top"].set_alpha(.3)
plt.gca().spines["bottom"].set_alpha(.3)
plt.gca().spines["right"].set_alpha(.3)
plt.gca().spines["left"].set_alpha(.3)
plt.yticks(df.index, df.cars,fontsize=10)
plt.xticks(fontsize=10)
plt.title('Diverging Dotplot of Car Mileage', fontdict={'size': 15})
plt.xlabel('$Mileage,fontsize=10)
plt.grid(linestyle='--', alpha=0.5)
plt.xlim(-2.5, 2.5)
plt.show()
15、带Marker的发散型棒棒糖图 (Diverging Lollipop Chart with Markers)
使用不同形状,强调重点关注的数据区域。
# Prepare Data
df = pd.read_csv("./datasets/mtcars.csv")
x = df.loc[:, ['mpg']]
df['mpg_z'] = (x - x.mean()) / x.std()
df['colors'] = 'black'
# color fiat differently
df.loc[df.cars == 'Fiat X1-9', 'colors'] = 'darkorange'
df.sort_values('mpg_z', inplace=True)
df.reset_index(inplace=True)
# Draw plot
import matplotlib.patches as patches
plt.figure(figsize=(10, 12), dpi=80)
plt.hlines(y=df.index,
xmin=0,
xmax=df.mpg_z,
color=df.colors,
alpha=0.4,
linewidth=1)
plt.scatter(df.mpg_z,
df.index,
color=df.colors,
s=[600 if x == 'Fiat X1-9' else 300 for x in df.cars],
alpha=0.6)
plt.yticks(df.index, df.cars)
plt.xticks(fontsize=12)
# Annotate
plt.annotate('Mercedes Models',
xy=(0.0, 11.0),
xytext=(1.0, 11),
xycoords='data',
fontsize=15,
ha='center',
va='center',
bbox=dict(boxstyle='square', fc='firebrick'),
arrowprops=dict(arrowstyle='-[, widthB=2.0, lengthB=1.5',
lw=2.0,
color='steelblue'),
color='white')
# Add Patches
p1 = patches.Rectangle((-2.0, -1),
width=.3,
height=3,
alpha=.2,
facecolor='red')
p2 = patches.Rectangle((1.5, 27),
width=.8,
height=5,
alpha=.2,
facecolor='green')
plt.gca().add_patch(p1)
plt.gca().add_patch(p2)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
# Decorate
plt.title('Diverging Bars of Car Mileage', fontdict={'size': 15})
plt.grid(linestyle='--', alpha=0.5)
plt.show()
16、面积图(Area Chart)
将曲线与坐标轴之间区域上色得面积图,面积图能够很好的展示整体与局部数据的关系,直观展示整体走势、展示不同元素的涨跌状况。
# Prepare Data
df = pd.read_csv("./datasets/economics.csv", parse_dates=['date']).head(100)
x = np.arange(df.shape[0])
y_returns = (df.psavert.diff().fillna(0) / df.psavert.shift(1)).fillna(0) * 100
# Plot使用plt.fill_between
plt.figure(figsize=(10, 8), dpi=80)
plt.fill_between(x[1:],
y_returns[1:],
0,
where=y_returns[1:] >= 0,
facecolor='green',
interpolate=True,
alpha=0.7)
plt.fill_between(x[1:],
y_returns[1:],
0,
where=y_returns[1:] <= 0,
facecolor='red',
interpolate=True,
alpha=0.7)
# Annotate
plt.annotate('Peak \n1975',
xy=(94.0, 21.0),
xytext=(88.0, 28),
bbox=dict(boxstyle='square', fc='firebrick'),
arrowprops=dict(facecolor='steelblue', shrink=0.05),
fontsize=12,
color='white')
# Decorations
xtickvals = [
str(m)[:3].upper() + "-" + str(y)
for y, m in zip(df.date.dt.year, df.date.dt.month_name())
]
plt.gca().set_xticks(x[::6])
plt.gca().set_xticklabels(xtickvals[::6],
rotation=90,
fontdict={
'horizontalalignment': 'center',
'verticalalignment': 'center_baseline',
'size': 12
})
plt.ylim(-20, 32)
plt.xlim(1, 100)
plt.yticks(fontsize=12)
plt.title("Month Economics Return %", fontsize=12)
plt.ylabel('Monthly returns %', fontsize=12)
plt.grid(alpha=0.5)
plt.show()