❝本文分享「5个最常用的排序(Ranking)关系图」。
❞
目录
三、排序 (Ranking)关系图
17、排序柱形图(Ordered Bar Chart)
# Prepare Data
df_raw = pd.read_csv("./datasets/mpg_ggplot2.csv")
df = df_raw[['cty',
'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', inplace=True)
df.reset_index(inplace=True)
# Draw plot
import matplotlib.patches as patches
fig, ax = plt.subplots(figsize=(10, 8), facecolor='white', dpi=80)
ax.vlines(x=df.index,
ymin=0,
ymax=df.cty,
color='#dc2624',
alpha=0.7,
linewidth=20)
# Annotate Text
for i, cty in enumerate(df.cty):
ax.text(i, cty + 0.5, round(cty, 1), horizontalalignment='center')
# Title, Label, Ticks and Ylim
ax.set_title('Bar Chart for Highway Mileage', fontdict={'size': 12})
plt.xticks(df.index,
df.manufacturer.str.upper(),
rotation=60,
horizontalalignment='right',
fontsize=10)
plt.yticks(fontsize=12)
plt.ylabel('Miles Per Gallon', fontsize=12)
plt.ylim = (0, 30)
# 添加底纹
p1 = patches.Rectangle((.57, -0.005),
width=.33,
height=.13,
alpha=.1,
facecolor='green',
transform=fig.transFigure)
p2 = patches.Rectangle((.124, -0.005),
width=.446,
height=.13,
alpha=.1,
facecolor='red',
transform=fig.transFigure)
fig.add_artist(p1)
fig.add_artist(p2)
plt.show()
更多条形图介绍:
-
「Python可视化|matplotlib12-垂直|水平|堆积条形图详解」
扫描二维码关注公众号,回复: 12941664 查看本文章
18、棒棒糖图(Lollipop Chart)
将上面的柱子换做棒棒即可,效果也一样~~
# 棒棒糖图(Lollipop Chart)
# Prepare Data
df_raw = pd.read_csv("./datasets/mpg_ggplot2.csv")
df = df_raw[['cty',
'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', inplace=True)
df.reset_index(inplace=True)
# Draw plot
fig, ax = plt.subplots(figsize=(10, 8), dpi=80)
ax.vlines(x=df.index,
ymin=0,
ymax=df.cty,
color='#dc2624',
alpha=0.7,
linewidth=4)
ax.scatter(x=df.index, y=df.cty, s=85, color='#dc2624', alpha=0.7)
# Title, Label, Ticks and Ylim
ax.set_title('Lollipop Chart for Highway Mileage', fontdict={'size': 12})
plt.ylabel('Miles Per Gallon', fontsize=12)
ax.set_xticks(df.index)
ax.set_xticklabels(df.manufacturer.str.upper(),
rotation=60,
fontdict={
'horizontalalignment': 'right',
'size': 11
})
ax.set_ylim(0, 30)
plt.yticks(fontsize=12)
# Annotate
for row in df.itertuples():
ax.text(row.Index,
row.cty + .5,
s=round(row.cty, 2),
horizontalalignment='center',
verticalalignment='bottom',
fontsize=12)
plt.show()
19、点图 (Dot Plot)
将上面的棒棒去掉并水平放置即可,效果也一样~~ ,在水平方向展示各个指标的排名情况。
# Prepare Data
df_raw = pd.read_csv("./datasets/mpg_ggplot2.csv")
df = df_raw[['cty',
'manufacturer']].groupby('manufacturer').apply(lambda x: x.mean())
df.sort_values('cty', inplace=True)
df.reset_index(inplace=True)
# Draw plot
fig, ax = plt.subplots(figsize=(10, 8), dpi=80)
ax.hlines(y=df.index,
xmin=11,
xmax=26,
color='gray',
alpha=0.7,
linewidth=1,
linestyles='dashdot')
ax.scatter(y=df.index, x=df.cty, s=75, color='#dc2624', alpha=0.7)
# Title, Label, Ticks and Ylim
ax.set_title('Dot Plot for Highway Mileage', fontdict={'size': 12})
plt.xlabel('Miles Per Gallon', fontsize=12)
ax.set_yticks(df.index)
ax.set_yticklabels(df.manufacturer.str.title(),
fontdict={
'horizontalalignment': 'right',
'fontsize': 12
})
plt.xticks(fontsize=12)
ax.set_xlim(10, 27)
plt.show()
20、坡图(Slope Chart)
很好的比较多项目两个不同时期的情况。
#comparing the ‘Before’ and ‘After’ positions of a given person/item
import matplotlib.lines as mlines
# Import Data
df = pd.read_csv("./datasets/gdppercap.csv")
left_label = [
str(c) + ', ' + str(round(y)) for c, y in zip(df.continent, df['1952'])
]
right_label = [
str(c) + ', ' + str(round(y)) for c, y in zip(df.continent, df['1957'])
]
klass = [
'red' if (y1 - y2) < 0 else 'green'
for y1, y2 in zip(df['1952'], df['1957'])
]
# draw line
# https://stackoverflow.com/questions/36470343/how-to-draw-a-line-with-matplotlib/36479941
def newline(p1, p2, color='black'):
ax = plt.gca()
l = mlines.Line2D([p1[0], p2[0]], [p1[1], p2[1]],
color='red' if p1[1] - p2[1] > 0 else 'green',
marker='o',
markersize=6)
ax.add_line(l)
return l
fig, ax = plt.subplots(1, 1, figsize=(10, 8), dpi=80)
# Vertical Lines
ax.vlines(x=1,
ymin=500,
ymax=13000,
color='black',
alpha=0.7,
linewidth=1,
linestyles='dotted')
ax.vlines(x=3,
ymin=500,
ymax=13000,
color='black',
alpha=0.7,
linewidth=1,
linestyles='dotted')
# Points
ax.scatter(y=df['1952'],
x=np.repeat(1, df.shape[0]),
s=10,
color='black',
alpha=0.7)
ax.scatter(y=df['1957'],
x=np.repeat(3, df.shape[0]),
s=10,
color='black',
alpha=0.7)
# Line Segmentsand Annotation
for p1, p2, c in zip(df['1952'], df['1957'], df['continent']):
newline([1, p1], [3, p2])
ax.text(1 - 0.05,
p1,
c + ', ' + str(round(p1)),
horizontalalignment='right',
verticalalignment='center',
fontdict={'size': 14})
ax.text(3 + 0.05,
p2,
c + ', ' + str(round(p2)),
horizontalalignment='left',
verticalalignment='center',
fontdict={'size': 14})
# 'Before' and 'After' Annotations
ax.text(1 - 0.05,
13000,
'BEFORE',
horizontalalignment='right',
verticalalignment='center',
fontdict={
'size': 15,
'weight': 700
})
ax.text(3 + 0.05,
13000,
'AFTER',
horizontalalignment='left',
verticalalignment='center',
fontdict={
'size': 15,
'weight': 700
})
# Decoration
ax.set_title("Slopechart: Comparing GDP Per Capita between 1952 vs 1957",
fontdict={'size': 18})
ax.set(xlim=(0, 4), ylim=(0, 14000), ylabel='Mean GDP Per Capita')
plt.ylabel('Mean GDP Per Capita', fontsize=15)
ax.set_xticks([1, 3])
ax.set_xticklabels(["1952", "1957"], fontdict={'size': 15, 'weight': 700})
plt.yticks(np.arange(500, 13000, 2000), fontsize=12)
# Lighten borders
plt.gca().spines["top"].set_alpha(.0)
plt.gca().spines["bottom"].set_alpha(.0)
plt.gca().spines["right"].set_alpha(.0)
plt.gca().spines["left"].set_alpha(.0)
plt.show()
21、哑铃图(Dumbbell Plot)
很好的比较多个项目两个不同时期的情况、更重要的是还会展示不同项目的排序信息。
#显示排序和处理前后值范围
import matplotlib.lines as mlines
# Import Data
df = pd.read_csv("./datasets/health.csv")
df.sort_values('pct_2014', inplace=True)
df.reset_index(inplace=True)
# Func to draw line segment
def newline(p1, p2, color='black'):
ax = plt.gca()
l = mlines.Line2D([p1[0], p2[0]], [p1[1], p2[1]], color='#d5695d')
ax.add_line(l)
return l
# Figure and Axes
fig, ax = plt.subplots(1, 1, figsize=(10, 8), facecolor='#f8f2e4', dpi=80)
# Vertical Lines
ax.vlines(x=.05,
ymin=0,
ymax=26,
color='black',
alpha=1,
linewidth=1,
linestyles='dotted')
ax.vlines(x=.10,
ymin=0,
ymax=26,
color='black',
alpha=1,
linewidth=1,
linestyles='dotted')
ax.vlines(x=.15,
ymin=0,
ymax=26,
color='black',
alpha=1,
linewidth=1,
linestyles='dotted')
ax.vlines(x=.20,
ymin=0,
ymax=26,
color='black',
alpha=1,
linewidth=1,
linestyles='dotted')
# Points
ax.scatter(y=df['index'], x=df['pct_2013'], s=50, color='#dc2624')
ax.scatter(y=df['index'], x=df['pct_2014'], s=50, color='#e87a59')
# Line Segments
for i, p1, p2 in zip(df['index'], df['pct_2013'], df['pct_2014']):
newline([p1, i], [p2, i])
# Decoration
ax.set_facecolor('#f8f2e4')
ax.set_title("Dumbell Chart: Pct Change - 2013 vs 2014", fontdict={'size': 18})
ax.set(xlim=(0, .25), ylim=(-1, 27), ylabel='Mean GDP Per Capita')
plt.ylabel('Mean GDP Per Capita', fontsize=15)
plt.yticks(fontsize=15)
ax.set_xticks([.05, .1, .15, .20])
ax.set_xticklabels(['5%', '15%', '20%', '25%'], fontdict={'size': 15})
plt.show()