Data Visualization Second Edition - Part 03 - Chapter 11 - Related

Data Visualization Second Edition - Part 03 - Chapter 11 - Related

Summarize

This series of blogs is a teaching resource blog based on the book "Data Visualization Second Edition". This article is mainly Chapter 11, which is related to the case of related visualization.

Visual Perspective - Correlation

insert image description here

insert image description here

Code

install dependencies

pip install scikit-learn -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install seaborn -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install tushare==1.2.89 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install mplfinance==0.12.9b7 -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install pyheatmap==0.1.12  -i https://pypi.tuna.tsinghua.edu.cn/simple

Scatterplot

Scatterplot 1 - Basic Scatterplot

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets
import seaborn as sns

# 基本散点图
a = np.random.randint(30, size=100)
b = np.random.randint(20, size=100)
asorted = sorted(a)
bsorted = sorted(b)
bsorted2 = sorted(b, reverse=True)
plt.scatter(asorted, bsorted)
plt.scatter(asorted, bsorted2)
plt.show()

The output is:
insert image description here

Scatter chart 2 - Scatter line chart

# 散点折线图
x = [2, 4, 6, 9, 12, 15, 18, 21, 24]
y = [12, 17, 10, 32, 15, 24, 5, 20, 16]
plt.scatter(x, y)
plt.plot(x, y)
plt.show()

The output is:
insert image description here

Scatterplot 3 - Regression Scatterplot

# 回归数据散点图
data = datasets.make_regression(n_samples=80,
                                n_features=1,
                                n_targets=1,
                                noise=10,
                                random_state=144)  # 生成回归数据,添加轻微扰动
x = data[0]  # x为50行1列
y = data[1]  # y为1行50列
plt.scatter(x[:, 0], y, s=8)
plt.show()

output as
insert image description here

Scatterplot 4 - Regression Data Scatterplot

# 回归
data = datasets.make_regression(n_samples=30,
                                n_features=1,
                                n_targets=1,
                                noise=20,
                                random_state=144)  # 生成回归数据,添加轻微扰动
x = data[0]  # x为50行1列
y = data[1]  # y为1行50列
sns.regplot(x=x[:, 0], y=y)  # 添加回归线
plt.show()

The output is:
insert image description here

Scatterplot 5 - Scatterplot of Categorical Data

# 分类数据散点图
X1, y1 = datasets.make_classification(
    n_samples=300, random_state=1, n_features=2, n_redundant=0, n_informative=2)
plt.scatter(X1[:, 0], X1[:, 1])
plt.show()

moons = datasets.make_moons(n_samples=150, noise=.09, random_state=10)
x = moons[0]
y = moons[1]
plt.scatter(x[:, 0], x[:, 1], c=y)  # 可视化月牙图形
plt.show()

The output is:
insert image description here

insert image description here

bubble chart

Bubble Chart 1-

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# 图例1
a = [5, 6, 8, 20, 27, 14, 19, 24, 13, 28, 30, 23, 7, 8, 33, 6, 9, 26, 27, 4]
b = [5, 7, 8, 16, 23, 28, 23, 20, 29, 34, 35, 30, 32, 27, 14, 19, 24, 23, 28, 30]
colors = np.random.rand(len(a))  # 颜色数组
size = [212, 225, 375, 420, 225, 356, 287, 300, 382, 375, 425, 543, 654, 609, 543, 435, 320, 525, 656, 287]
plt.scatter(a, b, s=size, c=colors, alpha=0.6)
plt.show()

The output is:
insert image description here

Bubble Chart 2-

sns.set(style="white")
a = [0.1, 0.2, 0.35, 0.5, 0.8, 0.9]
b = [0.4, 0.1, 0.5, 0.9, 0.4, 1]
size = [2500, 2560, 250, 2400, 2500, 750]
plt.scatter(a, b, s=size, c='g', alpha=0.6)
plt.axis([-0.1, 1.1, -0.2, 1.2])
plt.show()

The output is:
insert image description here

correlation diagram

Related Figure 1-

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets

# 相关图图例一
x = datasets.load_iris().data
m = 0
l = x.shape[1]
plt.figure(figsize=(8, 8))
sns.set(style="darkgrid")

# matplotlib画图中文显示会有问题,需要这两行设置默认字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']
for i in range(l):
    for j in range(l):
        m += 1
        plt.subplot(4, 4, m)
        if i != j:
            plt.scatter(x[:, i], x[:, j], s=3)  # 画出散点图,设置点的大小为3
            plt.grid(True)  # 显示网格
            if j == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[j])  # 只在最后一行设置横标签
        else:
            plt.hist(x[:, i], edgecolor='black')  # 画直方图,使边框为黑色
            plt.grid(True)  # 显示网格
            if i == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[i])  # 只在最后一行设置横标签

plt.show()

insert image description here

Related Figure 2-

# 相关图图例2

x = datasets.load_iris().data
m = 0
l = x.shape[1]
plt.figure(figsize=(8, 8))
sns.set(style="darkgrid")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# matplotlib画图中文显示会有问题,需要这两行设置默认字体
name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']
for i in range(l):
    for j in range(l):
        m += 1
        plt.subplot(4, 4, m)
        if i != j:
            sns.regplot(x=x[:, i], y=x[:, j], marker='.')  # 添加回归线
            if j == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[j])  # 只在最后一行设置横标签
        else:
            plt.hist(x[:, i], edgecolor='black')  # 画直方图,使边框为黑色
            if i == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[i])  # 只在最后一行设置横标签
plt.show()

insert image description here

Related Figure 3-

# 相关图图例3

sns.set(style="ticks", color_codes=True)
sns.set_style('whitegrid', {
    
    'font.sans-serif': ['simhei', 'FangSong']})  # 解决中文乱码问题
name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']

data = datasets.load_iris()
x = pd.DataFrame(datasets.load_iris().data, columns=name)
sns.pairplot(x)
plt.show()

insert image description here

Related Figure 4-

# 相关图图例4

x = datasets.load_iris().data
y = datasets.load_iris().target
m = 0
l = x.shape[1]
plt.figure(figsize=(8, 8))
sns.set(style="darkgrid")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# matplotlib画图中文显示会有问题,需要这两行设置默认字体
name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']
labels = ['山鸢尾', '变色鸢尾', '维吉尼亚鸢尾']
for i in range(l):
    for j in range(l):
        m += 1
        plt.subplot(4, 4, m)
        if i != j:
            plt.scatter(x[:, i][y == 0], x[:, j][y == 0], s=3)  # 画出散点图,设置点的大小为3
            plt.scatter(x[:, i][y == 1], x[:, j][y == 1], s=3)  # 画出散点图,设置点的大小为3
            plt.scatter(x[:, i][y == 2], x[:, j][y == 2], s=3)  # 画出散点图,设置点的大小为3
            plt.grid(True)  # 显示网格
            if j == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[j])  # 只在最后一行设置横标签
        else:
            sns.kdeplot(data=x[:, i][y == 0], shade=True)
            sns.kdeplot(data=x[:, i][y == 1], shade=True)
            sns.kdeplot(data=x[:, i][y == 2], shade=True)
            if i == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
            if i == 3: plt.xlabel(name[i])  # 只在最后一行设置横标签
plt.legend(labels, bbox_to_anchor=(1.05, 4.6), loc=2)
plt.show()

insert image description here

Related Figure 5-

# 相关图图例5

data = datasets.load_iris()
x = datasets.load_iris().data
y = datasets.load_iris().target
m = 0
l = x.shape[1]
plt.figure(figsize=(8, 8))
sns.set(style="darkgrid")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# matplotlib画图中文显示会有问题,需要这两行设置默认字体
name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']
# labels=['山鸢尾','变色鸢尾','维吉尼亚鸢尾']
for i in range(l):
    for j in range(l):
        m += 1
        plt.subplot(4, 4, m)
        plt.scatter(x[:, i][y == 0], x[:, j][y == 0], s=3, label='山鸢尾')  # 画出散点图,设置点的大小为3
        plt.scatter(x[:, i][y == 1], x[:, j][y == 1], s=3, label='变色鸢尾')  # 画出散点图,设置点的大小为3
        plt.scatter(x[:, i][y == 2], x[:, j][y == 2], s=3, label='维吉尼亚鸢尾')  # 画出散点图,设置点的大小为3
        plt.grid(True)  # 显示网格
        if j == 0: plt.ylabel(name[i])  # 只在第一列设置纵标签
        if i == 3: plt.xlabel(name[j])  # 只在最后一行设置横标签
plt.legend(bbox_to_anchor=(1.05, 4.6), loc=2)
plt.show()

insert image description here

Related Figure 6-

# 相关图图例6

data = datasets.load_iris()
x = datasets.load_iris().data
y = datasets.load_iris().target
m = 0
l = x.shape[1]
plt.figure(figsize=(8, 8))
sns.set(style="darkgrid")
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# matplotlib画图中文显示会有问题,需要这两行设置默认字体
name = ['花萼长度', '花萼宽度', '花瓣长度', '花瓣宽度']
# labels=['山鸢尾','变色鸢尾','维吉尼亚鸢尾']
for i in range(l):
    for j in range(l):
        m += 1
        plt.subplot(4, 4, m)
        plt.scatter(x[:, i][y == 0], x[:, 3 - j][y == 0], s=3, label='山鸢尾')  # 画出散点图,设置点的大小为3
        plt.scatter(x[:, i][y == 1], x[:, 3 - j][y == 1], s=3, label='变色鸢尾')  # 画出散点图,设置点的大小为3
        plt.scatter(x[:, i][y == 2], x[:, 3 - j][y == 2], s=3, label='维吉尼亚鸢尾')  # 画出散点图,设置点的大小为3
        plt.grid(True)  # 显示网格
        if j == 0:  plt.ylabel(name[i])  # 只在第一列设置纵标签
        if i == 3: plt.xlabel(name[3 - j])  # 只在最后一行设置横标签
plt.legend(bbox_to_anchor=(1.05, 4.6), loc=2)
plt.show()

insert image description here

heat map

Heat map 1-

from pyheatmap.heatmap import HeatMap
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 5))
x = np.random.randn(4, 4)
sns.heatmap(x, annot=True, vmin=-1, vmax=1)
plt.show()

insert image description here

Heat map 2-

from pyheatmap.heatmap import HeatMap
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# 1
X = [1, 23, 4, 34, 3, 47, 38, 7, 11, 9, 8, 44, 13, 5, 45, 24] * 10
Y = [26, 5, 35, 24, 18, 7, 28, 49, 6, 23, 54, 28, 8, 21, 52, 42] * 10
data = []
for i in range(len(X)):
    tmp = [int(X[i]), int(Y[i]), 1]
    data.append(tmp)
heat = HeatMap(data)
heat.clickmap().show()  # 点击图
heat.heatmap().show()  # 热图

insert image description here

insert image description here

2D density map

2D Density Map 1-

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
import warnings

warnings.filterwarnings("ignore")

# 图例1

x = datasets.load_iris().data
y = datasets.load_iris().target

sns.kdeplot(x=x[:, 0], y=x[:, 3], cmap='Purples',
            shade=True, shade_lowest=False)
plt.show()

insert image description here

2D Density Map 2-

# 图例2
cov = [[0.2, 1], [0.5, 0.1]]
mean = [0.4, 0.7]
np.random.seed(123)
y = np.random.multivariate_normal(mean, cov, 100)
sns.kdeplot(x=y[:, 0], y=y[:, 1], shade=True, cmap="Blues", cbar=True)
plt.show()

insert image description here

2D density plot 3-

# 图例3
np.random.seed(123)
x = np.random.randn(100)
y = np.random.binomial(50, 0.5, size=100)
sns.set(style="white", font_scale=1.5)
ax = sns.jointplot(x=x, y=y, kind='kde', cmap="Blues")
plt.show()

insert image description here

2D Density Map 4-

# 图例4
mean, cov = [0, 1], [(1, .5), (.5, 1)]
data = np.random.multivariate_normal(mean, cov, 200)
df = pd.DataFrame(data, columns=["x", "y"])
f, ax = plt.subplots(figsize=(6, 6))
cmap = sns.cubehelix_palette(as_cmap=True, dark=0, light=1, reverse=True)
sns.kdeplot(x=df.x, y=df.y, cmap=cmap, n_levels=60, shade=True)
plt.show()

insert image description here

2D Density Figure 5-

# 图例5
np.random.seed(123)
x = np.random.randn(100)
y = np.random.binomial(50, 0.5, size=100)
sns.set(style="white", font_scale=1.5)
ax = sns.jointplot(x=x, y=y, kind='hex', cmap="Blues")
plt.show()

insert image description here

Textbook screenshot

insert image description here

Guess you like

Origin blog.csdn.net/m0_38139250/article/details/130378117