Matlab scatter plot drawing, adding kernel density estimation
Table of contents
Effect of plotting a scatterplot with kernel density estimate coloring
Plotting a scatterplot and coloring it with the kernel density estimate can help us observe the distribution and density variation of the data.
- Visualize data distribution : Scatter plots can visually display the location and distribution of data points. By plotting data points on a scatter plot, we can quickly understand the overall distribution shape and density of the data.
- Highlight dense regions : Kernel density estimation is a nonparametric probability density estimation method that estimates the density around data points. By coloring the kernel density estimates, we can clearly see dense and sparse regions on the scatterplot, highlighting the concentration of the data.
- Discover cluster structure : If there is cluster structure in the data, the image of kernel density estimation can help us discover these clusters. By observing the change in color and the distribution of high-density regions, we can speculate on possible clustering patterns or groups in the data.
- Aided analysis and decision-making : Images of kernel density estimates provide richer information about the data. In the process of data analysis and decision-making, this image can be used as supplementary information to help us better understand the characteristics of the data, assist analysis and make corresponding decisions.
Drawing a scatterplot and coloring the image with kernel density estimates can provide a visual representation of the distribution and density of the data, helping to discover the characteristics and patterns of the data, and assisting the data analysis and decision-making process.
Matlab code implementation
clear;clc
load .\mycolorbar\nclcolormap.mat
map = nclcolormap.MPL_rainbow;
x_0=0.03;
y_0=0.20;
len=0.25;
width=0.50;
d_x=0.33;
d_y=-0.32;
py=[0 0 0];
px=[0 1 2];
siz = 30;
lind = 1.5;
for tt = 6
%--- 生成作图数据
sta = randn(1000,1);
ai = randn(1000,1);
ec = randn(1000,1);
cldas= randn(1000,1);
f1 = ksdensity([sta,ai], [sta,ai]);
f2 = ksdensity([sta,ec], [sta,ec]);
f3 = ksdensity([sta,cldas], [sta,cldas]);
%----AI
%--设置图窗大小,以及坐标属性
set(gcf,'color','w','outerposition',get(0,'screensize'));
axes('position',[x_0+d_x*px(1), y_0+d_y*py(1), len, width]);
scatter(sta,ai,20,f1*100,'filled');box on
caxis([0 7]);h0 = colorbar;colormap(map);
set(h0,'position',[x_0+d_x*px(1)+len+0.01, y_0+d_y*py(1), 0.015, width]);
set(h0,'fontsize',siz-15,'fontweight','normal');
set(h0,'ylim',[0 7],'ytick',[0:7],'yticklabel',num2str([0:7]','%.0f'),'fontsize',siz-18);
set(gca, 'TickDir', 'out','fontsize',siz-18,'linewidth',lind);grid on
xlabel('STA','fontsize',siz-15,'fontweight','bold');
ylabel('AI','fontsize',siz-15,'fontweight','bold');
hold on
limx = get(gca,'XLim');
ylim(limx)
set(gca,'ytick',get(gca,'xtick'))
plot(xlim,ylim,'k--','linewidth',lind);
%--在图像固定比例处添加统计分析内容
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.05 1.05];
k2=[0.05 0.9];
x_1=X(1)+k1(1)*(X(2)-X(1));
y_1=Y(1)+k1(2)*(Y(2)-Y(1));
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
mae = mean(abs(ai-sta));
r = corr(sta,ai);
text(double(x_1),double(y_1),['a) AI ',num2str(tt,'%02d'),'UTC'],'color','k','fontweight','bold','fontsize',siz-15);
text(double(x_2),double(y_2),{
['r = ', num2str(r,'%.2f'), ' (p<0.001)'];['MAE = ', num2str(mae,'%.2f')]},...
'color','k','fontweight','bold','fontsize',siz-15);
set(gca,'layer','top')
%------EC
axes('position',[x_0+d_x*px(2), y_0+d_y*py(2), len, width]);box on
scatter(sta,ec,20,f2*100,'filled');box on
h = colorbar;caxis(h0.Limits);colormap(map);
set(h,'position',[x_0+d_x*px(2)+len+0.01, y_0+d_y*py(2), 0.015, width]);
set(h,'fontsize',siz-15,'fontweight','normal');
set(h,'ylim',[0 7],'ytick',[0:7],'yticklabel',num2str([0:7]','%.0f'),'fontsize',siz-18);
set(gca, 'TickDir', 'out','fontsize',siz-18,'linewidth',lind);grid on
ylim(limx)
set(gca,'ytick',get(gca,'xtick'))
xlabel('STA','fontsize',siz-15,'fontweight','bold');
ylabel('EC','fontsize',siz-15,'fontweight','bold');
hold on
plot(xlim,ylim,'k--','linewidth',lind);
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.05 1.05];
k2=[0.05 0.9];
x_1=X(1)+k1(1)*(X(2)-X(1));
y_1=Y(1)+k1(2)*(Y(2)-Y(1));
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
mae = mean(abs(ec-sta));
r = corr(sta,ec);
text(double(x_1),double(y_1),['b) EC ',num2str(tt,'%02d'),'UTC'],'color','k','fontweight','bold','fontsize',siz-15);
text(double(x_2),double(y_2),{
['r = ', num2str(r,'%.2f'), ' (p<0.001)'];['MAE = ', num2str(mae,'%.2f')]},...
'color','k','fontweight','bold','fontsize',siz-15);
set(gca,'layer','top')
%---CLDAS
axes('position',[x_0+d_x*px(3), y_0+d_y*py(3), len, width]);box on
scatter(sta,cldas,20,f3*100,'filled');box on
h = colorbar;caxis(h0.Limits);colormap(map);
set(h,'position',[x_0+d_x*px(3)+len+0.01, y_0+d_y*py(3), 0.015, width]);
set(h,'fontsize',siz-15,'fontweight','normal');
set(h,'ylim',[0 7],'ytick',[0:7],'yticklabel',num2str([0:7]','%.0f'),'fontsize',siz-18);
set(gca, 'TickDir', 'out','fontsize',siz-18,'linewidth',lind);grid on
ylim(limx)
set(gca,'ytick',get(gca,'xtick'))
xlabel('STA','fontsize',siz-15,'fontweight','bold');
ylabel('CLDAS','fontsize',siz-15,'fontweight','bold');
hold on
plot(xlim,ylim,'k--','linewidth',lind);
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.05 1.05];
k2=[0.05 0.9];
x_1=X(1)+k1(1)*(X(2)-X(1));
y_1=Y(1)+k1(2)*(Y(2)-Y(1));
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
mae = mean(abs(cldas-sta));
r = corr(sta,cldas);
text(double(x_1),double(y_1),['c) CLDAS ',num2str(tt,'%02d'),'UTC'],'color','k','fontweight','bold','fontsize',siz-15);
text(double(x_2),double(y_2),{
['r = ', num2str(r,'%.2f'), ' (p<0.001)'];['MAE = ', num2str(mae,'%.2f')]},...
'color','k','fontweight','bold','fontsize',siz-15);
set(gca,'layer','top')
%--保存图片
save_path = './';
if ~exist(save_path), mkdir(save_path), end
export_fig([save_path, 'STA-',num2str(tt,'%02d'),'UTC.png'],'-r300','-dpng');
close all
end %for-tt
Matlab result presentation
Python code implementation
Transfer to meteorological and hydrological scientific research cat ( WeChat public platform (qq.com) )
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from sklearn.metrics import mean_squared_error
from matplotlib.pyplot import MultipleLocator
from statistics import mean
from matplotlib import rcParams
config = {
"font.family":'Times New Roman',"font.size": 16,"mathtext.fontset":'stix'}
rcParams.update(config)
# 读取数据
p1=np.linspace(1,120,1000)
p2=np.linspace(1,120,1000) + np.random.normal(0,10,1000)
def get_regression_line(real,pred,data_range=(0,1)):
# 拟合
def slope(xs,ys):
m = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * xs)))
b = mean(ys) - m * mean(xs)
return m, b
k, b = slope(real,pred)
regression_line = []
for a in range(0,120):
regression_line.append((k*a)+b)
return regression_line
# 开始绘图
fig,ax=plt.subplots(figsize=(12,9),dpi=600)
# Calculate the point density
xy = np.vstack([p1,p2])
z = gaussian_kde(xy)(xy)
scatter = ax.scatter(p1,p2,marker='o',c=z*100,edgecolors=None,s=5,label='LST',cmap='gist_rainbow')
cbar=plt.colorbar(scatter,shrink=1,orientation='vertical',extend='both',pad=0.015,aspect=30)
regression_line = get_regression_line(p1,p2,data_range=(0,120))
ax.plot(regression_line, 'r-', lw=1.5) # 绘制回归线
x, y = p1,p2
BIAS = mean(x - y)
MSE = mean_squared_error(x, y)
RMSE = np.power(MSE, 0.5)
R = np.corrcoef(x, y)[0, 1]
ax.text(1, 116, '$N=%.f$' % len(y), family = 'Times New Roman')
ax.text(15, 116, '$R=%.2f$' % R, family = 'Times New Roman')
ax.text(1, 112, '$BIAS=%.2f$' % BIAS, family = 'Times New Roman')
ax.text(1, 108, '$RMSE=%.2f$' % RMSE, family = 'Times New Roman')
plt.plot([0,120],[0,120],'k--',lw=1.5) # 绘制1:1线
# 设置边框粗细
ax.spines['bottom'].set_linewidth(2.5); # 设置底部坐标轴的粗细
ax.spines['top'].set_linewidth(2.5); # 设置底部坐标轴的粗细
ax.spines['left'].set_linewidth(2.5); # 设置底部坐标轴的粗细
ax.spines['right'].set_linewidth(2.5); # 设置底部坐标轴的粗细
# 设置刻度线长短粗细
ax2=plt.gca()
ax.tick_params(which='major',width=2.5,length=5)
# ax为两条坐标轴的实例
x_major_locator=MultipleLocator(100)
# 把x轴的刻度间隔设置为1,并存在变量里
y_major_locator=MultipleLocator(100)
# 把y轴的刻度间隔设置为10,并存在变量里
ax2.xaxis.set_major_locator(x_major_locator)
# 把x轴的主刻度设置为1的倍数
ax2.yaxis.set_major_locator(y_major_locator)
font3={
'family':'SimHei','size':16,'color':'k'}
plt.title('示例',fontdict=font3)
plt.xlabel('真实值',fontdict=font3)
plt.ylabel('预测值',fontdict=font3)
plt.xticks()
plt.yticks()
plt.xlim(xmin=0, xmax=120)
plt.xticks(np.arange(0, 1, 20))
plt.ylim(ymin=0, ymax=120)
plt.yticks(np.arange(0, 1, 20))
plt.savefig('./demo.png',dpi=300,bbox_inches='tight',pad_inches=0)
plt.show()