Python implements normalization and standardization

Data preprocessing - Sklearn realizes normalization and standardization and plot


The dataset is: https://archive.ics.uci.edu/ml/datasets/wine (only the first three columns are selected here)

full code

import pandas as pd
import numpy as np

df = pd.read_csv( 'wine_data.csv', header=None, usecols=[0,1,2] )
# 不让第一行做列名 ;  只拿前三列数据

df.columns = ['Class label',  'Alcohol',  'Malic acid']
# 指定列名
from sklearn import preprocessing

std_scale = preprocessing.StandardScaler().fit(df[['Alcohol', 'Malic acid']])
df_std = std_scale.transform(df[['Alcohol', 'Malic acid']])
# 标准化

minmax_scale = preprocessing.MinMaxScaler().fit(df[['Alcohol', 'Malic acid']])
df_minmax = minmax_scale.transform(df[['Alcohol', 'Malic acid']])
# 归一化
# %matplotlilb inline
import matplotlib.pyplot as plt

def plot():
    plt.figure(figsize=(8,6))

    # 三个数据画图:
    plt.scatter(df['Alcohol'], df['Malic acid'], color='green', label='input scale', alpha=0.6)
    plt.scatter( df_std[:,0], df_std[:,1], color='red', label='Z-Score scaled[ $N ( \mu=0, \sigma=1 ) $ ]', alpha=0.3 )
    # 数学符号的格式为$\...$
    plt.scatter( df_minmax[:,0], df_minmax[:,1],color='b', label='Min-Max scaled [min=0, max=1]', alpha=0.3 );

    # plt.title('Alcohol and Malic Acid content of the wine dataset');
    plt.xlabel('Alcohol',fontsize=13,fontweight='bold');
    plt.ylabel('Malic Acid',fontsize=13,fontweight='bold');
    plt.yticks(fontproperties='Times New Roman', size=13, weight='bold')  # 设置大小及加粗
    plt.xticks(fontproperties='Times New Roman', size=13,weight='bold')
    plt.legend(loc = 'best');
    # plt.grid();

    plt.tight_layout() ;# 会自动调整图的参数,使之填充整个图像区域
    plt.show()
plot()
plt.show()

run

The effect is shown in the figure: insert image description here
Reference link:
[1]: https://blog.csdn.net/sanjianjixiang/article/details/100807176

Guess you like

Origin blog.csdn.net/qq_41968196/article/details/127645090