归一化

start

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

df = DataFrame({
    'height': np.random.normal(170,scale=15,size = 50),
    'weight': np.random.normal(60,scale=20,size = 50),
    'set': np.random.randint(0,2,size = 50),
},dtype=np.uint8)

df.info()

out:<class ‘pandas.core.frame.DataFrame’>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
height 50 non-null uint8
weight 50 non-null uint8
set 50 non-null uint8
dtypes: uint8(3)
memory usage: 230.0 bytes

desc = df.describe()
desc

out: height weight set
count 50.000000 50.000000 50.000000
mean 167.000000 58.320000 0.400000
std 15.286382 18.551022 0.494872
min 129.000000 14.000000 0.000000
25% 158.500000 49.000000 0.000000
50% 167.000000 59.000000 0.000000
75% 177.750000 70.000000 1.000000
max 203.000000 93.000000 1.000000

# 归一化1 最大最小值归一化
df1 = (df-df.min())/(df.max() - df.min())

# 归一化2(Z-score,返回的结果std是1,平均只是0的标准正态分布数据)
df2 = ((df - df.mean())/df.std())

pandas 的绘图

import matplotlib.pyplot as plt

df = DataFrame(np.random.randint(1,10,size=(10,2)),columns=['A','B'])

df.plot()   # 线型图

'''- 'line' : line plot (default)
    - 'bar' : vertical bar plot
    - 'barh' : horizontal bar plot
    - 'hist' : histogram
    - 'box' : boxplot
    - 'kde' : Kernel Density Estimation plot
    - 'density' : same as 'kde'
    - 'area' : area plot
    - 'pie' : pie plot
    - 'scatter' : scatter plot
    - 'hexbin' : hexbin plot'''
df.plot(kind='bar')   # 条形图
df.plot(kind='box')   # 箱型图

df = DataFrame({
    'height':np.random.normal(170,size=60,scale=15),
    'age':np.random.normal(20,size=60,scale=2)
},dtype=np.uint8)
df['height'].plot(kind='hist',density = True)  # density = True y坐标使用密度,与密度曲线对应;False时为次数统计Frequency
df['height'].plot(kind='density',color='red')

频率条形图与密度曲线图

age weight height

df = DataFrame({
    'height':np.random.normal(170,size=1000,scale=15),
    'age':np.random.normal(20,size=1000,scale=2)
},dtype=np.uint8)

def change_self(x):
    if x <145:
        x += np.random.randint(0,50)
    if x >200:
        x -= np.random.randint(0,50)
    return x
    
df['height'] = df['height'].map(change_self)

df.plot(x='age',y='height',kind='scatter')

def change_self(x):
    y = ((x-100)*2 -30)/2 + np.random.randint(0,50) - np.random.randint(0,30)
    while y <35:
        y += np.random.randint(0,50)
    while y >125:
        y -= np.random.randint(0,40)
    return y
    
df['weight'] = df['height'].map(change_self)

df.plot(x='height',y='weight',kind='scatter')

python_数据_pandas_4

归一化

start

pandas 的绘图

age weight height

猜你喜欢