python_数据_pandas_4

归一化

start

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
df = DataFrame({
    'height': np.random.normal(170,scale=15,size = 50),
    'weight': np.random.normal(60,scale=20,size = 50),
    'set': np.random.randint(0,2,size = 50),
},dtype=np.uint8)
df.info()
  • out:<class ‘pandas.core.frame.DataFrame’>
    RangeIndex: 50 entries, 0 to 49
    Data columns (total 3 columns):
    height 50 non-null uint8
    weight 50 non-null uint8
    set 50 non-null uint8
    dtypes: uint8(3)
    memory usage: 230.0 bytes
desc = df.describe()
desc
  • out: height weight set
    count 50.000000 50.000000 50.000000
    mean 167.000000 58.320000 0.400000
    std 15.286382 18.551022 0.494872
    min 129.000000 14.000000 0.000000
    25% 158.500000 49.000000 0.000000
    50% 167.000000 59.000000 0.000000
    75% 177.750000 70.000000 1.000000
    max 203.000000 93.000000 1.000000
# 归一化1 最大最小值归一化
df1 = (df-df.min())/(df.max() - df.min())
# 归一化2(Z-score,返回的结果std是1,平均只是0的标准正态分布数据)
df2 = ((df - df.mean())/df.std())

pandas 的绘图

import matplotlib.pyplot as plt
df = DataFrame(np.random.randint(1,10,size=(10,2)),columns=['A','B'])
df.plot()   # 线型图
'''- 'line' : line plot (default)
    - 'bar' : vertical bar plot
    - 'barh' : horizontal bar plot
    - 'hist' : histogram
    - 'box' : boxplot
    - 'kde' : Kernel Density Estimation plot
    - 'density' : same as 'kde'
    - 'area' : area plot
    - 'pie' : pie plot
    - 'scatter' : scatter plot
    - 'hexbin' : hexbin plot'''
df.plot(kind='bar')   # 条形图
df.plot(kind='box')   # 箱型图
df = DataFrame({
    'height':np.random.normal(170,size=60,scale=15),
    'age':np.random.normal(20,size=60,scale=2)
},dtype=np.uint8)
df['height'].plot(kind='hist',density = True)  # density = True y坐标使用密度,与密度曲线对应;False时为次数统计Frequency
df['height'].plot(kind='density',color='red')
  • 频率条形图与密度曲线图

age weight height

df = DataFrame({
    'height':np.random.normal(170,size=1000,scale=15),
    'age':np.random.normal(20,size=1000,scale=2)
},dtype=np.uint8)

def change_self(x):
    if x <145:
        x += np.random.randint(0,50)
    if x >200:
        x -= np.random.randint(0,50)
    return x
    
df['height'] = df['height'].map(change_self)

df.plot(x='age',y='height',kind='scatter')
def change_self(x):
    y = ((x-100)*2 -30)/2 + np.random.randint(0,50) - np.random.randint(0,30)
    while y <35:
        y += np.random.randint(0,50)
    while y >125:
        y -= np.random.randint(0,40)
    return y
    
df['weight'] = df['height'].map(change_self)

df.plot(x='height',y='weight',kind='scatter')

猜你喜欢

转载自blog.csdn.net/sinat_39045958/article/details/86527525
今日推荐