python实现(0,1)的归一化

数据需要进行归一化,整理了以下三种版本

(1)按列进行归一化

#(1)
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
scaler = MinMaxScaler( )
scaler.fit(weight_unbn)
scaler.data_max_

weight_bn=scaler.transform(weight_unbn)
np.savetxt('D:\\weight_bn.txt', weight_bn)
weight_bn_pd = pd.DataFrame(weight_bn)
weight_bn_pd.to_csv('D:\\weight_bn.csv')
# print(weight_bn_pd.head())

 (2)按列进行归一化

import numpy as np
def noramlization(data):
    minVals = data.min(0)
    maxVals = data.max(0)
    ranges = maxVals - minVals
    normData = np.zeros(np.shape(data))
    m = data.shape[0]
    normData = data - np.tile(minVals, (m, 1))
    normData = normData/np.tile(ranges, (m, 1))
    return normData, ranges, minVals

weight_unbn = np.loadtxt('D:\\weight_unbn.txt')
weight_unbn = np.array(weight_unbn)
weight_bn, _, _, = noramlization(weight_unbn)
#print(weight_bn[:3,:])

(3)按列进行归一化

from sklearn import preprocessing  
import numpy as np 
 
weight_unbn = np.loadtxt('D:\\Data\\biclustering_data\\weight_unbn.txt')
min_max_scaler = preprocessing.MinMaxScaler() 
  
weight_bn = min_max_scaler.fit_transform(weight_unbn)
#print(weight_bn[:3,:])

(4)全局进行归一化

import numpy as np
def noramlization(data):
    minVals = data.min()
    maxVals = data.max()
    ranges = maxVals - minVals
    normData = np.zeros(np.shape(data))
    m = data.shape[0]
    normData = data - np.tile(minVals, (m, 1))
    normData = normData/np.tile(ranges, (m, 1))
    return normData, ranges, minVals, maxVals

weight_unbn =  np.array([[ 0,  1,  5,  3,  4],
                   [ 5,  5.5,  6,  8,  9],
                    [10, 11, 12, 13, 14]])
weight_unbn = np.array(weight_unbn)
weight_bn, ranges, minVals, maxVals,= noramlization(weight_unbn)
print(weight_bn, ranges, minVals, maxVals)
from sklearn import preprocessing  
import numpy as np 
 
genome_derived_features_con = pd.read_csv('D:/Data/features/genome_derived_features_con.csv', index_col=0)
min_max_scaler = preprocessing.MinMaxScaler() 
genome_derived_features_con = np.array(genome_derived_features_con)
genome_derived_features_con_reshape = genome_derived_features_con.reshape([-1, 1])

genome_derived_features_con_row_norm_reshape = min_max_scaler.fit_transform(genome_derived_features_con_reshape)
#print(weight_bn[:3,:])
genome_derived_features_con_row_norm = genome_derived_features_con_row_norm_reshape.reshape(genome_derived_features_con.shape)
genome_derived_features_con_row_norm = pd.DataFrame(genome_derived_features_con_row_norm)
genome_derived_features_con_row_norm.to_csv('D:/Data/features/genome_derived_features_con_row_norm_1.csv')

猜你喜欢

转载自blog.csdn.net/ziqingnian/article/details/110844959
今日推荐