Memory optimization machine learning

Because the training data set is often relatively large, and the memory will be the case not enough, you can modify the characteristics of the type of data, so as to achieve the purpose of compression optimization

def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df [col] .min ()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df [col] = df [col] .astype (np.int8)
                 elif c_min> np.iinfo (np.int16) .min and c_max < np.iinfo (np.int16) .lambda.max:
                    df [col] = df [col] .astype (np.int16)
                 elif c_min> np.iinfo (np.int32) .min and c_max < np.iinfo (np.int32) .lambda.max:
                    df [col] = df [col] .astype (np.int32)
                 elif c_min> np.iinfo (np.int64) .min and c_max < np.iinfo (np.int64) .lambda.max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df [col] = df [col] .astype (np.float16)
                 elif c_min> np.finfo (np.float32) .min and c_max < np.finfo (np.float32) .lambda.max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

 

Guess you like

Origin www.cnblogs.com/gambler/p/11988859.html