1. np retains several digits of data after the decimal point
import numpy as np
array = np. array( [ 1.123456789 , 2.23456 , 4.5643 ] )
print ( np. around( array, 3 ) )
2. Convert np data into pd.DataFrame data
import numpy as np
import pandas as pd
dataset = [ [ 1.458 , 1.254 ] , [ 2.365 , 5.154 ] , [ 2.365 , 7.356 ] ]
data = np. array( dataset)
df = pd. DataFrame( data= data, columns= [ 'pregnants' , 'Plasma_glucose_concentration' ] )
3. Read the column names of DataFrame
df.columns returns Index, which can be converted to list through tolist(), or list(array)
data_demo = df. columns. tolist( )
print ( data_demo)
4. pandas splicing files
Splicing in row direction:
import pandas as pd
import numpy as np
df1 = pd. DataFrame( np. array( [
[ 'a' , 1 , 2 ] ,
[ 'b' , 3 , 4 ] ,
[ 'c' , 5 , 6 ] ] ) ,
columns= [ 'name' , 'num11' , 'num12' ] )
df2 = pd. DataFrame( np. array( [
[ 'a' , 7 , 8 ] ,
[ 'b' , 9 , 10 ] ,
[ 'c' , 11 , 12 ] ] ) ,
columns= [ 'name' , 'num21' , 'num22' ] )
df_result = pd. merge( df1, df2, on= 'name' )
print ( df_result)
Splicing in column direction:
You can add rows and columns to the dataframe through concat. That is, concat can merge multiple pandas objects (dataframe/series) into one along a certain axis (axis=0/1).
concat syntax: pandas.concat(objs, axis=0, join='outer', ignore_index=False).
objs: a list, the content can be DataFrame or Series, and can be mixed.
axis: The default value is 0, which means merging by rows, and if equal to 1, it means merging by columns.
Join: The alignment of the indexes when merging. The default is outer join, but it can also be inner join. join=inner filters out unmatched columns
ignore_index: Whether to ignore the original data index.
import pandas as pd
import numpy as np
dataset0 = [ [ 1 , 1.458 , 1.254 ] , [ 2 , 2.365 , 5.154 ] , [ 3 , 2.365 , 7.356 ] ]
data0 = np. array( dataset0)
df0 = pd. DataFrame( data= data0, columns= [ 'no' , '001' , '002' ] )
dataset1 = [ [ 1 , 2.365 , 1.254 ] , [ 2 , 2.395 , 5.154 ] , [ 3 , 2.365 , 7.356 ] ]
data1 = np. array( dataset1)
df1 = pd. DataFrame( data= data1, columns= [ 'no' , '003' , '004' ] )
df_result = pd. concat( [ df0, df1] , axis= 0 , join= 'outer' , ignore_index= True )
print ( df_result)
5. Save DataFraem into a csv file
df. to_csv( '../SeaDataset/03/03-17-19.csv' )
6. Delete a file
os.remove(file path) delete file
import os
path= "./data"
txt_name0= "删除文件0.txt"
os. remove( os. path. join( path, txt_name0) )
7. Processing code for a certain data set
import numpy as np
import pandas as pd
df1 = pd. read_csv( '../SeaDataset/05/05-17.csv' , header= 0 , index_col= 0 )
df2 = pd. read_csv( '../SeaDataset/05/05-18-19.csv' , header= 0 , index_col= 0 )
df = pd. concat( [ df1, df2] , axis= 0 , join= 'outer' , ignore_index= False )
df. to_csv( '../SeaDataset/05/05-17-19.csv' )
df = pd. read_csv( '../SeaDataset/05/05-17-19.csv' , header= 0 , index_col= 0 )
df_col = df. columns. tolist( )
df_data = df. iloc[ : , : ] . values
df_data = np. around( df_data, 3 )
df_data = pd. DataFrame( data= df_data, columns= df_col)
df = pd. read_csv( '../SeaDataset/05/05-17-19.csv' , header= 0 )
df_index = df. iloc[ : , 0 ]
df = pd. concat( [ df_index, df_data] , axis= 1 , join= 'outer' , ignore_index= False )
df. to_csv( '../SeaDataset/05/data_05-17-19.csv' , index= False )
df = pd. read_csv( '../SeaDataset/05/data_05-17-19.csv' , header= 0 , index_col= 0 )
print ( df. iloc[ 0 , 0 ] )
print ( type ( df. iloc[ 0 , 0 ] ) )
print ( df)
os. remove( '../SeaDataset_Init/03/03-17-19.csv' )
You can continue to improve, such as introducing a for loop for batch processing.