Python Data Analysis and Mining Practice Chapter 10

Due to the fact that some of the codes in "Python Data Analysis and Mining Practice" were found in the study, some of them could not be implemented, and I took a lot of detours when I was learning. I hereby share the code that can be directly implemented, hoping to make friends in need less detours.

Section 10-1

import pandas as pd
inputfile='../10.2/water_heater.xls'
outfile='../10.2/dividsequence.xls'

data=pd.read_excel(inputfile)
data[ u'occurrence time' ]=pd.to_datetime(data[ u'occurrence time' ], format = '%Y%m%d%H%M%S' )    #conversion time format

 T=pd.Timedelta( minutes = 4 )        #threshold

 data=data[data[ u'water flow' ]> 0 ]        #retain records whose water flow is not 0

 d=data[ u'occurrence time' ].diff()>T #data         type is boolean
 data[ u'time number' ]=d.cumsum()+ 1       #Use the cumulative sum number
 data.to_excel(outfile)

Section 10-2

#Threshold optimization
 import pandas as pd
 import numpy as np
inputfile='../10.2/water_heater.xls'
data=pd.read_excel(inputfile)


data[ u'occurrence time' ]=pd.to_datetime(data[ u'occurrence time' ], format = '%Y%m%d%H%M%S' )    #conversion time format

 T=pd.Timedelta( minutes = 5 )        #Expert threshold
 data=data[data[ u'water flow' ]> 0 ]        #Keep records whose water flow is not 0
 def event_num(ts):



    d=data[u'发生时间'].diff()>ts
    return d.sum()+1

dt=[pd.Timedelta(minutes=i) for i in np.arange(1,9,0.25)]
h=pd.DataFrame(dt,columns=[u'阈值'])

h[ u 'number of events' ]=h[ u 'threshold' ].apply(event_num)

h[ u'slope ' ] = h[ u'number of events' ].diff()/ 0.25       #The difference between two adjacent points
 h[ u'slope indicator' ]=pd.Series.rolling(h[ u'slope ' ].abs(), window = 4 , center = False ).mean() #Use         the average absolute value of the slope of the last 4 points as the slope

 ts=h[ u'threshold ' ][h[ u'slope indicator' ]. idxmin()- 4 ]                #.idxmin() returns the index of the minimum value
 if ts>T:

    ts = pd.Timedelta ( minutes = 4 )
 print (ts)

Section 10-3

#train neural network to recognize bathing events
 import pandas as pd
inputfile1='../10.2/train_neural_network_data.xls'
inputfile2='../10.2/test_neural_network_data.xls'
testoutputfile='../10.2/test_output_data.xls'

data_train=pd.read_excel(inputfile1)
data_test=pd.read_excel(inputfile2)

x_train=data_train.iloc[:,5:16].as_matrix()     #样本特征
y_train=data_train.iloc[:,4].as_matrix()        #标签列

x_test=data_test.iloc[:,5:16].as_matrix()
y_test=data_test.iloc[:,4].as_matrix()

from keras.models import Sequential
from keras.layers.core import Dense,Dropout,Activation

model=Sequential()      #建立模型
model.add(Dense(input_dim=11,units=17))
model.add(Activation('relu'))

model.add(Dense(input_dim=17,units=10))
model.add(Activation('relu'))

model.add(Dense(input_dim=10,units=1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='adam')

model.fit(x_train,y_train,epochs=100,batch_size=1)
model.save_weights('../10.2/net.model')

r=pd.DataFrame(model.predict_classes(x_test),columns=[u'预测结果'])
pd.concat([data_test.iloc[:,:5],r],axis=1).to_excel(testoutputfile)

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325283631&siteId=291194637