因学习中发现《Python数据分析与挖掘实战》中的代码,有些不能实现,自己学习的时候走了很多弯路,特此分享可直接实现的代码,希望能让有需要的朋友少走弯路。
#概括性度量 import pandas as pd import numpy as np inputfile='../13.2/data1.csv' data=pd.read_csv(inputfile) r=[data.min(),data.max(),data.mean(),data.std()] r=pd.DataFrame(r,index=['Min','Max','Mean','Std']).T r=np.round(r,2) #13-2 Pearson相关系数 pear=np.round(data.corr(method='pearson'),2) #13-3 Lasso变量选择 from sklearn.linear_model import Lasso model=Lasso(alpha=0.1) model.fit(data.iloc[:,0:13],data['y']) print(model.coef_) #特征系数 #13-4 灰色预测的数据处理 from GM11 import GM11 outfile='../13.2/data1_GM11.xls' data.index=np.arange(1994,2014,1) data.loc[2014]=None data.loc[2015]=None l=['x1','x2','x3','x4','x5','x7'] for i in l: f=GM11(data[i][np.arange(1994,2014,1)].as_matrix())[0] data[i][2014]=f(len(data)-1) data[i][2015] = f(len(data)) data[i]=data[i].round(2) data[l+['y']].to_excel(outfile) #13-5 神经网络预测模型 modelfile='../13.2/net.model' outfile2='../13.2/revenue.xls' data2=data[l+['y']] data_train=data2.loc[np.arange(1994,2014,1)].copy() data_mean=data_train.mean() data_std=data_train.std() data_train=(data_train-data_mean)/data_std #数据标准化 x_train=data_train[l].as_matrix() #特征数据 y_train=data_train['y'].as_matrix() #标签数据 from keras.models import Sequential from keras.layers.core import Dense,Activation model2=Sequential() #建立模型 model2.add(Dense(input_dim=6,units=12)) model2.add(Activation('relu')) model2.add(Dense(input_dim=12,units=1)) model2.compile(loss='mean_squared_error',optimizer='adam') model2.fit(x_train,y_train,epochs=10000,batch_size=16) model2.save_weights(modelfile) x=((data[l]-data_mean[l])/data_std[l]).as_matrix() data2[u'y_pred']=model2.predict(x)*data_std['y']+data_mean['y'] data2.to_excel(outfile2) import matplotlib.pyplot as plt p=data2[['y','y_pred']].plot(subplots=True,style=['b-o','r-*']) plt.show()后面的一样,就不贴了