Python数据分析与挖掘实战 13章

因学习中发现《Python数据分析与挖掘实战》中的代码,有些不能实现,自己学习的时候走了很多弯路,特此分享可直接实现的代码,希望能让有需要的朋友少走弯路。

#概括性度量
import pandas as pd
import numpy as np
inputfile='../13.2/data1.csv'
data=pd.read_csv(inputfile)

r=[data.min(),data.max(),data.mean(),data.std()]
r=pd.DataFrame(r,index=['Min','Max','Mean','Std']).T

r=np.round(r,2)

#13-2 Pearson相关系数
pear=np.round(data.corr(method='pearson'),2)

#13-3 Lasso变量选择
from sklearn.linear_model import Lasso
model=Lasso(alpha=0.1)
model.fit(data.iloc[:,0:13],data['y'])
print(model.coef_)      #特征系数

#13-4  灰色预测的数据处理
from GM11 import GM11
outfile='../13.2/data1_GM11.xls'


data.index=np.arange(1994,2014,1)
data.loc[2014]=None
data.loc[2015]=None

l=['x1','x2','x3','x4','x5','x7']

for i in l:
    f=GM11(data[i][np.arange(1994,2014,1)].as_matrix())[0]
    data[i][2014]=f(len(data)-1)
    data[i][2015] = f(len(data))
    data[i]=data[i].round(2)
data[l+['y']].to_excel(outfile)

#13-5 神经网络预测模型
modelfile='../13.2/net.model'
outfile2='../13.2/revenue.xls'
data2=data[l+['y']]
data_train=data2.loc[np.arange(1994,2014,1)].copy()

data_mean=data_train.mean()
data_std=data_train.std()

data_train=(data_train-data_mean)/data_std      #数据标准化

x_train=data_train[l].as_matrix()       #特征数据
y_train=data_train['y'].as_matrix()     #标签数据

from keras.models import Sequential
from keras.layers.core import Dense,Activation

model2=Sequential()      #建立模型
model2.add(Dense(input_dim=6,units=12))
model2.add(Activation('relu'))

model2.add(Dense(input_dim=12,units=1))
model2.compile(loss='mean_squared_error',optimizer='adam')
model2.fit(x_train,y_train,epochs=10000,batch_size=16)

model2.save_weights(modelfile)

x=((data[l]-data_mean[l])/data_std[l]).as_matrix()

data2[u'y_pred']=model2.predict(x)*data_std['y']+data_mean['y']

data2.to_excel(outfile2)
import matplotlib.pyplot as plt
p=data2[['y','y_pred']].plot(subplots=True,style=['b-o','r-*'])
plt.show()
后面的一样,就不贴了

猜你喜欢

转载自blog.csdn.net/lonely2018/article/details/80184450