Model Predictive assessment
1. Data processing
1. First load data View data details
2. Check whether the data item and the NAN.
Three pairs of items to fill empty
4 book data into training and test data
5. Data normalization
2. The model predicts
MLP - regression model
integrated return
Linear regression
svm regression
knn regression
tree regression
tree regression
Random Forest Regression
Adaboost return
gbrt return
bagging return
3. Model Integration
Using k-fold cross validation
uses several preferred process model
Use the data results result
import warnings
warnings.filterwarnings("ignore")
from sklearn import preprocessing
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import ensemble
import pandas as pd
import math
from sklearn.model_selection import KFold
df = pd.read_excel("xxx.xlsx",encoding='utf8',index_col=0)
df=df.fillna(method='ffill')
data = df.values.astype('float')
x = data[:,1:]
y = data[:,0]
for i in range(len(y)):
y[i] = math.log(y[i])
kf = KFold(n_splits=5,shuffle=True)
for train_index,test_index in kf.split(x):
train_x = x[train_index]
test_x = x[test_index]
train_y = y[train_index]
test_y = y[test_index]
ss_x = preprocessing.StandardScaler()
train_x = ss_x.fit_transform(train_x)
test_x = ss_x.transform(test_x)
ss_y = preprocessing.StandardScaler()
train_y = ss_y.fit_transform(train_y.reshape(-1,1))
test_y = ss_y.transform(test_y.reshape(-1,1))
model_mlp = MLPRegressor(solver='lbfgs',hidden_layer_sizes=(20,20,20),random_state=1)
model_mlp.fit(train_x,train_y.ravel())
mlp_score = model_mlp.score(test_x,test_y.ravel())
print("sklearn多层感知器-回归模型得分",mlp_score)
model_gbr = GradientBoostingRegressor(learning_rate=0.1)
model_gbr.fit(train_x,train_y.ravel())
gbr_score = model_gbr.score(test_x,test_y.ravel())
print("sklearn集成-回归模型得分",gbr_score)
model_br=ensemble.BaggingRegressor()
model_br.fit(train_x,train_y)
model_brscore = model_br.score(test_x,test_y)
print("sklearn bagging 回归模型得分",model_brscore)
model_rfr=ensemble.RandomForestRegressor(n_estimators=20)
model_rfr.fit(train_x,train_y)
model_rfrscore = model_rfr.score(test_x,test_y)
print("sklearn 随机森林回归模型得分",model_rfrscore)
model_br=ensemble.BaggingRegressor()
model_br.fit(train_x,train_y)
model_brscore = model_br.score(test_x,test_y)
print("sklearn bagging 回归模型得分",model_brscore)
Using the processed data useful life score
Reference: https: //www.jianshu.com/p/f92d9ac14692