第一步：收集数据

系统目标为预测混凝土抗压强度：数据集地址

第二步：加载数据

import pandas as pd
from sklearn import preprocessing,linear_model #导入模型
from sklearn.metrics import r2_score #性能评价得分接近1模型性能越好
from sklearn.model_selection import train_test_split
import numpy as np
import warnings
warnings.filterwarnings('ignore')
dataset = pd.read_excel('Concrete_Data.xls')
dataset.head()

数据样式

# 修改列名&将数据样本和预测数据分离
dataset.columns =['cement','blash','fly','water','superplastic','coarse','fine','age','concrete']
target = dataset.pop('concrete')

第三步：选择模型

使用自己编写的交叉验证器

def CV(X,y,n):
    length = len(X)
    number = length // n
    y = np.reshape(y,(length,-1))


    scoreList = []

    regr = linear_model.LinearRegression()

    for i in range(n):
        if i == n-1:
            test_X = X[i*number:, :]
            test_y = y[i*number:]
            train_X = X[:i*number,:]
            train_y = y[:i*number]
        else:
            test_X = X[i*number:(i+1)*number,:]
            test_y = y[i*number:(i+1)*number]
            train_X = np.concatenate((X[:i*number,:],X[(i+1)*number:,:]),axis = 0)
            train_y = np.concatenate((y[:i*number],y[(i+1)*number:]),axis = 0)

        scaler = preprocessing.StandardScaler().fit(train_X)
        train_X_standard = scaler.transform(train_X)
        test_X_standard = scaler.transform(test_X)
        regr.fit(train_X_standard,train_y) 
        predicts = regr.predict(test_X_standard)

        scoreList.append(r2_score(test_y,predicts))

        #print(r2_score(test_y,predicts))



    return scoreList

np.mean(CV(np.matrix(dataset),np.matrix(target),10))

第四步：保存模型

dataset = pd.read_excel('Concrete_Data.xls')
dataset.columns =['cement','blash','fly','water','superplastic','coarse','fine','age','concrete']
target = dataset.pop('concrete')
X_train,X_test,y_train,y_test = train_test_split(dataset,target,test_size = 0.1,random_state = 88)
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_standard = scaler.transform(X_train)
X_test_standard = scaler.transform(X_test)
regr = linear_model.LinearRegression()
regr.fit(X_train_standard,y_train)
#保存模型
from sklearn.externals import joblib
joblib.dump(regr,'linear_regression_concrete.pkl')
joblib.dump(scaler,'linear_regression_concrete_scaler.pkl')

第五步：加载模型

predictor = joblib.load('linear_regression_concrete.pkl')
scaler = joblib.load('linear_regression_concrete_scaler.pkl')

第六步：构建预测系统

def concrete_System():
    cement = float(input('水泥，单位 千克每立方米：'))
    blast = float(input('高炉炉渣，单位 千克每立方米：'))
    fly = float(input('飞灰，单位 千克每立方米：'))
    water = float(input('水，单位 千克每立方米：'))
    superplastic = float(input('减水剂，单位 千克每立方米：'))
    coarse = float(input('粗骨料，单位 千克每立方米：'))
    fine = float(input('细骨料，单位 千克每立方米：'))
    age = float(input('年龄，单位 天数：'))

    inputArray = np.array([cement,blast,fly,water,superplastic,coarse,fine,age]).reshape((1,-1))
    standard_input = scaler.transform(inputArray)
    result = predictor.predict( standard_input)

    print('预测混凝土抗压强度是%.2f'.center(60,'=')% result)

系统运行如下所示：

使用线性回归构建混凝土抗压预测系统

第一步：收集数据

第二步：加载数据

第三步：选择模型

使用自己编写的交叉验证器

第四步：保存模型

第五步：加载模型

第六步：构建预测系统

猜你喜欢