机器学习第四周打卡:线性回归算法

简单线性回归
y=ax+b
其中a、b的算法依次是这个
在这里插入图片描述
如何评价回归方程的拟合度呢?
在这里插入图片描述
实战案例:

import pymysql # 导入模块
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #这里是引用了交叉验证
from sklearn.linear_model import LinearRegression  #线性回归



def get_df_from_db(sql,columnNames):
    conn = pymysql.connect(
    host='', # 主机模块
    port=3306, # 端口号
    user='',# 用户名
    password='', # 密码
    database='', # 需要连接的库
    charset='utf8' # 指定编码utf8
    )
    cursor = conn.cursor() # 获取游标 
    cursor.execute(sql)
    data = cursor.fetchall()
    columnDes = cursor.description #获取连接对象的描述信息
    # = [columnDes[i][0] for i in range(len(columnDes))]
    df = pd.DataFrame([list(i) for i in data],columns=columnNames)
    return df

def pridect_cost(x,y,x1_ture,x2_ture):
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)#选择20%为测试集
    #print(X_train)y_perdict,a,b
    
    linreg = LinearRegression()
    #训练
    model = linreg.fit(X_train, y_train)
    print('模型参数:')
    print(model)
    # 训练后模型截距
    print('模型截距:')
    print(linreg.intercept_)
    # 训练后模型权重(特征个数无变化)
    print('参数权重:')
    print (linreg.coef_)
    
    y_pred = linreg.predict(X_test)
    sum_mean = 0
    for i in range(len(y_pred)):
        sum_mean += (y_pred[i] - y_test.values[i]) ** 2
    sum_erro = np.sqrt(sum_mean /len(y_pred))  # 测试级的数量
    # calculate RMSE
    print ("RMSE by hand:", sum_erro)
    # 做ROC曲线
    plt.figure()
    plt.plot(range(len(y_pred)), y_pred, 'b', label="predict")
    plt.plot(range(len(y_pred)), y_test, 'r', label="test")
    plt.legend(loc="upper right")  # 显示图中的标签
    plt.xlabel("the number of sales")
    plt.ylabel('value of sales')
    plt.show()
    
    #假设多元线性模型为:y=ax+bx+c,x为第一个参数,y为第二个参数
    c=linreg.intercept_
    a=float(linreg.coef_[0])
    b=float(linreg.coef_[1])
    y_predict=x1_ture*a+x2_ture*b+c

    return y_predict

def main():
    sql="SELECT DATE_FORMAT(create_time,'%d'),DATE_FORMAT(create_time,'%k'),SUM(CASE WHEN str_tag='total_cost' THEN num_tag ELSE NULL END ) FROM common.current_consumption_list WHERE apartment='102087' AND DATE_FORMAT(create_time,'%Y-%m')='2020-03' AND DATE_FORMAT(create_time,'%Y-%m-%d')>=DATE_SUB(CURDATE(),INTERVAL 20 day) GROUP BY 1,2;"
    columnNames=['天','小时','消耗']
    df=get_df_from_db(sql,columnNames)
    x=df[['天','小时']]#.values.reshape(-1,1)做一元的时候需要用到这个
    y=df['消耗']
    y_predict=pridect_cost(x,y,22,24)
    print(y_predict)
    

if __name__ == '__main__':
    main()


多元线性回归

发布了23 篇原创文章 · 获赞 0 · 访问量 622

猜你喜欢

转载自blog.csdn.net/macmurphy/article/details/105030967