Linear regression + fitting of the relationship between house price and house size

Linear regression + linear fitting of the relationship between house price and house size
Insert picture description here
Insert picture description here
(For the data in this section, please refer to the computer "F:\pythondata\test4" or the Tencent Weiyun file "python data\test4")
Insert picture description here

import  matplotlib.pyplot as plt
from sklearn import  linear_model #进行线性回归。
import numpy as np

#建立datasets_X和datasets_Y用来存储数据中的房屋尺寸和房屋成交价格。
datasets_X =[]
datasets_Y =[]

fr =open('F:/python数据/test4.txt','r')
lines =fr.readlines()
for line in lines:
    items =line.strip().split(',')
    datasets_X.append(int(items[0]))
    datasets_Y.append(int(items[1]))

#此时的datasets_X为[1000, 792, 1260,,,]样式的
#此时的datasets_Y为[168, 184, 197,,,,]样式的
    
length =len(datasets_X) #求得datasets_X的长度,即为数据的总数。
#将datasets_X转化为数组, 并变为二维,以符合线性回 归拟合函数输入参数要求
datasets_X= np.array(datasets_X).reshape([length,1])
#将datasets_Y转化为数组
datasets_Y=np.array(datasets_Y)

'''
此时的datasets_X为
[[1000]
 [ 792]
 [1260]
 ...
 ]
样式的

此时的datasets_Y为[168 184 197 220...]样式的(没了逗号了)
'''

linear =linear_model.LinearRegression()
linear.fit(datasets_X,datasets_Y)#调用线性回归模块,建立回归方程,拟合数据
#查看回归方程系数
print('Cofficients:',linear.coef_)
#查看回归方程截距
print('intercept',linear.intercept_)

minX =min(datasets_X)
maxX =max(datasets_X)
#以数据datasets_X的最大值4399和最小值792为范围,建立等差数列[ 792  793  794 ... 4397 4398 4399],方便后续画图。
X=np.arange(minX,maxX).reshape([-1,1])
#使用reshape(-1,1)之后,数据集变成了一列。使用reshape(1,-1)数据集会变成一行。
plt.scatter(datasets_X,datasets_Y,color='red')
plt.plot(X,linear.predict(X),color='blue')
plt.xlabel('Area')
plt.ylabel('Price')
plt.show()

Insert picture description here

Insert picture description here
2.3. Polynomial regression + nonlinear fitting of the relationship between house price and house size

Insert picture description here
Insert picture description here
(For the data in this section, please refer to the computer "F:\pythondata\test4" or the Tencent Weiyun file "python data\test4")
Insert picture description here

import matplotlib.pyplot as plt
from sklearn import linear_model
import numpy as np
from sklearn.preprocessing import PolynomialFeatures


datasets_x = []
datasets_y = []
fr = open('F:/python数据/test4.txt','r')
lines = fr.readlines()
for line in lines:
    items = line.strip().split(',')
    datasets_x.append(int(items[0]))
    datasets_y.append(int(items[1]))
    
length = len(datasets_x)
datasets_X=np.array(datasets_x).reshape([length,1])#将datasets_x转化为数组,并变为二维,以符合线性回归拟合函数
datasets_Y=np.array(datasets_y)


poly_reg=PolynomialFeatures(degree=2) #degree=2表示建立datasets_X的二次多项式特征x_poly
x_poly = poly_reg.fit_transform(datasets_X) #使用线性模型学习x_poly和datasets_y之间的映射关系
lin_reg_2 = linear_model.LinearRegression()
lin_reg_2.fit(x_poly,datasets_Y)


minX=min(datasets_X)
maxX=max(datasets_X)
X=np.arange(minX,maxX).reshape([-1,1])

plt.scatter(datasets_X,datasets_Y,color='red')#scatter函数绘制数据点
plt.plot(X,lin_reg_2.predict(poly_reg.fit_transform(X)),color='blue')#plot函数用来绘制直线,这里表示绘制回归线
plt.xlabel("Area")
plt.ylabel("Price")
plt.show()

Insert picture description here

Guess you like

Origin blog.csdn.net/weixin_45014721/article/details/114653343