python sklern学习 波士顿房屋价格预测(线性回归)


单个特征的回归问题:

from sklearn import linear_model
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets,linear_model

#从csv文件读取数据的函数 ,这里直接用了数据,没有读取
def get_data(file_name):
 data = pd.read_csv(file_name)
 X_parameter = []
 Y_parameter = []
 for single_square_feet ,single_price_value in zip(data['size'],data['price']):
       X_parameter.append([float(single_square_feet)])
       Y_parameter.append(float(single_price_value))
 return X_parameter,Y_parameter



'''
    构建一个 一size,price为列名的csv数据
    数据转化为csv格式的方法
    在dict中指出列名字
'''
#df = pd.DataFrame({'size':train_X,'price':train_y})
#df.to_csv('C:/Users/zhangwei/Desktop/Machinelearning/house_price1.csv')


'''
    由于train_X是一个一唯的list(会被视为一个样本),需要转化为n_samples形式的二维形式
    如果是在csv文件中,读取过程直接转化为此形式
    也可以用np.array(train_X)把train转化为 此形式(此方法返回副本,重新赋值给train_X即可)
'''
#原始的X,y
# train_y = [6450.0,7450.0,8450.0,9450.0,11450.0,15450.0,18450.0]
# train_X = [150.00, 200.0, 250.0, 300.0, 350.0, 400.0, 600.0]
# train_X = np.array(train_X).reshape(-1,1)
# print(train_X)

#从csv获取数据
train_X,train_y = get_data('C:/Users/zhangwei/Desktop/Machinelearning/house_price1.csv')
# print(train_X)
# print(train_y)
'''
    intercept:截距
    coef:系数
    predict_value:预测结果
'''
def get_predict(train_X,train_y,test_X):
    regr = linear_model.LinearRegression()
    regr.fit(train_X,train_y)
    predict_result = regr.predict(test_X)
    predict = {}
    predict['intercept'] = regr.intercept_
    predict['coef'] = regr.coef_
    predict['predict_value'] = predict_result
    return predict

#对size == 700的房屋进行预测
test_X = 700
predict = get_predict(train_X,train_y,test_X)
print("Intercept value :",predict['intercept'])
print("coefficent :",predict['coef'])
print('Predicted value:',predict['predict_value'])
'''
    Intercept value : 1771.80851064
    coefficent : [ 28.77659574]
    Predicted value: [ 21915.42553191]
'''

'''
    绘制预测结果图
'''
def show_linear_result(train_X,train_y):
    regr = linear_model.LinearRegression()
    regr.fit(train_X,train_y)
    plt.scatter(train_X,train_y,color = "blue")
    plt.plot(train_X,regr.predict(train_X),color='red',linewidth=4)
    plt.xlabel('house size')
    plt.ylabel('house price')
    #plt.xticks(())
    #plt.yticks(())
    plt.show()
show_linear_result(train_X,train_y)

预测结果:

参考:点击打开链接

两部电影预测观众喜好:

import csv
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets, linear_model
# train_flash_X = [1,2,3,4,5,6,7,8,9]
# train_flahs_y = [4.83,4.27,3.59,3.53,3.46,3.73,3.47,4.34,4.66]
# train_arrow_X = [1,2,3,4,5,6,7,8,9]
# train_arrow_y = [2.84,2.32,2.55,2.49,2.73,2.6,2.64,3.92,3.06]
# df = pd.DataFrame({'flash_episod':train_flash_X,'flash_us_viewers':train_flahs_y,'arrow_episod':train_arrow_X,'arrow_us_viewers':train_arrow_y})
# df.to_csv('C:/Users/zhangwei/Desktop/Machinelearning/TVshow.csv')
# print(train_flash_X)
# print(train_flahs_y)
# print(train_arrow_X)
# print(train_arrow_y)
#
def get_data(file_name):
    data = pd.read_csv(file_name)
    train_flash_X = []
    train_flash_y = []
    train_arrow_X = []
    train_arrow_y = []
    for x1,x2,x3,x4 in zip(data['flash_episod'],data['flash_us_viewers'],data['arrow_episod'],data['arrow_us_viewers']):
        train_flash_X.append([float(x1)])
        train_flash_y.append([float(x2)])
        train_arrow_X.append([float(x3)])
        train_arrow_y.append([float(x4)])
    return train_flash_X,train_flash_y,train_arrow_X,train_arrow_y
#data = get_data('C:/Users/zhangwei/Desktop/Machinelearning/TVshow.csv')
x1,y1,x2,y2 = get_data('C:/Users/zhangwei/Desktop/Machinelearning/TVshow.csv')
def more_viewers(x1,y1,x2,y2):
    regr1 = linear_model.LinearRegression()
    regr1.fit(x1,y1)
    predict_value1 = regr1.predict(10)
    regr2 = linear_model.LinearRegression()
    regr2.fit(x2,y2)
    predict_value2 = regr2.predict(10)
    print(predict_value1)
    print(predict_value2)
    if predict_value1 > predict_value2:
        print('The Flash tv show will have more viewers for the next week~')
    else:
        print('The Tv show arrow will have more viewers for the next week~')
more_viewers(x1,y1,x2,y2)

数据:点击打开链接

猜你喜欢

转载自blog.csdn.net/tianweidadada/article/details/80118471