SVR实现波士顿房价预测

基于Sklearn的SVR实现波士顿房价预测

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectPercentile, f_regression
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import matplotlib as plt
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
#boston = load_boston() # add data sets
#print(boston.DESCR)
#X = boston.data
#y = boston.target

data_path1 = 'housing_true.xls'
hd1 = pd.read_excel(data_path1)
#hd1.fillna(0,inplace=True)#修改缺省值
#hd1.to_excel("housing.xls")
#print(hd1)
X = hd1.drop(['MEDV','LSTAT'],axis=1)
y1 = hd1['MEDV']
y = np.reshape(y1,len(y1),1)

#names = boston.feature_names
names = ['CRIM','ZN' ,'INDUS', 'CHAS' ,'NOX' ,'RM' ,'AGE', 'DIS' ,'RAD' ,'TAX', 'PTRATIO' ,'B' ]
#names = np.reshape(names,len(names),1)
#names =pd.DataFrame(names)
#names = names.drop(0,axis = 0)
#names = np.array(names)
#names = names.tolist(names)
#names = list(names)
#print(names.dtype)

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42)
#print("shape of X_train:{}".format(X_train.shape))
#print("shape of X_test:{}".format(X_test.shape))


boston_dataframe = pd.DataFrame(X_train,columns = names)
#print(boston_dataframe)
#grr = pd.plotting.scatter_matrix(boston_dataframe, c = y_train , figsize=(15,15),marker = 'X', hist_kwds = {"bins":20},s=20 , alpha = 0.5) # get big piture about the data
standard = StandardScaler()
X_train_standard = standard.fit_transform(X_train)
X_test_standard = standard.fit_transform(X_test)

select = SelectPercentile(f_regression,50)  # Select features based on percentile 
select.fit(X_train,y_train)
#print(y_train)

X_train_select = select.transform(X_train_standard)
X_test_select = select.transform(X_test_standard)
#print("x_train_select shape{}".format(X_train_select.shape))
#print('x_test_select shape{}'.format(X_test_select.shape))
#print('x_train shape:{}'.format(X_train.shape))
svr =SVR(kernel="rbf")
param_grid = {'C':[0.001,0.01,0.1,1,10,100],'gamma':[0.001,0.01,0.1,1,10,100]}
grid = GridSearchCV(svr,param_grid=param_grid,cv=5)
grid.fit(X_train_select,y_train)
#print("best cross-validation accuracy:{:.3f}".format(grid.best_score_))
#print("best set score:{:.3f}".format(grid.score(X_test_select,y_test)))
#print("best parameters:{}".format(grid.best_params_))

#print(X_test_select)
y_svr =grid.fit(X_train_select,y_train).predict(X_test_select)
x_data = range(0,len(X_test_select))
y_data1 = y_test
print(len(y_data1))
y_data2 = y_svr
plt.figure(figsize=(20,8),dpi=80)
plt.plot(x_data,y_data1,label='实际值')
plt.plot(x_data,y_data2,label='预测值')
plt.grid(alpha = 0.3)#网格,alpha为透明度
plt.ylabel("房价")
plt.legend()
plt.show()

猜你喜欢

转载自blog.csdn.net/ziqingnian/article/details/108350803