1. 线性回归
1.线性回归算法
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=LinearRegression()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('Linear_Regression:%.3f'%result.mean())
运行结果:
Linear_Regression:-34.942
2.岭回归
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=Ridge()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('Ridge_Regression:%.3f'%result.mean())
运行结果:
Ridge_Regression:-34.325
3.套索回归算法
#套索回归算法
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=Lasso()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('Lasso_Regression:%.3f'%result.mean())
运行结果:
Lasso_Regression:-34.614
4.弹性网络回归算法
#弹性网络回归算法
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import ElasticNet
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=ElasticNet()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('ElasticNet_Regression:%.3f'%result.mean())
运行结果:
ElasticNet_Regression:-31.292
2.非线性算法
1.K近邻算法
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.neighbors import KNeighborsRegressor
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=KNeighborsRegressor()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('KNeighbors_Regression:%.3f'%result.mean())
运行结果:
KNeighbors_Regression:-107.193
2.分类与回归树
##分类树与回归树
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeRegressor
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=DecisionTreeRegressor()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('DecisionTree_Regression:%.3f'%result.mean())
运行结果:
DecisionTree_Regression:-32.817
3.支持向量机
from pandas import read_csv
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
filename='/home/duan/regression-datasets-housing.csv'
names=['CRIM','ZN','INDUS','CHAS','NOX','RM','AGE','DIS','RAD','TAX','PRTATIO','B','LSTAT','MEDV']
data=read_csv(filename,names=names)
array= data.values
X= array[:,0:13]
Y= array[:,13]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds, random_state=seed)
model=SVR()
scoring='neg_mean_squared_error'
result=cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
print('SVR_Regression:%.3f'%result.mean())
运行结果:
SVR_Regression:-91.046