大数据第9节课(课后习题)

实训3:构建基于wine数据集的SVM模型

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

#1.读取数据
wine=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\wine.csv",encoding="GBK")

#2.拆分对象
wine_data=wine.drop("Class",axis=1)
wine_target=wine["Class"]

#3.划分训练集和测试集

from sklearn.model_selection import train_test_split
wine_data_train,wine_data_test,wine_target_train,wine_target_test=\
train_test_split(wine_data,wine_target,test_size=0.2,random_state=123)

#4.进行离差标准化
scale=MinMaxScaler().fit(wine_data_train) 

wine_scale_train=scale.transform(wine_data_train)  
wine_scale_test=scale.transform(wine_data_test)  


#5.构建SVM模型
wine_SVM=SVC().fit(wine_scale_train,wine_target_train)

#6.预生报告
from sklearn.metrics import classification_report
wine_target_pred=wine_SVM.predict(wine_scale_test)
print(classification_report(wine_target_test,wine_target_pred))

#7.测试精准度
## 求出预测和真实一样的数目
true = np.sum(wine_target_pred == wine_target_test )
print('预测对的结果数目为:', true)
print('预测结果准确率为:', true/wine_target_test.shape[0])

实训4:构建基于wine_quality数据集的回归模型

import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

data=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\winequality.csv",sep=";")

x=data.iloc[:,:11]
y=data.iloc[:,11]

x1,x2,y1,y2=\
train_test_split(x,y,test_size=0.2,random_state=42)

GBR_quality=GradientBoostingRegressor().fit(x1,y1)
y_pred=GBR_quality.predict(x2)


from sklearn.metrics import  explained_variance_score,mean_absolute_error,mean_squared_error,r2_score,median_absolute_error



print("平均绝对误差为",mean_absolute_error(y2,y_pred))
print("均方误差为",mean_squared_error(y2,y_pred))
print("中值绝对误差",median_absolute_error(y2,y_pred))
print("可解释方差值",explained_variance_score(y2,y_pred))
print("R的平方值",r2_score(y2,y_pred))


#本次构建的梯度模型提回归树模式的平均绝对误差和均方误差相对合理,但是可解释方差值和R的平方差值离1比较远,股本次构建的模型不太理想
发布了122 篇原创文章 · 获赞 221 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/jjsjsjjdj/article/details/103401613
今日推荐