大数据技术(第8节实验课-----机械学习和分类预测)

第1题

P196 实训1
注意:test_size系数为0.2 randomstate系数为123
第1行提交数据: 第3问标准化后,wine最后一行的 Alcolhol列(酒精浓度)的数据,保留6位有效数字(MinMax)
第2行提交数据: 第4问wine数据集的PCA降维后,最后一行Alcohol列(酒精浓度)的数据是多少?保留6位有效数字

#第一题  标准化和PCA降维
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

#1.读取数据
wine=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\wine.csv",encoding="GBK")

#2.拆分对象
wine_data=wine.drop("Class",axis=1)
wine_target=wine["Class"]

#3.读取数据
scale=MinMaxScaler().fit(wine_data)            
wine_scale=scale.transform(wine_data)  
print("%.6f"%wine_scale[-1][0])


from sklearn.decomposition  import PCA     
pca_model=PCA(n_components=2).fit(wine_scale)   
wine_trainPca=pca_model.transform(wine_scale)   

print("%.6f"%wine_trainPca[-1][0])

第2题 p197实训2

第3行提交数据:最高的FMI分数,(保留6位有效数字) #用标准化后的数据进行kmeans
第4行提交数据:最高的calinski-harabasz指数是多少(保留6位有效数字) #评分用原始的data

#第二题:分类预测

#一.FMI评分
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import fowlkes_mallows_score
from sklearn.cluster import KMeans  

#1.读取数据
wine=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\wine.csv",encoding="GBK")
wine_data=wine.drop("Class",axis=1)
wine_target=wine["Class"]

#2,训练数据标准化
scale=MinMaxScaler().fit(wine_data)            
wine_scale=scale.transform(wine_data)  

#3.FMI评分
for i in range(2,7):
    kmeans=KMeans(n_clusters=i,random_state=123).fit(wine_scale)
    score=fowlkes_mallows_score(wine_target,kmeans.labels_)
    print("%d  %.6f"%(i,score))

print("------------------------------\n")  
    
    
#二.CHS评分
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import calinski_harabaz_score
from sklearn.cluster import KMeans  

#1.读取数据
wine=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\wine.csv",encoding="GBK")
wine_data=wine.drop("Class",axis=1)
wine_target=wine["Class"]

#2,结果标准化
scale=MinMaxScaler().fit(wine_data) 
wine_test=scale.transform(wine_data)  

#3.CHS评分
for i in range(2,7):
    kmeans=KMeans(n_clusters=i,random_state=123).fit(wine_data)
    score=calinski_harabaz_score(wine_test,kmeans.labels_)
    print("%d  %.4f"%(i,score))

第3题 P197 实训3

提交数据:wine预测的准确度,正确预测数/总预测数

#第三题:测试精准度
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

#1.读取数据
wine=pd.read_csv(r"G:\大数据实验数据库\3.大数据实验数据\wine.csv",encoding="GBK")

#2.拆分对象
wine_data=wine.drop("Class",axis=1)
wine_target=wine["Class"]

#3.划分训练集和测试集

from sklearn.model_selection import train_test_split
wine_data_train,wine_data_test,wine_target_train,wine_target_test=\
train_test_split(wine_data,wine_target,test_size=0.2,random_state=123)

#4.进行离差标准化
scale=MinMaxScaler().fit(wine_data_train) 

wine_scale_train=scale.transform(wine_data_train)  
wine_scale_test=scale.transform(wine_data_test)  


#5.构建SVM模型
wine_SVM=SVC().fit(wine_scale_train,wine_target_train)

#6.预生报告
from sklearn.metrics import classification_report
wine_target_pred=wine_SVM.predict(wine_scale_test)
print(classification_report(wine_target_test,wine_target_pred))

#7.测试精准度
## 求出预测和真实一样的数目
true = np.sum(wine_target_pred == wine_target_test )
print('预测对的结果数目为:', true)
print('预测结果准确率为:', true/wine_target_test.shape[0])

发布了122 篇原创文章 · 获赞 221 · 访问量 2万+

猜你喜欢

转载自blog.csdn.net/jjsjsjjdj/article/details/103333954