import numpy as np
from sklearn.linear_model import LinearRegression
import sklearn.datasets as datasets
# 线性回归评价标准 糖尿病数据
diabetes = datasets.load_diabetes()# print(datasets)
X = diabetes["data"]
y = diabetes["target"]from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)# print(X_train.shape)# 该数据有正有负,说明次数据被处理过。归一化# print(X_train.std(axis=0))
lr = LinearRegression()
lr.fit(X_train, y_train)
y_ = lr.predict(X_test).round(0)# R的平方,又叫决定系数评价
score = lr.score(X_test, y_test)"""
The coefficient R^2 is defined as (1 - u/v), where u is the residual
sum of squares ((y_true - y_pred) ** 2).sum() and v is the total
sum of squares ((y_true - y_true.mean()) ** 2).sum().
"""# print(y_, "\n", y_test)# 下面这些都是评价指标from sklearn.metrics import mean_absolute_error, mean_squared_log_error, mean_squared_error
ss = mean_absolute_error(y_test, y_)print(ss)
第二部分
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from sklearn.neighbors import KNeighborsClassifier
cancer = pd.read_csv("./cancer.csv")# 分析数据属性# print(cancer.head())# 获取数据 所有行,从第二列向后所有列
X = cancer.iloc[:,2:]
y = cancer.Diagnosis # 目标数据,Diagnosis是列名称from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)# 声明算法
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
y_ = knn.predict(X_test).values
knn.score(X_test, y_test)# 评估
accuracy_score(y_test, y_)# 评估# 交叉表 ,查看错误数据 margins自动汇总数据# 对角线是正确的数据
pd.crosstab(index=y_test, columns=y_, rownames=["确诊"], colnames=["预测"], margins=True)from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_)# 类似交叉表from sklearn.metrics import classification_report
ss = classification_report(y_test, y_)# 更多的参考评估指标# ----------------------------------华丽分割线--------------------------------# preprocessing 预处理 StandardScaler 归一化from sklearn.preprocessing import StandardScaler
s = StandardScaler()
X2 = s.fit_transform(X)# 对数据归一化# X2[:5] # 转化之后数据是一维数据了 0~1之间的数据
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y, test_size=0.2)
knn2 = KNeighborsClassifier()
knn2.fit(X2_train)
y_2 = knn2.predict(X2_test)
accuracy_score(y2_test, y_2)