机器学习初篇:良/恶性乳腺癌肿瘤预测

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接: https://blog.csdn.net/qq_44198436/article/details/100592387
#打开训练样本和测试样本
import pandas as pd
df_train=pd.read_csv('learn/breast-cancer-train.csv')
df_test=pd.read_csv('learn/breast-cancer-test.csv')

#良性肿瘤,恶性肿瘤
df_test_negative=df_test.loc[df_test['Class']==2][['Clump Thickness','Uniformity of Cell Size']]
df_test_positive=df_test.loc[df_test['Class']==4][['Clump Thickness','Uniformity of Cell Size']]

#画图标记 o良性 x恶性
import matplotlib.pyplot as plt 
plt.scatter(df_test_negative['Clump Thickness'],df_test_negative['Uniformity of Cell Size'],marker='o',s=200,c='red')
plt.scatter(df_test_positive['Clump Thickness'],df_test_positive['Uniformity of Cell Size'],marker='x',s=150,c='black')

#导入逻辑斯蒂回归分类器
from sklearn.linear_model import LogisticRegression
import numpy as np 
lr=LogisticRegression()
lr_1=LogisticRegression()
lr.fit(df_train[['Clump Thickness','Uniformity of Cell Size','Marginal Adhesion','Bland Chromatin']],df_train['Class'])
print('最后结果:',lr.score(df_test[['Clump Thickness','Uniformity of Cell Size','Marginal Adhesion','Bland Chromatin']],df_test['Class']))

#比对结果
# print(lr.predict(df_test[['Clump Thickness','Uniformity of Cell Size','Bland Chromatin']]))
# print(np.array(df_test['Class']))
# print(lr.predict(df_test[['Clump Thickness','Uniformity of Cell Size','Bland Chromatin']])==np.array(df_test['Class']))

#线性回归方程ax+by+c=0
intercept=lr.intercept_
coef=lr.coef_[0, :]#第一维中下标为0的元素的所有值
lx=np.arange(0,12)
ly=(-intercept-lx*coef[0])/coef[1]	# ax+by+c=0
plt.plot(lx,ly,c='yellow')

#坐标 x,y
plt.xlabel('Clump Thickness')
plt.ylabel('Single Epithelial Cell Size')

#显示
plt.show()




猜你喜欢

转载自blog.csdn.net/qq_44198436/article/details/100592387