data1.insert(0, 'ones', 1)
X = data1.values[:,:-1]
y = data1.values[:,-1]
theta = np.zeros(X.shape[1]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def costFunction(theta, X, y):
return np.mean(- y * np.log(sigmoid(np.dot(X, theta))) - (1 - y) * np.log(1 - sigmoid(np.dot(X, theta))))
costFunction(theta, X, y)
0.6931471805599453
定义梯度下降函数
def gradient(theta, X, y):
return (1/X.shape[0]) * np.dot(X.T, sigmoid(np.dot(X, theta)) - y)
array([ -0.1 , -12.00921659, -11.26284221])
构造优化器求解
import scipy.optimize as opt
res = opt.minimize(fun=costFunction, x0=theta, args=(X, y), jac=gradient, method='Newton-CG') res
fun: 0.20349771251305832
jac: array([1.68639010e-05, 9.03344162e-04, 8.76022414e-04])
message: 'Optimization terminated successfully.'
nfev: 71
nhev: 0
nit: 28
njev: 240
status: 0
success: True
x: array([-25.1527642 , 0.20616308, 0.20140236])
验证
sigmoid(np.dot(np.array([1, 45, 85]), theta_result))
0.776220348464748
def predict(X, theta):
return (sigmoid(np.dot(X, theta)) >= 0.5).astype(int)
y_pred = predict(X, theta_result)
from sklearn.metrics import classification_report
print(classification_report(y, y_pred))
precision recall f1-score support
0.0 0.87 0.85 0.86 40
1.0 0.90 0.92 0.91 60
avg / total 0.89 0.89 0.89 100
x = [X[:,1].min(), X[:,1].max()]
y = [-(theta_result[0]+theta_result[1]*x[0])/theta_result[2],-(theta_result[0]+theta_result[1]*x[1])/theta_result[2]]
_, ax = plt.subplots(figsize=(10,6))
data1[data1['Admission']==0].plot(x='Exam 1 score', y='Exam 2 score', kind='scatter', c='red', marker='o', ax=ax, label='Not admitted') data1[data1['Admission']==1].plot(x='Exam 1 score', y='Exam 2 score', kind='scatter', c='blue',marker='x', ax=ax, label='Admitted') ax.plot(x, y) plt.show()