# 菜鸟 机 学 的 逆袭 之 路 # day6

path = ‘ex2data2.txt’
data_init = pd.read_csv(path,header = None,names=[‘Test 1’,‘Test2’,‘Accepted’])
data_init.head()

positive2 = data_init[data_init[‘Accepted’].isin([1])]
negative2 = data_init[data_init[‘Accepted’].isin([0])]
ax.scatter(positive2[‘Test 1’],positive2[‘Test 2’],s=50,c=‘b’,marker = ‘o’,lable = ‘Accepted’)
ax.scatter(negative2[‘Test 1’],negative2[‘Test 2’],s=50,c=‘r’,marker=‘x’,label = ‘Rejected’)
ax.legend()
ax.set_xlabel(‘Test 1 Score’)
ax.set_ylabel(‘Test 2 Score’)
plt.show()

degree = 6
data2 = data_init
x1 = data2[‘Test 1’]
x2 = data2[‘Test 2’]
data2.insert(3,‘ones’,1)
for i in range(1,degree+1):
for j in range(0,i+1):
data2[‘F’ + str(i - j) + str(j)] = np.power(x1,i-j) * np.power(x2,j)
data2.drop(‘Test 1’,axis = 1,inplace = True)
data2.drop(‘Test 2’,axis = 1,inplace = True)
data2.head()

#编辑代价函数:
def costreg(theta,X,y,learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
first = np.multiply(-y,np.log(sigmoid(X * theta.T)))
second = np.multiply((1 - y),np.log(1 - sigmoid(X * theta.T)))
reg = (learnignRate / (2 * len(X))) * np.sum(np.power(theta[:,1:theta.shape[1]],2))
return np.sum(first - second) / len(X) + reg

#编辑梯度函数(正则化后)
def gradientReg(theta,X,y,learningRate):
theta = np.matrix(theta)
X = np.matrix(X)
y = np.matrix(y)
parameters = int(theta.ravel().shpae[1])
grad = np.zeros(parameters)
error = sigmoid(X * theta.T) - y
for i in range(parameters):
term = np.multiply(error,X[:,i])
if (i ==0):
grad[i] = np.sum(term)/len(X)
else:
grad[i] = np.sum(term)/len(X) + ((learningRate) / len(X)) * theta[:,i])
return grad


#Initialize X, y, theta cols = data2.shape [1]
X2 = data2.iloc [:, 1: cols]
y2 = data2.iloc [:, 0: 1]
theta = np.zeros (cols -1)
X2 = np.array (X2.values)
y2 = np.array (y.values)
learningRate = 1
costReg (theta2, X2, y2, learningRate) #Calculate the initial cost


#The following uses the tool library to solve the parameter result2 = opt.fmin_tnc (func = costReg, x0 = theta2, fprime = gradientReg, args = (X2, y2, learningRate))
result2


#Use the prediction function in the previous section to check the accuracy of our scheme on the training set theta_min = np.matrix (result2 [0])
predictions = predict (theta_min, X2)
correct = [1 if ((a == 1 and b == 1) or (a == 0 and b == 0)) else 0 for (a, b) in zip (predictions, y2)]
accuracy = (sum (map (int, correct))% len ( correct))
print ('accuracy = {0}%'. format (accuracy))


#Draw the decision curve def hfunc (theta, x1, x2):
temp = theta [0] [0]
place = 0
for i in range (1, degree = 1):
for j in range (0, i + 1) :
temp + = np.power (x1, ij) * np.power (x2, j) * theta [0] [place + 1]
place + = 1
return temp
#note: where hfunc is actually a decision function, it is a split function
# y = theta0 + theta1 * x1 + theta2 * x2 + theta3 * x1 ^ 2 + theta4 * x1x2 + theta5 * x2 ^ 2 +…

#The decision boundary here means that when the values ​​of x1 and x2 are determined, the value added by the formula must be less than the critical value of the cost function to be displayed in yellow font points in the figure.
def find_decision_boundary (theta):
t1 = np.linspace (-1, 1.5, 1000)
t2 = np.linspace (-1, 1.5, 1000)

cordinates = [(x, y) for x in t1 for y in t2]
x_cord, y_cord = zip(*cordinates)
h_val = pd.DataFrame({'x1':x_cord, 'x2':y_cord})
h_val['hval'] = hfunc2(theta, h_val['x1'], h_val['x2'])

decision = h_val[np.abs(h_val['hval']) < 2 * 10**-3]
return decision.x1, decision.x2

fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(positive2[‘Test 1’], positive2[‘Test 2’], s=50, c=‘b’, marker=‘o’, label=‘Accepted’)
ax.scatter(negative2[‘Test 1’], negative2[‘Test 2’], s=50, c=‘r’, marker=‘x’, label=‘Rejected’)
ax.set_xlabel(‘Test 1 Score’)
ax.set_ylabel(‘Test 2 Score’)

x, y = find_decision_boundary(result2)
plt.scatter(x, y, c=‘y’, s=10, label=‘Prediction’)
ax.legend()
plt.show()

Overfitting
learningRate2 = 0 when
lamuda = 0 result3 = opt.fmin_tnc (func = costReg, x0 = theta2, fprime = gradientReg, args = (X2, y2, learningRate2))

lamuda = 100时欠拟合
learningRate2 = 100
result4 = opt.fmin_tnc(func=costReg, x0=theta2, fprime=gradientReg, args=(X2, y2, learningRate2))

Published 31 original articles · Likes0 · Visits 697

Guess you like

Origin blog.csdn.net/ballzy/article/details/104430708