%%
%For this exercise, suppose that a high school has a dataset representing 40 students ...
%who were admitted to college and 40 students who were not admitted.
%Each $(x^{(i)}, y^{(i)})$ training example contains a ...
%student's score on two standardized exams and a label of whether the student was admitted.
%Your task is to build a binary classification model that estimates college ...
%admission chances based on a student's scores on two exams. In your training data,
%a. The first column of your x array represents all Test 1 scores,
%and the second column represents all Test 2 scores.
%b. The y vector uses '1' to label a student who was admitted ...
%%
clear all;
close all;
clc;
%% loading the original data;
% X : [80x2]
X = load('ex4x.dat');
% Y : [80x1]
Y = load('ex4y.dat');
% get the row value and column vlaue of the matrix X
[row, col] = size(X);
% add the intercept term into the matrix X
% then the size of the matrix X is rowx(col+1),sepecially is a 80x3 in this example.
% X : [80x3]
X = [ones(row,1),X];
%% Plot the training data;
% Use different markers for positives and negatives
positive = find(Y);
negative = find(Y==0);
plot(X(positive,2),X(positive,3),'b+');
hold on;
plot(X(negative,2),X(negative,3),'r*');
xlabel('Exam1');ylabel('Eaxm2')
%% Initialize the fitting parameters e.g. set theta = 0 and this theta is a 1x3 Matrix.
theta = zeros([1, col+1]);
% inorder to implements the matrix multiplication (X * theta)using the matrix X and the matrix theta.
%We need get the response of the matrix theta.
% theta : [3X1]
theta = theta';
%% Define the sigmoid function
% The key word "inline" in Matlab can be used to define a function without
%% Using Newton's Method
%Newton's method often converges in 5-15 iterations.
MAX_ITR = 7;
J = zeros(MAX_ITR, 1);
% m,the number of these examples
m = row;
for i = 1:MAX_ITR
% Z : [80X1]
Z = X* theta;
% Calculate the hypothesis function g(z) using sigmoid function
% h : [80X1] Details : h = g(Z) = g(X * theta );
H = g(Z);
% Calculate gradient
% grad : [3X1]
grad = (1/m).*X' * (H-Y);
% Construct a diag matrix using the matrix h
% diag(h) : [80X80]
% diag(1-h) : [80X80]
% Then calculate Hessian Matrix H
% After the computation.The size of
% Calculate J for testing convergence
%S = sum(X) is the sum of the elements of the vector X. If
%X is a matrix, S is a row vector with the sum
% over each column.
% sum(X) == sum(X,1),responses a row vector with the sum
% over each column.
% sum(X,2),responses a column vector with the sum
% over each row.
% J(i) : [1x1]
J(i) =(1/m)*sum(-Y.*log(H) - (1-Y).*log(1-H));
end
% Display theta
theta
% Calculate the probability that a student with
% Score 20 on exam 1 and score 80 on exam 2
% will not be admitted
prob = 1 - g([1, 20, 80]*theta)
% Plot Newton's method result
% Only need 2 points to define a line, so choose two endpoints
plot_x = [min(X(:,2))-2, max(X(:,2))+2];
% Calculate the decision boundary line
plot_y = (-1./theta(3)).*(theta(2).*plot_x +theta(1));
plot(plot_x, plot_y)
legend('Admitted', 'Not admitted', 'Decision Boundary')
hold off
% Plot J
figure
plot(0:MAX_ITR-1, J, 'o--', 'MarkerFaceColor', 'r', 'MarkerSize', 8)
xlabel('Iteration'); ylabel('J')
% Display J
J
%{
X =
55.5000 69.5000
41.0000 81.5000
53.5000 86.0000
46.0000 84.0000
41.0000 73.5000
51.5000 69.0000
51.0000 62.5000
42.0000 75.0000
53.5000 83.0000
57.5000 71.0000
42.5000 72.5000
41.0000 80.0000
46.0000 82.0000
46.0000 60.5000
49.5000 76.0000
41.0000 76.0000
48.5000 72.5000
51.5000 82.5000
44.5000 70.5000
44.0000 66.0000
33.0000 76.5000
33.5000 78.5000
31.5000 72.0000
33.0000 81.5000
42.0000 59.5000
30.0000 64.0000
61.0000 45.0000
49.0000 79.0000
26.5000 64.5000
34.0000 71.5000
42.0000 83.5000
29.5000 74.5000
39.5000 70.0000
51.5000 66.0000
41.5000 71.5000
42.5000 79.5000
35.0000 59.5000
38.5000 73.5000
32.0000 81.5000
46.0000 60.5000
36.5000 53.0000
36.5000 53.5000
24.0000 60.5000
19.0000 57.5000
34.5000 60.0000
37.5000 64.5000
35.5000 51.0000
37.0000 50.5000
21.5000 42.0000
35.5000 58.5000
26.5000 68.5000
26.5000 55.5000
18.5000 67.0000
40.0000 67.0000
32.5000 71.5000
39.0000 71.5000
43.0000 55.5000
22.0000 54.0000
36.0000 62.5000
31.0000 55.5000
38.5000 76.0000
40.0000 75.0000
37.5000 63.0000
24.5000 58.0000
30.0000 67.0000
33.0000 56.0000
56.5000 61.0000
41.0000 57.0000
49.5000 63.0000
34.5000 72.5000
32.5000 69.0000
36.0000 73.0000
27.0000 53.5000
41.0000 63.5000
29.5000 52.5000
20.0000 65.5000
38.0000 65.0000
18.5000 74.5000
16.0000 72.5000
33.5000 68.0000
Y =
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
%}
%For this exercise, suppose that a high school has a dataset representing 40 students ...
%who were admitted to college and 40 students who were not admitted.
%Each $(x^{(i)}, y^{(i)})$ training example contains a ...
%student's score on two standardized exams and a label of whether the student was admitted.
%Your task is to build a binary classification model that estimates college ...
%admission chances based on a student's scores on two exams. In your training data,
%a. The first column of your x array represents all Test 1 scores,
%and the second column represents all Test 2 scores.
%b. The y vector uses '1' to label a student who was admitted ...
% and '0' to label a student who was not admitted.
% 原始数据在该文本的尾部给出了
clear all;
close all;
clc;
%% loading the original data;
% X : [80x2]
X = load('ex4x.dat');
% Y : [80x1]
Y = load('ex4y.dat');
% get the row value and column vlaue of the matrix X
[row, col] = size(X);
% add the intercept term into the matrix X
% then the size of the matrix X is rowx(col+1),sepecially is a 80x3 in this example.
% X : [80x3]
X = [ones(row,1),X];
%% Plot the training data;
% Use different markers for positives and negatives
positive = find(Y);
negative = find(Y==0);
plot(X(positive,2),X(positive,3),'b+');
hold on;
plot(X(negative,2),X(negative,3),'r*');
xlabel('Exam1');ylabel('Eaxm2')
%% Initialize the fitting parameters e.g. set theta = 0 and this theta is a 1x3 Matrix.
theta = zeros([1, col+1]);
% inorder to implements the matrix multiplication (X * theta)using the matrix X and the matrix theta.
%We need get the response of the matrix theta.
% theta : [3X1]
theta = theta';
%% Define the sigmoid function
% The key word "inline" in Matlab can be used to define a function without
% writing a Matlab function script file.
%内联函数,这里在Matlab2014a中提示即将被废弃,应该考虑其它方式,如内部function 或者 函数句柄
g = inline('1.0 ./ (1.0 + exp(-z))');%% Using Newton's Method
%Newton's method often converges in 5-15 iterations.
MAX_ITR = 7;
J = zeros(MAX_ITR, 1);
% m,the number of these examples
m = row;
for i = 1:MAX_ITR
% Z : [80X1]
Z = X* theta;
% Calculate the hypothesis function g(z) using sigmoid function
% h : [80X1] Details : h = g(Z) = g(X * theta );
H = g(Z);
% Calculate gradient
% grad : [3X1]
grad = (1/m).*X' * (H-Y);
% Construct a diag matrix using the matrix h
% diag(h) : [80X80]
% diag(1-h) : [80X80]
% Then calculate Hessian Matrix H
% After the computation.The size of
%the Hessian matrix is 3 by 3 demensions
%这里在实现hessian矩阵的时候用到了对角矩阵 diag(H) H 是 假设函数 hypothesis
Hesn = (1/m).*X' * diag(H) * diag(1-H) *X;% Calculate J for testing convergence
%S = sum(X) is the sum of the elements of the vector X. If
%X is a matrix, S is a row vector with the sum
% over each column.
% sum(X) == sum(X,1),responses a row vector with the sum
% over each column.
% sum(X,2),responses a column vector with the sum
% over each row.
% J(i) : [1x1]
J(i) =(1/m)*sum(-Y.*log(H) - (1-Y).*log(1-H));
% Update the value of theta
%左除,实际上是左边取逆矩阵再与右边的相乘
theta = theta - Hesn\grad;end
% Display theta
theta
% Calculate the probability that a student with
% Score 20 on exam 1 and score 80 on exam 2
% will not be admitted
prob = 1 - g([1, 20, 80]*theta)
% Plot Newton's method result
% Only need 2 points to define a line, so choose two endpoints
plot_x = [min(X(:,2))-2, max(X(:,2))+2];
% Calculate the decision boundary line
plot_y = (-1./theta(3)).*(theta(2).*plot_x +theta(1));
plot(plot_x, plot_y)
legend('Admitted', 'Not admitted', 'Decision Boundary')
hold off
% Plot J
figure
plot(0:MAX_ITR-1, J, 'o--', 'MarkerFaceColor', 'r', 'MarkerSize', 8)
xlabel('Iteration'); ylabel('J')
% Display J
J
%{
X =
55.5000 69.5000
41.0000 81.5000
53.5000 86.0000
46.0000 84.0000
41.0000 73.5000
51.5000 69.0000
51.0000 62.5000
42.0000 75.0000
53.5000 83.0000
57.5000 71.0000
42.5000 72.5000
41.0000 80.0000
46.0000 82.0000
46.0000 60.5000
49.5000 76.0000
41.0000 76.0000
48.5000 72.5000
51.5000 82.5000
44.5000 70.5000
44.0000 66.0000
33.0000 76.5000
33.5000 78.5000
31.5000 72.0000
33.0000 81.5000
42.0000 59.5000
30.0000 64.0000
61.0000 45.0000
49.0000 79.0000
26.5000 64.5000
34.0000 71.5000
42.0000 83.5000
29.5000 74.5000
39.5000 70.0000
51.5000 66.0000
41.5000 71.5000
42.5000 79.5000
35.0000 59.5000
38.5000 73.5000
32.0000 81.5000
46.0000 60.5000
36.5000 53.0000
36.5000 53.5000
24.0000 60.5000
19.0000 57.5000
34.5000 60.0000
37.5000 64.5000
35.5000 51.0000
37.0000 50.5000
21.5000 42.0000
35.5000 58.5000
26.5000 68.5000
26.5000 55.5000
18.5000 67.0000
40.0000 67.0000
32.5000 71.5000
39.0000 71.5000
43.0000 55.5000
22.0000 54.0000
36.0000 62.5000
31.0000 55.5000
38.5000 76.0000
40.0000 75.0000
37.5000 63.0000
24.5000 58.0000
30.0000 67.0000
33.0000 56.0000
56.5000 61.0000
41.0000 57.0000
49.5000 63.0000
34.5000 72.5000
32.5000 69.0000
36.0000 73.0000
27.0000 53.5000
41.0000 63.5000
29.5000 52.5000
20.0000 65.5000
38.0000 65.0000
18.5000 74.5000
16.0000 72.5000
33.5000 68.0000
Y =
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
%}