Adaboost Matlab 实现

算法介绍：Adaboost (Adaptive boosting) 是由 Yoav Freund 和 Robert Schapire 在1995年在 "A Short Introduction to Boosting" 论文中提出。论文链接.

参考书籍：机器学习实战

这一程序随机生成数据集和标签，通过Adaboost算法训练弱分类器，最终合并为一强分类器，通过画出阈值分割线，直观显示出Adaboost的分割方式。

代码实现

这里附上我的Adaboost的matlab实现。

% Writen by: Weichen GU
% Date     : 2019/03/09
function Adaboost
    % Initialization
    clc;
    clear;
    cla;
    % Global range definition
    global lowT;
    global highT;
    lowT = 0; highT = 3000;
    % Generate data and labels
    % [1] random labels
    [data, label] = createData(50,2,lowT,highT);
    % [2] Rectangle shape region labels
    %label  = (data(:,1)-data(:,2) > 0)*2 - 1;
    % Generate data
    %[data, ~] = createData(500,2,lowT,highT);
    % [3] Circle shape region labels
    %label  = (((data(:,1)-1500).^2+(data(:,2)-1500).^2 -1000^2) > 0)*2 - 1;
    % [4] Hyperbolic shape region labels
    %label  = (((data(:,1)-1500).^2-(data(:,2)-1500).^2 -300^2) > 0)*2 - 1;
    % Draw the original figure of data with different labels
    figure(1);
    clf;
    format rat
    hold on
    plot( data(label==1,1),data(label==1,2),'b+','LineWidth',3,'MarkerSize',8);
    plot(data(label==-1,1),data(label==-1,2),'ro','LineWidth',3,'MarkerSize',8);
    xlabel('x');ylabel('y');axis([lowT highT lowT highT]);
    title('Orignal data');
    hold off
    % Draw the predicted data and labels
    figure(2);
    clf;
    hold on
    % Using Adaboost algorithm to train the data
    [~,~,strongClassifierLabel]=AdaboostTrain(data,label,10000);
    % Plot the predicted value and labels
    plot( data(strongClassifierLabel==1,1),data(strongClassifierLabel==1,2),'b+','LineWidth',3,'MarkerSize',8);
    plot(data(strongClassifierLabel==-1,1),data(strongClassifierLabel==-1,2),'ro','LineWidth',3,'MarkerSize',8);
    xlabel('x');ylabel('y');axis([lowT highT lowT highT]);
    title('Predict data and corresponding split line');
    hold off
    
end

%--- This function is to create sample data ---%
function [data,label] = createData(m,n,lowT,highT)
    % Default parameters
    if ~nargin
        lowT = 1;highT = 100;m = 2;n = 10;
    end
    % The samples should be less than the total samples
    if((m*n)>highT^2)
       return; 
    end
    data = [];label = [];
    for i = 1:1:m
        s = [];
        t = 1;
        while 1
            temp = randi([lowT, highT],1);
            if(isempty(find(s==temp, 1)))
                s=[s temp];
                t = t+1;
            end
            if(t>n)
                break;
            end
        end
    data = [data;s];
    label = randi([0, 1],m,1);
    end
    % Create labels (-1,1)
    label = label*2-1;
end

%--- weakClassifier function ---%
function [retLabel] = weakClassifier(data,dimension, threshVal,threshMethod)  
    % Weak classifier
    if ~threshMethod    % 0 - lowThresh, 1- highThresh
        threshMethod = 0;
    end
    retLabel = zeros(length(data),1)-1;
    data_temp = data(:,dimension);
    retLabel(data_temp>threshVal)=1;
    % highThresh
    if(threshMethod==1)
        retLabel = -retLabel;
    end
end

function [bestWeakClassifier, minErr, bestLabel]=selectBestStump(data, label,D)
    [m, n] = size(data);
    step = 2*m; % Step definion
    bestWeakClassifier = []; % Best weak classifier in one loop
    bestLabel = zeros(m,1);  % Best predicted label
    minErr = inf;            % Initialize minimum error
    success=0;               % This is for draw the split line(threshold) of every loop
    for i=1:1:n
        minValue = min(data(:,i));  % Get the minimum value 
        maxValue = max(data(:,i));  % and the maximum value
        stepValue = (maxValue-minValue)/step;       % Split the difference by the steps
        % Traverse the values
        for j = -1:step+1
            % Low threshold and high threshold
            for k = 0:1:1
                threshVal = minValue+stepValue*j;
                % for every threshold get the predictlabel
                predictLabel = weakClassifier(data,i,threshVal,k);
                % assign error data to 1 and right data to0
                errData = ones(m,1);
                errData(label==predictLabel)=0;
                % Get the weighted error by multipling D and errData
                weightedErr =D'*errData;
                % Get the minumum weightedErr in one loop
                if (weightedErr<minErr)
                    success=1;
                    dimemsion = i;
                    thresh_temp = threshVal;
                    minErr = weightedErr;
                    bestLabel = predictLabel;
                    bestWeakClassifier={'dimension' i;'thresh' threshVal};
                end
            end
        end
    end
        %  Draw the split line(threshold) in one loop
        if (success ==1)
         drawline(dimemsion,thresh_temp,step);
        end
end
%--- This function is to draw the split line in every loop ---%
function [ok] = drawline(i,threshVal,step)
    global lowT;global highT;
    % i = 1 horizontal axis
    if(i==1)
            cx=threshVal*ones(1,step+1);
            cy=lowT:(highT-lowT)/step:highT;
            plot(cx,cy,'m:','lineWidth',2)
    end
    % i = 2 vertical axis
    if(i==2)
        cy=threshVal*ones(1,step+1);
        cx=lowT:(highT-lowT)/step:highT;
        plot(cx,cy,'m:','lineWidth',2)
    end
    % Return value
    ok = 1;
end
%--- Simple realization method for Adaboost algorithm ---%
function [weakClassifier,err,strongClassifier] = AdaboostTrain(data,label,numIteration)
    [m, ~] = size(data);  
    weakClassifier = {};
    D = ones(m,1)/m;
    combinedWeakClassifier = zeros(m,1);
    for i = 1:1:numIteration
        [bestWeakClassifier, minErr, bestLabel]=selectBestStump(data, label,D);
        alpha= 0.5*log((1-minErr)/minErr);
        bestWeakClassifier(3,:) =  {'alpha' alpha};
        % Store weak classifier parameters
        weakClassifier=[weakClassifier;bestWeakClassifier];
        % Get D value
        D = D.*exp(-alpha*label.*bestLabel);
        Z_t = 2*sqrt(minErr*(1-minErr));
        % Normalize D, we can also divide D by sum(D)
        D = D/Z_t;
        % Combine weak classifier
        combinedWeakClassifier = combinedWeakClassifier+alpha*bestLabel;
        % Generate strong classifier
        strongClassifier = sign(combinedWeakClassifier);
        [~,err]=symerr(label,strongClassifier);
        % Display error rate
        err = vpa(err)
        % If error = 0, then return
        if(err==0.0)
            break;
        end
    end
end