机器学习之逻辑回归实现二分类器

机器学习之逻辑回归实现二分类器

损失函数

这里写图片描述

对损失函数求theta的偏导

这里写图片描述

类似与线性回归,根据偏导和学习率更新参数theta

python实现

#导入相关类库和数据集
import numpy as np
from sklearn import datasets

data = datasets.load_iris()#读取iris数据集
train_x = data['data'][:100]#因为是二分类问题,所以我们只需要前一百个样本,每类50个样本
train_y = data['target'][:100].reshape((-1, 1))#读取他们对应的类别标签

#导入logisticRegression类,类的实现会在下方贴出
from logisticRegression import logisticRegression
lR = logisticRegression(train_x, train_y, alpha=0.1, epoches=20)
lR.train()#调用类中的train方法

logisticRegression.py

import numpy as np
import matplotlib.pyplot as plt


class logisticRegression(object):
    def __init__(self, train_x, train_y, alpha, epoches=100):
        self._x = train_x
        self._y = train_y
        self._w = 0.1 * np.random.randn(self._x.shape[1], 1)
        self._alpha = alpha
        self._epoches = epoches
        self._cost = []

    def hx2(self, x):
        tmp = 1 / (1 + np.exp(-x.dot(self._w)))
        tmp = np.exp(x.dot(self._w)) / (1 + np.exp(x.dot(self._w)))
        tmp[np.where(tmp >= 0.5)] = 1
        tmp[np.where(tmp < 0.5)] = 0
        return tmp

    def hx(self, x):
        tmp = 1 / (1 + np.exp(-x.dot(self._w)))
        return tmp

    def network(self,):
        y_hat = self.hx(self._x)
        err = y_hat - self._y
        # print(y_hat)
        J = -np.mean(self._y.T.dot(np.log(y_hat + 0.00001)) +
                     (1 - self._y).T.dot(np.log(1 - y_hat + 0.00001)))
        self._cost += [J]
        delta_w = self._x.T.dot(err)
        self._w -= self._alpha * (delta_w + 0.01 * self._w)
        print(self._w)

    def plotCost(self,):
        plt.plot(self._cost)
        plt.xlabel("epoches")
        plt.ylabel("cost")
        plt.title("LogisticRegression")
        plt.show()

    def plot_y_hat(self, y_hat):
        self._plot(y_hat)
        plt.title("predict_label")
        plt.legend(["predict_label-1", "predict_label-0"])
        plt.show()

    def plot_y(self,):
        self._plot(self._y)
        plt.title("true_label")
        plt.legend(["true_label-1", "true_label-0"])
        plt.show()

    def _plot(self, y):
        position_zero = (y == 0).reshape((len(y),))
        position_one = (y == 1).reshape((len(y),))
        plt.scatter(self._x[position_one, 0], self._x[position_one,
                                                      1], marker='+', label='+', color='b')
        plt.scatter(self._x[position_zero, 0],
                    self._x[position_zero, 1], marker='o', label='-', color='y')
        plt.xlabel("feature_1")
        plt.ylabel("feature_2")

    def acc(self, y_hat, y):
        return list(y_hat - y).count(0) / len(y)

    def train(self,):
        for e in range(self._epoches):
            self.network()
        self.plot_y_hat(self.hx2(self._x))
        self.plot_y()
        self.plotCost()

运行结果

样本的预测类别
这里写图片描述
样本的真实类别
这里写图片描述
损失函数追踪
这里写图片描述

总结

从三张图不难看出逻辑回归的效果还是很不错的,大概七八步就能收敛,这也得益于iris数据的分类效果本身就比较好。

猜你喜欢

转载自blog.csdn.net/qq_35327651/article/details/79915264