logistic回归 (梯度上升法)

梯度上升法每次讲当前参数向每个特征的梯度移动一小部分,经过多次迭代得到最后的解,在梯度上升的时候可以采用随机取样,虽然效果差不多,但是可以占用更少的计算资源,同时随机梯度上升法是一个在线算法,他可以在新数据到来时就可以完成参数更新,而不需要重新读取整个数据集来进行批处理计算。

# -*- coding:UTF-8 -*-
import matplotlib.pyplot as plt
import numpy as np

def LoadDataSet():
  data_mat, label_mat = [], []
  with open("testSet.txt") as f:
    for row in f.readlines():
      row_tmp = row.strip().split()
      data_mat.append([1.0, float(row_tmp[0]), float(row_tmp[1])])
      label_mat.append(int(row_tmp[2]))
    return data_mat, label_mat

def Sigmoid(inX):
  return 1.0 / (1 + np.exp(-inX))

def GradAscent(DataMatInput, ClassLabels):
  data_mat = np.mat(DataMatInput)
  label_mat = np.mat(ClassLabels).transpose() #转置
  m, n = np.shape(data_mat) #返回矩阵大小,m为行,n为列数(也是特征数)
  alpha = 0.001
  cycles = 500
  weight = np.ones((n,1))
  for i in range(cycles):
    tmp_mat = Sigmoid(data_mat * weight)
    error = label_mat - tmp_mat # [m * 1]的向量
    weight = weight + alpha * data_mat.transpose() * error #将样本数据转置之后才可以做矩阵运算
  return weight.getA()

def run():
  data_mat, label_mat = LoadDataSet()
  weight = GradAscent(data_mat, label_mat)
  data_arr = np.array(data_mat)
  n = np.shape(data_mat)[0]
  xcord1, ycord1, xcord2, ycord2 = [], [], [], []
  for i in range(n):
    if int(label_mat[i]) == 1:
      xcord1.append(data_arr[i, 1])
      ycord1.append(data_arr[i, 2])
    else:
      xcord2.append(data_arr[i, 1])
      ycord2.append(data_arr[i, 2])
  fig = plt.figure()
  ax = fig.add_subplot(111)  # 添加subplot 1行1列第一块画布
  ax.scatter(xcord1, ycord1, s=20, c='red', marker='s', alpha=.5)
  ax.scatter(xcord2, ycord2, s=20, c='green', alpha=.5)
  x = np.arange(-3.0, 3.0, 0.1)
  y = (-weight[0] - weight[1] * x) / weight[2] #反解y
  ax.plot(x, y)
  plt.title('logistic')
  plt.xlabel('X1')
  plt.ylabel('X2')
  plt.show()

run()

参考链接:

https://blog.csdn.net/c406495762/article/details/77723333#1__238(解决了为什么梯度上升只需要用error * 数据就可以完成向梯度移动的问题)

猜你喜欢

转载自www.cnblogs.com/lalalatianlalu/p/11335509.html