统计学第二章--感知机

查看数据集

import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# load data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target

df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
print(df.label.value_counts())

plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='one')
plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='two')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()

sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)

发现四个维度的数据,有两个维度就可以线性可分.

import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# load data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target

# df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
# print(df.label.value_counts())
#
# plt.scatter(df[:50]['sepal length'], df[:50]['sepal width'], label='one')
# plt.scatter(df[50:100]['sepal length'], df[50:100]['sepal width'], label='two')
# plt.xlabel('sepal length')
# plt.ylabel('sepal width')
# plt.legend()
# plt.show()

data = np.array(df.iloc[:100, [0,1,-1]])
print(data)
X, y = data[:,:-1], data[:,-1]
print(type(X))
y = np.array([1 if i == 1 else -1 for i in y])


# 数据线性可分，二分类数据
# 此处为一元一次线性方程
class Model:
    def __init__(self):
        self.w = np.ones(len(data[0]) - 1, dtype=np.float32)
        print('self.w',self.w)
        self.b = 0
        self.l_rate = 0.1
        # self.data = data

    def sign(self, x, w, b):
        y = np.dot(x, w) + b
        return y

    # 随机梯度下降法
    def fit(self, X_train, y_train):
        is_wrong = False
        while not is_wrong:
            wrong_count = 0
            for d in range(len(X_train)):
                X = X_train[d]
                y = y_train[d]
                if y * self.sign(X, self.w, self.b) <= 0:
                    self.w = self.w + self.l_rate * np.dot(y, X)
                    self.b = self.b + self.l_rate * y
                    wrong_count += 1
            if wrong_count == 0:
                is_wrong = True
        return 'Perceptron Model!'

    def score(self):
        pass

perceptron = Model()
perceptron.fit(X, y)



x_points = np.linspace(4, 7,10)
print('x_points=',x_points)
y_ = -(perceptron.w[0]*x_points + perceptron.b)/perceptron.w[1]
plt.plot(x_points, y_)

plt.plot(data[:50, 0], data[:50, 1], 'bo', color='blue', label='one')
plt.plot(data[50:100, 0], data[50:100, 1], 'bo', color='orange', label='two')
plt.xlabel('sepal length')
plt.ylabel('sepal width')
plt.legend()
plt.show()

统计学第二章--感知机

猜你喜欢