3.3 编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
数据集
密度 | 甜度 | 好or 坏 |
0.676 | 0.460 | 1 |
0.774 | 0.376 | 1 |
0.634 | 0.264 | 1 |
0.608 | 0.318 | 1 |
0.556 | 0.215 | 1 |
0.403 | 0.237 | 1 |
0.481 | 0.149 | 1 |
0.437 | 0.211 | 1 |
0.666 | 0.091 | 0 |
0.243 | 0.267 | 0 |
0.245 | 0.057 | 0 |
0.343 | 0.099 | 0 |
0.639 | 0.161 | 0 |
0.657 | 0.198 | 0 |
0.360 | 0.370 | 0 |
0.593 | 0.042 | 0 |
0.719 | 0.103 | 0 |
import string
import math
def sigmoid(x):
return 1 / (1 + math.e**(-x))
def classify(x):
if x <= 0.5:
return 0
else:
return 1
def object_function(w1, w2, x1, x2, b):
return sigmoid(w1 * x1 + w2 * x2 + b)
file = open("D:\Desktop\watermelon.txt", "r")
training_set = list()
num = 0
#读入数据
for line in file.readlines():
density, sugar, label = line.split()
#print(density, sugar, label)
training_set.append((density, sugar, label))
#print(training_set)
num += 1
#开始训练
w1, w2 = 0.0, 0.0
b = 1.0
learning_rate = 0.5
for epoch in range(100000):
correct = 0.0
deltaW1, deltaW2, deltaB = 0.0, 0.0, 0.0
for i in range(num):
#计算正确率
x1, x2 = float(training_set[i][0]), float(training_set[i][1])
y = int(training_set[i][2])
#print("x1: %f, x2: %f" % (x1, x2))
predict_y = object_function(w1, w2, x1, x2, b)
#print("w1 * x1 + w2 * x2 + b = %f" % (w1 * x1 + w2 * x2 + b))
#print("y = %f, predict_y = %f" % (y, predict_y))
correct += (classify(predict_y) == y)
#更新参数
temp = learning_rate * (predict_y - y)
deltaW1 += temp * x1
deltaW2 += temp * x2
deltaB += temp
#print("epoch %d has accuracy: %f" % (epoch, float(correct / num)))
w1 -= deltaW1
w2 -= deltaW2
b -= deltaB
print("epoch %d has accuracy: %f%%" % (epoch, float(correct / num) * 100))
print("w1 = %f, w2 = %f, b = %f" % (w1, w2, b))
最终的结果为
epoch 99999 has accuracy: 70.588235%
w1 = 3.156182, w2 = 12.541840, b = -4.431327