梯度下降、L1范式

假设数据集中特征集合X是一个30010的矩阵,300代表样本数,10代表特征数
目标集合Y是一个300
1的矩阵
特征集合到目标集合的转换经历了一个权重矩阵W(10*1)
即是一个线性回归,目标函数为y = wx
对其进行梯度下降法,使损失函数变小,不断拟合

import numpy as np
def get_data(file_name):
    with open(file_name) as fil:
        X = []
        X_one_line = fil.readline()
        while X_one_line != '':
            X.append([eval(i) for i in X_one_line.split()])
            X_one_line = fil.readline()
        X = np.array(X)
    return X

def get_weight():
    W_orig = []
    W_input = input("请输入初始权重数组:")
    for i in W_input.split():
        W_orig.append([eval(i)])
    return W_orig

def train_test_sep(X,Y,train_size):
    num_of_data = X.shape[0]   # 得到数据组数 300组
    num_of_train = int(num_of_data*train_size)  # 训练集的组数(270
    X_train = X[0:num_of_train]
    X_test = X[num_of_train:]
    Y_train = Y[0:num_of_train]
    Y_test = Y[num_of_train:]
    return (X_train,X_test,Y_train,Y_test)


def model(X,W):
    Y_pred = np.dot(X,W)
    return Y_pred

def evalution(Y_pred,Y):
    loss = np.sum(np.multiply(Y-Y_pred,Y-Y_pred))
    print(loss)

# 1.0198594030883626e-08
# 1.40506601300693e-09
def opitimizer(Y,W,X,lr,iter_num):
    for i in range(iter_num):
        loss_grad = 2 * (-X.T) @ (Y - X @ W)  # 梯度
        W -= lr * loss_grad
        loss = np.sum(np.multiply(Y - X@W, Y - X@W))
        print(i,loss)
    return W






if __name__ == "__main__":
    X = get_data("x.txt")    # 特征集
    Y = get_data("y.txt")    # 结果集
    W = get_weight()         # 设定初始权重
    W_right = get_data("w.txt")    # 正确的权重集
    learning_rate = 100
    iter_num = 600
    train_size = 0.9
    dataset = train_test_sep(X,Y,train_size)     # 已经分成训练集和测试集的数据库
    print("###Training:---")
    print("Loss before opitimization:",end="")# 打印梯度下降前的loss
    evalution(model(dataset[0], W), dataset[2])
    print("###Opitimizing:---")
    W_after_opitimizer = opitimizer(dataset[2],W,dataset[0],learning_rate,iter_num)
    print("weight after training:\n",W_after_opitimizer)
    print("weight in answer:\n",W_right)

    print("###Testing:---")
    evalution(model(dataset[1],W_after_opitimizer),dataset[3])


# 1 2 3 4 5 6 7 8 9 10

考虑将目标函数改为 y = w1x + w2x2+w3x3
效果并不好。。。
也没有发生过拟合的现象,在训练集中效果不佳
在测试集中效果更烂。。。

import numpy as np
def get_data(file_name):
    with open(file_name) as fil:
        X = []
        X_one_line = fil.readline()
        while X_one_line != '':
            X.append([eval(i) for i in X_one_line.split()])
            X_one_line = fil.readline()
        X = np.array(X)
    return X

def get_weight(n):
    if n == 1:
        W_orig = []
        W_input = input("请输入初始权重数组:")
        for i in W_input.split():
            W_orig.append([eval(i)])
        return W_orig
    if n == 3:
        W1_orig = []
        W2_orig = []
        W3_orig = []
        W1_input = input("请输入W1初始权重数组:")
        for j in W1_input.split():
            W1_orig.append([eval(j)])
        W2_input = input("请输入W2初始权重数组:")
        for j in W2_input.split():
            W2_orig.append([eval(j)])
        W3_input = input("请输入W2初始权重数组:")
        for j in W3_input.split():
            W3_orig.append([eval(j)])
        return W1_orig,W2_orig,W3_orig

def train_test_sep(X,Y,train_size):
    num_of_data = X.shape[0]   # 得到数据组数 300组
    num_of_train = int(num_of_data*train_size)  # 训练集的组数(270
    X_train = X[0:num_of_train]
    X_test = X[num_of_train:]
    Y_train = Y[0:num_of_train]
    Y_test = Y[num_of_train:]
    return (X_train,X_test,Y_train,Y_test)


def model(X,W):
    Y_pred = np.dot(X,W)
    return Y_pred


def model2(X,W1,W2,W3):
    Y_pred = np.dot(X,W1) + np.dot(X*X,W2) + np.dot(X*X*X,W3)
    return Y_pred

def evalution(Y_pred,Y):
    loss = np.sum(np.multiply(Y-Y_pred,Y-Y_pred))
    print(loss)


def opitimizer(Y,W,X,lr,iter_num):
    for i in range(iter_num):
        loss_grad = 2 * (-X.T) @ (Y - X @ W)  # 梯度
        W -= lr * loss_grad
        loss = np.sum(np.multiply(Y - X@W, Y - X@W))
        print(i,loss)
    return W


def opitimizer2(Y,W1,W2,W3,X,lr,iter_num):
    for i in range(iter_num):
        common_part = Y - X@W1 - (X*X)@W2 - (X*X*X)@W3
        Loss_grad_W1 = 2*(-X.T)@common_part
        Loss_grad_W2 = 2*(-(X*X).T)@common_part
        Loss_grad_W3 = 2*(-(X*X*X).T)@common_part
        W1 -= lr*Loss_grad_W1
        W2 -= lr*Loss_grad_W2
        W3 -= lr*Loss_grad_W3
        Y_pred = np.dot(X,W1) + np.dot(X*X,W2) + np.dot(X*X*X,W3)
        loss = np.sum((Y-Y_pred)*(Y-Y_pred))
        print(i,loss)
    return W1,W2,W3






if __name__ == "__main__":
    X = get_data("x.txt")    # 特征集
    Y = get_data("y.txt")    # 结果集
    # W = get_weight(0)         # 设定初始权重
    W1,W2,W3 = get_weight(3)
    W_right = get_data("w.txt")    # 正确的权重集
    learning_rate = 1e-4
    iter_num = 100000
    train_size = 0.9
    dataset = train_test_sep(X,Y,train_size)     # 已经分成训练集和测试集的数据库
    print("###Training:---")
    print("Loss before opitimization:",end="")# 打印梯度下降前的loss
    # evalution(model(dataset[0], W), dataset[2])
    evalution(model2(dataset[0],W1,W2,W3),dataset[2])
    print("###Opitimizing:---")
    W1_after,W2_after,W3_after = opitimizer2(dataset[2],W1,W2,W3,dataset[0],learning_rate,iter_num)
    # W_after_opitimizer = opitimizer(dataset[2],W,dataset[0],learning_rate,iter_num)
    # print("weight after training:\n",W_after_opitimizer)
    print("W1 after training:\n",W1_after)
    print("W2 after training:\n", W2_after)
    print("W3 after training:\n", W3_after)
    # print("weight in answer:\n",W_right)

    print("###Testing:---")
    # evalution(model(dataset[1],W_after_opitimizer),dataset[3])
    evalution(model2(dataset[0], W1, W2, W3), dataset[2])

# 1 2 3 4 5 6 7 8 9 10
# 1 2 3 4 5 6 7 8 9 10
# 1 2 3 4 5 6 7 8 9 10

假设发生了过拟合。。
在通读L1 L2范式的详解以及Scikit-learn上基于L1 L2范式正则化的实例大佬文章之后
对鸢尾花数据集进行L1范式正则化:
解释:
在线性回归或者逻辑回归(就是分类啊…)中,目标函数一般可以写作 y = w1x1 + w2x2 + w3x3 + s4x4 + w0
coef就代表着 w1 到 w4这些特征权重
intercept就是w0,是偏移量,是bias
最终的结果[0 -0. 0.16129057 0.32571651]
也说明了Lasso范式对于特征选择很有帮助
也证明了Lasso的稀疏性

from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
import numpy as np
import sys
np.set_printoptions(threshold=sys.maxsize)
iris = load_iris()
scaler = StandardScaler()
print(iris)
print("-"*30)
X = scaler.fit_transform(iris["data"])
print(X)
print("-"*30)
Y = iris["target"]
print(Y)
print("-"*30)
names = iris["feature_names"]
print(names)
print("-"*30)
lasso = Lasso(alpha=.3)
print(lasso.fit(X, Y).coef_)
print(lasso.fit(X, Y).intercept_)
# print("Lasso model: ", pretty_print_linear(lasso.coef_, names, sort=True))
发布了45 篇原创文章 · 获赞 0 · 访问量 989

猜你喜欢

转载自blog.csdn.net/jokerxsy/article/details/104717936
今日推荐