[实践篇] Softmax Regression

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/shenziheng1/article/details/83062131

代码、数据已经上传,可以自主下载。https://download.csdn.net/download/shenziheng1/10721992

1. 训练代码

import numpy as np


def load_data(inputfile):
    f = open(inputfile)
    feature_data = []
    label_data = []
    for line in f.readlines():
        feature_tmp = []
        feature_tmp.append(1)  # offset
        lines = line.strip().split("\t")
        for i in xrange( len(lines)-1 ):
            feature_tmp.append( float(lines[i]) )
        label_data.append( int(lines[-1]) )

        feature_data.append( feature_tmp )
    f.close()

    return np.mat( feature_data ), np.mat( label_data ).T, len(set(label_data))


def gradient_ascent(feature_data, label_data, k, maxCycle, alpha):
    m, n = np.shape(feature_data)
    weights = np.mat(np.ones((n,k)))
    i = 0
    while i <= maxCycle:
        err = np.exp(feature_data * weights)
        if i % 100 == 0:
            print "\t-------iter: ", i , ", cost: ", cost(err, label_data)
        rowsum = -err.sum(axis=1)
        rowsum = rowsum.repeat(k, axis=1)
        err = err / rowsum
        for x in range(m):
            err[x, label_data[x, 0]] += 1
        weights = weights + (alpha / m) * feature_data.T * err
        i += 1
    return weights


def cost(err, label_data):
    m = np.shape(err)[0]
    sum_cost = 0.0
    for i in xrange(m):
        if err[ i, label_data[i,0]] / np.sum(err[i, :]) > 0:
            sum_cost-= np.log(err[i, label_data[i, 0]] / np.sum(err[i, :]))
        else:
            sum_cost -= 0.0
    return sum_cost / m


def save_model(file_name, weights):
    f_w = open(file_name, "w")
    m, n = np.shape(weights)
    for i in xrange(m):
        w_tmp = []
        for j in xrange(n):
            w_tmp.append( str(weights[i,j]))
        f_w.write("\t".join(w_tmp) + "\n")
    f_w.close()


if __name__ == "__main__":
    inputfile = "SoftInput.txt"
    """ load training data"""
    print "--------load training data---------"
    feature, label, k, = load_data(inputfile)
    """training Softmax model"""
    print "--------training Softmax model--------"
    weights = gradient_ascent(feature, label, k, 100000, 0.4)
    """saving final model"""
    print "--------saving Softmax model--------"
    save_model("weights", weights)

2.测试代码

import numpy as np
import random as rd


def load_data(num, m):
    testData = np.mat(np.ones((num,m)))
    for i in xrange(num):
        testData[i, 1] = rd.random()*6-3
        testData[i, 2] = rd.random()*15
    return testData


def load_weights(weights_path):
    f = open(weights_path)
    w = []
    for line in f.readlines():
        w_tmp = []
        lines = line.strip().split("\t")
        for x in lines:
            w_tmp.append(float(x))
        w.append(w_tmp)
    f.close()
    weights = np.mat(w)
    m, n = np.shape(weights)
    return weights, m, n


def predict(test_data, weights):
    h = test_data * weights
    return h.argmax(axis=1) # select maximum prediction as classification


def save_result(file_name, result):
    f_result = open(file_name, "w")
    m = np.shape(result)[0]
    for i in xrange(m):
        f_result.write(str( result[i,0] ) + "\n")
    f_result.close()


if __name__ == "__main__":
    print "--------load mode--------"
    w, m, n = load_weights("weights")
    print "--------prediction--------"
    test_data = load_data(4000, m)
    print "--------save results--------"
    result = predict(test_data, w)

3.补充知识

  • set(): 创建一个无序不重复元素集,可进行关系测试,删除重复数据,还可以计算交集、差集、并集等。
>>>x = set('runoob') 
>>> y = set('google') 
>>> x, y 
(set(['b', 'r', 'u', 'o', 'n']), set(['e', 'o', 'g', 'l'])) # 重复的被删除
 >>> x & y # 交集 
set(['o']) 
>>> x | y # 并集
set(['b', 'e', 'g', 'l', 'o', 'n', 'r', 'u']) 
>>> x - y # 差集 
set(['r', 'b', 'u', 'n']) 
  • sum(axis): 一维向量相加求和的方式。默认情况下相当于axis=1, 每一行相加求和;
>>> np.sum([[0,1,2],[2,1,3],axis=1)  
array([3,6])

猜你喜欢

转载自blog.csdn.net/shenziheng1/article/details/83062131