Tensorflow-softmax回归

手动实现softmax回归

//首先导入所需的包
%matplotlib inline
import d2lzh as d2l
from mxnet import autograd, nd
//使用Fashion-MNIST数据集,设置批量大小为256
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

初始化模型参数

num_inputs = 784  //这个参数是由图片的 heigth * weight所得  使用  shape()可以获得,我这使用的是28*28的一个图片
num_outputs = 10 //图片类别个数

w = nd.random.normal(scale=0.01, shape=(num_inputs,num_outputs))
b = nd.zeros(num_outputs)

//模型参数附上梯度
w.attach_grad()
b.attach_grad()

实现softmax运算

X = nd.array([[1,2,3],[4,5,6]])
X.sum(axis=0,keepdims=True), X.sum(axis=1,keepdims=True)//0计算X轴,1计算Y轴

def softmax(X):
	X_exp = nd.exp(X)
	partition = X_exp.sum(axis=1,keepdims=True)
	return X_exp / partition
X = nd.random.normal(shape(2,5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

定义模型
此函数是把每张图片改为num_inputs向量

def net(X):
	return softmax(nd.dot(X.reshape((-1,num_inputs)),w) + b)

定义损失函数
在这我定义了2个样本在3个类别的预测概率。
使用该函数我们可以得到预测概率。

y_hat = nd.array([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y = nd.array([0,2],dtype='int32')

def cross_entropy(y_hat,y):
	return -nd.pick(y_hat,y).log()

计算分类准确率
y_hat.argmax(axis=1) 返回Y轴最大元素的索引,且返回结果与变量y形状相同.

def accuracy(y_hat,y):
	return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat,y)

def evaluate_accuracy(data_iter,net):
	acc_sum, n = 0.0, 0
	for X, y in data_iter:
		y = y.astype('float32')
		acc_sum += (net(X).argmax(axis=1) == y).sum.asscalar()
		n += y.size
	return acc_sum / n
evaluate_accuracy(test_iter,net)

训练模型

num_epochs, lr = 5, 0.1 #迭代周期数和学习率

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
	for epoch in range(num_epochs):
		train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
		for X, y in train_iter:
			with autograd.record():#自动求导
				y_hat = net(X)
				l = loss(y_hat,y).sum()
			l.backward()
			if trainer is None:
				d2l.sgd(params, lr, batch_size)
			else:
				trainer.step(batch_size)
			y = y.astype('float32')
			train_l_sum += l.asscalar()
			train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
			n += y.size
		test_acc = evaluate_accuracy(test_iter,net)
		print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [w, b], lr)
			

猜你喜欢

转载自blog.csdn.net/qq_43802454/article/details/121309081