Tensorflow-softmax regression

Manually implement softmax regression

//首先导入所需的包
%matplotlib inline
import d2lzh as d2l
from mxnet import autograd, nd
//使用Fashion-MNIST数据集,设置批量大小为256
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

Initialize model parameters

num_inputs = 784  //这个参数是由图片的 heigth * weight所得  使用  shape()可以获得,我这使用的是28*28的一个图片
num_outputs = 10 //图片类别个数

w = nd.random.normal(scale=0.01, shape=(num_inputs,num_outputs))
b = nd.zeros(num_outputs)

//模型参数附上梯度
w.attach_grad()
b.attach_grad()

Implement softmax operation

X = nd.array([[1,2,3],[4,5,6]])
X.sum(axis=0,keepdims=True), X.sum(axis=1,keepdims=True)//0计算X轴,1计算Y轴

def softmax(X):
	X_exp = nd.exp(X)
	partition = X_exp.sum(axis=1,keepdims=True)
	return X_exp / partition
X = nd.random.normal(shape(2,5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

Define the model.
This function changes each picture into a num_inputs vector.

def net(X):
	return softmax(nd.dot(X.reshape((-1,num_inputs)),w) + b)

Define the loss function
Here I define the predicted probabilities of 2 samples in 3 categories.
Using this function we can get the predicted probability.

y_hat = nd.array([[0.1,0.3,0.6],[0.3,0.2,0.5]])
y = nd.array([0,2],dtype='int32')

def cross_entropy(y_hat,y):
	return -nd.pick(y_hat,y).log()

Calculate classification accuracy
y_hat.argmax(axis=1)Returns the index of the largest element on the Y axis, and the returned result has the same shape as the variable y.

def accuracy(y_hat,y):
	return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()
accuracy(y_hat,y)

def evaluate_accuracy(data_iter,net):
	acc_sum, n = 0.0, 0
	for X, y in data_iter:
		y = y.astype('float32')
		acc_sum += (net(X).argmax(axis=1) == y).sum.asscalar()
		n += y.size
	return acc_sum / n
evaluate_accuracy(test_iter,net)

Training model

num_epochs, lr = 5, 0.1 #迭代周期数和学习率

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
	for epoch in range(num_epochs):
		train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
		for X, y in train_iter:
			with autograd.record():#自动求导
				y_hat = net(X)
				l = loss(y_hat,y).sum()
			l.backward()
			if trainer is None:
				d2l.sgd(params, lr, batch_size)
			else:
				trainer.step(batch_size)
			y = y.astype('float32')
			train_l_sum += l.asscalar()
			train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
			n += y.size
		test_acc = evaluate_accuracy(test_iter,net)
		print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch+1, train_l_sum / n, train_acc_sum / n, test_acc))
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [w, b], lr)
			

Guess you like

Origin blog.csdn.net/qq_43802454/article/details/121309081