This blog is mainly a detailed explanation of the code of the GAN network:
The first is predefined:
clear; clc; %%%clc is a command to clear the current command area, which means clearing, which is more comfortable to look at. And clear is used to clear environment variables. The two are different.
%%% load data set
train_x=load('Normalization_wbc.txt');%train_x is what we want the GAN network to generate similar data to.
[m,n]=size(train_x);%m indicates how many rows train_x has, and n indicates how many columns there are.
%%% define the model
generator=nnsetup([30,15,30]);% The first 30 means that there are 30 neurons in the first layer, which should be the same as the dimension of train_x, and the last 30 should also be the same as the dimension of train_x.
discriminator=nnsetup([30,15,1]);% The first 30 should have the same number of neurons as the last layer of the generator, the last layer is 1 neuron, and the output is that each sample comes from Probability of real data.
%%parameter settings
batch_size=m; %batchsize indicates how many samples to input at a time for training, because my data volume is small, just input all of them directly.
iteration=100;% How many times to iterate, or how many times to go forward.
images_num=m;
batch_num=floor(images_num / batch_size);
learning_rate=0.0001;
The second is to build a neural network according to a predefined network model:
function nn=nnsetup(architecture)
nn.architecture= architecture;%%% pass the predefined network structure to the structure of nn (neuron network)
nn.layers_count= numel(nn.architecture);% Calculate how many layers the passed network structure has
%%%%%%%Adam optimizer needs to set parameters%%%
nn.t=0;
nn.beta1=0.9;
nn.beta2=0.999;
nn.epsilon=10^(-8);
%%%%%%%%%%%%%%%%%%%%%%%
for i=2:nn.layers_count
nn.layers{i}.w=normrnd(0,0.02,nn.architecture(i-1),nn.architecture(i));%normrnd refers to the random number generated by normal distribution, the first number 0 means The mean value is 0, the second number 0.02 means sigma=0.02, and the third value means the generated dimension size. For example, the third and fourth values are 30 and 15 respectively, which means generating a 30*15 matrix.
nn.layers{i}.b = normrnd(0, 0.02, 1, nn.architecture(i));% generates bias
nn.layers{i}.w_m = 0;% seems to be a parameter related to weight bias , but they are all set to 0, which seems meaningless.
nn.layers{i}.w_v = 0;
nn.layers{i}.b_m = 0;
nn.layers{i}.b_v = 0;
end
end
Part 3: Forward Propagation
function nn=nnff(nn,x)
nn.layers{1}.a=x;%%% takes data set x as input layer
for i=2:nn.layers_count %%%%nn.layers_count is already the number of layers of the network when it is passed into nn
input=nn.layers{i-1}.a;
w=nn.layers{i}.w;
b=nn.layers{i}.b;
nn.layers{i}.z=input * w +repmat(b,size(input,1),1);
if i~=nn.layers_count
nn.layers{i}.a=relu(nn.layers{i}.z);%%%% If it is not the last layer, pass the relu activation function
else
nn.layers{i}.a=sigmoid(nn.layers{i}.z);
end
end
end
The fourth part: Backpropagation, backpropagation is divided into the backpropagation of the generator and the backpropagation of the discriminator
A. Backpropagation of the discriminator
function nn=nnbp_d(nn, y_h, y)
%The input of the discriminator is the last layer of the generator, the output data Fake data and the real data train_x we have at hand, that is, real data
n=nn.layers_count;
nn.layers{n}.d=delta_sigmoid_cross_entropy(y_h,y); %%%%nn.layers{n}.d indicates the residual of the last layer
for i=n-1:-1:2%%%n is the initial value of i, 1 is the end value, and -1 is the step size. That is, starting from i=n, add -1 each time, that is, subtract 1, until i is equal to 1.
d=nn.layers{i+1}.d;
w=nn.layers{i+1}.w;
z=nn.layers{i}.z;
nn.layers{i}.d=d*w' .*delta_relu(z);
end
for i=2:n
d=nn.layers{i}.d;
a=nn.layers{i-1}.a;
nn.layers{i}.dw=a'*d /size(d,1);
nn.layers{i}.db=mean(d,1);
end
end
Part V, Backpropagation of Generators
function g_net=nnbp_g(g_net,d_net)
n=g_net.layers_count;
a=g_net.layers{n}.a;
g_net.layers{n}.d=d_net.layers{2}.d*d_net.layers{2}.w' .* (a .*(1-a));
for i=n-1:-1:2
d=g_net.layers{i+1}.d;
w=g_net.layers{i+1}.w;
z=g_net.layers{i}.z;
g_net.layers{i}.d=d*w' .* delta_relu(z);
end
%Calculate partial derivatives
for i=2:n
d=g_net.layers{i}.d;
a=g_net.layers{i-1}.a;
g_net.layers{i}.dw=a'*d/size(d,1);
g_net.layers{i}.db=mean(d,1);
end
end
Part VI, activation function and loss function
%%%sigmoid activation function
function output=sigmoid(x)
output=1 ./(1+exp(-x));
end
%%%relu activation function
function output=relu(x)
output=max(x,0);
end
%%%Leaky_Relu activation function
function output = Leaky_ReLU(x)
a=2;
if x>=0
output=x;
else
output=x/a;
end
end
%%%% loss function
Derivative of %relu with respect to x
function output=delta_relu(x)
output=max(x,0);
output(output>0)=1;
end
%%%%sigmoid cross entropy loss function
function result=sigmoid_cross_entropy(logits,labels)
result=max(logits,0) -logits .*labels +log(1+exp(-abs(logits)));
result=mean(result);
end
%%% Derivative of sigmoid cross entropy to logits
function result=delta_sigmoid_cross_entropy(logits, labels)
temp1=max(logits,0);
temp1(temp1>0)=1;
temp2=logits;
temp2(temp2>0)=-1;
temp2(temp2<0)=1;
result=temp1-labels+exp(-abs(logits)) ./ (1+exp(-abs(logits))) .* temp2;
end
Part VII, Adam optimizer
%Adam优化器
function nn = nnapplygrade(nn, learning_rate);
n = nn.layers_count;
nn.t = nn.t+1;
beta1 = nn.beta1;
beta2 = nn.beta2;
lr = learning_rate * sqrt(1-nn.beta2^nn.t) / (1-nn.beta1^nn.t);
for i = 2:n
dw = nn.layers{i}.dw;
db = nn.layers{i}.db;
%使用adam更新权重与偏置
nn.layers{i}.w_m = beta1 * nn.layers{i}.w_m + (1-beta1) * dw;
nn.layers{i}.w_v = beta2 * nn.layers{i}.w_v + (1-beta2) * (dw.*dw);
nn.layers{i}.w = nn.layers{i}.w -lr * nn.layers{i}.w_m ./ (sqrt(nn.layers{i}.w_v) + nn.epsilon);
nn.layers{i}.b_m = beta1 * nn.layers{i}.b_m + (1-beta1) * db;
nn.layers{i}.b_v = beta2 * nn.layers{i}.b_v + (1-beta2) * (db .* db);
nn.layers{i}.b = nn.layers{i}.b -lr * nn.layers{i}.b_m ./ (sqrt(nn.layers{i}.b_v) + nn.epsilon);
end
end
The eighth part, dinner, start training GAN.
for i=1:iteration
kk=randperm(images_num);
images_real=train_x;
noise=unifrnd(0,1,m,30);
generator=nnff(generator,noise);
images_fake=generator.layers{generator.layers_count}.a;
discriminator=nnff(discriminator,images_fake);
logits_fake=discriminator.layers{discriminator.layers_count}.z;
discriminator=nnbp_d(discriminator, logits_fake, ones(batch_size,1));
generator= nnbp_g(generator, discriminator);
generator=nnbp_g(generator, discriminator);
generator=nnapplygrade(generator,learning_rate);
%%%%%%% start updating discriminator
generator=nnff(generator,noise);
images_fake=generator.layers{generator.layers_count}.a;
images=[images_fake;images_real];
discriminator=nnff(discriminator,images);
logits=discriminator.layers{discriminator.layers_count}.z;
logits = discriminator.layers{discriminator.layers_count}.z;
labels = [zeros(batch_size,1); ones(batch_size,1)];% predefine a label, the data in the front is 0, the data in the back is 1, also carry out splicing.
discriminator = nnbp_d(discriminator, logits, labels); %logits is compared with the real label, pay attention to the comparison with the 29th line of code.
discriminator = nnapplygrade(discriminator, learning_rate);% updates the weights of the discriminator network.
%----output loss loss
c_loss(i,:) = sigmoid_cross_entropy(logits(1:batch_size), ones(batch_size,1));% This is the loss of the generator
d_loss (i,:)= sigmoid_cross_entropy(logits, labels); % discriminator loss
end