吴恩达 深度学习 第5课 week1 Dinosaurus Island -- Character level language model final - v1

def clip(gradients,maxValue):
    dWaa, dWax, dWya, db, dby = gradients['dWaa'], gradients['dWax'], gradients['dWya'], gradients['db'], gradients['dby']
    ### START CODE HERE ###
    # clip to mitigate exploding gradients, loop over [dWax, dWaa, dWya, db, dby]. (≈2 lines)
    for gradient in [dWax, dWaa, dWya, db, dby]:
        np.clip(gradient,-maxValue,maxValue,out=gradient)
    ### END CODE HERE ###
    gradients = {"dWaa": dWaa, "dWax": dWax, "dWya": dWya, "db": db, "dby": dby}
    return gradients
def sample(parameters,char_to_ix,seed):
    # Retrieve parameters and relevant shapes from "parameters" dictionary
    Waa, Wax, Wya, by, b = parameters['Waa'], parameters['Wax'], parameters['Wya'], parameters['by'], parameters['b']
    vocab_size = by.shape[0]
    n_a = Waa.shape[1]
    ### START CODE HERE ###
    # Step 1: Create the one-hot vector x for the first character (initializing the sequence generation). (≈1 line)
    x = np.zeros((vocab_size,1))
    # Step 1': Initialize a_prev as zeros (≈1 line)
    a_prev = np.zeros((n_a,1))
    # Create an empty list of indices, this is the list which will contain the list of indices of the characters to generate (≈1 line)
    indices = []
    # Idx is a flag to detect a newline character, we initialize it to -1
    idx = -1
    # Loop over time-steps t. At each time-step, sample a character from a probability distribution and append
    # its index to "indices". We'll stop if we reach 50 characters (which should be very unlikely with a well
    # trained model), which helps debugging and prevents entering an infinite loop.
    counter = 0
    newline_character = char_to_ix['\n']
    while (idx != newline_character and counter != 50):
        # Step 2: Forward propagate x using the equations (1), (2) and (3)
        a = np.tanh(np.dot(Wax,x)+np.dot(Waa,a_prev)+b)
        z = np.dot(Wya,a)+by
        y = softmax(z)
        # for grading purposes
        np.random.seed(counter + seed)
        # Step 3: Sample the index of a character within the vocabulary from the probability distribution y
        idx = np.random.choice(range(len(y)),p=y.ravel())
        # Append the index to "indices"
        indices.append(idx)
        # Step 4: Overwrite the input character as the one corresponding to the sampled index.
        x = np.zeros((vocab_size,1))
        x[idx] =1
        # Update "a_prev" to be "a"
        a_prev = a
        # for grading purposes
        seed += 1
        counter += 1
    ### END CODE HERE ###
    if (counter == 50):
        indices.append(char_to_ix['\n'])
    return indices
def optimize(X, Y, a_prev, parameters, learning_rate = 0.01):
    # Forward propagate through time (≈1 line)
    loss, cache = rnn_forward(X,Y,a_prev,parameters)
    # Backpropagate through time (≈1 line)
    gradients, a = rnn_backward(X,Y,parameters,cache)
    # Clip your gradients between -5 (min) and 5 (max) (≈1 line)
    gradients = clip(gradients,5)
    # Update parameters (≈1 line)
    parameters = update_parameters(parameters,gradients,learning_rate)
    ### END CODE HERE ###
    return loss, gradients, a[len(X) - 1]
def model(data, ix_to_char, char_to_ix, num_iterations = 35000, n_a = 50, dino_names = 7, vocab_size = 27):
    # Retrieve n_x and n_y from vocab_size
    n_x, n_y = vocab_size, vocab_size
    # Initialize parameters
    parameters = initialize_parameters(n_a, n_x, n_y)
    # Initialize loss (this is required because we want to smooth our loss, don't worry about it)
    loss = get_initial_loss(vocab_size, dino_names)
    # Build list of all dinosaur names (training examples).
    with open("dinos.txt") as f:
        examples = f.readlines()
    examples = [x.lower().strip() for x in examples]
    # Shuffle list of all dinosaur names
    shuffle(examples)
    # Initialize the hidden state of your LSTM
    a_prev = np.zeros((n_a, 1))
    # Optimization loop
    for j in range(num_iterations):
        ### START CODE HERE ###
        # Use the hint above to define one training example (X,Y) (≈ 2 lines)
        index = j%len(examples)
        X = [None]+[char_to_ix[ch]for ch in examples[index]]
        Y = X[1:]+[char_to_ix["\n"]]
        # Perform one optimization step: Forward-prop -> Backward-prop -> Clip -> Update parameters
        # Choose a learning rate of 0.01
        curr_loss, gradients, a_prev = optimize(X,Y,a_prev,parameters)
        ### END CODE HERE ###
        # Use a latency trick to keep the loss smooth. It happens here to accelerate the training.
        loss = smooth(loss, curr_loss)
        # Every 2000 Iteration, generate "n" characters thanks to sample() to check if the model is learning properly
        if j % 2000 == 0:
            print('Iteration: %d, Loss: %f' % (j, loss) + '\n')
            # The number of dinosaur names to print
            seed = 0
            for name in range(dino_names):
                # Sample indices and print them
                sampled_indices = sample(parameters, char_to_ix, seed)
                print_sample(sampled_indices, ix_to_char)
                seed += 1  # To get the same result for grading purposed, increment the seed by one.
            print('\n')
    return parameters

result:

Here is your poem: 

Forsooth this maketh no sense  hin the wire,
i ende that hy hary cume canctore,
sure mont of which relaven to be besing,
and eye with hist horoure you doth thy sided's frved,
have casy thou wirt wateath of this conness,
dide yours denerted beaution, to the my and ell fites,
thou taken both worse inensefted bitpy yous,,
 
sray i ipenthanders shill to pizino breased.
and care thou hist' owr that ever so nas,
by suce your chnect w

猜你喜欢

转载自blog.csdn.net/qq_31119155/article/details/81078624