Coursera-Deep Learning Specialization 课程之(五):Sequence Models: -weak1编程作业 (第三部分)

Improvise a Jazz Solo with an LSTM Network

from __future__ import print_function
import IPython
import sys
from music21 import *
import numpy as np
from grammar import *
from qa import *
from preprocess import * 
from music_utils import *
from data_utils import *
from keras.models import load_model, Model
from keras.layers import Dense, Activation, Dropout, Input, LSTM, Reshape, Lambda, RepeatVector
from keras.initializers import glorot_uniform
from keras.utils import to_categorical
from keras.optimizers import Adam
from keras import backend as K

1 - Problem statement

这里写图片描述

1.1 - Dataset

IPython.display.Audio('./data/30s_seq.mp3')#爵士乐

爵士乐:training_example.mp3

X, Y, n_values, indices_values = load_music_utils()
print('shape of X:', X.shape)
print('number of training examples:', X.shape[0])
print('Tx (length of sequence):', X.shape[1])
print('total # of unique values:', n_values)
print('Shape of Y:', Y.shape)

shape of X: (60, 30, 78)
number of training examples: 60
Tx (length of sequence): 30
total # of unique values: 78
Shape of Y: (30, 60, 78)

1.2 - Overview of our model

这里写图片描述

2 - Building the model

# GRADED FUNCTION: djmodel

def djmodel(Tx, n_a, n_values):
    """
    Implement the model

    Arguments:
    Tx -- length of the sequence in a corpus
    n_a -- the number of activations used in our model
    n_values -- number of unique values in the music data 

    Returns:
    model -- a keras model with the 
    """

    # Define the input of your model with a shape 
    X = Input(shape=(Tx, n_values))

    # Define s0, initial hidden state for the decoder LSTM
    a0 = Input(shape=(n_a,), name='a0')
    c0 = Input(shape=(n_a,), name='c0')
    a = a0
    c = c0

    ### START CODE HERE ### 
    # Step 1: Create empty list to append the outputs while you iterate (≈1 line)
    outputs = []

    # Step 2: Loop
    for t in range(Tx):

        # Step 2.A: select the "t"th time step vector from X. 
        x = Lambda(lambda x: X[:,t,:])(X)
        # Step 2.B: Use reshapor to reshape x to be (1, n_values) (≈1 line)
        x = reshapor(x)
        # Step 2.C: Perform one step of the LSTM_cell
        a, _, c = LSTM_cell(x, initial_state=[a, c])
        # Step 2.D: Apply densor to the hidden state output of LSTM_Cell
        out = densor(a)
        # Step 2.E: add the output to "outputs"
        outputs.append(out)

    # Step 3: Create model instance
    model = Model([X,a0,c0],outputs)

    ### END CODE HERE ###

    return model
model = djmodel(Tx = 30 , n_a = 64, n_values = 78)opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.01)

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])m = 60
a0 = np.zeros((m, n_a))
c0 = np.zeros((m, n_a))
model.fit([X, a0, c0], list(Y), epochs=100)

60/60 [==============================] - 0s - loss: 5.9642 - dense_2_loss_1: 3.7577

3 - Generating music

这里写图片描述

def music_inference_model(LSTM_cell, densor, n_values = 78, n_a = 64, Ty = 100):
### START CODE HERE ###
    # Step 1: Create an empty list of "outputs" to later store your predicted values (≈1 line)
    outputs = []

    # Step 2: Loop over Ty and generate a value at every time step
    for t in range(Ty):

        # Step 2.A: Perform one step of LSTM_cell (≈1 line)
        a, _, c = LSTM_cell(x, initial_state=[a, c])

        # Step 2.B: Apply Dense layer to the hidden state output of the LSTM_cell (≈1 line)
        out = densor(a)

        # Step 2.C: Append the prediction "out" to "outputs". out.shape = (None, 78) (≈1 line)
        outputs.append(out)

        # Step 2.D: Select the next value according to "out", and set "x" to be the one-hot representation of the
        #           selected value, which will be passed as the input to LSTM_cell on the next step. We have provided 
        #           the line of code you need to do this. 
        x = Lambda(one_hot)(out)

    # Step 3: Create model instance with the correct "inputs" and "outputs" (≈1 line)
    inference_model = Model([x0,a0,c0],outputs)

    ### END CODE HERE ###

    return inference_model
inference_model = music_inference_model(LSTM_cell, densor, n_values = 78, n_a = 64, Ty = 50)
x_initializer = np.zeros((1, 1, 78))
a_initializer = np.zeros((1, n_a))
c_initializer = np.zeros((1, n_a))
def predict_and_sample(inference_model, x_initializer = x_initializer, a_initializer = a_initializer, 
                       c_initializer = c_initializer):
### START CODE HERE ###
    # Step 1: Use your inference model to predict an output sequence given x_initializer, a_initializer and c_initializer.
    pred = inference_model.predict([x_initializer, a_initializer, c_initializer])
    # Step 2: Convert "pred" into an np.array() of indices with the maximum probabilities
    indices = np.argmax(pred,axis=-1)
    # Step 3: Convert indices to one-hot vectors, the shape of the results should be (1, )
    results = to_categorical(indices,num_classes=x_initializer.shape[-1])
    ### END CODE HERE ###

    return results, indices                      
results, indices = predict_and_sample(inference_model, x_initializer, a_initializer, c_initializer)
print("np.argmax(results[12]) =", np.argmax(results[12]))
print("np.argmax(results[17]) =", np.argmax(results[17]))
print("list(indices[12:18]) =", list(indices[12:18]))

np.argmax(results[12]) = 42
np.argmax(results[17]) = 76
list(indices[12:18]) = [array([42]), array([76]), array([51]), array([30]), array([42]), array([76])]

3.3 - Generate music

out_stream = generate_music(inference_model)

Predicting new values for different set of chords.
Generated 51 sounds using the predicted values for the set of chords (“1”) and after pruning
Generated 51 sounds using the predicted values for the set of chords (“2”) and after pruning
Generated 51 sounds using the predicted values for the set of chords (“3”) and after pruning
Generated 51 sounds using the predicted values for the set of chords (“4”) and after pruning
Generated 50 sounds using the predicted values for the set of chords (“5”) and after pruning
Your generated music is saved in output/my_music.midi

IPython.display.Audio('./data/30s_trained_model.mp3')

训练结果:30s_trained_model.mp3

发布了34 篇原创文章 · 获赞 4 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/leaeason/article/details/79719821
今日推荐