lua torch实现ST-LSTM

参考文章[https://www.csdn.net/article/2015-09-14/2825693]
原文链接[https://apaszke.github.io/lstm-explained.html]

1.定义输入

    -- there will be 4*n+1 inputs
    local inputs = {}  #创建一个空的table
    table.insert(inputs, nn.Identity()()) -- x
    for L = 1, n do  
     # nn.Identity() - 传递输入（用来存放输入数据）
        table.insert(inputs, nn.Identity()()) -- prev_cj[L]
        table.insert(inputs, nn.Identity()()) -- prev_hj[L]
    end    
    for L = 1, n do
        table.insert(inputs, nn.Identity()()) -- prev_ct[L]
        table.insert(inputs, nn.Identity()()) -- prev_ht[L]
    end

    local x, input_size_L

普通LSTM上一状态的输入只有c和h，而ST-LSTM分成两部分，t和j

将输入表中的元素分别送给prev_cj，prev_hj，prev_ct，prev_ht

    local outputs = {}

    for L = 1, n do
        -- c,h from previos steps
        local prev_cj = inputs[L*2]
        local prev_hj = inputs[L*2+1]

        local prev_ct = inputs[n*2+L*2]
        local prev_ht = inputs[n*2+L*2+1]

        -- the input to this layer
        if (L == 1) then
            x = inputs[1]
            input_size_L = input_size
        else
            x = outputs[(L-1)*2] 
            if dropout > 0 then x = nn.Dropout(dropout)(x) end -- apply dropout, if any
            input_size_L = rnn_size
        end

2.输入线性变换

rnn_size是hideen units个数，线性变换后，拆成5部分（普通LSTM是四部分）

        -- evaluate the input sums at once for efficiency
        local i2h  = nn.Linear(input_size_L, 5 * rnn_size)(x):annotate{      name = 'i2h_'  .. L}
        local h2hj = nn.Linear(rnn_size,     5 * rnn_size)(prev_hj):annotate{name = 'h2hj_' .. L}
        local h2ht = nn.Linear(rnn_size,     5 * rnn_size)(prev_ht):annotate{name = 'h2ht_' .. L}
        local all_input_sums = nn.CAddTable()({i2h, h2hj, h2ht})

        local reshaped = nn.Reshape(5, rnn_size)(all_input_sums)
        local n1, n2, n3, n4, n5 = nn.SplitTable(2)(reshaped):split(5)

3. 输入非线性变换

        -- decode the gates
        local in_gate       = nn.Sigmoid()(n1)
        local forget_gate_j = nn.Sigmoid()(n2)
        local forget_gate_t = nn.Sigmoid()(n3)
        local out_gate      = nn.Sigmoid()(n4)
        -- decode the write inputs
        local in_transform  = nn.Tanh()(n5)

4.状态更新

        local next_c = nn.CAddTable()({
            nn.CMulTable()({forget_gate_j, prev_cj}),
            nn.CMulTable()({forget_gate_t, prev_ct}),
            nn.CMulTable()({in_gate,  in_transform})  })

        -- gated cells form the output
        local next_h = nn.CMulTable()({out_gate, nn.Tanh()(next_c)})

        table.insert(outputs, next_c)
        table.insert(outputs, next_h)

1.定义输入

2.输入线性变换

3. 输入非线性变换

4.状态更新

猜你喜欢