序列列表与稀疏矩阵的转换

import numpy as np
import string
chars=string.ascii_lowercase
chars
'abcdefghijklmnopqrstuvwxyz'
char2num={v:k for k,v in enumerate(chars)}
num2char={k:v for k,v in enumerate(chars)}
#输入'abc','de'对应在chars中的索引‘012’,‘34’
inputs=['hello','tom']
inputs=[[char2num[item1] for item1 in item0] for item0 in inputs]
inputs
#[[7, 4, 11, 11, 14], [19, 14, 12]]
# 转化一个序列列表为稀疏矩阵
def sparse_tuple_from(sequences, dtype=np.int32):    
    indices = []
    values = []
    for n, seq in enumerate(sequences):
        indices.extend(zip([n] * len(seq), range(len(seq))))
        values.extend(seq)

    indices = np.asarray(indices, dtype=np.int64)
    values = np.asarray(values, dtype=dtype)
    shape = np.asarray([len(sequences), np.asarray(indices).max(0)[1] + 1], dtype=np.int64)
    return indices, values, shape
sparse_tensor=sparse_tuple_from(inputs)
sparse_tensor
# (array([[0, 0],
#            [0, 1],
#            [0, 2],
#            [0, 3],
#            [0, 4],
#            [1, 0],
#            [1, 1],
#            [1, 2]], dtype=int64),
#     array([ 7,  4, 11, 11, 14, 19, 14, 12]),
#     array([2, 5], dtype=int64))
def decode_sparse_tensor(sparse_tensor):
    decoded_indexes = list()
    current_i = 0
    current_seq = []
    for offset, i_and_index in enumerate(sparse_tensor[0]):
        i = i_and_index[0]
        if i != current_i:
            decoded_indexes.append(current_seq)
            current_i = i
            current_seq = list()
        current_seq.append(offset)
    decoded_indexes.append(current_seq)
    result = []
    for item in decoded_indexes:
        result.append([sparse_tensor[1][index] for index in item])
    return result
decoded_indexes=decode_sparse_tensor(sparse_tensor)
decoded_indexes
#[[7, 4, 11, 11, 14], [19, 14, 12]]
outputs=[[num2char[index] for index in item] for item in decoded_indexes]
outputs
#[['h', 'e', 'l', 'l', 'o'], ['t', 'o', 'm']]

猜你喜欢

转载自blog.csdn.net/shuishou07/article/details/78923590
今日推荐