youtubeDNN模型实现1-SparseFeature封装

1 特征封装

SparseFeature

1.1 namedtuple 使用

Users=namedtuple("User",['name','sex','age'])
user=Users(name="harry",sex="male",age=22)
print(user.name,user.sex,user.age)
harry male 22

1.2 SparseFeat 和 VarLenSparseFeat

sparsefeat 继承nametuple,并在__new__ 设置初始值。

from tensorflow import keras 
import tensorflow  as tf
tf.compat.v1.disable_eager_execution()
class SparseFeat(namedtuple("SparseFeat",['name','vocabulary_size',"embedding_dim","use_hash"
                                         ,'dtype',"embedding_initializer","embedding_name",
                                         "group_name","trainable"])):
    __slots__=()
    def __new__(cls,name,vocabulary_size,embedding_dim,use_hash=False,dtype="int32",embedding_initializer=None
               ,embedding_name=None,group_name="default_group",trainable=True):
        if embedding_dim=="auto":
            embedding_dim=6*int(pow(vocabulary_size,0.25))
        if embedding_initializer is None:
            embedding_initializer=keras.initializers.RandomNormal(mean=0.0,stddev=0.0001,seed=2022)
        if embedding_name is None:
            embedding_name=name
        
        return super(SparseFeat,cls).__new__(cls,name,vocabulary_size,embedding_dim,use_hash
                                            ,dtype,embedding_initializer,embedding_name
                                            ,group_name,trainable)
    
    def __hash__(self):
        return self.name.__hash__()

class VarLenSparseFeat(namedtuple("VarLenSparseFeat"
                        ,["sparsefeat","maxlen",'combiner','length_name','weight_name','weight_norm'])):
    __slots__=()
    def __new__(cls,sparsefeat,maxlen,combiner='mean',length_name=None,weight_name=None,weight_norm=True):
        return super(VarLenSparseFeat,cls).__new__(cls,sparsefeat,maxlen,combiner,length_name,weight_name,weight_norm)
    
    def __hash__(self):
        return self.name.__hash__()

2 特征封装应用

经需要的类别特征封装在SparseFeat和VarLenSparseFeat中,如有

feature_max_idx={'user_id': 4, 'movie_id': 208, 'gender': 3, 'age': 4, 'occupation': 4, 'zip': 4}
embedding_dim=16

user_feature_columns = [SparseFeat('user_id', feature_max_idx['user_id'], embedding_dim),
                            SparseFeat("gender", feature_max_idx['gender'], embedding_dim),
                            SparseFeat("age", feature_max_idx['age'], embedding_dim),
                            SparseFeat("occupation", feature_max_idx['occupation'], embedding_dim),
                            SparseFeat("zip", feature_max_idx['zip'], embedding_dim),
                           VarLenSparseFeat(SparseFeat('hist_movie_id',feature_max_idx['movie_id'],embedding_dim
                                           ,embedding_name="movie_id" ),50,'mean','hist_len')
                           ]
item_feature_columns = [SparseFeat('movie_id', feature_max_idx['movie_id'], embedding_dim)]

猜你喜欢

转载自blog.csdn.net/weixin_42529756/article/details/127095973