谨此记录,mark
- 时间:2024.01.20
- 问题排查:2h ??
报错如下:
InvalidArgumentError (see above for traceback): indices[48,9] = 1656 is not in [0, 1656) [[node Deep/embedding_lookup (defined at /apsara/TempRoot/Odps/tbcrm_algorithm_dev_202401200852275gtjap1qosod_4666727b_b21c_4cb6_9dd3_8cf7b352d11f_AlgoTask_0_0/[email protected]#21/tensorflow_jobs/AlimiCTR/models/deepfm.py:71) = GatherV2[Taxis=DT_INT32, Tindices=DT_INT64, Tparams=DT_FLOAT, _device="/job:chief/replica:0/task:0/device:CPU:0"](Deep/concat, IteratorGetNext:2, Deep/embedding_lookup/axis)]] [[{{node clip_by_global_norm/mul_5_S265}} = _Recv[client_terminated=false, recv_device="/job:ps/replica:0/task:1/device:CPU:0", send_device="/job:chief/replica:0/task:0/device:CPU:0", send_device_incarnation=3940832609751527887, tensor_name="edge_741_clip_by_global_norm/mul_5", tensor_type=DT_FLOAT, _device="/job:ps/replica:0/task:1/device:CPU:0"]()]]
解决方法 ?? :
sparse_encoding_size为类别变量分桶数量,emb size 数量要 +1
self.sparse_encoding_size = self.get_sparse_encoding_size() self.set_model_hyperparameter('sparse_encoding_size', self.sparse_encoding_size + 1) with tf.variable_scope(DNN_SCOPE, partitioner=self.partitioner()): input_dense = features["dense_main"] input_sparse = features['sparse_main'] sparse_emb_table = get_token_embeddings(sparse_token_size, embedding_dim=embedding_dim, variable_name="sparse_emb", zero_pad=True) sparse_emb = tf.nn.embedding_lookup(sparse_emb_table, input_sparse)
WHERE
def get_token_embeddings(vocab_size, embedding_dim, variable_name="embedding_dim", zero_pad=True): # with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE): embeddings = tf.get_variable(variable_name, dtype=tf.float32, shape=(vocab_size, embedding_dim), initializer=tf.truncated_normal_initializer( stddev=0.05, dtype=tf.float32)) if zero_pad: embeddings = tf.concat((tf.zeros(shape=[1, embedding_dim]), embeddings[1:, :]), axis=0) return embeddings