mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-02 09:18:49 +02:00
b389c3d302
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
139 lines
4.4 KiB
Python
139 lines
4.4 KiB
Python
import os
|
|
from typing import Dict
|
|
|
|
from twitter.deepbird.projects.magic_recs.libs.model_utils import filter_nans_and_infs
|
|
import twml
|
|
from twml.layers import full_sparse, sparse_max_norm
|
|
|
|
from .params import FeaturesParams, GraphParams, SparseFeaturesParams
|
|
|
|
import tensorflow as tf
|
|
from tensorflow import Tensor
|
|
import tensorflow.compat.v1 as tf1
|
|
|
|
|
|
FEAT_CONFIG_DEFAULT_VAL = 0
|
|
DEFAULT_FEATURE_LIST_PATH = "./feature_list_default.yaml"
|
|
FEATURE_LIST_DEFAULT_PATH = os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_PATH
|
|
)
|
|
|
|
|
|
def get_feature_config(data_spec_path=None, feature_list_provided=[], params: GraphParams = None):
|
|
|
|
a_string_feat_list = [feat for feat, feat_type in feature_list_provided if feat_type != "S"]
|
|
|
|
builder = twml.contrib.feature_config.FeatureConfigBuilder(
|
|
data_spec_path=data_spec_path, debug=False
|
|
)
|
|
|
|
builder = builder.extract_feature_group(
|
|
feature_regexes=a_string_feat_list,
|
|
group_name="continuous_features",
|
|
default_value=FEAT_CONFIG_DEFAULT_VAL,
|
|
type_filter=["CONTINUOUS"],
|
|
)
|
|
|
|
builder = builder.extract_feature_group(
|
|
feature_regexes=a_string_feat_list,
|
|
group_name="binary_features",
|
|
type_filter=["BINARY"],
|
|
)
|
|
|
|
if params.model.features.sparse_features:
|
|
builder = builder.extract_features_as_hashed_sparse(
|
|
feature_regexes=a_string_feat_list,
|
|
hash_space_size_bits=params.model.features.sparse_features.bits,
|
|
type_filter=["DISCRETE", "STRING", "SPARSE_BINARY"],
|
|
output_tensor_name="sparse_not_continuous",
|
|
)
|
|
|
|
builder = builder.extract_features_as_hashed_sparse(
|
|
feature_regexes=[feat for feat, feat_type in feature_list_provided if feat_type == "S"],
|
|
hash_space_size_bits=params.model.features.sparse_features.bits,
|
|
type_filter=["SPARSE_CONTINUOUS"],
|
|
output_tensor_name="sparse_continuous",
|
|
)
|
|
|
|
builder = builder.add_labels([task.label for task in params.tasks] + ["label.ntabDislike"])
|
|
|
|
if params.weight:
|
|
builder = builder.define_weight(params.weight)
|
|
|
|
return builder.build()
|
|
|
|
|
|
def dense_features(features: Dict[str, Tensor], training: bool) -> Tensor:
|
|
"""
|
|
Performs feature transformations on the raw dense features (continuous and binary).
|
|
"""
|
|
with tf.name_scope("dense_features"):
|
|
x = filter_nans_and_infs(features["continuous_features"])
|
|
|
|
x = tf.sign(x) * tf.math.log(tf.abs(x) + 1)
|
|
x = tf1.layers.batch_normalization(
|
|
x, momentum=0.9999, training=training, renorm=training, axis=1
|
|
)
|
|
x = tf.clip_by_value(x, -5, 5)
|
|
|
|
transformed_continous_features = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x)
|
|
|
|
binary_features = filter_nans_and_infs(features["binary_features"])
|
|
binary_features = tf.dtypes.cast(binary_features, tf.float32)
|
|
|
|
output = tf.concat([transformed_continous_features, binary_features], axis=1)
|
|
|
|
return output
|
|
|
|
|
|
def sparse_features(
|
|
features: Dict[str, Tensor], training: bool, params: SparseFeaturesParams
|
|
) -> Tensor:
|
|
"""
|
|
Performs feature transformations on the raw sparse features.
|
|
"""
|
|
|
|
with tf.name_scope("sparse_features"):
|
|
with tf.name_scope("sparse_not_continuous"):
|
|
sparse_not_continuous = full_sparse(
|
|
inputs=features["sparse_not_continuous"],
|
|
output_size=params.embedding_size,
|
|
use_sparse_grads=training,
|
|
use_binary_values=False,
|
|
)
|
|
|
|
with tf.name_scope("sparse_continuous"):
|
|
shape_enforced_input = twml.util.limit_sparse_tensor_size(
|
|
sparse_tf=features["sparse_continuous"], input_size_bits=params.bits, mask_indices=False
|
|
)
|
|
|
|
normalized_continuous_sparse = sparse_max_norm(
|
|
inputs=shape_enforced_input, is_training=training
|
|
)
|
|
|
|
sparse_continuous = full_sparse(
|
|
inputs=normalized_continuous_sparse,
|
|
output_size=params.embedding_size,
|
|
use_sparse_grads=training,
|
|
use_binary_values=False,
|
|
)
|
|
|
|
output = tf.concat([sparse_not_continuous, sparse_continuous], axis=1)
|
|
|
|
return output
|
|
|
|
|
|
def get_features(features: Dict[str, Tensor], training: bool, params: FeaturesParams) -> Tensor:
|
|
"""
|
|
Performs feature transformations on the dense and sparse features and combine the resulting
|
|
tensors into a single one.
|
|
"""
|
|
with tf.name_scope("features"):
|
|
x = dense_features(features, training)
|
|
tf1.logging.info(f"Dense features: {x.shape}")
|
|
|
|
if params.sparse_features:
|
|
x = tf.concat([x, sparse_features(features, training, params.sparse_features)], axis=1)
|
|
|
|
return x
|