mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-02 17:28:45 +02:00
b389c3d302
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
177 lines
5.8 KiB
Python
177 lines
5.8 KiB
Python
import os
|
|
|
|
from twitter.deepbird.projects.magic_recs.libs.metric_fn_utils import USER_AGE_FEATURE_NAME
|
|
from twitter.deepbird.projects.magic_recs.libs.model_utils import read_config
|
|
from twml.contrib import feature_config as contrib_feature_config
|
|
|
|
|
|
# checkstyle: noqa
|
|
|
|
FEAT_CONFIG_DEFAULT_VAL = -1.23456789
|
|
|
|
DEFAULT_INPUT_SIZE_BITS = 18
|
|
|
|
DEFAULT_FEATURE_LIST_PATH = "./feature_list_default.yaml"
|
|
FEATURE_LIST_DEFAULT_PATH = os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_PATH
|
|
)
|
|
|
|
DEFAULT_FEATURE_LIST_LIGHT_RANKING_PATH = "./feature_list_light_ranking.yaml"
|
|
FEATURE_LIST_DEFAULT_LIGHT_RANKING_PATH = os.path.join(
|
|
os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_LIGHT_RANKING_PATH
|
|
)
|
|
|
|
FEATURE_LIST_DEFAULT = read_config(FEATURE_LIST_DEFAULT_PATH).items()
|
|
FEATURE_LIST_LIGHT_RANKING_DEFAULT = read_config(FEATURE_LIST_DEFAULT_LIGHT_RANKING_PATH).items()
|
|
|
|
|
|
LABELS = ["label"]
|
|
LABELS_MTL = {"OONC": ["label"], "OONC_Engagement": ["label", "label.engagement"]}
|
|
LABELS_LR = {
|
|
"Sent": ["label.sent"],
|
|
"HeavyRankPosition": ["meta.ranking.is_top3"],
|
|
"HeavyRankProbability": ["meta.ranking.weighted_oonc_model_score"],
|
|
}
|
|
|
|
|
|
def _get_new_feature_config_base(
|
|
data_spec_path,
|
|
labels,
|
|
add_sparse_continous=True,
|
|
add_gbdt=True,
|
|
add_user_id=False,
|
|
add_timestamp=False,
|
|
add_user_age=False,
|
|
feature_list_provided=[],
|
|
opt=None,
|
|
run_light_ranking_group_metrics_in_bq=False,
|
|
):
|
|
"""
|
|
Getter of the feature config based on specification.
|
|
|
|
Args:
|
|
data_spec_path: A string indicating the path of the data_spec.json file, which could be
|
|
either a local path or a hdfs path.
|
|
labels: A list of strings indicating the name of the label in the data spec.
|
|
add_sparse_continous: A bool indicating if sparse_continuous feature needs to be included.
|
|
add_gbdt: A bool indicating if gbdt feature needs to be included.
|
|
add_user_id: A bool indicating if user_id feature needs to be included.
|
|
add_timestamp: A bool indicating if timestamp feature needs to be included. This will be useful
|
|
for sequential models and meta learning models.
|
|
add_user_age: A bool indicating if the user age feature needs to be included.
|
|
feature_list_provided: A list of features thats need to be included. If not specified, will use
|
|
FEATURE_LIST_DEFAULT by default.
|
|
opt: A namespace of arguments indicating the hyparameters.
|
|
run_light_ranking_group_metrics_in_bq: A bool indicating if heavy ranker score info needs to be included to compute group metrics in BigQuery.
|
|
|
|
Returns:
|
|
A twml feature config object.
|
|
"""
|
|
|
|
input_size_bits = DEFAULT_INPUT_SIZE_BITS if opt is None else opt.input_size_bits
|
|
|
|
feature_list = feature_list_provided if feature_list_provided != [] else FEATURE_LIST_DEFAULT
|
|
a_string_feat_list = [f[0] for f in feature_list if f[1] != "S"]
|
|
|
|
builder = contrib_feature_config.FeatureConfigBuilder(data_spec_path=data_spec_path)
|
|
|
|
builder = builder.extract_feature_group(
|
|
feature_regexes=a_string_feat_list,
|
|
group_name="continuous",
|
|
default_value=FEAT_CONFIG_DEFAULT_VAL,
|
|
type_filter=["CONTINUOUS"],
|
|
)
|
|
|
|
builder = builder.extract_features_as_hashed_sparse(
|
|
feature_regexes=a_string_feat_list,
|
|
output_tensor_name="sparse_no_continuous",
|
|
hash_space_size_bits=input_size_bits,
|
|
type_filter=["BINARY", "DISCRETE", "STRING", "SPARSE_BINARY"],
|
|
)
|
|
|
|
if add_gbdt:
|
|
builder = builder.extract_features_as_hashed_sparse(
|
|
feature_regexes=["ads\..*"],
|
|
output_tensor_name="gbdt_sparse",
|
|
hash_space_size_bits=input_size_bits,
|
|
)
|
|
|
|
if add_sparse_continous:
|
|
s_string_feat_list = [f[0] for f in feature_list if f[1] == "S"]
|
|
|
|
builder = builder.extract_features_as_hashed_sparse(
|
|
feature_regexes=s_string_feat_list,
|
|
output_tensor_name="sparse_continuous",
|
|
hash_space_size_bits=input_size_bits,
|
|
type_filter=["SPARSE_CONTINUOUS"],
|
|
)
|
|
|
|
if add_user_id:
|
|
builder = builder.extract_feature("meta.user_id")
|
|
if add_timestamp:
|
|
builder = builder.extract_feature("meta.timestamp")
|
|
if add_user_age:
|
|
builder = builder.extract_feature(USER_AGE_FEATURE_NAME)
|
|
|
|
if run_light_ranking_group_metrics_in_bq:
|
|
builder = builder.extract_feature("meta.trace_id")
|
|
builder = builder.extract_feature("meta.ranking.weighted_oonc_model_score")
|
|
|
|
builder = builder.add_labels(labels).define_weight("meta.weight")
|
|
|
|
return builder.build()
|
|
|
|
|
|
def get_feature_config_with_sparse_continuous(
|
|
data_spec_path,
|
|
feature_list_provided=[],
|
|
opt=None,
|
|
add_user_id=False,
|
|
add_timestamp=False,
|
|
add_user_age=False,
|
|
):
|
|
task_name = opt.task_name if getattr(opt, "task_name", None) is not None else "OONC"
|
|
if task_name not in LABELS_MTL:
|
|
raise ValueError("Invalid Task Name !")
|
|
|
|
return _get_new_feature_config_base(
|
|
data_spec_path=data_spec_path,
|
|
labels=LABELS_MTL[task_name],
|
|
add_sparse_continous=True,
|
|
add_user_id=add_user_id,
|
|
add_timestamp=add_timestamp,
|
|
add_user_age=add_user_age,
|
|
feature_list_provided=feature_list_provided,
|
|
opt=opt,
|
|
)
|
|
|
|
|
|
def get_feature_config_light_ranking(
|
|
data_spec_path,
|
|
feature_list_provided=[],
|
|
opt=None,
|
|
add_user_id=True,
|
|
add_timestamp=False,
|
|
add_user_age=False,
|
|
add_gbdt=False,
|
|
run_light_ranking_group_metrics_in_bq=False,
|
|
):
|
|
task_name = opt.task_name if getattr(opt, "task_name", None) is not None else "HeavyRankPosition"
|
|
if task_name not in LABELS_LR:
|
|
raise ValueError("Invalid Task Name !")
|
|
if not feature_list_provided:
|
|
feature_list_provided = FEATURE_LIST_LIGHT_RANKING_DEFAULT
|
|
|
|
return _get_new_feature_config_base(
|
|
data_spec_path=data_spec_path,
|
|
labels=LABELS_LR[task_name],
|
|
add_sparse_continous=False,
|
|
add_gbdt=add_gbdt,
|
|
add_user_id=add_user_id,
|
|
add_timestamp=add_timestamp,
|
|
add_user_age=add_user_age,
|
|
feature_list_provided=feature_list_provided,
|
|
opt=opt,
|
|
run_light_ranking_group_metrics_in_bq=run_light_ranking_group_metrics_in_bq,
|
|
)
|