mirror of
https://github.com/twitter/the-algorithm-ml.git
synced 2024-11-16 13:19:23 +01:00
478 lines
14 KiB
YAML
478 lines
14 KiB
YAML
|
training:
|
||
|
num_train_steps: 10
|
||
|
num_eval_steps: 5
|
||
|
checkpoint_every_n: 5
|
||
|
train_log_every_n: 1
|
||
|
eval_log_every_n: 1
|
||
|
save_dir: ${HOME}/tmp/runs/recap_local_debug
|
||
|
eval_timeout_in_s: 7200
|
||
|
model:
|
||
|
backbone:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config:
|
||
|
mask_blocks:
|
||
|
- aggregation_size: 1024
|
||
|
input_layer_norm: false
|
||
|
output_size: 1024
|
||
|
reduction_factor: null
|
||
|
- aggregation_size: 1024
|
||
|
input_layer_norm: false
|
||
|
output_size: 1024
|
||
|
reduction_factor: null
|
||
|
- aggregation_size: 1024
|
||
|
input_layer_norm: false
|
||
|
output_size: 1024
|
||
|
reduction_factor: null
|
||
|
- aggregation_size: 1024
|
||
|
input_layer_norm: false
|
||
|
output_size: 1024
|
||
|
reduction_factor: null
|
||
|
mlp:
|
||
|
batch_norm: null
|
||
|
dropout: null
|
||
|
final_layer_activation: true
|
||
|
layer_sizes:
|
||
|
- 2048
|
||
|
use_parallel: true
|
||
|
mlp_config: null
|
||
|
pos_weight: 1.0
|
||
|
featurization_config:
|
||
|
clip_log1p_abs_config: null
|
||
|
double_norm_log_config:
|
||
|
batch_norm_config:
|
||
|
affine: true
|
||
|
momentum: 0.01
|
||
|
clip_magnitude: 5.0
|
||
|
layer_norm_config:
|
||
|
axis: -1
|
||
|
center: true
|
||
|
epsilon: 0.0
|
||
|
scale: true
|
||
|
feature_names_to_concat:
|
||
|
- binary
|
||
|
log1p_abs_config: null
|
||
|
z_score_log_config: null
|
||
|
large_embeddings: null
|
||
|
multi_task_type: share_all
|
||
|
position_debias_config: null
|
||
|
small_embeddings: null
|
||
|
stratifiers: null
|
||
|
tasks:
|
||
|
recap.engagement.is_favorited:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_good_clicked_convo_desc_favorited_or_replied:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_good_clicked_convo_desc_v2:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_negative_feedback_v2:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout:
|
||
|
rate: 0.1
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_profile_clicked_and_profile_engaged:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_replied:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_replied_reply_engaged_by_author:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_report_tweet_clicked:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout:
|
||
|
rate: 0.2
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_retweeted:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
recap.engagement.is_video_playback_50:
|
||
|
affine_map: null
|
||
|
dcn_config: null
|
||
|
dlrm_config: null
|
||
|
mask_net_config: null
|
||
|
mlp_config:
|
||
|
batch_norm:
|
||
|
affine: false
|
||
|
momentum: 0.1
|
||
|
dropout: null
|
||
|
final_layer_activation: false
|
||
|
layer_sizes:
|
||
|
- 256
|
||
|
- 128
|
||
|
- 1
|
||
|
pos_weight: 1.0
|
||
|
train_data:
|
||
|
global_batch_size: 128
|
||
|
dataset_service_compression: AUTO
|
||
|
inputs: &data_root "${HOME}/tmp/recap_local_random_data/*.gz"
|
||
|
seg_dense_schema: &seg_dense_schema
|
||
|
schema_path: "${TML_BASE}/projects/home/recap/config/home_recap_2022/segdense.json"
|
||
|
renamed_features:
|
||
|
"continuous": "home_recap_2022_cont__segdense_vals"
|
||
|
"binary": "home_recap_2022_binary__segdense_vals"
|
||
|
"discrete": "home_recap_2022_discrete__segdense_vals"
|
||
|
"author_embedding": "original_author.timelines.twhin_author_follow_embeddings.twhin_author_follow_embeddings"
|
||
|
"user_embedding": "user.timelines.twhin_user_follow_embeddings.twhin_user_follow_embeddings"
|
||
|
"user_eng_embedding": "user.timelines.twhin_user_engagement_embeddings.twhin_user_engagement_embeddings"
|
||
|
"meta__author_id": "meta.author_id"
|
||
|
"meta__user_id": "meta.user_id"
|
||
|
"meta__tweet_id": "meta.tweet_id"
|
||
|
tasks: &data_tasks
|
||
|
"recap.engagement.is_bookmarked": {}
|
||
|
"recap.engagement.is_favorited": {}
|
||
|
"recap.engagement.is_good_clicked_convo_desc_favorited_or_replied": {}
|
||
|
"recap.engagement.is_good_clicked_convo_desc_v2": {}
|
||
|
"recap.engagement.is_negative_feedback_v2": {}
|
||
|
"recap.engagement.is_profile_clicked_and_profile_engaged": {}
|
||
|
"recap.engagement.is_replied": {}
|
||
|
"recap.engagement.is_replied_reply_engaged_by_author": {}
|
||
|
"recap.engagement.is_report_tweet_clicked": {}
|
||
|
"recap.engagement.is_retweeted": {}
|
||
|
"recap.engagement.is_shared": {}
|
||
|
"recap.engagement.is_tweet_detail_dwelled_15_sec": {}
|
||
|
"recap.engagement.is_video_playback_50": {}
|
||
|
preprocess: &preprocess
|
||
|
truncate_and_slice:
|
||
|
continuous_feature_truncation: 2117
|
||
|
binary_feature_truncation: 59
|
||
|
validation_data:
|
||
|
validation: &validation
|
||
|
global_batch_size: &eval_batch_size 128
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks: *data_tasks
|
||
|
preprocess: *preprocess
|
||
|
train:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks: *data_tasks
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_favorited:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_favorited":
|
||
|
pos_downsampling_rate: 0.8387
|
||
|
neg_downsampling_rate: 0.01
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_favorited"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_good_clicked_convo_desc_favorited_or_replied:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_good_clicked_convo_desc_favorited_or_replied":
|
||
|
pos_downsampling_rate: 0.9164
|
||
|
neg_downsampling_rate: 0.00195
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_good_clicked_convo_desc_favorited_or_replied"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_good_clicked_convo_desc_v2:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_good_clicked_convo_desc_v2":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.00174
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_good_clicked_convo_desc_v2"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_negative_feedback_v2:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_negative_feedback_v2":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.00280
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_negative_feedback_v2"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_profile_clicked_and_profile_engaged:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_profile_clicked_and_profile_engaged":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.0015
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_profile_clicked_and_profile_engaged"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_replied:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_replied":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.005
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_replied"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_replied_reply_engaged_by_author:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_replied_reply_engaged_by_author":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.001
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_replied_reply_engaged_by_author"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_report_tweet_clicked:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_report_tweet_clicked":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.000014
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_report_tweet_clicked"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_retweeted:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_retweeted":
|
||
|
pos_downsampling_rate: 0.9561
|
||
|
neg_downsampling_rate: 0.004
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_retweeted"
|
||
|
preprocess: *preprocess
|
||
|
recap.engagement.is_video_playback_50:
|
||
|
global_batch_size: *eval_batch_size
|
||
|
inputs: *data_root
|
||
|
seg_dense_schema: *seg_dense_schema
|
||
|
tasks:
|
||
|
<<: *data_tasks
|
||
|
"recap.engagement.is_video_playback_50":
|
||
|
pos_downsampling_rate: 1.0
|
||
|
neg_downsampling_rate: 0.00427
|
||
|
evaluation_tasks:
|
||
|
- "recap.engagement.is_video_playback_50"
|
||
|
preprocess: *preprocess
|
||
|
|
||
|
optimizer:
|
||
|
adam:
|
||
|
beta_1: 0.95
|
||
|
beta_2: 0.999
|
||
|
epsilon: 1.0e-07
|
||
|
multi_task_learning_rates:
|
||
|
backbone_learning_rate:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0001
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
tower_learning_rates:
|
||
|
recap.engagement.is_favorited:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0008
|
||
|
num_ramp_steps: 5000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_good_clicked_convo_desc_favorited_or_replied:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0001
|
||
|
num_ramp_steps: 2000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_good_clicked_convo_desc_v2:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0002
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_negative_feedback_v2:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0005
|
||
|
num_ramp_steps: 5000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_profile_clicked_and_profile_engaged:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0003
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_replied:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.001
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_replied_reply_engaged_by_author:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0001
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_report_tweet_clicked:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0001
|
||
|
num_ramp_steps: 3000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_retweeted:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.0001
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
recap.engagement.is_video_playback_50:
|
||
|
constant: null
|
||
|
linear_ramp_to_constant:
|
||
|
learning_rate: 0.003
|
||
|
num_ramp_steps: 1000
|
||
|
linear_ramp_to_cosine: null
|
||
|
piecewise_constant: null
|
||
|
single_task_learning_rate: null
|