From 4df87a278e9853a7fb790a85616a3ee8005a9ef2 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 14 Apr 2023 10:15:14 -0700 Subject: [PATCH 01/11] Latest navi open source refresh latest code change including the global thread pool Closes twitter/the-algorithm#452 Closes twitter/the-algorithm#505 --- navi/README.md | 5 +++ navi/dr_transform/Cargo.toml | 7 +++-- navi/dr_transform/src/lib.rs | 1 + navi/navi/Cargo.toml | 21 ++++++++----- navi/navi/scripts/run_onnx.sh | 9 +++--- navi/navi/src/bin/navi_onnx.rs | 15 ++++++++- navi/navi/src/bootstrap.rs | 3 ++ navi/navi/src/cli_args.rs | 10 +++--- navi/navi/src/lib.rs | 1 + navi/navi/src/onnx_model.rs | 54 ++++++++++++++++++-------------- navi/navi/src/predict_service.rs | 52 ++++++++++++++++-------------- navi/segdense/Cargo.toml | 2 +- 12 files changed, 112 insertions(+), 68 deletions(-) diff --git a/navi/README.md b/navi/README.md index 9a4326d96..4e7d325f7 100644 --- a/navi/README.md +++ b/navi/README.md @@ -31,6 +31,11 @@ In navi/navi, you can run the following commands: - `scripts/run_onnx.sh` for [Onnx](https://onnx.ai/) Do note that you need to create a models directory and create some versions, preferably using epoch time, e.g., `1679693908377`. +so the models structure looks like: + models/ + -web_click + - 1809000 + - 1809010 ## Build You can adapt the above scripts to build using Cargo. diff --git a/navi/dr_transform/Cargo.toml b/navi/dr_transform/Cargo.toml index 47f097eb9..cff73375b 100644 --- a/navi/dr_transform/Cargo.toml +++ b/navi/dr_transform/Cargo.toml @@ -3,7 +3,6 @@ name = "dr_transform" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" @@ -12,7 +11,6 @@ bpr_thrift = { path = "../thrift_bpr_adapter/thrift/"} segdense = { path = "../segdense/"} thrift = "0.17.0" ndarray = "0.15" -ort = {git ="https://github.com/pykeio/ort.git", tag="v1.14.2"} base64 = "0.20.0" npyz = "0.7.2" log = "0.4.17" @@ -21,6 +19,11 @@ prometheus = "0.13.1" once_cell = "1.17.0" rand = "0.8.5" itertools = "0.10.5" +anyhow = "1.0.70" +[target.'cfg(not(target_os="linux"))'.dependencies] +ort = {git ="https://github.com/pykeio/ort.git", features=["profiling"], tag="v1.14.6"} +[target.'cfg(target_os="linux")'.dependencies] +ort = {git ="https://github.com/pykeio/ort.git", features=["profiling", "tensorrt", "cuda", "copy-dylibs"], tag="v1.14.6"} [dev-dependencies] criterion = "0.3.0" diff --git a/navi/dr_transform/src/lib.rs b/navi/dr_transform/src/lib.rs index 25b7cd2d3..ea3b25a55 100644 --- a/navi/dr_transform/src/lib.rs +++ b/navi/dr_transform/src/lib.rs @@ -3,3 +3,4 @@ pub mod converter; #[cfg(test)] mod test; pub mod util; +pub extern crate ort; diff --git a/navi/navi/Cargo.toml b/navi/navi/Cargo.toml index a942b1ae4..e355ea2a7 100644 --- a/navi/navi/Cargo.toml +++ b/navi/navi/Cargo.toml @@ -1,8 +1,7 @@ [package] name = "navi" -version = "2.0.42" +version = "2.0.45" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [[bin]] name = "navi" @@ -16,12 +15,19 @@ required-features=["torch"] name = "navi_onnx" path = "src/bin/navi_onnx.rs" required-features=["onnx"] +[[bin]] +name = "navi_onnx_test" +path = "src/bin/bin_tests/navi_onnx_test.rs" +[[bin]] +name = "navi_torch_test" +path = "src/bin/bin_tests/navi_torch_test.rs" +required-features=["torch"] [features] default=[] navi_console=[] torch=["tch"] -onnx=["ort"] +onnx=[] tf=["tensorflow"] [dependencies] itertools = "0.10.5" @@ -47,6 +53,7 @@ parking_lot = "0.12.1" rand = "0.8.5" rand_pcg = "0.3.1" random = "0.12.2" +x509-parser = "0.15.0" sha256 = "1.0.3" tonic = { version = "0.6.2", features=['compression', 'tls'] } tokio = { version = "1.17.0", features = ["macros", "rt-multi-thread", "fs", "process"] } @@ -55,16 +62,12 @@ npyz = "0.7.3" base64 = "0.21.0" histogram = "0.6.9" tch = {version = "0.10.3", optional = true} -tensorflow = { version = "0.20.0", optional = true } +tensorflow = { version = "0.18.0", optional = true } once_cell = {version = "1.17.1"} ndarray = "0.15" serde = "1.0.154" serde_json = "1.0.94" dr_transform = { path = "../dr_transform"} -[target.'cfg(not(target_os="linux"))'.dependencies] -ort = {git ="https://github.com/pykeio/ort.git", features=["profiling"], optional = true, tag="v1.14.2"} -[target.'cfg(target_os="linux")'.dependencies] -ort = {git ="https://github.com/pykeio/ort.git", features=["profiling", "tensorrt", "cuda", "copy-dylibs"], optional = true, tag="v1.14.2"} [build-dependencies] tonic-build = {version = "0.6.2", features=['prost', "compression"] } [profile.release] @@ -74,3 +77,5 @@ ndarray-rand = "0.14.0" tokio-test = "*" assert_cmd = "2.0" criterion = "0.4.0" + + diff --git a/navi/navi/scripts/run_onnx.sh b/navi/navi/scripts/run_onnx.sh index ae6ff10b6..cc8695f4a 100644 --- a/navi/navi/scripts/run_onnx.sh +++ b/navi/navi/scripts/run_onnx.sh @@ -1,10 +1,9 @@ #!/bin/sh #RUST_LOG=debug LD_LIBRARY_PATH=so/onnx/lib target/release/navi_onnx --port 30 --num-worker-threads 8 --intra-op-parallelism 8 --inter-op-parallelism 8 \ RUST_LOG=info LD_LIBRARY_PATH=so/onnx/lib cargo run --bin navi_onnx --features onnx -- \ - --port 30 --num-worker-threads 8 --intra-op-parallelism 8 --inter-op-parallelism 8 \ + --port 8030 --num-worker-threads 8 \ --model-check-interval-secs 30 \ - --model-dir models/int8 \ - --output caligrated_probabilities \ - --input "" \ --modelsync-cli "echo" \ - --onnx-ep-options use_arena=true + --onnx-ep-options use_arena=true \ + --model-dir models/prod_home --output caligrated_probabilities --input "" --intra-op-parallelism 8 --inter-op-parallelism 8 --max-batch-size 1 --batch-time-out-millis 1 \ + --model-dir models/prod_home1 --output caligrated_probabilities --input "" --intra-op-parallelism 8 --inter-op-parallelism 8 --max-batch-size 1 --batch-time-out-millis 1 \ diff --git a/navi/navi/src/bin/navi_onnx.rs b/navi/navi/src/bin/navi_onnx.rs index ac73a3d16..03b1ea2aa 100644 --- a/navi/navi/src/bin/navi_onnx.rs +++ b/navi/navi/src/bin/navi_onnx.rs @@ -1,11 +1,24 @@ use anyhow::Result; +use log::info; use navi::cli_args::{ARGS, MODEL_SPECS}; use navi::onnx_model::onnx::OnnxModel; use navi::{bootstrap, metrics}; fn main() -> Result<()> { env_logger::init(); - assert_eq!(MODEL_SPECS.len(), ARGS.inter_op_parallelism.len()); + info!("global: {:?}", ARGS.onnx_global_thread_pool_options); + let assert_session_params = if ARGS.onnx_global_thread_pool_options.is_empty() { + // std::env::set_var("OMP_NUM_THREADS", "1"); + info!("now we use per session thread pool"); + MODEL_SPECS.len() + } + else { + info!("now we use global thread pool"); + 0 + }; + assert_eq!(assert_session_params, ARGS.inter_op_parallelism.len()); + assert_eq!(assert_session_params, ARGS.inter_op_parallelism.len()); + metrics::register_custom_metrics(); bootstrap::bootstrap(OnnxModel::new) } diff --git a/navi/navi/src/bootstrap.rs b/navi/navi/src/bootstrap.rs index 5841270ec..56215292f 100644 --- a/navi/navi/src/bootstrap.rs +++ b/navi/navi/src/bootstrap.rs @@ -207,6 +207,9 @@ impl PredictionService for PredictService { PredictResult::DropDueToOverload => Err(Status::resource_exhausted("")), PredictResult::ModelNotFound(idx) => { Err(Status::not_found(format!("model index {}", idx))) + }, + PredictResult::ModelNotReady(idx) => { + Err(Status::unavailable(format!("model index {}", idx))) } PredictResult::ModelVersionNotFound(idx, version) => Err( Status::not_found(format!("model index:{}, version {}", idx, version)), diff --git a/navi/navi/src/cli_args.rs b/navi/navi/src/cli_args.rs index f022d6d4a..300de580f 100644 --- a/navi/navi/src/cli_args.rs +++ b/navi/navi/src/cli_args.rs @@ -87,13 +87,11 @@ pub struct Args { pub intra_op_parallelism: Vec, #[clap( long, - default_value = "14", help = "number of threads to parallelize computations of the graph" )] pub inter_op_parallelism: Vec, #[clap( long, - default_value = "serving_default", help = "signature of a serving. only TF" )] pub serving_sig: Vec, @@ -107,10 +105,12 @@ pub struct Args { help = "max warmup records to use. warmup only implemented for TF" )] pub max_warmup_records: usize, + #[clap(long, value_parser = Args::parse_key_val::, value_delimiter=',')] + pub onnx_global_thread_pool_options: Vec<(String, String)>, #[clap( - long, - default_value = "true", - help = "when to use graph parallelization. only for ONNX" + long, + default_value = "true", + help = "when to use graph parallelization. only for ONNX" )] pub onnx_use_parallel_mode: String, // #[clap(long, default_value = "false")] diff --git a/navi/navi/src/lib.rs b/navi/navi/src/lib.rs index e91284d64..3536b5d60 100644 --- a/navi/navi/src/lib.rs +++ b/navi/navi/src/lib.rs @@ -146,6 +146,7 @@ pub enum PredictResult { Ok(Vec, i64), DropDueToOverload, ModelNotFound(usize), + ModelNotReady(usize), ModelVersionNotFound(usize, i64), } diff --git a/navi/navi/src/onnx_model.rs b/navi/navi/src/onnx_model.rs index 991fab83a..a0d75c8c9 100644 --- a/navi/navi/src/onnx_model.rs +++ b/navi/navi/src/onnx_model.rs @@ -13,21 +13,22 @@ pub mod onnx { use dr_transform::converter::{BatchPredictionRequestToTorchTensorConverter, Converter}; use itertools::Itertools; use log::{debug, info}; - use ort::environment::Environment; - use ort::session::Session; - use ort::tensor::InputTensor; - use ort::{ExecutionProvider, GraphOptimizationLevel, SessionBuilder}; + use dr_transform::ort::environment::Environment; + use dr_transform::ort::session::Session; + use dr_transform::ort::tensor::InputTensor; + use dr_transform::ort::{ExecutionProvider, GraphOptimizationLevel, SessionBuilder}; + use dr_transform::ort::LoggingLevel; use serde_json::Value; use std::fmt::{Debug, Display}; use std::sync::Arc; use std::{fmt, fs}; use tokio::time::Instant; - lazy_static! { pub static ref ENVIRONMENT: Arc = Arc::new( Environment::builder() .with_name("onnx home") - .with_log_level(ort::LoggingLevel::Error) + .with_log_level(LoggingLevel::Error) + .with_global_thread_pool(ARGS.onnx_global_thread_pool_options.clone()) .build() .unwrap() ); @@ -101,23 +102,30 @@ pub mod onnx { let meta_info = format!("{}/{}/{}", ARGS.model_dir[idx], version, META_INFO); let mut builder = SessionBuilder::new(&ENVIRONMENT)? .with_optimization_level(GraphOptimizationLevel::Level3)? - .with_parallel_execution(ARGS.onnx_use_parallel_mode == "true")? - .with_inter_threads( - utils::get_config_or( - model_config, - "inter_op_parallelism", - &ARGS.inter_op_parallelism[idx], - ) - .parse()?, - )? - .with_intra_threads( - utils::get_config_or( - model_config, - "intra_op_parallelism", - &ARGS.intra_op_parallelism[idx], - ) - .parse()?, - )? + .with_parallel_execution(ARGS.onnx_use_parallel_mode == "true")?; + if ARGS.onnx_global_thread_pool_options.is_empty() { + builder = builder + .with_inter_threads( + utils::get_config_or( + model_config, + "inter_op_parallelism", + &ARGS.inter_op_parallelism[idx], + ) + .parse()?, + )? + .with_intra_threads( + utils::get_config_or( + model_config, + "intra_op_parallelism", + &ARGS.intra_op_parallelism[idx], + ) + .parse()?, + )?; + } + else { + builder = builder.with_disable_per_session_threads()?; + } + builder = builder .with_memory_pattern(ARGS.onnx_use_memory_pattern == "true")? .with_execution_providers(&OnnxModel::ep_choices())?; match &ARGS.profiling { diff --git a/navi/navi/src/predict_service.rs b/navi/navi/src/predict_service.rs index 25ba4b848..8650662cf 100644 --- a/navi/navi/src/predict_service.rs +++ b/navi/navi/src/predict_service.rs @@ -1,7 +1,7 @@ use anyhow::{anyhow, Result}; use arrayvec::ArrayVec; use itertools::Itertools; -use log::{error, info, warn}; +use log::{error, info}; use std::fmt::{Debug, Display}; use std::string::String; use std::sync::Arc; @@ -179,17 +179,17 @@ impl PredictService { //initialize the latest version array let mut cur_versions = vec!["".to_owned(); MODEL_SPECS.len()]; loop { - let config = utils::read_config(&meta_file).unwrap_or_else(|e| { - warn!("config file {} not found due to: {}", meta_file, e); - Value::Null - }); info!("***polling for models***"); //nice deliminter - info!("config:{}", config); if let Some(ref cli) = ARGS.modelsync_cli { if let Err(e) = call_external_modelsync(cli, &cur_versions).await { error!("model sync cli running error:{}", e) } } + let config = utils::read_config(&meta_file).unwrap_or_else(|e| { + info!("config file {} not found due to: {}", meta_file, e); + Value::Null + }); + info!("config:{}", config); for (idx, cur_version) in cur_versions.iter_mut().enumerate() { let model_dir = &ARGS.model_dir[idx]; PredictService::scan_load_latest_model_from_model_dir( @@ -229,26 +229,32 @@ impl PredictService { let no_more_msg = match msg { Ok(PredictMessage::Predict(model_spec_at, version, val, resp, ts)) => { if let Some(model_predictors) = all_model_predictors.get_mut(model_spec_at) { - match version { - None => model_predictors[0].push(val, resp, ts), - Some(the_version) => match model_predictors - .iter_mut() - .find(|x| x.model.version() == the_version) - { - None => resp - .send(PredictResult::ModelVersionNotFound( - model_spec_at, - the_version, - )) - .unwrap_or_else(|e| { - error!("cannot send back version error: {:?}", e) - }), - Some(predictor) => predictor.push(val, resp, ts), - }, + if model_predictors.is_empty() { + resp.send(PredictResult::ModelNotReady(model_spec_at)) + .unwrap_or_else(|e| error!("cannot send back model not ready error: {:?}", e)); + } + else { + match version { + None => model_predictors[0].push(val, resp, ts), + Some(the_version) => match model_predictors + .iter_mut() + .find(|x| x.model.version() == the_version) + { + None => resp + .send(PredictResult::ModelVersionNotFound( + model_spec_at, + the_version, + )) + .unwrap_or_else(|e| { + error!("cannot send back version error: {:?}", e) + }), + Some(predictor) => predictor.push(val, resp, ts), + }, + } } } else { resp.send(PredictResult::ModelNotFound(model_spec_at)) - .unwrap_or_else(|e| error!("cannot send back model error: {:?}", e)) + .unwrap_or_else(|e| error!("cannot send back model not found error: {:?}", e)) } MPSC_CHANNEL_SIZE.dec(); false diff --git a/navi/segdense/Cargo.toml b/navi/segdense/Cargo.toml index 4adbf2bc1..1c8abc58c 100644 --- a/navi/segdense/Cargo.toml +++ b/navi/segdense/Cargo.toml @@ -3,9 +3,9 @@ name = "segdense" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +env_logger = "0.10.0" serde = { version = "1.0.104", features = ["derive"] } serde_json = "1.0.48" log = "0.4.17" From 23fa75d4068edb15a90c93369943987c631ae98c Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 14 Apr 2023 15:28:51 -0700 Subject: [PATCH 02/11] [Medium][UUA] Clean up BCE in UUA This is to clean up the BCE adapters and services in UUA since BCE no longer exists. --- .../adapter/behavioral_client_event/BUILD | 13 -- .../BaseBCEAdapter.scala | 96 ------------ .../BehavioralClientEventAdapter.scala | 39 ----- .../ImpressionBCEAdapter.scala | 34 ----- .../ProfileImpressionBCEAdapter.scala | 52 ------- .../TweetImpressionBCEAdapter.scala | 84 ----------- .../BehavioralClientEventAdapterSpec.scala | 139 ------------------ .../BehavioralClientEventService.scala | 25 ---- ...ProcessorBehavioralClientEventModule.scala | 87 ----------- 9 files changed, 569 deletions(-) delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BUILD delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BaseBCEAdapter.scala delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BehavioralClientEventAdapter.scala delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ImpressionBCEAdapter.scala delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ProfileImpressionBCEAdapter.scala delete mode 100644 unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/TweetImpressionBCEAdapter.scala delete mode 100644 unified_user_actions/adapter/src/test/scala/com/twitter/unified_user_actions/adapter/BehavioralClientEventAdapterSpec.scala delete mode 100644 unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/BehavioralClientEventService.scala delete mode 100644 unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/module/KafkaProcessorBehavioralClientEventModule.scala diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BUILD b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BUILD deleted file mode 100644 index 5c3030625..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -scala_library( - sources = [ - "*.scala", - ], - tags = ["bazel-compatible"], - dependencies = [ - "client-events/thrift/src/thrift/storage/twitter/behavioral_event:behavioral_event-scala", - "kafka/finagle-kafka/finatra-kafka/src/main/scala", - "unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter:base", - "unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/common", - "unified_user_actions/thrift/src/main/thrift/com/twitter/unified_user_actions:unified_user_actions-scala", - ], -) diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BaseBCEAdapter.scala b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BaseBCEAdapter.scala deleted file mode 100644 index ba81e9469..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BaseBCEAdapter.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.unified_user_actions.adapter.behavioral_client_event - -import com.twitter.client_event_entities.serverside_context_key.latest.thriftscala.FlattenedServersideContextKey -import com.twitter.storage.behavioral_event.thriftscala.EventLogContext -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.adapter.common.AdapterUtils -import com.twitter.unified_user_actions.thriftscala.ActionType -import com.twitter.unified_user_actions.thriftscala.BreadcrumbTweet -import com.twitter.unified_user_actions.thriftscala.ClientEventNamespace -import com.twitter.unified_user_actions.thriftscala.EventMetadata -import com.twitter.unified_user_actions.thriftscala.Item -import com.twitter.unified_user_actions.thriftscala.ProductSurface -import com.twitter.unified_user_actions.thriftscala.ProductSurfaceInfo -import com.twitter.unified_user_actions.thriftscala.SourceLineage -import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction -import com.twitter.unified_user_actions.thriftscala.UserIdentifier - -case class ProductSurfaceRelated( - productSurface: Option[ProductSurface], - productSurfaceInfo: Option[ProductSurfaceInfo]) - -trait BaseBCEAdapter { - def toUUA(e: FlattenedEventLog): Seq[UnifiedUserAction] - - protected def getUserIdentifier(c: EventLogContext): UserIdentifier = - UserIdentifier( - userId = c.userId, - guestIdMarketing = c.guestIdMarketing - ) - - protected def getEventMetadata(e: FlattenedEventLog): EventMetadata = - EventMetadata( - sourceLineage = SourceLineage.BehavioralClientEvents, - sourceTimestampMs = - e.context.driftAdjustedEventCreatedAtMs.getOrElse(e.context.eventCreatedAtMs), - receivedTimestampMs = AdapterUtils.currentTimestampMs, - // Client UI language or from Gizmoduck which is what user set in Twitter App. - // Please see more at https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/finatra-internal/international/src/main/scala/com/twitter/finatra/international/LanguageIdentifier.scala - // The format should be ISO 639-1. - language = e.context.languageCode.map(AdapterUtils.normalizeLanguageCode), - // Country code could be IP address (geoduck) or User registration country (gizmoduck) and the former takes precedence. - // We don’t know exactly which one is applied, unfortunately, - // see https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/finatra-internal/international/src/main/scala/com/twitter/finatra/international/CountryIdentifier.scala - // The format should be ISO_3166-1_alpha-2. - countryCode = e.context.countryCode.map(AdapterUtils.normalizeCountryCode), - clientAppId = e.context.clientApplicationId, - clientVersion = e.context.clientVersion, - clientPlatform = e.context.clientPlatform, - viewHierarchy = e.v1ViewTypeHierarchy, - clientEventNamespace = Some( - ClientEventNamespace( - page = e.page, - section = e.section, - element = e.element, - action = e.actionName, - subsection = e.subsection - )), - breadcrumbViews = e.v1BreadcrumbViewTypeHierarchy, - breadcrumbTweets = e.v1BreadcrumbTweetIds.map { breadcrumbs => - breadcrumbs.map { breadcrumb => - BreadcrumbTweet( - tweetId = breadcrumb.serversideContextId.toLong, - sourceComponent = breadcrumb.sourceComponent) - } - } - ) - - protected def getBreadcrumbTweetIds( - breadcrumbTweetIds: Option[Seq[FlattenedServersideContextKey]] - ): Seq[BreadcrumbTweet] = - breadcrumbTweetIds - .getOrElse(Nil).map(breadcrumb => { - BreadcrumbTweet( - tweetId = breadcrumb.serversideContextId.toLong, - sourceComponent = breadcrumb.sourceComponent) - }) - - protected def getBreadcrumbViews(breadcrumbView: Option[Seq[String]]): Seq[String] = - breadcrumbView.getOrElse(Nil) - - protected def getUnifiedUserAction( - event: FlattenedEventLog, - actionType: ActionType, - item: Item, - productSurface: Option[ProductSurface] = None, - productSurfaceInfo: Option[ProductSurfaceInfo] = None - ): UnifiedUserAction = - UnifiedUserAction( - userIdentifier = getUserIdentifier(event.context), - actionType = actionType, - item = item, - eventMetadata = getEventMetadata(event), - productSurface = productSurface, - productSurfaceInfo = productSurfaceInfo - ) -} diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BehavioralClientEventAdapter.scala b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BehavioralClientEventAdapter.scala deleted file mode 100644 index f2dbb5917..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/BehavioralClientEventAdapter.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.unified_user_actions.adapter.behavioral_client_event - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.kafka.serde.UnKeyed -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.adapter.AbstractAdapter -import com.twitter.unified_user_actions.thriftscala._ - -class BehavioralClientEventAdapter - extends AbstractAdapter[FlattenedEventLog, UnKeyed, UnifiedUserAction] { - - import BehavioralClientEventAdapter._ - - override def adaptOneToKeyedMany( - input: FlattenedEventLog, - statsReceiver: StatsReceiver = NullStatsReceiver - ): Seq[(UnKeyed, UnifiedUserAction)] = - adaptEvent(input).map { e => (UnKeyed, e) } -} - -object BehavioralClientEventAdapter { - def adaptEvent(e: FlattenedEventLog): Seq[UnifiedUserAction] = - // See go/bcecoverage for event namespaces, usage and coverage details - Option(e) - .map { e => - (e.page, e.actionName) match { - case (Some("tweet_details"), Some("impress")) => - TweetImpressionBCEAdapter.TweetDetails.toUUA(e) - case (Some("fullscreen_video"), Some("impress")) => - TweetImpressionBCEAdapter.FullscreenVideo.toUUA(e) - case (Some("fullscreen_image"), Some("impress")) => - TweetImpressionBCEAdapter.FullscreenImage.toUUA(e) - case (Some("profile"), Some("impress")) => - ProfileImpressionBCEAdapter.Profile.toUUA(e) - case _ => Nil - } - }.getOrElse(Nil) -} diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ImpressionBCEAdapter.scala b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ImpressionBCEAdapter.scala deleted file mode 100644 index 4c608c8c6..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ImpressionBCEAdapter.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.unified_user_actions.adapter.behavioral_client_event - -import com.twitter.client.behavioral_event.action.impress.latest.thriftscala.Impress -import com.twitter.client_event_entities.serverside_context_key.latest.thriftscala.FlattenedServersideContextKey -import com.twitter.unified_user_actions.thriftscala.Item - -trait ImpressionBCEAdapter extends BaseBCEAdapter { - type ImpressedItem <: Item - - def getImpressedItem( - context: FlattenedServersideContextKey, - impression: Impress - ): ImpressedItem - - /** - * The start time of an impression in milliseconds since epoch. In BCE, the impression - * tracking clock will start immediately after the page is visible with no initial delay. - */ - def getImpressedStartTimestamp(impression: Impress): Long = - impression.visibilityPctDwellStartMs - - /** - * The end time of an impression in milliseconds since epoch. In BCE, the impression - * tracking clock will end before the user exit the page. - */ - def getImpressedEndTimestamp(impression: Impress): Long = - impression.visibilityPctDwellEndMs - - /** - * The UI component that hosted the impressed item. - */ - def getImpressedUISourceComponent(context: FlattenedServersideContextKey): String = - context.sourceComponent -} diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ProfileImpressionBCEAdapter.scala b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ProfileImpressionBCEAdapter.scala deleted file mode 100644 index ef072f1b1..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/ProfileImpressionBCEAdapter.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.unified_user_actions.adapter.behavioral_client_event - -import com.twitter.client.behavioral_event.action.impress.latest.thriftscala.Impress -import com.twitter.client_event_entities.serverside_context_key.latest.thriftscala.FlattenedServersideContextKey -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.thriftscala.ActionType -import com.twitter.unified_user_actions.thriftscala.ClientProfileV2Impression -import com.twitter.unified_user_actions.thriftscala.Item -import com.twitter.unified_user_actions.thriftscala.ProductSurface -import com.twitter.unified_user_actions.thriftscala.ProfileActionInfo -import com.twitter.unified_user_actions.thriftscala.ProfileInfo -import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction - -object ProfileImpressionBCEAdapter { - val Profile = new ProfileImpressionBCEAdapter() -} - -class ProfileImpressionBCEAdapter extends ImpressionBCEAdapter { - override type ImpressedItem = Item.ProfileInfo - - override def toUUA(e: FlattenedEventLog): Seq[UnifiedUserAction] = - (e.v2Impress, e.v1UserIds) match { - case (Some(v2Impress), Some(v1UserIds)) => - v1UserIds.map { user => - getUnifiedUserAction( - event = e, - actionType = ActionType.ClientProfileV2Impression, - item = getImpressedItem(user, v2Impress), - productSurface = Some(ProductSurface.ProfilePage) - ) - } - case _ => Nil - } - - override def getImpressedItem( - context: FlattenedServersideContextKey, - impression: Impress - ): ImpressedItem = - Item.ProfileInfo( - ProfileInfo( - actionProfileId = context.serversideContextId.toLong, - profileActionInfo = Some( - ProfileActionInfo.ClientProfileV2Impression( - ClientProfileV2Impression( - impressStartTimestampMs = getImpressedStartTimestamp(impression), - impressEndTimestampMs = getImpressedEndTimestamp(impression), - sourceComponent = getImpressedUISourceComponent(context) - ) - ) - ) - )) -} diff --git a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/TweetImpressionBCEAdapter.scala b/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/TweetImpressionBCEAdapter.scala deleted file mode 100644 index f7d51900b..000000000 --- a/unified_user_actions/adapter/src/main/scala/com/twitter/unified_user_actions/adapter/behavioral_client_event/TweetImpressionBCEAdapter.scala +++ /dev/null @@ -1,84 +0,0 @@ -package com.twitter.unified_user_actions.adapter.behavioral_client_event - -import com.twitter.client.behavioral_event.action.impress.latest.thriftscala.Impress -import com.twitter.client_event_entities.serverside_context_key.latest.thriftscala.FlattenedServersideContextKey -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.thriftscala.ActionType -import com.twitter.unified_user_actions.thriftscala.ClientTweetV2Impression -import com.twitter.unified_user_actions.thriftscala.Item -import com.twitter.unified_user_actions.thriftscala.ProductSurface -import com.twitter.unified_user_actions.thriftscala.TweetActionInfo -import com.twitter.unified_user_actions.thriftscala.TweetInfo -import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction - -object TweetImpressionBCEAdapter { - val TweetDetails = new TweetImpressionBCEAdapter(ActionType.ClientTweetV2Impression) - val FullscreenVideo = new TweetImpressionBCEAdapter( - ActionType.ClientTweetVideoFullscreenV2Impression) - val FullscreenImage = new TweetImpressionBCEAdapter( - ActionType.ClientTweetImageFullscreenV2Impression) -} - -class TweetImpressionBCEAdapter(actionType: ActionType) extends ImpressionBCEAdapter { - override type ImpressedItem = Item.TweetInfo - - override def toUUA(e: FlattenedEventLog): Seq[UnifiedUserAction] = - (actionType, e.v2Impress, e.v1TweetIds, e.v1BreadcrumbTweetIds) match { - case (ActionType.ClientTweetV2Impression, Some(v2Impress), Some(v1TweetIds), _) => - toUUAEvents(e, v2Impress, v1TweetIds) - case ( - ActionType.ClientTweetVideoFullscreenV2Impression, - Some(v2Impress), - _, - Some(v1BreadcrumbTweetIds)) => - toUUAEvents(e, v2Impress, v1BreadcrumbTweetIds) - case ( - ActionType.ClientTweetImageFullscreenV2Impression, - Some(v2Impress), - _, - Some(v1BreadcrumbTweetIds)) => - toUUAEvents(e, v2Impress, v1BreadcrumbTweetIds) - case _ => Nil - } - - private def toUUAEvents( - e: FlattenedEventLog, - v2Impress: Impress, - v1TweetIds: Seq[FlattenedServersideContextKey] - ): Seq[UnifiedUserAction] = - v1TweetIds.map { tweet => - getUnifiedUserAction( - event = e, - actionType = actionType, - item = getImpressedItem(tweet, v2Impress), - productSurface = getProductSurfaceRelated.productSurface, - productSurfaceInfo = getProductSurfaceRelated.productSurfaceInfo - ) - } - - override def getImpressedItem( - context: FlattenedServersideContextKey, - impression: Impress - ): ImpressedItem = - Item.TweetInfo( - TweetInfo( - actionTweetId = context.serversideContextId.toLong, - tweetActionInfo = Some( - TweetActionInfo.ClientTweetV2Impression( - ClientTweetV2Impression( - impressStartTimestampMs = getImpressedStartTimestamp(impression), - impressEndTimestampMs = getImpressedEndTimestamp(impression), - sourceComponent = getImpressedUISourceComponent(context) - ) - )) - )) - - private def getProductSurfaceRelated: ProductSurfaceRelated = - actionType match { - case ActionType.ClientTweetV2Impression => - ProductSurfaceRelated( - productSurface = Some(ProductSurface.TweetDetailsPage), - productSurfaceInfo = None) - case _ => ProductSurfaceRelated(productSurface = None, productSurfaceInfo = None) - } -} diff --git a/unified_user_actions/adapter/src/test/scala/com/twitter/unified_user_actions/adapter/BehavioralClientEventAdapterSpec.scala b/unified_user_actions/adapter/src/test/scala/com/twitter/unified_user_actions/adapter/BehavioralClientEventAdapterSpec.scala deleted file mode 100644 index 3d834c89b..000000000 --- a/unified_user_actions/adapter/src/test/scala/com/twitter/unified_user_actions/adapter/BehavioralClientEventAdapterSpec.scala +++ /dev/null @@ -1,139 +0,0 @@ -package com.twitter.unified_user_actions.adapter - -import com.twitter.inject.Test -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.adapter.TestFixtures.BCEFixture -import com.twitter.unified_user_actions.adapter.behavioral_client_event.BehavioralClientEventAdapter -import com.twitter.unified_user_actions.thriftscala._ -import com.twitter.util.Time -import org.scalatest.prop.TableDrivenPropertyChecks - -class BehavioralClientEventAdapterSpec extends Test with TableDrivenPropertyChecks { - - test("basic event conversion should be correct") { - new BCEFixture { - Time.withTimeAt(frozenTime) { _ => - val tests = Table( - ("event", "expected", "description"), - ( - makeBCEEvent(), - makeUUAImpressEvent(productSurface = Some(ProductSurface.TweetDetailsPage)), - "tweet_details conversion"), - (makeBCEProfileImpressEvent(), makeUUAProfileImpressEvent(), "profile conversion"), - ( - makeBCEVideoFullscreenImpressEvent(), - makeUUAVideoFullscreenImpressEvent(), - "fullscreen_video conversion"), - ( - makeBCEImageFullscreenImpressEvent(), - makeUUAImageFullscreenImpressEvent(), - "fullscreen_image conversion"), - ) - forEvery(tests) { (input: FlattenedEventLog, expected: UnifiedUserAction, desc: String) => - assert(Seq(expected) === BehavioralClientEventAdapter.adaptEvent(input), desc) - } - } - } - } - - test( - "tweet_details is NOT missing productSurface[Info] when empty breadcrumb components and breadcrumbs tweets id") { - new BCEFixture { - Time.withTimeAt(frozenTime) { _ => - val input = makeBCEEvent(v1BreadcrumbViewTypeHierarchy = None, v1BreadcrumbTweetIds = None) - val expected = - makeUUAImpressEvent( - productSurface = Some(ProductSurface.TweetDetailsPage), - breadcrumbViews = None, - breadcrumbTweets = None) - val actual = BehavioralClientEventAdapter.adaptEvent(input) - - assert(Seq(expected) === actual) - } - } - } - - test("tweet_details is not missing productSurface[Info] when only breadcrumb tweets is empty") { - new BCEFixture { - Time.withTimeAt(frozenTime) { _ => - val input = makeBCEEvent(v1BreadcrumbTweetIds = None) - val expected = makeUUAImpressEvent( - productSurface = Some(ProductSurface.TweetDetailsPage), - breadcrumbViews = Some(viewBreadcrumbs), - breadcrumbTweets = None - ) - val actual = BehavioralClientEventAdapter.adaptEvent(input) - - assert(Seq(expected) === actual) - } - } - } - - test("unsupported events should be skipped") { - new BCEFixture { - val unsupportedPage = "unsupported_page" - val unsupportedAction = "unsupported_action" - val supportedNamespaces = Table( - ("page", "actions"), - ("tweet_details", Seq("impress")), - ("profile", Seq("impress")), - ) - - forAll(supportedNamespaces) { (page: String, actions: Seq[String]) => - actions.foreach { supportedAction => - assert( - BehavioralClientEventAdapter - .adaptEvent( - makeBCEEvent( - currentPage = Some(unsupportedPage), - actionName = Some(supportedAction))).isEmpty) - - assert(BehavioralClientEventAdapter - .adaptEvent( - makeBCEEvent(currentPage = Some(page), actionName = Some(unsupportedAction))).isEmpty) - } - } - } - } - - test("event w/ missing info should be skipped") { - new BCEFixture { - val eventsWithMissingInfo = Table( - ("event", "description"), - (null.asInstanceOf[FlattenedEventLog], "null event"), - (makeBCEEvent(v2Impress = None), "impression event missing v2Impress"), - (makeBCEEvent(v1TweetIds = None), "tweet event missing v1TweetIds"), - (makeBCEProfileImpressEvent(v1UserIds = None), "profile event missing v1UserIds"), - ( - makeBCEVideoFullscreenImpressEvent(v1BreadcrumbTweetIds = None), - "fullscreen_video event missing v1BreadcrumbTweetIds"), - ( - makeBCEImageFullscreenImpressEvent(v1BreadcrumbTweetIds = None), - "fullscreen_image event missing v1BreadcrumbTweetIds"), - ) - - forEvery(eventsWithMissingInfo) { (event: FlattenedEventLog, desc: String) => - assert( - BehavioralClientEventAdapter - .adaptEvent(event).isEmpty, - desc) - } - } - } - - test("use eventCreateAtMs when driftAdjustedTimetampMs is empty") { - new BCEFixture { - Time.withTimeAt(frozenTime) { _ => - val input = makeBCEEvent( - context = makeBCEContext(driftAdjustedEventCreatedAtMs = None) - ) - val expected = makeUUAImpressEvent( - createTs = eventCreatedTime, - productSurface = Some(ProductSurface.TweetDetailsPage)) - val actual = BehavioralClientEventAdapter.adaptEvent(input) - - assert(Seq(expected) === actual) - } - } - } -} diff --git a/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/BehavioralClientEventService.scala b/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/BehavioralClientEventService.scala deleted file mode 100644 index 43ca35ad1..000000000 --- a/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/BehavioralClientEventService.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.unified_user_actions.service; - -import com.twitter.finatra.decider.modules.DeciderModule -import com.twitter.finatra.kafka.serde.UnKeyed -import com.twitter.inject.server.TwitterServer -import com.twitter.kafka.client.processor.AtLeastOnceProcessor -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.service.module.KafkaProcessorBehavioralClientEventModule - -object BehavioralClientEventServiceMain extends BehavioralClientEventService - -class BehavioralClientEventService extends TwitterServer { - override val modules = Seq( - KafkaProcessorBehavioralClientEventModule, - DeciderModule - ) - - override protected def setup(): Unit = {} - - override protected def start(): Unit = { - val processor = injector.instance[AtLeastOnceProcessor[UnKeyed, FlattenedEventLog]] - closeOnExit(processor) - processor.start() - } -} diff --git a/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/module/KafkaProcessorBehavioralClientEventModule.scala b/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/module/KafkaProcessorBehavioralClientEventModule.scala deleted file mode 100644 index 463c691e6..000000000 --- a/unified_user_actions/service/src/main/scala/com/twitter/unified_user_actions/service/module/KafkaProcessorBehavioralClientEventModule.scala +++ /dev/null @@ -1,87 +0,0 @@ -package com.twitter.unified_user_actions.service.module - -import com.google.inject.Provides -import com.twitter.decider.Decider -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.kafka.serde.UnKeyed -import com.twitter.finatra.kafka.serde.UnKeyedSerde -import com.twitter.inject.annotations.Flag -import com.twitter.inject.TwitterModule -import com.twitter.kafka.client.processor.AtLeastOnceProcessor -import com.twitter.storage.behavioral_event.thriftscala.FlattenedEventLog -import com.twitter.unified_user_actions.adapter.behavioral_client_event.BehavioralClientEventAdapter -import com.twitter.unified_user_actions.kafka.CompressionTypeFlag -import com.twitter.unified_user_actions.kafka.serde.NullableScalaSerdes -import com.twitter.util.Duration -import com.twitter.util.StorageUnit -import com.twitter.util.logging.Logging -import javax.inject.Singleton - -object KafkaProcessorBehavioralClientEventModule extends TwitterModule with Logging { - override def modules = Seq(FlagsModule) - - private val adapter: BehavioralClientEventAdapter = new BehavioralClientEventAdapter - private final val processorName: String = "uuaProcessor" - - @Provides - @Singleton - def providesKafkaProcessor( - decider: Decider, - @Flag(FlagsModule.cluster) cluster: String, - @Flag(FlagsModule.kafkaSourceCluster) kafkaSourceCluster: String, - @Flag(FlagsModule.kafkaDestCluster) kafkaDestCluster: String, - @Flag(FlagsModule.kafkaSourceTopic) kafkaSourceTopic: String, - @Flag(FlagsModule.kafkaSinkTopics) kafkaSinkTopics: Seq[String], - @Flag(FlagsModule.kafkaGroupId) kafkaGroupId: String, - @Flag(FlagsModule.kafkaProducerClientId) kafkaProducerClientId: String, - @Flag(FlagsModule.kafkaMaxPendingRequests) kafkaMaxPendingRequests: Int, - @Flag(FlagsModule.kafkaWorkerThreads) kafkaWorkerThreads: Int, - @Flag(FlagsModule.commitInterval) commitInterval: Duration, - @Flag(FlagsModule.maxPollRecords) maxPollRecords: Int, - @Flag(FlagsModule.maxPollInterval) maxPollInterval: Duration, - @Flag(FlagsModule.sessionTimeout) sessionTimeout: Duration, - @Flag(FlagsModule.fetchMax) fetchMax: StorageUnit, - @Flag(FlagsModule.batchSize) batchSize: StorageUnit, - @Flag(FlagsModule.linger) linger: Duration, - @Flag(FlagsModule.bufferMem) bufferMem: StorageUnit, - @Flag(FlagsModule.compressionType) compressionTypeFlag: CompressionTypeFlag, - @Flag(FlagsModule.retries) retries: Int, - @Flag(FlagsModule.retryBackoff) retryBackoff: Duration, - @Flag(FlagsModule.requestTimeout) requestTimeout: Duration, - @Flag(FlagsModule.enableTrustStore) enableTrustStore: Boolean, - @Flag(FlagsModule.trustStoreLocation) trustStoreLocation: String, - statsReceiver: StatsReceiver, - ): AtLeastOnceProcessor[UnKeyed, FlattenedEventLog] = { - KafkaProcessorProvider.provideDefaultAtLeastOnceProcessor( - name = processorName, - kafkaSourceCluster = kafkaSourceCluster, - kafkaGroupId = kafkaGroupId, - kafkaSourceTopic = kafkaSourceTopic, - sourceKeyDeserializer = UnKeyedSerde.deserializer, - sourceValueDeserializer = NullableScalaSerdes - .Thrift[FlattenedEventLog](statsReceiver.counter("deserializerErrors")).deserializer, - commitInterval = commitInterval, - maxPollRecords = maxPollRecords, - maxPollInterval = maxPollInterval, - sessionTimeout = sessionTimeout, - fetchMax = fetchMax, - processorMaxPendingRequests = kafkaMaxPendingRequests, - processorWorkerThreads = kafkaWorkerThreads, - adapter = adapter, - kafkaSinkTopics = kafkaSinkTopics, - kafkaDestCluster = kafkaDestCluster, - kafkaProducerClientId = kafkaProducerClientId, - batchSize = batchSize, - linger = linger, - bufferMem = bufferMem, - compressionType = compressionTypeFlag.compressionType, - retries = retries, - retryBackoff = retryBackoff, - requestTimeout = requestTimeout, - statsReceiver = statsReceiver, - trustStoreLocationOpt = if (enableTrustStore) Some(trustStoreLocation) else None, - decider = decider, - zone = ZoneFiltering.zoneMapping(cluster), - ) - } -} From 31e82d6474cf47b3695bf919c44c94d146192a03 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Wed, 5 Apr 2023 16:08:19 -0700 Subject: [PATCH 03/11] improvements from external prs -fix corner case where dr converter failed when initializing Closes twitter/the-algorithm#550 --- navi/dr_transform/src/all_config.rs | 3 +- navi/dr_transform/src/converter.rs | 262 ++++++++++-------- .../tensorflow/core/framework/full_type.proto | 10 +- .../tensorflow/core/framework/function.proto | 2 +- .../tensorflow/core/framework/node_def.proto | 2 +- .../tensorflow/core/framework/op_def.proto | 4 +- .../core/framework/step_stats.proto | 2 +- .../tensorflow/core/framework/tensor.proto | 2 +- .../tensorflow/core/protobuf/config.proto | 8 +- .../core/protobuf/coordination_service.proto | 2 +- .../tensorflow/core/protobuf/debug.proto | 2 +- .../core/protobuf/debug_event.proto | 6 +- .../distributed_runtime_payloads.proto | 2 +- .../core/protobuf/eager_service.proto | 2 +- .../tensorflow/core/protobuf/master.proto | 2 +- .../core/protobuf/saved_object_graph.proto | 2 +- .../core/protobuf/tensor_bundle.proto | 2 +- .../tensorflow/core/protobuf/worker.proto | 4 +- .../tensorflow_serving/apis/logging.proto | 2 +- .../file_system_storage_path_source.proto | 4 +- .../config/model_server_config.proto | 4 +- navi/navi/src/bootstrap.rs | 26 +- navi/navi/src/metrics.rs | 7 + navi/navi/src/onnx_model.rs | 2 +- navi/navi/src/predict_service.rs | 40 ++- navi/segdense/src/error.rs | 56 ++-- navi/segdense/src/lib.rs | 4 +- navi/segdense/src/main.rs | 23 +- navi/segdense/src/mapper.rs | 4 +- ...segdense_transform_spec_home_recap_2022.rs | 1 - navi/segdense/src/util.rs | 57 ++-- 31 files changed, 305 insertions(+), 244 deletions(-) diff --git a/navi/dr_transform/src/all_config.rs b/navi/dr_transform/src/all_config.rs index 29451bfd4..d5c52c362 100644 --- a/navi/dr_transform/src/all_config.rs +++ b/navi/dr_transform/src/all_config.rs @@ -44,5 +44,6 @@ pub struct RenamedFeatures { } pub fn parse(json_str: &str) -> Result { - serde_json::from_str(json_str) + let all_config: AllConfig = serde_json::from_str(json_str)?; + Ok(all_config) } diff --git a/navi/dr_transform/src/converter.rs b/navi/dr_transform/src/converter.rs index 578d766fd..3097aedc0 100644 --- a/navi/dr_transform/src/converter.rs +++ b/navi/dr_transform/src/converter.rs @@ -2,6 +2,9 @@ use std::collections::BTreeSet; use std::fmt::{self, Debug, Display}; use std::fs; +use crate::all_config; +use crate::all_config::AllConfig; +use anyhow::{bail, Context}; use bpr_thrift::data::DataRecord; use bpr_thrift::prediction_service::BatchPredictionRequest; use bpr_thrift::tensor::GeneralTensor; @@ -16,8 +19,6 @@ use segdense::util; use thrift::protocol::{TBinaryInputProtocol, TSerializable}; use thrift::transport::TBufferChannel; -use crate::{all_config, all_config::AllConfig}; - pub fn log_feature_match( dr: &DataRecord, seg_dense_config: &DensificationTransformSpec, @@ -28,20 +29,24 @@ pub fn log_feature_match( for (feature_id, feature_value) in dr.continuous_features.as_ref().unwrap() { debug!( - "{dr_type} - Continuous Datarecord => Feature ID: {feature_id}, Feature value: {feature_value}" + "{} - Continous Datarecord => Feature ID: {}, Feature value: {}", + dr_type, feature_id, feature_value ); for input_feature in &seg_dense_config.cont.input_features { if input_feature.feature_id == *feature_id { - debug!("Matching input feature: {input_feature:?}") + debug!("Matching input feature: {:?}", input_feature) } } } for feature_id in dr.binary_features.as_ref().unwrap() { - debug!("{dr_type} - Binary Datarecord => Feature ID: {feature_id}"); + debug!( + "{} - Binary Datarecord => Feature ID: {}", + dr_type, feature_id + ); for input_feature in &seg_dense_config.binary.input_features { if input_feature.feature_id == *feature_id { - debug!("Found input feature: {input_feature:?}") + debug!("Found input feature: {:?}", input_feature) } } } @@ -90,18 +95,19 @@ impl BatchPredictionRequestToTorchTensorConverter { model_version: &str, reporting_feature_ids: Vec<(i64, &str)>, register_metric_fn: Option, - ) -> BatchPredictionRequestToTorchTensorConverter { - let all_config_path = format!("{model_dir}/{model_version}/all_config.json"); - let seg_dense_config_path = - format!("{model_dir}/{model_version}/segdense_transform_spec_home_recap_2022.json"); - let seg_dense_config = util::load_config(&seg_dense_config_path); + ) -> anyhow::Result { + let all_config_path = format!("{}/{}/all_config.json", model_dir, model_version); + let seg_dense_config_path = format!( + "{}/{}/segdense_transform_spec_home_recap_2022.json", + model_dir, model_version + ); + let seg_dense_config = util::load_config(&seg_dense_config_path)?; let all_config = all_config::parse( &fs::read_to_string(&all_config_path) - .unwrap_or_else(|error| panic!("error loading all_config.json - {error}")), - ) - .unwrap(); + .with_context(|| "error loading all_config.json - ")?, + )?; - let feature_mapper = util::load_from_parsed_config_ref(&seg_dense_config); + let feature_mapper = util::load_from_parsed_config(seg_dense_config.clone())?; let user_embedding_feature_id = Self::get_feature_id( &all_config @@ -131,11 +137,11 @@ impl BatchPredictionRequestToTorchTensorConverter { let (discrete_feature_metrics, continuous_feature_metrics) = METRICS.get_or_init(|| { let discrete = HistogramVec::new( HistogramOpts::new(":navi:feature_id:discrete", "Discrete Feature ID values") - .buckets(Vec::from([ - 0.0f64, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, + .buckets(Vec::from(&[ + 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 250.0, 300.0, 500.0, 1000.0, 10000.0, 100000.0, - ])), + ] as &'static [f64])), &["feature_id"], ) .expect("metric cannot be created"); @@ -144,18 +150,18 @@ impl BatchPredictionRequestToTorchTensorConverter { ":navi:feature_id:continuous", "continuous Feature ID values", ) - .buckets(Vec::from([ - 0.0f64, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, - 120.0, 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 250.0, 300.0, - 500.0, 1000.0, 10000.0, 100000.0, - ])), + .buckets(Vec::from(&[ + 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0, 110.0, 120.0, + 130.0, 140.0, 150.0, 160.0, 170.0, 180.0, 190.0, 200.0, 250.0, 300.0, 500.0, + 1000.0, 10000.0, 100000.0, + ] as &'static [f64])), &["feature_id"], ) .expect("metric cannot be created"); - if let Some(r) = register_metric_fn { + register_metric_fn.map(|r| { r(&discrete); r(&continuous); - } + }); (discrete, continuous) }); @@ -164,13 +170,16 @@ impl BatchPredictionRequestToTorchTensorConverter { for (feature_id, feature_type) in reporting_feature_ids.iter() { match *feature_type { - "discrete" => discrete_features_to_report.insert(*feature_id), - "continuous" => continuous_features_to_report.insert(*feature_id), - _ => panic!("Invalid feature type {feature_type} for reporting metrics!"), + "discrete" => discrete_features_to_report.insert(feature_id.clone()), + "continuous" => continuous_features_to_report.insert(feature_id.clone()), + _ => bail!( + "Invalid feature type {} for reporting metrics!", + feature_type + ), }; } - BatchPredictionRequestToTorchTensorConverter { + Ok(BatchPredictionRequestToTorchTensorConverter { all_config, seg_dense_config, all_config_path, @@ -183,7 +192,7 @@ impl BatchPredictionRequestToTorchTensorConverter { continuous_features_to_report, discrete_feature_metrics, continuous_feature_metrics, - } + }) } fn get_feature_id(feature_name: &str, seg_dense_config: &Root) -> i64 { @@ -218,43 +227,45 @@ impl BatchPredictionRequestToTorchTensorConverter { let mut working_set = vec![0 as f32; total_size]; let mut bpr_start = 0; for (bpr, &bpr_end) in bprs.iter().zip(batch_size) { - if bpr.common_features.is_some() - && bpr.common_features.as_ref().unwrap().tensors.is_some() - && bpr - .common_features - .as_ref() - .unwrap() - .tensors - .as_ref() - .unwrap() - .contains_key(&feature_id) - { - let source_tensor = bpr - .common_features - .as_ref() - .unwrap() - .tensors - .as_ref() - .unwrap() - .get(&feature_id) - .unwrap(); - let tensor = match source_tensor { - GeneralTensor::FloatTensor(float_tensor) => - //Tensor::of_slice( + if bpr.common_features.is_some() { + if bpr.common_features.as_ref().unwrap().tensors.is_some() { + if bpr + .common_features + .as_ref() + .unwrap() + .tensors + .as_ref() + .unwrap() + .contains_key(&feature_id) { - float_tensor - .floats - .iter() - .map(|x| x.into_inner() as f32) - .collect::>() - } - _ => vec![0 as f32; cols], - }; + let source_tensor = bpr + .common_features + .as_ref() + .unwrap() + .tensors + .as_ref() + .unwrap() + .get(&feature_id) + .unwrap(); + let tensor = match source_tensor { + GeneralTensor::FloatTensor(float_tensor) => + //Tensor::of_slice( + { + float_tensor + .floats + .iter() + .map(|x| x.into_inner() as f32) + .collect::>() + } + _ => vec![0 as f32; cols], + }; - // since the tensor is found in common feature, add it in all batches - for row in bpr_start..bpr_end { - for col in 0..cols { - working_set[row * cols + col] = tensor[col]; + // since the tensor is found in common feature, add it in all batches + for row in bpr_start..bpr_end { + for col in 0..cols { + working_set[row * cols + col] = tensor[col]; + } + } } } } @@ -298,9 +309,9 @@ impl BatchPredictionRequestToTorchTensorConverter { // (INT64 --> INT64, DataRecord.discrete_feature) fn get_continuous(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { // These need to be part of model schema - let rows = batch_ends[batch_ends.len() - 1]; - let cols = 5293; - let full_size = rows * cols; + let rows: usize = batch_ends[batch_ends.len() - 1]; + let cols: usize = 5293; + let full_size: usize = rows * cols; let default_val = f32::NAN; let mut tensor = vec![default_val; full_size]; @@ -325,15 +336,18 @@ impl BatchPredictionRequestToTorchTensorConverter { .unwrap(); for feature in common_features { - if let Some(f_info) = self.feature_mapper.get(feature.0) { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index = r * cols + idx; - tensor[flat_index] = feature.1.into_inner() as f32; + match self.feature_mapper.get(feature.0) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + if idx < cols { + // Set value in each row + for r in bpr_start..bpr_end { + let flat_index: usize = r * cols + idx; + tensor[flat_index] = feature.1.into_inner() as f32; + } } } + None => (), } if self.continuous_features_to_report.contains(feature.0) { self.continuous_feature_metrics @@ -349,24 +363,28 @@ impl BatchPredictionRequestToTorchTensorConverter { // Process the batch of datarecords for r in bpr_start..bpr_end { - let dr: &DataRecord = &bpr.individual_features_list[r - bpr_start]; + let dr: &DataRecord = + &bpr.individual_features_list[usize::try_from(r - bpr_start).unwrap()]; if dr.continuous_features.is_some() { for feature in dr.continuous_features.as_ref().unwrap() { - if let Some(f_info) = self.feature_mapper.get(feature.0) { - let idx = f_info.index_within_tensor as usize; - let flat_index = r * cols + idx; - if flat_index < tensor.len() && idx < cols { - tensor[flat_index] = feature.1.into_inner() as f32; + match self.feature_mapper.get(&feature.0) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + let flat_index: usize = r * cols + idx; + if flat_index < tensor.len() && idx < cols { + tensor[flat_index] = feature.1.into_inner() as f32; + } } + None => (), } if self.continuous_features_to_report.contains(feature.0) { self.continuous_feature_metrics .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner()) + .observe(feature.1.into_inner() as f64) } else if self.discrete_features_to_report.contains(feature.0) { self.discrete_feature_metrics .with_label_values(&[feature.0.to_string().as_str()]) - .observe(feature.1.into_inner()) + .observe(feature.1.into_inner() as f64) } } } @@ -383,10 +401,10 @@ impl BatchPredictionRequestToTorchTensorConverter { fn get_binary(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { // These need to be part of model schema - let rows = batch_ends[batch_ends.len() - 1]; - let cols = 149; - let full_size = rows * cols; - let default_val = 0; + let rows: usize = batch_ends[batch_ends.len() - 1]; + let cols: usize = 149; + let full_size: usize = rows * cols; + let default_val: i64 = 0; let mut v = vec![default_val; full_size]; @@ -410,15 +428,18 @@ impl BatchPredictionRequestToTorchTensorConverter { .unwrap(); for feature in common_features { - if let Some(f_info) = self.feature_mapper.get(feature) { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index = r * cols + idx; - v[flat_index] = 1; + match self.feature_mapper.get(feature) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + if idx < cols { + // Set value in each row + for r in bpr_start..bpr_end { + let flat_index: usize = r * cols + idx; + v[flat_index] = 1; + } } } + None => (), } } } @@ -428,10 +449,13 @@ impl BatchPredictionRequestToTorchTensorConverter { let dr: &DataRecord = &bpr.individual_features_list[r - bpr_start]; if dr.binary_features.is_some() { for feature in dr.binary_features.as_ref().unwrap() { - if let Some(f_info) = self.feature_mapper.get(feature) { - let idx = f_info.index_within_tensor as usize; - let flat_index = r * cols + idx; - v[flat_index] = 1; + match self.feature_mapper.get(&feature) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + let flat_index: usize = r * cols + idx; + v[flat_index] = 1; + } + None => (), } } } @@ -448,10 +472,10 @@ impl BatchPredictionRequestToTorchTensorConverter { #[allow(dead_code)] fn get_discrete(&self, bprs: &[BatchPredictionRequest], batch_ends: &[usize]) -> InputTensor { // These need to be part of model schema - let rows = batch_ends[batch_ends.len() - 1]; - let cols = 320; - let full_size = rows * cols; - let default_val = 0; + let rows: usize = batch_ends[batch_ends.len() - 1]; + let cols: usize = 320; + let full_size: usize = rows * cols; + let default_val: i64 = 0; let mut v = vec![default_val; full_size]; @@ -475,15 +499,18 @@ impl BatchPredictionRequestToTorchTensorConverter { .unwrap(); for feature in common_features { - if let Some(f_info) = self.feature_mapper.get(feature.0) { - let idx = f_info.index_within_tensor as usize; - if idx < cols { - // Set value in each row - for r in bpr_start..bpr_end { - let flat_index = r * cols + idx; - v[flat_index] = *feature.1; + match self.feature_mapper.get(feature.0) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + if idx < cols { + // Set value in each row + for r in bpr_start..bpr_end { + let flat_index: usize = r * cols + idx; + v[flat_index] = *feature.1; + } } } + None => (), } if self.discrete_features_to_report.contains(feature.0) { self.discrete_feature_metrics @@ -495,15 +522,18 @@ impl BatchPredictionRequestToTorchTensorConverter { // Process the batch of datarecords for r in bpr_start..bpr_end { - let dr: &DataRecord = &bpr.individual_features_list[r]; + let dr: &DataRecord = &bpr.individual_features_list[usize::try_from(r).unwrap()]; if dr.discrete_features.is_some() { for feature in dr.discrete_features.as_ref().unwrap() { - if let Some(f_info) = self.feature_mapper.get(feature.0) { - let idx = f_info.index_within_tensor as usize; - let flat_index = r * cols + idx; - if flat_index < v.len() && idx < cols { - v[flat_index] = *feature.1; + match self.feature_mapper.get(&feature.0) { + Some(f_info) => { + let idx = f_info.index_within_tensor as usize; + let flat_index: usize = r * cols + idx; + if flat_index < v.len() && idx < cols { + v[flat_index] = *feature.1; + } } + None => (), } if self.discrete_features_to_report.contains(feature.0) { self.discrete_feature_metrics @@ -569,7 +599,7 @@ impl Converter for BatchPredictionRequestToTorchTensorConverter { .map(|bpr| bpr.individual_features_list.len()) .scan(0usize, |acc, e| { //running total - *acc += e; + *acc = *acc + e; Some(*acc) }) .collect::>(); diff --git a/navi/navi/proto/tensorflow/core/framework/full_type.proto b/navi/navi/proto/tensorflow/core/framework/full_type.proto index e8175ed3d..ddf05ec8f 100644 --- a/navi/navi/proto/tensorflow/core/framework/full_type.proto +++ b/navi/navi/proto/tensorflow/core/framework/full_type.proto @@ -122,7 +122,7 @@ enum FullTypeId { // TFT_TENSOR[TFT_INT32, TFT_UNKNOWN] // is a Tensor of int32 element type and unknown shape. // - // TODO: Define TFT_SHAPE and add more examples. + // TODO(mdan): Define TFT_SHAPE and add more examples. TFT_TENSOR = 1000; // Array (or tensorflow::TensorList in the variant type registry). @@ -178,7 +178,7 @@ enum FullTypeId { // object (for now). // The bool element type. - // TODO + // TODO(mdan): Quantized types, legacy representations (e.g. ref) TFT_BOOL = 200; // Integer element types. TFT_UINT8 = 201; @@ -195,7 +195,7 @@ enum FullTypeId { TFT_DOUBLE = 211; TFT_BFLOAT16 = 215; // Complex element types. - // TODO: Represent as TFT_COMPLEX[TFT_DOUBLE] instead? + // TODO(mdan): Represent as TFT_COMPLEX[TFT_DOUBLE] instead? TFT_COMPLEX64 = 212; TFT_COMPLEX128 = 213; // The string element type. @@ -240,7 +240,7 @@ enum FullTypeId { // ownership is in the true sense: "the op argument representing the lock is // available". // Mutex locks are the dynamic counterpart of control dependencies. - // TODO: Properly document this thing. + // TODO(mdan): Properly document this thing. // // Parametrization: TFT_MUTEX_LOCK[]. TFT_MUTEX_LOCK = 10202; @@ -271,6 +271,6 @@ message FullTypeDef { oneof attr { string s = 3; int64 i = 4; - // TODO: list/tensor, map? Need to reconcile with TFT_RECORD, etc. + // TODO(mdan): list/tensor, map? Need to reconcile with TFT_RECORD, etc. } } diff --git a/navi/navi/proto/tensorflow/core/framework/function.proto b/navi/navi/proto/tensorflow/core/framework/function.proto index efa3c9aeb..6e59df718 100644 --- a/navi/navi/proto/tensorflow/core/framework/function.proto +++ b/navi/navi/proto/tensorflow/core/framework/function.proto @@ -23,7 +23,7 @@ message FunctionDefLibrary { // with a value. When a GraphDef has a call to a function, it must // have binding for every attr defined in the signature. // -// TODO: +// TODO(zhifengc): // * device spec, etc. message FunctionDef { // The definition of the function's name, arguments, return values, diff --git a/navi/navi/proto/tensorflow/core/framework/node_def.proto b/navi/navi/proto/tensorflow/core/framework/node_def.proto index 801759817..705e90aa3 100644 --- a/navi/navi/proto/tensorflow/core/framework/node_def.proto +++ b/navi/navi/proto/tensorflow/core/framework/node_def.proto @@ -61,7 +61,7 @@ message NodeDef { // one of the names from the corresponding OpDef's attr field). // The values must have a type matching the corresponding OpDef // attr's type field. - // TODO: Add some examples here showing best practices. + // TODO(josh11b): Add some examples here showing best practices. map attr = 5; message ExperimentalDebugInfo { diff --git a/navi/navi/proto/tensorflow/core/framework/op_def.proto b/navi/navi/proto/tensorflow/core/framework/op_def.proto index a53fdf028..b71f5ce87 100644 --- a/navi/navi/proto/tensorflow/core/framework/op_def.proto +++ b/navi/navi/proto/tensorflow/core/framework/op_def.proto @@ -96,7 +96,7 @@ message OpDef { // Human-readable description. string description = 4; - // TODO: bool is_optional? + // TODO(josh11b): bool is_optional? // --- Constraints --- // These constraints are only in effect if specified. Default is no @@ -139,7 +139,7 @@ message OpDef { // taking input from multiple devices with a tree of aggregate ops // that aggregate locally within each device (and possibly within // groups of nearby devices) before communicating. - // TODO: Implement that optimization. + // TODO(josh11b): Implement that optimization. bool is_aggregate = 16; // for things like add // Other optimizations go here, like diff --git a/navi/navi/proto/tensorflow/core/framework/step_stats.proto b/navi/navi/proto/tensorflow/core/framework/step_stats.proto index 62238234d..762487f02 100644 --- a/navi/navi/proto/tensorflow/core/framework/step_stats.proto +++ b/navi/navi/proto/tensorflow/core/framework/step_stats.proto @@ -53,7 +53,7 @@ message MemoryStats { // Time/size stats recorded for a single execution of a graph node. message NodeExecStats { - // TODO: Use some more compact form of node identity than + // TODO(tucker): Use some more compact form of node identity than // the full string name. Either all processes should agree on a // global id (cost_id?) for each node, or we should use a hash of // the name. diff --git a/navi/navi/proto/tensorflow/core/framework/tensor.proto b/navi/navi/proto/tensorflow/core/framework/tensor.proto index 2d4b593be..eb057b127 100644 --- a/navi/navi/proto/tensorflow/core/framework/tensor.proto +++ b/navi/navi/proto/tensorflow/core/framework/tensor.proto @@ -16,7 +16,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framewo message TensorProto { DataType dtype = 1; - // Shape of the tensor. TODO: sort out the 0-rank issues. + // Shape of the tensor. TODO(touts): sort out the 0-rank issues. TensorShapeProto tensor_shape = 2; // Only one of the representations below is set, one of "tensor_contents" and diff --git a/navi/navi/proto/tensorflow/core/protobuf/config.proto b/navi/navi/proto/tensorflow/core/protobuf/config.proto index ff78e1f22..e454309fc 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/config.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/config.proto @@ -532,7 +532,7 @@ message ConfigProto { // We removed the flag client_handles_error_formatting. Marking the tag // number as reserved. - // TODO: Should we just remove this tag so that it can be + // TODO(shikharagarwal): Should we just remove this tag so that it can be // used in future for other purpose? reserved 2; @@ -576,7 +576,7 @@ message ConfigProto { // - If isolate_session_state is true, session states are isolated. // - If isolate_session_state is false, session states are shared. // - // TODO: Add a single API that consistently treats + // TODO(b/129330037): Add a single API that consistently treats // isolate_session_state and ClusterSpec propagation. bool share_session_state_in_clusterspec_propagation = 8; @@ -704,7 +704,7 @@ message ConfigProto { // Options for a single Run() call. message RunOptions { - // TODO Turn this into a TraceOptions proto which allows + // TODO(pbar) Turn this into a TraceOptions proto which allows // tracing to be controlled in a more orthogonal manner? enum TraceLevel { NO_TRACE = 0; @@ -781,7 +781,7 @@ message RunMetadata { repeated GraphDef partition_graphs = 3; message FunctionGraphs { - // TODO: Include some sort of function/cache-key identifier? + // TODO(nareshmodi): Include some sort of function/cache-key identifier? repeated GraphDef partition_graphs = 1; GraphDef pre_optimization_graph = 2; diff --git a/navi/navi/proto/tensorflow/core/protobuf/coordination_service.proto b/navi/navi/proto/tensorflow/core/protobuf/coordination_service.proto index e190bb028..730fb8c10 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/coordination_service.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/coordination_service.proto @@ -194,7 +194,7 @@ service CoordinationService { // Report error to the task. RPC sets the receiving instance of coordination // service agent to error state permanently. - // TODO: Consider splitting this into a different RPC service. + // TODO(b/195990880): Consider splitting this into a different RPC service. rpc ReportErrorToAgent(ReportErrorToAgentRequest) returns (ReportErrorToAgentResponse); diff --git a/navi/navi/proto/tensorflow/core/protobuf/debug.proto b/navi/navi/proto/tensorflow/core/protobuf/debug.proto index 1cc76f1ed..2fabd0319 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/debug.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/debug.proto @@ -46,7 +46,7 @@ message DebugTensorWatch { // are to be debugged, the callers of Session::Run() must use distinct // debug_urls to make sure that the streamed or dumped events do not overlap // among the invocations. - // TODO: More visible documentation of this in g3docs. + // TODO(cais): More visible documentation of this in g3docs. repeated string debug_urls = 4; // Do not error out if debug op creation fails (e.g., due to dtype diff --git a/navi/navi/proto/tensorflow/core/protobuf/debug_event.proto b/navi/navi/proto/tensorflow/core/protobuf/debug_event.proto index b68f45d4d..5530004d7 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/debug_event.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/debug_event.proto @@ -12,7 +12,7 @@ option java_package = "org.tensorflow.util"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; // Available modes for extracting debugging information from a Tensor. -// TODO: Document the detailed column names and semantics in a separate +// TODO(cais): Document the detailed column names and semantics in a separate // markdown file once the implementation settles. enum TensorDebugMode { UNSPECIFIED = 0; @@ -223,7 +223,7 @@ message DebuggedDevice { // A debugger-generated ID for the device. Guaranteed to be unique within // the scope of the debugged TensorFlow program, including single-host and // multi-host settings. - // TODO: Test the uniqueness guarantee in multi-host settings. + // TODO(cais): Test the uniqueness guarantee in multi-host settings. int32 device_id = 2; } @@ -264,7 +264,7 @@ message Execution { // field with the DebuggedDevice messages. repeated int32 output_tensor_device_ids = 9; - // TODO support, add more fields + // TODO(cais): When backporting to V1 Session.run() support, add more fields // such as fetches and feeds. } diff --git a/navi/navi/proto/tensorflow/core/protobuf/distributed_runtime_payloads.proto b/navi/navi/proto/tensorflow/core/protobuf/distributed_runtime_payloads.proto index c19da9d82..ddb346afa 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/distributed_runtime_payloads.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/distributed_runtime_payloads.proto @@ -7,7 +7,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobu // Used to serialize and transmit tensorflow::Status payloads through // grpc::Status `error_details` since grpc::Status lacks payload API. -// TODO: Use GRPC API once supported. +// TODO(b/204231601): Use GRPC API once supported. message GrpcPayloadContainer { map payloads = 1; } diff --git a/navi/navi/proto/tensorflow/core/protobuf/eager_service.proto b/navi/navi/proto/tensorflow/core/protobuf/eager_service.proto index 9d658c7d9..204acf6b1 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/eager_service.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/eager_service.proto @@ -172,7 +172,7 @@ message WaitQueueDoneRequest { } message WaitQueueDoneResponse { - // TODO: Consider adding NodeExecStats here to be able to + // TODO(nareshmodi): Consider adding NodeExecStats here to be able to // propagate some stats. } diff --git a/navi/navi/proto/tensorflow/core/protobuf/master.proto b/navi/navi/proto/tensorflow/core/protobuf/master.proto index 60555cd58..e1732a932 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/master.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/master.proto @@ -94,7 +94,7 @@ message ExtendSessionRequest { } message ExtendSessionResponse { - // TODO: Return something about the operation? + // TODO(mrry): Return something about the operation? // The new version number for the extended graph, to be used in the next call // to ExtendSession. diff --git a/navi/navi/proto/tensorflow/core/protobuf/saved_object_graph.proto b/navi/navi/proto/tensorflow/core/protobuf/saved_object_graph.proto index 70b31f0e6..a59ad0ed2 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/saved_object_graph.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/saved_object_graph.proto @@ -176,7 +176,7 @@ message SavedBareConcreteFunction { // allows the ConcreteFunction to be called with nest structure inputs. This // field may not be populated. If this field is absent, the concrete function // can only be called with flat inputs. - // TODO: support calling saved ConcreteFunction with structured + // TODO(b/169361281): support calling saved ConcreteFunction with structured // inputs in C++ SavedModel API. FunctionSpec function_spec = 4; } diff --git a/navi/navi/proto/tensorflow/core/protobuf/tensor_bundle.proto b/navi/navi/proto/tensorflow/core/protobuf/tensor_bundle.proto index 4433afae2..999195cc9 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/tensor_bundle.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/tensor_bundle.proto @@ -17,7 +17,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobu // Special header that is associated with a bundle. // -// TODO: maybe in the future, we can add information about +// TODO(zongheng,zhifengc): maybe in the future, we can add information about // which binary produced this checkpoint, timestamp, etc. Sometime, these can be // valuable debugging information. And if needed, these can be used as defensive // information ensuring reader (binary version) of the checkpoint and the writer diff --git a/navi/navi/proto/tensorflow/core/protobuf/worker.proto b/navi/navi/proto/tensorflow/core/protobuf/worker.proto index 0df080c77..18d60b568 100644 --- a/navi/navi/proto/tensorflow/core/protobuf/worker.proto +++ b/navi/navi/proto/tensorflow/core/protobuf/worker.proto @@ -188,7 +188,7 @@ message DeregisterGraphRequest { } message DeregisterGraphResponse { - // TODO: Optionally add summary stats for the graph. + // TODO(mrry): Optionally add summary stats for the graph. } //////////////////////////////////////////////////////////////////////////////// @@ -294,7 +294,7 @@ message RunGraphResponse { // If the request asked for execution stats, the cost graph, or the partition // graphs, these are returned here. - // TODO: Package these in a RunMetadata instead. + // TODO(suharshs): Package these in a RunMetadata instead. StepStats step_stats = 2; CostGraphDef cost_graph = 3; repeated GraphDef partition_graph = 4; diff --git a/navi/navi/proto/tensorflow_serving/apis/logging.proto b/navi/navi/proto/tensorflow_serving/apis/logging.proto index 9d304f44d..6298bb4b2 100644 --- a/navi/navi/proto/tensorflow_serving/apis/logging.proto +++ b/navi/navi/proto/tensorflow_serving/apis/logging.proto @@ -13,5 +13,5 @@ message LogMetadata { SamplingConfig sampling_config = 2; // List of tags used to load the relevant MetaGraphDef from SavedModel. repeated string saved_model_tags = 3; - // TODO: Add more metadata as mentioned in the bug. + // TODO(b/33279154): Add more metadata as mentioned in the bug. } diff --git a/navi/navi/proto/tensorflow_serving/config/file_system_storage_path_source.proto b/navi/navi/proto/tensorflow_serving/config/file_system_storage_path_source.proto index 8d8541d4f..add7aa2a2 100644 --- a/navi/navi/proto/tensorflow_serving/config/file_system_storage_path_source.proto +++ b/navi/navi/proto/tensorflow_serving/config/file_system_storage_path_source.proto @@ -58,7 +58,7 @@ message FileSystemStoragePathSourceConfig { // A single servable name/base_path pair to monitor. // DEPRECATED: Use 'servables' instead. - // TODO: Stop using these fields, and ultimately remove them here. + // TODO(b/30898016): Stop using these fields, and ultimately remove them here. string servable_name = 1 [deprecated = true]; string base_path = 2 [deprecated = true]; @@ -76,7 +76,7 @@ message FileSystemStoragePathSourceConfig { // check for a version to appear later.) // DEPRECATED: Use 'servable_versions_always_present' instead, which includes // this behavior. - // TODO: Remove 2019-10-31 or later. + // TODO(b/30898016): Remove 2019-10-31 or later. bool fail_if_zero_versions_at_startup = 4 [deprecated = true]; // If true, the servable is always expected to exist on the underlying diff --git a/navi/navi/proto/tensorflow_serving/config/model_server_config.proto b/navi/navi/proto/tensorflow_serving/config/model_server_config.proto index 0f80aa1c7..cadc2b6e6 100644 --- a/navi/navi/proto/tensorflow_serving/config/model_server_config.proto +++ b/navi/navi/proto/tensorflow_serving/config/model_server_config.proto @@ -9,7 +9,7 @@ import "tensorflow_serving/config/logging_config.proto"; option cc_enable_arenas = true; // The type of model. -// TODO: DEPRECATED. +// TODO(b/31336131): DEPRECATED. enum ModelType { MODEL_TYPE_UNSPECIFIED = 0 [deprecated = true]; TENSORFLOW = 1 [deprecated = true]; @@ -31,7 +31,7 @@ message ModelConfig { string base_path = 2; // Type of model. - // TODO: DEPRECATED. Please use 'model_platform' instead. + // TODO(b/31336131): DEPRECATED. Please use 'model_platform' instead. ModelType model_type = 3 [deprecated = true]; // Type of model (e.g. "tensorflow"). diff --git a/navi/navi/src/bootstrap.rs b/navi/navi/src/bootstrap.rs index 56215292f..1f767f17e 100644 --- a/navi/navi/src/bootstrap.rs +++ b/navi/navi/src/bootstrap.rs @@ -1,5 +1,6 @@ use anyhow::Result; use log::{info, warn}; +use x509_parser::{prelude::{parse_x509_pem}, parse_x509_certificate}; use std::collections::HashMap; use tokio::time::Instant; use tonic::{ @@ -27,6 +28,7 @@ use crate::cli_args::{ARGS, INPUTS, OUTPUTS}; use crate::metrics::{ NAVI_VERSION, NUM_PREDICTIONS, NUM_REQUESTS_FAILED, NUM_REQUESTS_FAILED_BY_MODEL, NUM_REQUESTS_RECEIVED, NUM_REQUESTS_RECEIVED_BY_MODEL, RESPONSE_TIME_COLLECTOR, + CERT_EXPIRY_EPOCH }; use crate::predict_service::{Model, PredictService}; use crate::tf_proto::tensorflow_serving::model_spec::VersionChoice::Version; @@ -233,6 +235,12 @@ impl PredictionService for PredictService { } } +// A function that takes a timestamp as input and returns a ticker stream +fn report_expiry(expiry_time: i64) { + info!("Certificate expires at epoch: {:?}", expiry_time); + CERT_EXPIRY_EPOCH.set(expiry_time as i64); +} + pub fn bootstrap(model_factory: ModelFactory) -> Result<()> { info!("package: {}, version: {}, args: {:?}", NAME, VERSION, *ARGS); //we follow SemVer. So here we assume MAJOR.MINOR.PATCH @@ -249,6 +257,7 @@ pub fn bootstrap(model_factory: ModelFactory) -> Result<()> { ); } + tokio::runtime::Builder::new_multi_thread() .thread_name("async worker") .worker_threads(ARGS.num_worker_threads) @@ -266,6 +275,21 @@ pub fn bootstrap(model_factory: ModelFactory) -> Result<()> { let mut builder = if ARGS.ssl_dir.is_empty() { Server::builder() } else { + // Read the pem file as a string + let pem_str = std::fs::read_to_string(format!("{}/server.crt", ARGS.ssl_dir)).unwrap(); + let res = parse_x509_pem(&pem_str.as_bytes()); + match res { + Ok((rem, pem_2)) => { + assert!(rem.is_empty()); + assert_eq!(pem_2.label, String::from("CERTIFICATE")); + let res_x509 = parse_x509_certificate(&pem_2.contents); + info!("Certificate label: {}", pem_2.label); + assert!(res_x509.is_ok()); + report_expiry(res_x509.unwrap().1.validity().not_after.timestamp()); + }, + _ => panic!("PEM parsing failed: {:?}", res), + } + let key = tokio::fs::read(format!("{}/server.key", ARGS.ssl_dir)) .await .expect("can't find key file"); @@ -281,7 +305,7 @@ pub fn bootstrap(model_factory: ModelFactory) -> Result<()> { let identity = Identity::from_pem(pem.clone(), key); let client_ca_cert = Certificate::from_pem(pem.clone()); let tls = ServerTlsConfig::new() - .identity(identity) + .identity(identity) .client_ca_root(client_ca_cert); Server::builder() .tls_config(tls) diff --git a/navi/navi/src/metrics.rs b/navi/navi/src/metrics.rs index 7cc9e6fcf..373f84f0f 100644 --- a/navi/navi/src/metrics.rs +++ b/navi/navi/src/metrics.rs @@ -171,6 +171,9 @@ lazy_static! { &["model_name"] ) .expect("metric can be created"); + pub static ref CERT_EXPIRY_EPOCH: IntGauge = + IntGauge::new(":navi:cert_expiry_epoch", "Timestamp when the current cert expires") + .expect("metric can be created"); } pub fn register_custom_metrics() { @@ -249,6 +252,10 @@ pub fn register_custom_metrics() { REGISTRY .register(Box::new(CONVERTER_TIME_COLLECTOR.clone())) .expect("collector can be registered"); + REGISTRY + .register(Box::new(CERT_EXPIRY_EPOCH.clone())) + .expect("collector can be registered"); + } pub fn register_dynamic_metrics(c: &HistogramVec) { diff --git a/navi/navi/src/onnx_model.rs b/navi/navi/src/onnx_model.rs index a0d75c8c9..18f116570 100644 --- a/navi/navi/src/onnx_model.rs +++ b/navi/navi/src/onnx_model.rs @@ -189,7 +189,7 @@ pub mod onnx { &version, reporting_feature_ids, Some(metrics::register_dynamic_metrics), - )), + )?), }; onnx_model.warmup()?; Ok(onnx_model) diff --git a/navi/navi/src/predict_service.rs b/navi/navi/src/predict_service.rs index 8650662cf..fc355d7ea 100644 --- a/navi/navi/src/predict_service.rs +++ b/navi/navi/src/predict_service.rs @@ -24,7 +24,7 @@ use serde_json::{self, Value}; pub trait Model: Send + Sync + Display + Debug + 'static { fn warmup(&self) -> Result<()>; - //TODO: refactor this to return Vec>, i.e. + //TODO: refactor this to return vec>, i.e. //we have the underlying runtime impl to split the response to each client. //It will eliminate some inefficient memory copy in onnx_model.rs as well as simplify code fn do_predict( @@ -222,8 +222,8 @@ impl PredictService { .map(|b| b.parse().unwrap()) .collect::>(); let no_msg_wait_millis = *batch_time_out_millis.iter().min().unwrap(); - let mut all_model_predictors = - ArrayVec::, MAX_VERSIONS_PER_MODEL>, MAX_NUM_MODELS>::new(); + let mut all_model_predictors: ArrayVec::, MAX_VERSIONS_PER_MODEL>, MAX_NUM_MODELS> = + (0 ..MAX_NUM_MODELS).map( |_| ArrayVec::, MAX_VERSIONS_PER_MODEL>::new()).collect(); loop { let msg = rx.try_recv(); let no_more_msg = match msg { @@ -272,27 +272,23 @@ impl PredictService { queue_reset_ts: Instant::now(), queue_earliest_rq_ts: Instant::now(), }; - if idx < all_model_predictors.len() { - metrics::NEW_MODEL_SNAPSHOT - .with_label_values(&[&MODEL_SPECS[idx]]) - .inc(); + assert!(idx < all_model_predictors.len()); + metrics::NEW_MODEL_SNAPSHOT + .with_label_values(&[&MODEL_SPECS[idx]]) + .inc(); - info!("now we serve updated model: {}", predictor.model); - //we can do this since the vector is small - let predictors = &mut all_model_predictors[idx]; - if predictors.len() == ARGS.versions_per_model { - predictors.remove(predictors.len() - 1); - } - predictors.insert(0, predictor); - } else { - info!("now we serve new model: {:}", predictor.model); - let mut predictors = - ArrayVec::, MAX_VERSIONS_PER_MODEL>::new(); - predictors.push(predictor); - all_model_predictors.push(predictors); - //check the invariant that we always push the last model to the end - assert_eq!(all_model_predictors.len(), idx + 1) + //we can do this since the vector is small + let predictors = &mut all_model_predictors[idx]; + if predictors.len() == 0 { + info!("now we serve new model: {}", predictor.model); } + else { + info!("now we serve updated model: {}", predictor.model); + } + if predictors.len() == ARGS.versions_per_model { + predictors.remove(predictors.len() - 1); + } + predictors.insert(0, predictor); false } Err(TryRecvError::Empty) => true, diff --git a/navi/segdense/src/error.rs b/navi/segdense/src/error.rs index d997b6933..4c1d9af7d 100644 --- a/navi/segdense/src/error.rs +++ b/navi/segdense/src/error.rs @@ -5,39 +5,49 @@ use std::fmt::Display; */ #[derive(Debug)] pub enum SegDenseError { - IoError(std::io::Error), - Json(serde_json::Error), - JsonMissingRoot, - JsonMissingObject, - JsonMissingArray, - JsonArraySize, - JsonMissingInputFeature, + IoError(std::io::Error), + Json(serde_json::Error), + JsonMissingRoot, + JsonMissingObject, + JsonMissingArray, + JsonArraySize, + JsonMissingInputFeature, } impl Display for SegDenseError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - SegDenseError::IoError(io_error) => write!(f, "{}", io_error), - SegDenseError::Json(serde_json) => write!(f, "{}", serde_json), - SegDenseError::JsonMissingRoot => write!(f, "{}", "SegDense JSON: Root Node note found!"), - SegDenseError::JsonMissingObject => write!(f, "{}", "SegDense JSON: Object note found!"), - SegDenseError::JsonMissingArray => write!(f, "{}", "SegDense JSON: Array Node note found!"), - SegDenseError::JsonArraySize => write!(f, "{}", "SegDense JSON: Array size not as expected!"), - SegDenseError::JsonMissingInputFeature => write!(f, "{}", "SegDense JSON: Missing input feature!"), + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SegDenseError::IoError(io_error) => write!(f, "{}", io_error), + SegDenseError::Json(serde_json) => write!(f, "{}", serde_json), + SegDenseError::JsonMissingRoot => { + write!(f, "{}", "SegDense JSON: Root Node note found!") + } + SegDenseError::JsonMissingObject => { + write!(f, "{}", "SegDense JSON: Object note found!") + } + SegDenseError::JsonMissingArray => { + write!(f, "{}", "SegDense JSON: Array Node note found!") + } + SegDenseError::JsonArraySize => { + write!(f, "{}", "SegDense JSON: Array size not as expected!") + } + SegDenseError::JsonMissingInputFeature => { + write!(f, "{}", "SegDense JSON: Missing input feature!") + } + } } - } } impl std::error::Error for SegDenseError {} impl From for SegDenseError { - fn from(err: std::io::Error) -> Self { - SegDenseError::IoError(err) - } + fn from(err: std::io::Error) -> Self { + SegDenseError::IoError(err) + } } impl From for SegDenseError { - fn from(err: serde_json::Error) -> Self { - SegDenseError::Json(err) - } + fn from(err: serde_json::Error) -> Self { + SegDenseError::Json(err) + } } diff --git a/navi/segdense/src/lib.rs b/navi/segdense/src/lib.rs index 476411702..f9930da64 100644 --- a/navi/segdense/src/lib.rs +++ b/navi/segdense/src/lib.rs @@ -1,4 +1,4 @@ pub mod error; -pub mod segdense_transform_spec_home_recap_2022; pub mod mapper; -pub mod util; \ No newline at end of file +pub mod segdense_transform_spec_home_recap_2022; +pub mod util; diff --git a/navi/segdense/src/main.rs b/navi/segdense/src/main.rs index 1515df101..d8f7f8bc4 100644 --- a/navi/segdense/src/main.rs +++ b/navi/segdense/src/main.rs @@ -5,19 +5,18 @@ use segdense::error::SegDenseError; use segdense::util; fn main() -> Result<(), SegDenseError> { - env_logger::init(); - let args: Vec = env::args().collect(); - - let schema_file_name: &str = if args.len() == 1 { - "json/compact.json" - } else { - &args[1] - }; + env_logger::init(); + let args: Vec = env::args().collect(); - let json_str = fs::read_to_string(schema_file_name)?; + let schema_file_name: &str = if args.len() == 1 { + "json/compact.json" + } else { + &args[1] + }; - util::safe_load_config(&json_str)?; + let json_str = fs::read_to_string(schema_file_name)?; - Ok(()) + util::safe_load_config(&json_str)?; + + Ok(()) } - diff --git a/navi/segdense/src/mapper.rs b/navi/segdense/src/mapper.rs index f640f2aeb..f5a1d6532 100644 --- a/navi/segdense/src/mapper.rs +++ b/navi/segdense/src/mapper.rs @@ -19,13 +19,13 @@ pub struct FeatureMapper { impl FeatureMapper { pub fn new() -> FeatureMapper { FeatureMapper { - map: HashMap::new() + map: HashMap::new(), } } } pub trait MapWriter { - fn set(&mut self, feature_id: i64, info: FeatureInfo); + fn set(&mut self, feature_id: i64, info: FeatureInfo); } pub trait MapReader { diff --git a/navi/segdense/src/segdense_transform_spec_home_recap_2022.rs b/navi/segdense/src/segdense_transform_spec_home_recap_2022.rs index a3b3513f8..ff6d3ae17 100644 --- a/navi/segdense/src/segdense_transform_spec_home_recap_2022.rs +++ b/navi/segdense/src/segdense_transform_spec_home_recap_2022.rs @@ -164,7 +164,6 @@ pub struct ComplexFeatureTypeTransformSpec { pub tensor_shape: Vec, } - #[derive(Default, Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "camelCase")] pub struct InputFeatureMapRecord { diff --git a/navi/segdense/src/util.rs b/navi/segdense/src/util.rs index 5d020cea3..116725189 100644 --- a/navi/segdense/src/util.rs +++ b/navi/segdense/src/util.rs @@ -1,23 +1,23 @@ +use log::debug; use std::fs; -use log::{debug}; -use serde_json::{Value, Map}; +use serde_json::{Map, Value}; use crate::error::SegDenseError; -use crate::mapper::{FeatureMapper, FeatureInfo, MapWriter}; +use crate::mapper::{FeatureInfo, FeatureMapper, MapWriter}; use crate::segdense_transform_spec_home_recap_2022::{self as seg_dense, InputFeature}; -pub fn load_config(file_name: &str) -> seg_dense::Root { - let json_str = fs::read_to_string(file_name).expect( - &format!("Unable to load segdense file {}", file_name)); - let seg_dense_config = parse(&json_str).expect( - &format!("Unable to parse segdense file {}", file_name)); - return seg_dense_config; +pub fn load_config(file_name: &str) -> Result { + let json_str = fs::read_to_string(file_name)?; + // &format!("Unable to load segdense file {}", file_name)); + let seg_dense_config = parse(&json_str)?; + // &format!("Unable to parse segdense file {}", file_name)); + Ok(seg_dense_config) } pub fn parse(json_str: &str) -> Result { let root: seg_dense::Root = serde_json::from_str(json_str)?; - return Ok(root); + Ok(root) } /** @@ -44,15 +44,8 @@ pub fn safe_load_config(json_str: &str) -> Result load_from_parsed_config(root) } -pub fn load_from_parsed_config_ref(root: &seg_dense::Root) -> FeatureMapper { - load_from_parsed_config(root.clone()).unwrap_or_else( - |error| panic!("Error loading all_config.json - {}", error)) -} - // Perf note : make 'root' un-owned -pub fn load_from_parsed_config(root: seg_dense::Root) -> - Result { - +pub fn load_from_parsed_config(root: seg_dense::Root) -> Result { let v = root.input_features_map; // Do error check @@ -86,7 +79,7 @@ pub fn load_from_parsed_config(root: seg_dense::Root) -> Some(info) => { debug!("{:?}", info); fm.set(feature_id, info) - }, + } None => (), } } @@ -94,19 +87,22 @@ pub fn load_from_parsed_config(root: seg_dense::Root) -> Ok(fm) } #[allow(dead_code)] -fn add_feature_info_to_mapper(feature_mapper: &mut FeatureMapper, input_features: &Vec) { +fn add_feature_info_to_mapper( + feature_mapper: &mut FeatureMapper, + input_features: &Vec, +) { for input_feature in input_features.iter() { - let feature_id = input_feature.feature_id; - let feature_info = to_feature_info(input_feature); - - match feature_info { - Some(info) => { - debug!("{:?}", info); - feature_mapper.set(feature_id, info) - }, - None => (), + let feature_id = input_feature.feature_id; + let feature_info = to_feature_info(input_feature); + + match feature_info { + Some(info) => { + debug!("{:?}", info); + feature_mapper.set(feature_id, info) } + None => (), } + } } pub fn to_feature_info(input_feature: &seg_dense::InputFeature) -> Option { @@ -139,7 +135,7 @@ pub fn to_feature_info(input_feature: &seg_dense::InputFeature) -> Option 0, 3 => 2, _ => -1, - } + }, }; if input_feature.index < 0 { @@ -156,4 +152,3 @@ pub fn to_feature_info(input_feature: &seg_dense::InputFeature) -> Option Date: Wed, 26 Apr 2023 19:38:55 -0700 Subject: [PATCH 04/11] User Signals in Candidate Sourcing Stage Add the overview readme about how Twitter uses user signals in candidate retrieval. --- RETREIVAL_SIGNALS.md | 51 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 RETREIVAL_SIGNALS.md diff --git a/RETREIVAL_SIGNALS.md b/RETREIVAL_SIGNALS.md new file mode 100644 index 000000000..6f064bc46 --- /dev/null +++ b/RETREIVAL_SIGNALS.md @@ -0,0 +1,51 @@ +# Signals for Candidate Sources + +## Overview + +The candidate sourcing stage within the Twitter Recommendation algorithm serves to significantly narrow down the item size from approximately 1 billion to just a few thousand. This process utilizes Twitter user behavior as the primary input for the algorithm. This document comprehensively enumerates all the signals during the candidate sourcing phase. + +| Signals | Description | +| :-------------------- | :-------------------------------------------------------------------- | +| Author Follow | The accounts which user explicit follows. | +| Author Unfollow | The accounts which user recently unfollows. | +| Author Mute | The accounts which user have muted. | +| Author Block | The accounts which user have blocked | +| Tweet Favorite | The tweets which user clicked the like botton. | +| Tweet Unfavorite | The tweets which user clicked the unlike botton. | +| Retweet | The tweets which user retweeted | +| Quote Tweet | The tweets which user retweeted with comments. | +| Tweet Reply | The tweets which user replied. | +| Tweet Share | The tweets which user clicked the share botton. | +| Tweet Bookmark | The tweets which user clicked the bookmark botton. | +| Tweet Click | The tweets which user clicked and viewed the tweet detail page. | +| Tweet Video Watch | The video tweets which user watched certain seconds or percentage. | +| Tweet Don't like | The tweets which user clicked "Not interested in this tweet" botton. | +| Tweet Report | The tweets which user clicked "Report Tweet" botton. | +| Notification Open | The push notification tweets which user opened. | +| Ntab click | The tweets which user click on the Notifications page. | +| User AddressBook | The author accounts identifiers of the user's addressbook. | + +## Usage Details + +Twitter uses these user signals as training labels and/or ML features in the each candidate sourcing algorithms. The following tables shows how they are used in the each components. + +| Signals | USS | SimClusters | TwHin | UTEG | FRS | Light Ranking | +| :-------------------- | :----------------- | :----------------- | :----------------- | :----------------- | :----------------- | :----------------- | +| Author Follow | Features | Features / Labels | Features / Labels | Features | Features / Labels | N/A | +| Author Unfollow | Features | N/A | N/A | N/A | N/A | N/A | +| Author Mute | Features | N/A | N/A | N/A | Features | N/A | +| Author Block | Features | N/A | N/A | N/A | Features | N/A | +| Tweet Favorite | Features | Features | Features / Labels | Features | Features / Labels | Features / Labels | +| Tweet Unfavorite | Features | Features | N/A | N/A | N/A | N/A | +| Retweet | Features | N/A | Features / Labels | Features | Features / Labels | Features / Labels | +| Quote Tweet | Features | N/A | Features / Labels | Features | Features / Labels | Features / Labels | +| Tweet Reply | Features | N/A | Features | Features | Features / Labels | Features | +| Tweet Share | Features | N/A | N/A | N/A | Features | N/A | +| Tweet Bookmark | Features | N/A | N/A | N/A | N/A | N/A | +| Tweet Click | Features | N/A | N/A | N/A | Features | Labels | +| Tweet Video Watch | Features | Features | N/A | N/A | N/A | Labels | +| Tweet Don't like | Features | N/A | N/A | N/A | N/A | N/A | +| Tweet Report | Features | N/A | N/A | N/A | N/A | N/A | +| Notification Open | Features | Features | Features | N/A | Features | N/A | +| Ntab click | Features | Features | Features | N/A | Features | N/A | +| User AddressBook | N/A | N/A | N/A | N/A | Features | N/A | \ No newline at end of file From 197bf2c563c0695143346ac04adca5c36e7f0e2a Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Thu, 27 Apr 2023 13:58:07 -0700 Subject: [PATCH 05/11] Open-sourcing Timelines Aggregation Framework Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models. --- README.md | 1 + .../BCELabelTransformFromUUADataRecord.scala | 68 + .../prediction/common/aggregates/BUILD | 353 ++++++ .../aggregates/FeatureSelectorConfig.scala | 121 ++ .../prediction/common/aggregates/README.md | 6 + .../RecapUserFeatureAggregation.scala | 415 ++++++ .../RectweetUserFeatureAggregation.scala | 52 + .../TimelinesAggregationConfig.scala | 80 ++ .../TimelinesAggregationConfigDetails.scala | 579 +++++++++ .../TimelinesAggregationConfigTrait.scala | 50 + ...TimelinesAggregationKeyValInjections.scala | 48 + .../TimelinesAggregationSources.scala | 45 + .../real_time/AuthorFeaturesAdapter.scala | 70 ++ .../common/aggregates/real_time/BUILD | 199 +++ .../common/aggregates/real_time/Event.scala | 11 + .../real_time/FeatureStoreUtils.scala | 53 + .../real_time/LocallyReplicatedStore.scala | 79 ++ .../real_time/StormAggregateSourceUtils.scala | 254 ++++ .../TimelinesOnlineAggregationConfig.scala | 34 + ...TimelinesOnlineAggregationConfigBase.scala | 1112 +++++++++++++++++ .../TimelinesOnlineAggregationSources.scala | 5 + .../TimelinesRealTimeAggregatesJob.scala | 182 +++ .../TimelinesStormAggregateSource.scala | 185 +++ .../real_time/TweetFeaturesAdapter.scala | 35 + .../TweetFeaturesReadableStore.scala | 53 + .../aggregates/real_time/TypeSafeRunner.scala | 7 + .../real_time/UserFeaturesAdapter.scala | 108 ++ .../real_time/UserFeaturesReadableStore.scala | 37 + .../timelines/prediction/features/README.md | 6 + .../features/client_log_event/BUILD | 11 + .../ClientLogEventDataRecordFeatures.scala | 169 +++ .../prediction/features/common/BUILD | 11 + .../features/common/CombinedFeatures.scala | 536 ++++++++ .../common/NonHomeLabelFeatures.scala | 97 ++ .../common/TimelinesSharedFeatures.scala | 759 +++++++++++ .../features/engagement_features/BUILD | 12 + .../EngagementFeatures.scala | 246 ++++ .../prediction/features/escherbird/BUILD | 19 + .../escherbird/EscherbirdFeatures.scala | 19 + .../EscherbirdFeaturesConverter.scala | 19 + .../features/followsource/BUILD.bazel | 7 + .../followsource/FollowSourceFeatures.scala | 53 + .../timelines/prediction/features/itl/BUILD | 9 + .../prediction/features/itl/ITLFeatures.scala | 575 +++++++++ .../prediction/features/list_features/BUILD | 9 + .../features/list_features/ListFeatures.scala | 24 + .../prediction/features/p_home_latest/BUILD | 9 + .../HomeLatestUserFeatures.scala | 49 + .../timelines/prediction/features/ppmi/BUILD | 8 + .../features/ppmi/PpmiFeatures.scala | 7 + .../prediction/features/real_graph/BUILD | 15 + ...lGraphDataRecordFeatureStoreFeatures.scala | 232 ++++ .../RealGraphDataRecordFeatures.scala | 534 ++++++++ .../timelines/prediction/features/recap/BUILD | 9 + .../features/recap/RecapFeatures.scala | 967 ++++++++++++++ .../features/recap/RecapFeaturesUtils.scala | 29 + .../prediction/features/request_context/BUILD | 9 + .../RequestContextFeatures.scala | 57 + .../prediction/features/simcluster/BUILD | 13 + .../simcluster/SimclusterFeatures.scala | 61 + .../simcluster/SimclusterTweetFeatures.scala | 150 +++ .../SimclustersScoresFeatures.scala | 43 + .../prediction/features/socialproof/BUILD | 15 + .../socialproof/SocialProofFeatures.scala | 172 +++ .../prediction/features/time_features/BUILD | 10 + .../TimeDataRecordFeatures.scala | 111 ++ .../features/two_hop_features/BUILD | 10 + .../two_hop_features/TwoHopFeatures.scala | 93 ++ .../TwoHopFeaturesConfig.scala | 30 + .../prediction/features/user_health/BUILD | 10 + .../user_health/UserHealthFeatures.scala | 23 + .../AggregateGroup.scala | 124 ++ .../AggregateSource.scala | 9 + .../AggregateStore.scala | 5 + .../AggregationConfig.scala | 5 + .../AggregationKey.scala | 50 + .../ml_util/aggregation_framework/BUILD | 101 ++ .../DataRecordAggregationMonoid.scala | 92 ++ .../aggregation_framework/KeyedRecord.scala | 27 + .../OfflineAggregateInjections.scala | 46 + .../OfflineAggregateSource.scala | 21 + .../OfflineAggregateStore.scala | 128 ++ .../ml_util/aggregation_framework/README.md | 39 + .../aggregation_framework/StoreConfig.scala | 68 + .../aggregation_framework/StoreRegister.scala | 13 + .../TypedAggregateGroup.scala | 486 +++++++ .../ml_util/aggregation_framework/Utils.scala | 122 ++ .../conversion/AggregatesV2Adapter.scala | 165 +++ .../AggregatesV2FeatureSource.scala | 171 +++ .../aggregation_framework/conversion/BUILD | 71 ++ .../conversion/CombineCountsPolicy.scala | 223 ++++ .../conversion/DataSetPipeSketchJoin.scala | 46 + .../conversion/PickFirstRecordPolicy.scala | 26 + .../conversion/PickTopCtrPolicy.scala | 226 ++++ .../SparseBinaryAggregateJoin.scala | 199 +++ .../conversion/SparseBinaryMergePolicy.scala | 81 ++ .../SparseBinaryMultipleAggregateJoin.scala | 109 ++ .../docs/AUTOMATED_COMMIT_FILES | 5 + .../docs/aggregation.rst | 167 +++ .../aggregation_framework/docs/batch.rst | 215 ++++ .../aggregation_framework/docs/conf.py | 59 + .../aggregation_framework/docs/index.rst | 11 + .../aggregation_framework/docs/joining.rst | 72 ++ .../aggregation_framework/docs/real-time.rst | 327 +++++ .../docs/troubleshooting.rst | 117 ++ .../ml_util/aggregation_framework/heron/BUILD | 74 ++ .../NighthawkUnderlyingStoreConfig.scala | 31 + .../heron/OnlineAggregationConfigTrait.scala | 28 + .../heron/OnlineAggregationStoresTrait.scala | 6 + .../heron/RealTimeAggregateStore.scala | 50 + .../heron/RealTimeAggregatesJobBase.scala | 301 +++++ .../heron/RealTimeAggregatesJobConfig.scala | 79 ++ .../heron/StormAggregateSource.scala | 27 + .../heron/UserReindexingNighthawkStore.scala | 309 +++++ .../aggregation_framework/heron/package.scala | 8 + .../job/AggregatesV2Job.scala | 163 +++ .../ml_util/aggregation_framework/job/BUILD | 19 + .../job/DataRecordFeatureCounter.scala | 39 + .../metrics/AggregateFeature.scala | 51 + .../metrics/AggregationMetric.scala | 184 +++ .../metrics/AggregationMetricCommon.scala | 55 + .../aggregation_framework/metrics/BUILD | 15 + .../metrics/ConversionUtils.scala | 5 + .../metrics/CountMetric.scala | 41 + .../metrics/EasyMetric.scala | 34 + .../metrics/FeatureCache.scala | 72 ++ .../metrics/LastResetMetric.scala | 107 ++ .../metrics/LatestMetric.scala | 69 + .../metrics/MaxMetric.scala | 64 + .../metrics/SumLikeMetric.scala | 66 + .../metrics/SumMetric.scala | 52 + .../metrics/SumSqMetric.scala | 53 + .../metrics/TimedValue.scala | 14 + .../metrics/TimedValueAggregationMetric.scala | 90 ++ .../aggregation_framework/package.scala | 19 + .../ml_util/aggregation_framework/query/BUILD | 12 + .../query/ScopedAggregateBuilder.scala | 159 +++ .../scalding/AggregateFeaturesMerger.scala | 213 ++++ .../AggregatesStoreComparisonJob.scala | 200 +++ .../scalding/AggregatesV2ScaldingJob.scala | 216 ++++ .../scalding/AggregationKeyOrdering.scala | 17 + .../aggregation_framework/scalding/BUILD | 72 ++ .../scalding/DeletedUserPruner.scala | 97 ++ .../scalding/MostRecentVersionedStore.scala | 100 ++ .../scalding/sources/BUILD | 26 + .../sources/ScaldingAggregateSource.scala | 77 ++ 146 files changed, 16429 insertions(+) create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/README.md create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala create mode 100644 src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/README.md create mode 100644 src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/common/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel create mode 100644 src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/itl/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/list_features/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/recap/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/request_context/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/time_features/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala create mode 100644 src/scala/com/twitter/timelines/prediction/features/user_health/BUILD create mode 100644 src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/AggregateGroup.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/AggregateSource.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/AggregateStore.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/AggregationConfig.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/AggregationKey.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/DataRecordAggregationMonoid.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/KeyedRecord.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateInjections.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateSource.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateStore.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/README.md create mode 100644 timelines/data_processing/ml_util/aggregation_framework/StoreConfig.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/StoreRegister.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/TypedAggregateGroup.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/Utils.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2Adapter.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2FeatureSource.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/CombineCountsPolicy.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/DataSetPipeSketchJoin.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/PickFirstRecordPolicy.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/PickTopCtrPolicy.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryAggregateJoin.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMergePolicy.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMultipleAggregateJoin.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/AUTOMATED_COMMIT_FILES create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/aggregation.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/batch.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/conf.py create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/index.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/joining.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/real-time.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/docs/troubleshooting.rst create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/NighthawkUnderlyingStoreConfig.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationConfigTrait.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationStoresTrait.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregateStore.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobBase.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobConfig.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/StormAggregateSource.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/UserReindexingNighthawkStore.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/heron/package.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/job/AggregatesV2Job.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/job/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/AggregateFeature.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetricCommon.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/ConversionUtils.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/CountMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/EasyMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/FeatureCache.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/LastResetMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/LatestMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/MaxMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/SumLikeMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/SumMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/SumSqMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValue.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValueAggregationMetric.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/package.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/query/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/query/ScopedAggregateBuilder.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/AggregateFeaturesMerger.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesStoreComparisonJob.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesV2ScaldingJob.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/AggregationKeyOrdering.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/DeletedUserPruner.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/MostRecentVersionedStore.scala create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/sources/BUILD create mode 100644 timelines/data_processing/ml_util/aggregation_framework/scalding/sources/ScaldingAggregateSource.scala diff --git a/README.md b/README.md index 79a7e6135..691c3a96c 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | | [topic-social-proof](topic-social-proof/README.md) | Identifies topics related to individual Tweets. | | Software framework | [navi](navi/README.md) | High performance, machine learning model serving written in Rust. | | | [product-mixer](product-mixer/README.md) | Software framework for building feeds of content. | +| | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. | | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. | The product surface currently included in this repository is the For You Timeline. diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala new file mode 100644 index 000000000..6adf6eaf8 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/BCELabelTransformFromUUADataRecord.scala @@ -0,0 +1,68 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api.ITransform +import com.twitter.ml.api.constant.SharedFeatures +import java.lang.{Double => JDouble} + +import com.twitter.timelines.prediction.common.adapters.AdapterConsumer +import com.twitter.timelines.prediction.common.adapters.EngagementLabelFeaturesDataRecordUtils +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.RichDataRecord +import com.twitter.timelines.suggests.common.engagement.thriftscala.EngagementType +import com.twitter.timelines.suggests.common.engagement.thriftscala.Engagement +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import com.twitter.timelines.prediction.features.common.CombinedFeatures + +/** + * To transfrom BCE events UUA data records that contain only continuous dwell time to datarecords that contain corresponding binary label features + * The UUA datarecords inputted would have USER_ID, SOURCE_TWEET_ID,TIMESTAMP and + * 0 or one of (TWEET_DETAIL_DWELL_TIME_MS, PROFILE_DWELL_TIME_MS, FULLSCREEN_VIDEO_DWELL_TIME_MS) features. + * We will use the different engagement TIME_MS to differentiate different engagements, + * and then re-use the function in EngagementTypeConverte to add the binary label to the datarecord. + **/ + +object BCELabelTransformFromUUADataRecord extends ITransform { + + val dwellTimeFeatureToEngagementMap = Map( + TimelinesSharedFeatures.TWEET_DETAIL_DWELL_TIME_MS -> EngagementType.TweetDetailDwell, + TimelinesSharedFeatures.PROFILE_DWELL_TIME_MS -> EngagementType.ProfileDwell, + TimelinesSharedFeatures.FULLSCREEN_VIDEO_DWELL_TIME_MS -> EngagementType.FullscreenVideoDwell + ) + + def dwellFeatureToEngagement( + rdr: RichDataRecord, + dwellTimeFeature: Feature[JDouble], + engagementType: EngagementType + ): Option[Engagement] = { + if (rdr.hasFeature(dwellTimeFeature)) { + Some( + Engagement( + engagementType = engagementType, + timestampMs = rdr.getFeatureValue(SharedFeatures.TIMESTAMP), + weight = Some(rdr.getFeatureValue(dwellTimeFeature)) + )) + } else { + None + } + } + override def transformContext(featureContext: FeatureContext): FeatureContext = { + featureContext.addFeatures( + (CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements).toSeq: _*) + } + override def transform(record: DataRecord): Unit = { + val rdr = new RichDataRecord(record) + val engagements = dwellTimeFeatureToEngagementMap + .map { + case (dwellTimeFeature, engagementType) => + dwellFeatureToEngagement(rdr, dwellTimeFeature, engagementType) + }.flatten.toSeq + + // Re-use BCE( behavior client events) label conversion in EngagementTypeConverter to align with BCE labels generation for offline training data + EngagementLabelFeaturesDataRecordUtils.setDwellTimeFeatures( + rdr, + Some(engagements), + AdapterConsumer.Combined) + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD b/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD new file mode 100644 index 000000000..01c930e8e --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD @@ -0,0 +1,353 @@ +create_datasets( + base_name = "original_author_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/original_author_aggregates/1556496000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.OriginalAuthor", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "twitter_wide_user_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_aggregates/1556496000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUser", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "twitter_wide_user_author_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_author_aggregates/1556323200000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUserAuthor", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_aggregates/1556150400000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_author_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_author_aggregates/1556064000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserAuthor", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "aggregates_canary", + fallback_path = "gs://user.timelines.dp.gcp.twttr.net//canaries/processed/aggregates_v2/user_aggregates/1622851200000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_engager_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_engager_aggregates/1556496000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserEngager", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_original_author_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1556496000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserOriginalAuthor", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "author_topic_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/author_topic_aggregates/1589932800000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.AuthorTopic", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_topic_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_topic_aggregates/1590278400000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserTopic", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_inferred_topic_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_inferred_topic_aggregates/1599696000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserInferredTopic", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_mention_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_mention_aggregates/1556582400000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMention", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_request_dow_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_dow_aggregates/1556236800000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestDow", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +create_datasets( + base_name = "user_request_hour_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_hour_aggregates/1556150400000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestHour", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + + +create_datasets( + base_name = "user_list_aggregates", + fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_list_aggregates/1590624000000", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserList", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + + +create_datasets( + base_name = "user_media_understanding_annotation_aggregates", + key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey", + platform = "java8", + role = "timelines", + scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMediaUnderstandingAnnotation", + segment_type = "snapshot", + tags = ["bazel-compatible"], + val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)", + scala_dependencies = [ + ":injections", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) + +scala_library( + sources = [ + "BCELabelTransformFromUUADataRecord.scala", + "FeatureSelectorConfig.scala", + "RecapUserFeatureAggregation.scala", + "RectweetUserFeatureAggregation.scala", + "TimelinesAggregationConfig.scala", + "TimelinesAggregationConfigDetails.scala", + "TimelinesAggregationConfigTrait.scala", + "TimelinesAggregationSources.scala", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + ":aggregates_canary-scala", + ":author_topic_aggregates-scala", + ":original_author_aggregates-scala", + ":twitter_wide_user_aggregates-scala", + ":twitter_wide_user_author_aggregates-scala", + ":user_aggregates-scala", + ":user_author_aggregates-scala", + ":user_engager_aggregates-scala", + ":user_inferred_topic_aggregates-scala", + ":user_list_aggregates-scala", + ":user_media_understanding_annotation_aggregates-scala", + ":user_mention_aggregates-scala", + ":user_original_author_aggregates-scala", + ":user_request_dow_aggregates-scala", + ":user_request_hour_aggregates-scala", + ":user_topic_aggregates-scala", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/java/com/twitter/ml/api/matcher", + "src/scala/com/twitter/common/text/util", + "src/scala/com/twitter/dal/client/dataset", + "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core", + "src/scala/com/twitter/scalding_internal/multiformat/format", + "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter", + "src/scala/com/twitter/timelines/prediction/features/client_log_event", + "src/scala/com/twitter/timelines/prediction/features/common", + "src/scala/com/twitter/timelines/prediction/features/engagement_features", + "src/scala/com/twitter/timelines/prediction/features/escherbird", + "src/scala/com/twitter/timelines/prediction/features/itl", + "src/scala/com/twitter/timelines/prediction/features/list_features", + "src/scala/com/twitter/timelines/prediction/features/p_home_latest", + "src/scala/com/twitter/timelines/prediction/features/real_graph", + "src/scala/com/twitter/timelines/prediction/features/recap", + "src/scala/com/twitter/timelines/prediction/features/request_context", + "src/scala/com/twitter/timelines/prediction/features/simcluster", + "src/scala/com/twitter/timelines/prediction/features/time_features", + "src/scala/com/twitter/timelines/prediction/transform/filter", + "src/thrift/com/twitter/timelines/suggests/common:engagement-scala", + "timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java", + "util/util-core:scala", + ], +) + +scala_library( + name = "injections", + sources = [ + "FeatureSelectorConfig.scala", + "RecapUserFeatureAggregation.scala", + "RectweetUserFeatureAggregation.scala", + "TimelinesAggregationConfigDetails.scala", + "TimelinesAggregationConfigTrait.scala", + "TimelinesAggregationKeyValInjections.scala", + "TimelinesAggregationSources.scala", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/java/com/twitter/ml/api/matcher", + "src/scala/com/twitter/common/text/util", + "src/scala/com/twitter/dal/client/dataset", + "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core", + "src/scala/com/twitter/scalding_internal/multiformat/format", + "src/scala/com/twitter/timelines/prediction/features/client_log_event", + "src/scala/com/twitter/timelines/prediction/features/common", + "src/scala/com/twitter/timelines/prediction/features/engagement_features", + "src/scala/com/twitter/timelines/prediction/features/escherbird", + "src/scala/com/twitter/timelines/prediction/features/itl", + "src/scala/com/twitter/timelines/prediction/features/list_features", + "src/scala/com/twitter/timelines/prediction/features/p_home_latest", + "src/scala/com/twitter/timelines/prediction/features/real_graph", + "src/scala/com/twitter/timelines/prediction/features/recap", + "src/scala/com/twitter/timelines/prediction/features/request_context", + "src/scala/com/twitter/timelines/prediction/features/semantic_core_features", + "src/scala/com/twitter/timelines/prediction/features/simcluster", + "src/scala/com/twitter/timelines/prediction/features/time_features", + "src/scala/com/twitter/timelines/prediction/transform/filter", + "timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java", + "util/util-core:scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala new file mode 100644 index 000000000..1c91ef16c --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/FeatureSelectorConfig.scala @@ -0,0 +1,121 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.matcher.FeatureMatcher +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import scala.collection.JavaConverters._ + +object FeatureSelectorConfig { + val BasePairsToStore = Seq( + ("twitter_wide_user_aggregate.pair", "*"), + ("twitter_wide_user_author_aggregate.pair", "*"), + ("user_aggregate_v5.continuous.pair", "*"), + ("user_aggregate_v7.pair", "*"), + ("user_author_aggregate_v2.pair", "recap.earlybird.*"), + ("user_author_aggregate_v2.pair", "recap.searchfeature.*"), + ("user_author_aggregate_v2.pair", "recap.tweetfeature.embeds*"), + ("user_author_aggregate_v2.pair", "recap.tweetfeature.link_count*"), + ("user_author_aggregate_v2.pair", "engagement_features.in_network.*"), + ("user_author_aggregate_v2.pair", "recap.tweetfeature.is_reply.*"), + ("user_author_aggregate_v2.pair", "recap.tweetfeature.is_retweet.*"), + ("user_author_aggregate_v2.pair", "recap.tweetfeature.num_mentions.*"), + ("user_author_aggregate_v5.pair", "*"), + ("user_author_aggregate_tweetsource_v1.pair", "*"), + ("user_engager_aggregate.pair", "*"), + ("user_mention_aggregate.pair", "*"), + ("user_request_context_aggregate.dow.pair", "*"), + ("user_request_context_aggregate.hour.pair", "*"), + ("user_aggregate_v6.pair", "*"), + ("user_original_author_aggregate_v1.pair", "*"), + ("user_original_author_aggregate_v2.pair", "*"), + ("original_author_aggregate_v1.pair", "*"), + ("original_author_aggregate_v2.pair", "*"), + ("author_topic_aggregate.pair", "*"), + ("user_list_aggregate.pair", "*"), + ("user_topic_aggregate.pair", "*"), + ("user_topic_aggregate_v2.pair", "*"), + ("user_inferred_topic_aggregate.pair", "*"), + ("user_inferred_topic_aggregate_v2.pair", "*"), + ("user_media_annotation_aggregate.pair", "*"), + ("user_media_annotation_aggregate.pair", "*"), + ("user_author_good_click_aggregate.pair", "*"), + ("user_engager_good_click_aggregate.pair", "*") + ) + val PairsToStore = BasePairsToStore ++ Seq( + ("user_aggregate_v2.pair", "*"), + ("user_aggregate_v5.boolean.pair", "*"), + ("user_aggregate_tweetsource_v1.pair", "*"), + ) + + + val LabelsToStore = Seq( + "any_label", + "recap.engagement.is_favorited", + "recap.engagement.is_retweeted", + "recap.engagement.is_replied", + "recap.engagement.is_open_linked", + "recap.engagement.is_profile_clicked", + "recap.engagement.is_clicked", + "recap.engagement.is_photo_expanded", + "recap.engagement.is_video_playback_50", + "recap.engagement.is_video_quality_viewed", + "recap.engagement.is_replied_reply_impressed_by_author", + "recap.engagement.is_replied_reply_favorited_by_author", + "recap.engagement.is_replied_reply_replied_by_author", + "recap.engagement.is_report_tweet_clicked", + "recap.engagement.is_block_clicked", + "recap.engagement.is_mute_clicked", + "recap.engagement.is_dont_like", + "recap.engagement.is_good_clicked_convo_desc_favorited_or_replied", + "recap.engagement.is_good_clicked_convo_desc_v2", + "itl.engagement.is_favorited", + "itl.engagement.is_retweeted", + "itl.engagement.is_replied", + "itl.engagement.is_open_linked", + "itl.engagement.is_profile_clicked", + "itl.engagement.is_clicked", + "itl.engagement.is_photo_expanded", + "itl.engagement.is_video_playback_50" + ) + + val PairGlobsToStore = for { + (prefix, suffix) <- PairsToStore + label <- LabelsToStore + } yield FeatureMatcher.glob(prefix + "." + label + "." + suffix) + + val BaseAggregateV2FeatureSelector = FeatureMatcher + .none() + .or( + FeatureMatcher.glob("meta.user_id"), + FeatureMatcher.glob("meta.author_id"), + FeatureMatcher.glob("entities.original_author_id"), + FeatureMatcher.glob("entities.topic_id"), + FeatureMatcher + .glob("entities.inferred_topic_ids" + TypedAggregateGroup.SparseFeatureSuffix), + FeatureMatcher.glob("timelines.meta.list_id"), + FeatureMatcher.glob("list.id"), + FeatureMatcher + .glob("engagement_features.user_ids.public" + TypedAggregateGroup.SparseFeatureSuffix), + FeatureMatcher + .glob("entities.users.mentioned_screen_names" + TypedAggregateGroup.SparseFeatureSuffix), + FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_dont_like.*"), + FeatureMatcher.glob("user_author_aggregate_v2.pair.any_label.recap.tweetfeature.has_*"), + FeatureMatcher.glob("request_context.country_code"), + FeatureMatcher.glob("request_context.timestamp_gmt_dow"), + FeatureMatcher.glob("request_context.timestamp_gmt_hour"), + FeatureMatcher.glob( + "semantic_core.media_understanding.high_recall.non_sensitive.entity_ids" + TypedAggregateGroup.SparseFeatureSuffix) + ) + + val AggregatesV2ProdFeatureSelector = BaseAggregateV2FeatureSelector + .orList(PairGlobsToStore.asJava) + + val ReducedPairGlobsToStore = (for { + (prefix, suffix) <- BasePairsToStore + label <- LabelsToStore + } yield FeatureMatcher.glob(prefix + "." + label + "." + suffix)) ++ Seq( + FeatureMatcher.glob("user_aggregate_v2.pair.any_label.*"), + FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_favorited.*"), + FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_photo_expanded.*"), + FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_profile_clicked.*") + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md b/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md new file mode 100644 index 000000000..0bae21a14 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/README.md @@ -0,0 +1,6 @@ +## Timelines Aggregation Jobs + +This directory contains the specific definition of aggregate jobs that generate features used by the Heavy Ranker. +The primary files of interest are [`TimelinesAggregationConfigDetails.scala`](TimelinesAggregationConfigDetails.scala), which contains the defintion for the batch aggregate jobs and [`real_time/TimelinesOnlineAggregationConfigBase.scala`](real_time/TimelinesOnlineAggregationConfigBase.scala) which contains the definitions for the real time aggregate jobs. + +The aggregation framework that these jobs are based on is [here](../../../../../../../../timelines/data_processing/ml_util/aggregation_framework). \ No newline at end of file diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala new file mode 100644 index 000000000..657d5a713 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/RecapUserFeatureAggregation.scala @@ -0,0 +1,415 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.Feature +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures +import com.twitter.timelines.prediction.features.real_graph.RealGraphDataRecordFeatures +import com.twitter.timelines.prediction.features.recap.RecapFeatures +import com.twitter.timelines.prediction.features.time_features.TimeDataRecordFeatures + +object RecapUserFeatureAggregation { + val RecapFeaturesForAggregation: Set[Feature[_]] = + Set( + RecapFeatures.HAS_IMAGE, + RecapFeatures.HAS_VIDEO, + RecapFeatures.FROM_MUTUAL_FOLLOW, + RecapFeatures.HAS_CARD, + RecapFeatures.HAS_NEWS, + RecapFeatures.REPLY_COUNT, + RecapFeatures.FAV_COUNT, + RecapFeatures.RETWEET_COUNT, + RecapFeatures.BLENDER_SCORE, + RecapFeatures.CONVERSATIONAL_COUNT, + RecapFeatures.IS_BUSINESS_SCORE, + RecapFeatures.CONTAINS_MEDIA, + RecapFeatures.RETWEET_SEARCHER, + RecapFeatures.REPLY_SEARCHER, + RecapFeatures.MENTION_SEARCHER, + RecapFeatures.REPLY_OTHER, + RecapFeatures.RETWEET_OTHER, + RecapFeatures.MATCH_UI_LANG, + RecapFeatures.MATCH_SEARCHER_MAIN_LANG, + RecapFeatures.MATCH_SEARCHER_LANGS, + RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT, + RecapFeatures.TEXT_SCORE, + RealGraphDataRecordFeatures.NUM_RETWEETS_EWMA, + RealGraphDataRecordFeatures.NUM_RETWEETS_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_RETWEETS_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_RETWEETS_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.NUM_FAVORITES_EWMA, + RealGraphDataRecordFeatures.NUM_FAVORITES_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_FAVORITES_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_FAVORITES_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.NUM_MENTIONS_EWMA, + RealGraphDataRecordFeatures.NUM_MENTIONS_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_MENTIONS_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_MENTIONS_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_EWMA, + RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_EWMA, + RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_EWMA, + RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_ELAPSED_DAYS, + RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_DAYS_SINCE_LAST, + RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_EWMA, + RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_NON_ZERO_DAYS, + RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_ELAPSED_DAYS, + RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST + ) + + val RecapLabelsForAggregation: Set[Feature.Binary] = + Set( + RecapFeatures.IS_FAVORITED, + RecapFeatures.IS_RETWEETED, + RecapFeatures.IS_CLICKED, + RecapFeatures.IS_PROFILE_CLICKED, + RecapFeatures.IS_OPEN_LINKED + ) + + val DwellDuration: Set[Feature[_]] = + Set( + TimelinesSharedFeatures.DWELL_TIME_MS, + ) + + val UserFeaturesV2: Set[Feature[_]] = RecapFeaturesForAggregation ++ Set( + RecapFeatures.HAS_VINE, + RecapFeatures.HAS_PERISCOPE, + RecapFeatures.HAS_PRO_VIDEO, + RecapFeatures.HAS_VISIBLE_LINK, + RecapFeatures.BIDIRECTIONAL_FAV_COUNT, + RecapFeatures.UNIDIRECTIONAL_FAV_COUNT, + RecapFeatures.BIDIRECTIONAL_REPLY_COUNT, + RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT, + RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT, + RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT, + RecapFeatures.EMBEDS_URL_COUNT, + RecapFeatures.EMBEDS_IMPRESSION_COUNT, + RecapFeatures.VIDEO_VIEW_COUNT, + RecapFeatures.IS_RETWEET, + RecapFeatures.IS_REPLY, + RecapFeatures.IS_EXTENDED_REPLY, + RecapFeatures.HAS_LINK, + RecapFeatures.HAS_TREND, + RecapFeatures.LINK_LANGUAGE, + RecapFeatures.NUM_HASHTAGS, + RecapFeatures.NUM_MENTIONS, + RecapFeatures.IS_SENSITIVE, + RecapFeatures.HAS_MULTIPLE_MEDIA, + RecapFeatures.USER_REP, + RecapFeatures.FAV_COUNT_V2, + RecapFeatures.RETWEET_COUNT_V2, + RecapFeatures.REPLY_COUNT_V2, + RecapFeatures.LINK_COUNT, + EngagementDataRecordFeatures.InNetworkFavoritesCount, + EngagementDataRecordFeatures.InNetworkRetweetsCount, + EngagementDataRecordFeatures.InNetworkRepliesCount + ) + + val UserAuthorFeaturesV2: Set[Feature[_]] = Set( + RecapFeatures.HAS_IMAGE, + RecapFeatures.HAS_VINE, + RecapFeatures.HAS_PERISCOPE, + RecapFeatures.HAS_PRO_VIDEO, + RecapFeatures.HAS_VIDEO, + RecapFeatures.HAS_CARD, + RecapFeatures.HAS_NEWS, + RecapFeatures.HAS_VISIBLE_LINK, + RecapFeatures.REPLY_COUNT, + RecapFeatures.FAV_COUNT, + RecapFeatures.RETWEET_COUNT, + RecapFeatures.BLENDER_SCORE, + RecapFeatures.CONVERSATIONAL_COUNT, + RecapFeatures.IS_BUSINESS_SCORE, + RecapFeatures.CONTAINS_MEDIA, + RecapFeatures.RETWEET_SEARCHER, + RecapFeatures.REPLY_SEARCHER, + RecapFeatures.MENTION_SEARCHER, + RecapFeatures.REPLY_OTHER, + RecapFeatures.RETWEET_OTHER, + RecapFeatures.MATCH_UI_LANG, + RecapFeatures.MATCH_SEARCHER_MAIN_LANG, + RecapFeatures.MATCH_SEARCHER_LANGS, + RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT, + RecapFeatures.TEXT_SCORE, + RecapFeatures.BIDIRECTIONAL_FAV_COUNT, + RecapFeatures.UNIDIRECTIONAL_FAV_COUNT, + RecapFeatures.BIDIRECTIONAL_REPLY_COUNT, + RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT, + RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT, + RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT, + RecapFeatures.EMBEDS_URL_COUNT, + RecapFeatures.EMBEDS_IMPRESSION_COUNT, + RecapFeatures.VIDEO_VIEW_COUNT, + RecapFeatures.IS_RETWEET, + RecapFeatures.IS_REPLY, + RecapFeatures.HAS_LINK, + RecapFeatures.HAS_TREND, + RecapFeatures.LINK_LANGUAGE, + RecapFeatures.NUM_HASHTAGS, + RecapFeatures.NUM_MENTIONS, + RecapFeatures.IS_SENSITIVE, + RecapFeatures.HAS_MULTIPLE_MEDIA, + RecapFeatures.FAV_COUNT_V2, + RecapFeatures.RETWEET_COUNT_V2, + RecapFeatures.REPLY_COUNT_V2, + RecapFeatures.LINK_COUNT, + EngagementDataRecordFeatures.InNetworkFavoritesCount, + EngagementDataRecordFeatures.InNetworkRetweetsCount, + EngagementDataRecordFeatures.InNetworkRepliesCount + ) + + val UserAuthorFeaturesV2Count: Set[Feature[_]] = Set( + RecapFeatures.HAS_IMAGE, + RecapFeatures.HAS_VINE, + RecapFeatures.HAS_PERISCOPE, + RecapFeatures.HAS_PRO_VIDEO, + RecapFeatures.HAS_VIDEO, + RecapFeatures.HAS_CARD, + RecapFeatures.HAS_NEWS, + RecapFeatures.HAS_VISIBLE_LINK, + RecapFeatures.FAV_COUNT, + RecapFeatures.CONTAINS_MEDIA, + RecapFeatures.RETWEET_SEARCHER, + RecapFeatures.REPLY_SEARCHER, + RecapFeatures.MENTION_SEARCHER, + RecapFeatures.REPLY_OTHER, + RecapFeatures.RETWEET_OTHER, + RecapFeatures.MATCH_UI_LANG, + RecapFeatures.MATCH_SEARCHER_MAIN_LANG, + RecapFeatures.MATCH_SEARCHER_LANGS, + RecapFeatures.IS_RETWEET, + RecapFeatures.IS_REPLY, + RecapFeatures.HAS_LINK, + RecapFeatures.HAS_TREND, + RecapFeatures.IS_SENSITIVE, + RecapFeatures.HAS_MULTIPLE_MEDIA, + EngagementDataRecordFeatures.InNetworkFavoritesCount + ) + + val UserTopicFeaturesV2Count: Set[Feature[_]] = Set( + RecapFeatures.HAS_IMAGE, + RecapFeatures.HAS_VIDEO, + RecapFeatures.HAS_CARD, + RecapFeatures.HAS_NEWS, + RecapFeatures.FAV_COUNT, + RecapFeatures.CONTAINS_MEDIA, + RecapFeatures.RETWEET_SEARCHER, + RecapFeatures.REPLY_SEARCHER, + RecapFeatures.MENTION_SEARCHER, + RecapFeatures.REPLY_OTHER, + RecapFeatures.RETWEET_OTHER, + RecapFeatures.MATCH_UI_LANG, + RecapFeatures.MATCH_SEARCHER_MAIN_LANG, + RecapFeatures.MATCH_SEARCHER_LANGS, + RecapFeatures.IS_RETWEET, + RecapFeatures.IS_REPLY, + RecapFeatures.HAS_LINK, + RecapFeatures.HAS_TREND, + RecapFeatures.IS_SENSITIVE, + EngagementDataRecordFeatures.InNetworkFavoritesCount, + EngagementDataRecordFeatures.InNetworkRetweetsCount, + TimelinesSharedFeatures.NUM_CAPS, + TimelinesSharedFeatures.ASPECT_RATIO_DEN, + TimelinesSharedFeatures.NUM_NEWLINES, + TimelinesSharedFeatures.IS_360, + TimelinesSharedFeatures.IS_MANAGED, + TimelinesSharedFeatures.IS_MONETIZABLE, + TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE, + TimelinesSharedFeatures.HAS_TITLE, + TimelinesSharedFeatures.HAS_DESCRIPTION, + TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION, + TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION + ) + + val UserFeaturesV5Continuous: Set[Feature[_]] = Set( + TimelinesSharedFeatures.QUOTE_COUNT, + TimelinesSharedFeatures.VISIBLE_TOKEN_RATIO, + TimelinesSharedFeatures.WEIGHTED_FAV_COUNT, + TimelinesSharedFeatures.WEIGHTED_RETWEET_COUNT, + TimelinesSharedFeatures.WEIGHTED_REPLY_COUNT, + TimelinesSharedFeatures.WEIGHTED_QUOTE_COUNT, + TimelinesSharedFeatures.EMBEDS_IMPRESSION_COUNT_V2, + TimelinesSharedFeatures.EMBEDS_URL_COUNT_V2, + TimelinesSharedFeatures.DECAYED_FAVORITE_COUNT, + TimelinesSharedFeatures.DECAYED_RETWEET_COUNT, + TimelinesSharedFeatures.DECAYED_REPLY_COUNT, + TimelinesSharedFeatures.DECAYED_QUOTE_COUNT, + TimelinesSharedFeatures.FAKE_FAVORITE_COUNT, + TimelinesSharedFeatures.FAKE_RETWEET_COUNT, + TimelinesSharedFeatures.FAKE_REPLY_COUNT, + TimelinesSharedFeatures.FAKE_QUOTE_COUNT, + TimeDataRecordFeatures.LAST_FAVORITE_SINCE_CREATION_HRS, + TimeDataRecordFeatures.LAST_RETWEET_SINCE_CREATION_HRS, + TimeDataRecordFeatures.LAST_REPLY_SINCE_CREATION_HRS, + TimeDataRecordFeatures.LAST_QUOTE_SINCE_CREATION_HRS, + TimeDataRecordFeatures.TIME_SINCE_LAST_FAVORITE_HRS, + TimeDataRecordFeatures.TIME_SINCE_LAST_RETWEET_HRS, + TimeDataRecordFeatures.TIME_SINCE_LAST_REPLY_HRS, + TimeDataRecordFeatures.TIME_SINCE_LAST_QUOTE_HRS + ) + + val UserFeaturesV5Boolean: Set[Feature[_]] = Set( + TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG, + TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG, + TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG, + TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG, + TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG, + TimelinesSharedFeatures.LABEL_SPAM_FLAG, + TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG, + TimelinesSharedFeatures.PERISCOPE_EXISTS, + TimelinesSharedFeatures.PERISCOPE_IS_LIVE, + TimelinesSharedFeatures.PERISCOPE_HAS_BEEN_FEATURED, + TimelinesSharedFeatures.PERISCOPE_IS_CURRENTLY_FEATURED, + TimelinesSharedFeatures.PERISCOPE_IS_FROM_QUALITY_SOURCE, + TimelinesSharedFeatures.HAS_QUOTE + ) + + val UserAuthorFeaturesV5: Set[Feature[_]] = Set( + TimelinesSharedFeatures.HAS_QUOTE, + TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG, + TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG, + TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG, + TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG, + TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG, + TimelinesSharedFeatures.LABEL_SPAM_FLAG, + TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG + ) + + val UserTweetSourceFeaturesV1Continuous: Set[Feature[_]] = Set( + TimelinesSharedFeatures.NUM_CAPS, + TimelinesSharedFeatures.NUM_WHITESPACES, + TimelinesSharedFeatures.TWEET_LENGTH, + TimelinesSharedFeatures.ASPECT_RATIO_DEN, + TimelinesSharedFeatures.ASPECT_RATIO_NUM, + TimelinesSharedFeatures.BIT_RATE, + TimelinesSharedFeatures.HEIGHT_1, + TimelinesSharedFeatures.HEIGHT_2, + TimelinesSharedFeatures.HEIGHT_3, + TimelinesSharedFeatures.HEIGHT_4, + TimelinesSharedFeatures.VIDEO_DURATION, + TimelinesSharedFeatures.WIDTH_1, + TimelinesSharedFeatures.WIDTH_2, + TimelinesSharedFeatures.WIDTH_3, + TimelinesSharedFeatures.WIDTH_4, + TimelinesSharedFeatures.NUM_MEDIA_TAGS + ) + + val UserTweetSourceFeaturesV1Boolean: Set[Feature[_]] = Set( + TimelinesSharedFeatures.HAS_QUESTION, + TimelinesSharedFeatures.RESIZE_METHOD_1, + TimelinesSharedFeatures.RESIZE_METHOD_2, + TimelinesSharedFeatures.RESIZE_METHOD_3, + TimelinesSharedFeatures.RESIZE_METHOD_4 + ) + + val UserTweetSourceFeaturesV2Continuous: Set[Feature[_]] = Set( + TimelinesSharedFeatures.NUM_EMOJIS, + TimelinesSharedFeatures.NUM_EMOTICONS, + TimelinesSharedFeatures.NUM_NEWLINES, + TimelinesSharedFeatures.NUM_STICKERS, + TimelinesSharedFeatures.NUM_FACES, + TimelinesSharedFeatures.NUM_COLOR_PALLETTE_ITEMS, + TimelinesSharedFeatures.VIEW_COUNT, + TimelinesSharedFeatures.TWEET_LENGTH_TYPE + ) + + val UserTweetSourceFeaturesV2Boolean: Set[Feature[_]] = Set( + TimelinesSharedFeatures.IS_360, + TimelinesSharedFeatures.IS_MANAGED, + TimelinesSharedFeatures.IS_MONETIZABLE, + TimelinesSharedFeatures.IS_EMBEDDABLE, + TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE, + TimelinesSharedFeatures.HAS_TITLE, + TimelinesSharedFeatures.HAS_DESCRIPTION, + TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION, + TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION + ) + + val UserAuthorTweetSourceFeaturesV1: Set[Feature[_]] = Set( + TimelinesSharedFeatures.HAS_QUESTION, + TimelinesSharedFeatures.TWEET_LENGTH, + TimelinesSharedFeatures.VIDEO_DURATION, + TimelinesSharedFeatures.NUM_MEDIA_TAGS + ) + + val UserAuthorTweetSourceFeaturesV2: Set[Feature[_]] = Set( + TimelinesSharedFeatures.NUM_CAPS, + TimelinesSharedFeatures.NUM_WHITESPACES, + TimelinesSharedFeatures.ASPECT_RATIO_DEN, + TimelinesSharedFeatures.ASPECT_RATIO_NUM, + TimelinesSharedFeatures.BIT_RATE, + TimelinesSharedFeatures.TWEET_LENGTH_TYPE, + TimelinesSharedFeatures.NUM_EMOJIS, + TimelinesSharedFeatures.NUM_EMOTICONS, + TimelinesSharedFeatures.NUM_NEWLINES, + TimelinesSharedFeatures.NUM_STICKERS, + TimelinesSharedFeatures.NUM_FACES, + TimelinesSharedFeatures.IS_360, + TimelinesSharedFeatures.IS_MANAGED, + TimelinesSharedFeatures.IS_MONETIZABLE, + TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE, + TimelinesSharedFeatures.HAS_TITLE, + TimelinesSharedFeatures.HAS_DESCRIPTION, + TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION, + TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION + ) + + val UserAuthorTweetSourceFeaturesV2Count: Set[Feature[_]] = Set( + TimelinesSharedFeatures.NUM_CAPS, + TimelinesSharedFeatures.ASPECT_RATIO_DEN, + TimelinesSharedFeatures.NUM_NEWLINES, + TimelinesSharedFeatures.IS_360, + TimelinesSharedFeatures.IS_MANAGED, + TimelinesSharedFeatures.IS_MONETIZABLE, + TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE, + TimelinesSharedFeatures.HAS_TITLE, + TimelinesSharedFeatures.HAS_DESCRIPTION, + TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION, + TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION + ) + + val LabelsV2: Set[Feature.Binary] = RecapLabelsForAggregation ++ Set( + RecapFeatures.IS_REPLIED, + RecapFeatures.IS_PHOTO_EXPANDED, + RecapFeatures.IS_VIDEO_PLAYBACK_50 + ) + + val TwitterWideFeatures: Set[Feature[_]] = Set( + RecapFeatures.IS_REPLY, + TimelinesSharedFeatures.HAS_QUOTE, + RecapFeatures.HAS_MENTION, + RecapFeatures.HAS_HASHTAG, + RecapFeatures.HAS_LINK, + RecapFeatures.HAS_CARD, + RecapFeatures.CONTAINS_MEDIA + ) + + val TwitterWideLabels: Set[Feature.Binary] = Set( + RecapFeatures.IS_FAVORITED, + RecapFeatures.IS_RETWEETED, + RecapFeatures.IS_REPLIED + ) + + val ReciprocalLabels: Set[Feature.Binary] = Set( + RecapFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR, + RecapFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR, + RecapFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR + ) + + val NegativeEngagementLabels: Set[Feature.Binary] = Set( + RecapFeatures.IS_REPORT_TWEET_CLICKED, + RecapFeatures.IS_BLOCK_CLICKED, + RecapFeatures.IS_MUTE_CLICKED, + RecapFeatures.IS_DONT_LIKE + ) + + val GoodClickLabels: Set[Feature.Binary] = Set( + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1, + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2, + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala new file mode 100644 index 000000000..12835ef1f --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/RectweetUserFeatureAggregation.scala @@ -0,0 +1,52 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.Feature +import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures +import com.twitter.timelines.prediction.features.itl.ITLFeatures + +object RectweetUserFeatureAggregation { + val RectweetLabelsForAggregation: Set[Feature.Binary] = + Set( + ITLFeatures.IS_FAVORITED, + ITLFeatures.IS_RETWEETED, + ITLFeatures.IS_REPLIED, + ITLFeatures.IS_CLICKED, + ITLFeatures.IS_PROFILE_CLICKED, + ITLFeatures.IS_OPEN_LINKED, + ITLFeatures.IS_PHOTO_EXPANDED, + ITLFeatures.IS_VIDEO_PLAYBACK_50 + ) + + val TweetFeatures: Set[Feature[_]] = Set( + ITLFeatures.HAS_IMAGE, + ITLFeatures.HAS_CARD, + ITLFeatures.HAS_NEWS, + ITLFeatures.REPLY_COUNT, + ITLFeatures.FAV_COUNT, + ITLFeatures.REPLY_COUNT, + ITLFeatures.RETWEET_COUNT, + ITLFeatures.MATCHES_UI_LANG, + ITLFeatures.MATCHES_SEARCHER_MAIN_LANG, + ITLFeatures.MATCHES_SEARCHER_LANGS, + ITLFeatures.TEXT_SCORE, + ITLFeatures.LINK_LANGUAGE, + ITLFeatures.NUM_HASHTAGS, + ITLFeatures.NUM_MENTIONS, + ITLFeatures.IS_SENSITIVE, + ITLFeatures.HAS_VIDEO, + ITLFeatures.HAS_LINK, + ITLFeatures.HAS_VISIBLE_LINK, + EngagementDataRecordFeatures.InNetworkFavoritesCount + // nice to have, but currently not hydrated in the RecommendedTweet payload + //EngagementDataRecordFeatures.InNetworkRetweetsCount, + //EngagementDataRecordFeatures.InNetworkRepliesCount + ) + + val ReciprocalLabels: Set[Feature.Binary] = Set( + ITLFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR, + ITLFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR, + ITLFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR, + ITLFeatures.IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR, + ITLFeatures.IS_REPLIED_REPLY_QUOTED_BY_AUTHOR + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala new file mode 100644 index 000000000..e6581e32e --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfig.scala @@ -0,0 +1,80 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.dal.client.dataset.KeyValDALDataset +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.FeatureContext +import com.twitter.scalding_internal.multiformat.format.keyval +import com.twitter.summingbird.batch.BatchID +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsPolicy +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateDataRecordStore +import scala.collection.JavaConverters._ + +object TimelinesAggregationConfig extends TimelinesAggregationConfigTrait { + override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2" + + def storeToDatasetMap: Map[String, KeyValDALDataset[ + keyval.KeyVal[AggregationKey, (BatchID, DataRecord)] + ]] = Map( + AuthorTopicAggregateStore -> AuthorTopicAggregatesScalaDataset, + UserTopicAggregateStore -> UserTopicAggregatesScalaDataset, + UserInferredTopicAggregateStore -> UserInferredTopicAggregatesScalaDataset, + UserAggregateStore -> UserAggregatesScalaDataset, + UserAuthorAggregateStore -> UserAuthorAggregatesScalaDataset, + UserOriginalAuthorAggregateStore -> UserOriginalAuthorAggregatesScalaDataset, + OriginalAuthorAggregateStore -> OriginalAuthorAggregatesScalaDataset, + UserEngagerAggregateStore -> UserEngagerAggregatesScalaDataset, + UserMentionAggregateStore -> UserMentionAggregatesScalaDataset, + TwitterWideUserAggregateStore -> TwitterWideUserAggregatesScalaDataset, + TwitterWideUserAuthorAggregateStore -> TwitterWideUserAuthorAggregatesScalaDataset, + UserRequestHourAggregateStore -> UserRequestHourAggregatesScalaDataset, + UserRequestDowAggregateStore -> UserRequestDowAggregatesScalaDataset, + UserListAggregateStore -> UserListAggregatesScalaDataset, + UserMediaUnderstandingAnnotationAggregateStore -> UserMediaUnderstandingAnnotationAggregatesScalaDataset, + ) + + override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match { + case s: OfflineAggregateDataRecordStore => + s.toOfflineAggregateDataRecordStoreWithDAL(storeToDatasetMap(s.name)) + case _ => throw new IllegalArgumentException("Unsupported logical dataset type.") + } + + object CombineCountPolicies { + val EngagerCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_engager_aggregate") + val EngagerGoodClickCountsPolicy: CombineCountsPolicy = mkCountsPolicy( + "user_engager_good_click_aggregate") + val RectweetEngagerCountsPolicy: CombineCountsPolicy = + mkCountsPolicy("rectweet_user_engager_aggregate") + val MentionCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_mention_aggregate") + val RectweetSimclustersTweetCountsPolicy: CombineCountsPolicy = + mkCountsPolicy("rectweet_user_simcluster_tweet_aggregate") + val UserInferredTopicCountsPolicy: CombineCountsPolicy = + mkCountsPolicy("user_inferred_topic_aggregate") + val UserInferredTopicV2CountsPolicy: CombineCountsPolicy = + mkCountsPolicy("user_inferred_topic_aggregate_v2") + val UserMediaUnderstandingAnnotationCountsPolicy: CombineCountsPolicy = + mkCountsPolicy("user_media_annotation_aggregate") + + private[this] def mkCountsPolicy(prefix: String): CombineCountsPolicy = { + val features = TimelinesAggregationConfig.aggregatesToCompute + .filter(_.aggregatePrefix == prefix) + .flatMap(_.allOutputFeatures) + CombineCountsPolicy( + topK = 2, + aggregateContextToPrecompute = new FeatureContext(features.asJava), + hardLimit = Some(20) + ) + } + } +} + +object TimelinesAggregationCanaryConfig extends TimelinesAggregationConfigTrait { + override def outputHdfsPath: String = "/user/timelines/canaries/processed/aggregates_v2" + + override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match { + case s: OfflineAggregateDataRecordStore => + s.toOfflineAggregateDataRecordStoreWithDAL(dalDataset = AggregatesCanaryScalaDataset) + case _ => throw new IllegalArgumentException("Unsupported logical dataset type.") + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala new file mode 100644 index 000000000..aa439deda --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigDetails.scala @@ -0,0 +1,579 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.conversions.DurationOps._ +import com.twitter.ml.api.constant.SharedFeatures.AUTHOR_ID +import com.twitter.ml.api.constant.SharedFeatures.USER_ID +import com.twitter.timelines.data_processing.ml_util.aggregation_framework._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics._ +import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform +import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveAuthorIdZero +import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveUserIdZero +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures +import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures.RichUnifyPublicEngagersTransform +import com.twitter.timelines.prediction.features.list_features.ListFeatures +import com.twitter.timelines.prediction.features.recap.RecapFeatures +import com.twitter.timelines.prediction.features.request_context.RequestContextFeatures +import com.twitter.timelines.prediction.features.semantic_core_features.SemanticCoreFeatures +import com.twitter.timelines.prediction.transform.filter.FilterInNetworkTransform +import com.twitter.timelines.prediction.transform.filter.FilterImageTweetTransform +import com.twitter.timelines.prediction.transform.filter.FilterVideoTweetTransform +import com.twitter.timelines.prediction.transform.filter.FilterOutImageVideoTweetTransform +import com.twitter.util.Duration + +trait TimelinesAggregationConfigDetails extends Serializable { + + import TimelinesAggregationSources._ + + def outputHdfsPath: String + + /** + * Converts the given logical store to a physical store. The reason we do not specify the + * physical store directly with the [[AggregateGroup]] is because of a cyclic dependency when + * create physical stores that are DalDataset with PersonalDataType annotations derived from + * the [[AggregateGroup]]. + * + */ + def mkPhysicalStore(store: AggregateStore): AggregateStore + + def defaultMaxKvSourceFailures: Int = 100 + + val timelinesOfflineAggregateSink = new OfflineStoreCommonConfig { + override def apply(startDate: String) = OfflineAggregateStoreCommonConfig( + outputHdfsPathPrefix = outputHdfsPath, + dummyAppId = "timelines_aggregates_v2_ro", + dummyDatasetPrefix = "timelines_aggregates_v2_ro", + startDate = startDate + ) + } + + val UserAggregateStore = "user_aggregates" + val UserAuthorAggregateStore = "user_author_aggregates" + val UserOriginalAuthorAggregateStore = "user_original_author_aggregates" + val OriginalAuthorAggregateStore = "original_author_aggregates" + val UserEngagerAggregateStore = "user_engager_aggregates" + val UserMentionAggregateStore = "user_mention_aggregates" + val TwitterWideUserAggregateStore = "twitter_wide_user_aggregates" + val TwitterWideUserAuthorAggregateStore = "twitter_wide_user_author_aggregates" + val UserRequestHourAggregateStore = "user_request_hour_aggregates" + val UserRequestDowAggregateStore = "user_request_dow_aggregates" + val UserListAggregateStore = "user_list_aggregates" + val AuthorTopicAggregateStore = "author_topic_aggregates" + val UserTopicAggregateStore = "user_topic_aggregates" + val UserInferredTopicAggregateStore = "user_inferred_topic_aggregates" + val UserMediaUnderstandingAnnotationAggregateStore = + "user_media_understanding_annotation_aggregates" + val AuthorCountryCodeAggregateStore = "author_country_code_aggregates" + val OriginalAuthorCountryCodeAggregateStore = "original_author_country_code_aggregates" + + /** + * Step 3: Configure all aggregates to compute. + * Note that different subsets of aggregates in this list + * can be launched by different summingbird job instances. + * Any given job can be responsible for a set of AggregateGroup + * configs whose outputStores share the same exact startDate. + * AggregateGroups that do not share the same inputSource, + * outputStore or startDate MUST be launched using different + * summingbird jobs and passed in a different --start-time argument + * See science/scalding/mesos/timelines/prod.yaml for an example + * of how to configure your own job. + */ + val negativeDownsampleTransform = + DownsampleTransform( + negativeSamplingRate = 0.03, + keepLabels = RecapUserFeatureAggregation.LabelsV2) + val negativeRecTweetDownsampleTransform = DownsampleTransform( + negativeSamplingRate = 0.03, + keepLabels = RectweetUserFeatureAggregation.RectweetLabelsForAggregation + ) + + val userAggregatesV2: AggregateGroup = + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_aggregate_v2", + preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */ + keys = Set(USER_ID), + features = RecapUserFeatureAggregation.UserFeaturesV2, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric, SumMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userAuthorAggregatesV2: Set[AggregateGroup] = { + + /** + * NOTE: We need to remove records from out-of-network authors from the recap input + * records (which now include out-of-network records as well after merging recap and + * rectweet models) that are used to compute user-author aggregates. This is necessary + * to limit the growth rate of user-author aggregates. + */ + val allFeatureAggregates = Set( + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_author_aggregate_v2", + preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero), + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.UserAuthorFeaturesV2, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(SumMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAuthorAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + ) + + val countAggregates: Set[AggregateGroup] = Set( + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_author_aggregate_v2", + preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero), + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.UserAuthorFeaturesV2Count, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAuthorAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + ) + + allFeatureAggregates ++ countAggregates + } + + val userAggregatesV5Continuous: AggregateGroup = + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_aggregate_v5.continuous", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID), + features = RecapUserFeatureAggregation.UserFeaturesV5Continuous, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric, SumMetric, SumSqMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userAuthorAggregatesV5: AggregateGroup = + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_author_aggregate_v5", + preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero), + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.UserAuthorFeaturesV5, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAuthorAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val tweetSourceUserAuthorAggregatesV1: AggregateGroup = + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_author_aggregate_tweetsource_v1", + preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero), + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.UserAuthorTweetSourceFeaturesV1, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric, SumMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAuthorAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userEngagerAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_engager_aggregate", + keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserEngagerAggregateStore, + startDate = "2016-09-02 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + preTransforms = Seq( + RichRemoveUserIdZero, + RichUnifyPublicEngagersTransform + ) + ) + + val userMentionAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */ + aggregatePrefix = "user_mention_aggregate", + keys = Set(USER_ID, RecapFeatures.MENTIONED_SCREEN_NAMES), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserMentionAggregateStore, + startDate = "2017-03-01 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + val twitterWideUserAggregates = AggregateGroup( + inputSource = timelinesDailyTwitterWideSource, + preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */ + aggregatePrefix = "twitter_wide_user_aggregate", + keys = Set(USER_ID), + features = RecapUserFeatureAggregation.TwitterWideFeatures, + labels = RecapUserFeatureAggregation.TwitterWideLabels, + metrics = Set(CountMetric, SumMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = TwitterWideUserAggregateStore, + startDate = "2016-12-28 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val twitterWideUserAuthorAggregates = AggregateGroup( + inputSource = timelinesDailyTwitterWideSource, + preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */ + aggregatePrefix = "twitter_wide_user_author_aggregate", + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.TwitterWideFeatures, + labels = RecapUserFeatureAggregation.TwitterWideLabels, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = TwitterWideUserAuthorAggregateStore, + startDate = "2016-12-28 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + /** + * User-HourOfDay and User-DayOfWeek aggregations, both for recap and rectweet + */ + val userRequestHourAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_request_context_aggregate.hour", + preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform), + keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_HOUR), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserRequestHourAggregateStore, + startDate = "2017-08-01 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userRequestDowAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_request_context_aggregate.dow", + preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform), + keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_DOW), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserRequestDowAggregateStore, + startDate = "2017-08-01 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val authorTopicAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "author_topic_aggregate", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(AUTHOR_ID, TimelinesSharedFeatures.TOPIC_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = AuthorTopicAggregateStore, + startDate = "2020-05-19 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userTopicAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_topic_aggregate", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserTopicAggregateStore, + startDate = "2020-05-23 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userTopicAggregatesV2 = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_topic_aggregate_v2", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID), + features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count, + labels = RecapUserFeatureAggregation.LabelsV2, + includeAnyFeature = false, + includeAnyLabel = false, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserTopicAggregateStore, + startDate = "2020-05-23 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userInferredTopicAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_inferred_topic_aggregate", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserInferredTopicAggregateStore, + startDate = "2020-09-09 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userInferredTopicAggregatesV2 = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_inferred_topic_aggregate_v2", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS), + features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count, + labels = RecapUserFeatureAggregation.LabelsV2, + includeAnyFeature = false, + includeAnyLabel = false, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserInferredTopicAggregateStore, + startDate = "2020-09-09 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userReciprocalEngagementAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_aggregate_v6", + preTransforms = Seq(RichRemoveUserIdZero), + keys = Set(USER_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.ReciprocalLabels, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + val userOriginalAuthorReciprocalEngagementAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_original_author_aggregate_v1", + preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero), + keys = Set(USER_ID, TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.ReciprocalLabels, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserOriginalAuthorAggregateStore, + startDate = "2018-12-26 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + val originalAuthorReciprocalEngagementAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "original_author_aggregate_v1", + preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero), + keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.ReciprocalLabels, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = OriginalAuthorAggregateStore, + startDate = "2023-02-25 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + val originalAuthorNegativeEngagementAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "original_author_aggregate_v2", + preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero), + keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.NegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = OriginalAuthorAggregateStore, + startDate = "2023-02-25 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + includeAnyLabel = false + ) + + val userListAggregates: AggregateGroup = + AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_list_aggregate", + keys = Set(USER_ID, ListFeatures.LIST_ID), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserListAggregateStore, + startDate = "2020-05-28 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + preTransforms = Seq(RichRemoveUserIdZero) + ) + + val userMediaUnderstandingAnnotationAggregates: AggregateGroup = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_media_annotation_aggregate", + preTransforms = Seq(RichRemoveUserIdZero), + keys = + Set(USER_ID, SemanticCoreFeatures.mediaUnderstandingHighRecallNonSensitiveEntityIdsFeature), + features = Set.empty, + labels = RecapUserFeatureAggregation.LabelsV2, + metrics = Set(CountMetric), + halfLives = Set(50.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserMediaUnderstandingAnnotationAggregateStore, + startDate = "2021-03-20 00:00", + commonConfig = timelinesOfflineAggregateSink + )) + ) + + val userAuthorGoodClickAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_author_good_click_aggregate", + preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero), + keys = Set(USER_ID, AUTHOR_ID), + features = RecapUserFeatureAggregation.UserAuthorFeaturesV2, + labels = RecapUserFeatureAggregation.GoodClickLabels, + metrics = Set(SumMetric), + halfLives = Set(14.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserAuthorAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )) + ) + + val userEngagerGoodClickAggregates = AggregateGroup( + inputSource = timelinesDailyRecapMinimalSource, + aggregatePrefix = "user_engager_good_click_aggregate", + keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds), + features = Set.empty, + labels = RecapUserFeatureAggregation.GoodClickLabels, + metrics = Set(CountMetric), + halfLives = Set(14.days), + outputStore = mkPhysicalStore( + OfflineAggregateDataRecordStore( + name = UserEngagerAggregateStore, + startDate = "2016-09-02 00:00", + commonConfig = timelinesOfflineAggregateSink, + maxKvSourceFailures = defaultMaxKvSourceFailures + )), + preTransforms = Seq( + RichRemoveUserIdZero, + RichUnifyPublicEngagersTransform + ) + ) + +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala new file mode 100644 index 000000000..6fb2e07b7 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationConfigTrait.scala @@ -0,0 +1,50 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationConfig +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup + +trait TimelinesAggregationConfigTrait + extends TimelinesAggregationConfigDetails + with AggregationConfig { + private val aggregateGroups = Set( + authorTopicAggregates, + userTopicAggregates, + userTopicAggregatesV2, + userInferredTopicAggregates, + userInferredTopicAggregatesV2, + userAggregatesV2, + userAggregatesV5Continuous, + userReciprocalEngagementAggregates, + userAuthorAggregatesV5, + userOriginalAuthorReciprocalEngagementAggregates, + originalAuthorReciprocalEngagementAggregates, + tweetSourceUserAuthorAggregatesV1, + userEngagerAggregates, + userMentionAggregates, + twitterWideUserAggregates, + twitterWideUserAuthorAggregates, + userRequestHourAggregates, + userRequestDowAggregates, + userListAggregates, + userMediaUnderstandingAnnotationAggregates, + ) ++ userAuthorAggregatesV2 + + val aggregatesToComputeList: Set[List[TypedAggregateGroup[_]]] = + aggregateGroups.map(_.buildTypedAggregateGroups()) + + override val aggregatesToCompute: Set[TypedAggregateGroup[_]] = aggregatesToComputeList.flatten + + /* + * Feature selection config to save storage space and manhattan query bandwidth. + * Only the most important features found using offline RCE simulations are used + * when actually training and serving. This selector is used by + * [[com.twitter.timelines.data_processing.jobs.timeline_ranking_user_features.TimelineRankingAggregatesV2FeaturesProdJob]] + * but defined here to keep it in sync with the config that computes the aggregates. + */ + val AggregatesV2FeatureSelector = FeatureSelectorConfig.AggregatesV2ProdFeatureSelector + + def filterAggregatesGroups(storeNames: Set[String]): Set[AggregateGroup] = { + aggregateGroups.filter(aggregateGroup => storeNames.contains(aggregateGroup.outputStore.name)) + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala new file mode 100644 index 000000000..1f2433b53 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationKeyValInjections.scala @@ -0,0 +1,48 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.DataRecord +import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection +import com.twitter.summingbird.batch.BatchID +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.{ + AggregateStore, + AggregationKey, + OfflineAggregateInjections, + TypedAggregateGroup +} + +object TimelinesAggregationKeyValInjections extends TimelinesAggregationConfigTrait { + + import OfflineAggregateInjections.getInjection + + type KVInjection = KeyValInjection[AggregationKey, (BatchID, DataRecord)] + + val AuthorTopic: KVInjection = getInjection(filter(AuthorTopicAggregateStore)) + val UserTopic: KVInjection = getInjection(filter(UserTopicAggregateStore)) + val UserInferredTopic: KVInjection = getInjection(filter(UserInferredTopicAggregateStore)) + val User: KVInjection = getInjection(filter(UserAggregateStore)) + val UserAuthor: KVInjection = getInjection(filter(UserAuthorAggregateStore)) + val UserOriginalAuthor: KVInjection = getInjection(filter(UserOriginalAuthorAggregateStore)) + val OriginalAuthor: KVInjection = getInjection(filter(OriginalAuthorAggregateStore)) + val UserEngager: KVInjection = getInjection(filter(UserEngagerAggregateStore)) + val UserMention: KVInjection = getInjection(filter(UserMentionAggregateStore)) + val TwitterWideUser: KVInjection = getInjection(filter(TwitterWideUserAggregateStore)) + val TwitterWideUserAuthor: KVInjection = getInjection(filter(TwitterWideUserAuthorAggregateStore)) + val UserRequestHour: KVInjection = getInjection(filter(UserRequestHourAggregateStore)) + val UserRequestDow: KVInjection = getInjection(filter(UserRequestDowAggregateStore)) + val UserList: KVInjection = getInjection(filter(UserListAggregateStore)) + val UserMediaUnderstandingAnnotation: KVInjection = getInjection( + filter(UserMediaUnderstandingAnnotationAggregateStore)) + + private def filter(storeName: String): Set[TypedAggregateGroup[_]] = { + val groups = aggregatesToCompute.filter(_.outputStore.name == storeName) + require(groups.nonEmpty) + groups + } + + override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2" + + // Since this object is not used to execute any online or offline aggregates job, but is meant + // to store all PDT enabled KeyValInjections, we do not need to construct a physical store. + // We use the identity operation as a default. + override def mkPhysicalStore(store: AggregateStore): AggregateStore = store +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala new file mode 100644 index 000000000..c799f22fa --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/TimelinesAggregationSources.scala @@ -0,0 +1,45 @@ +package com.twitter.timelines.prediction.common.aggregates + +import com.twitter.ml.api.constant.SharedFeatures.TIMESTAMP +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateSource +import com.twitter.timelines.prediction.features.p_home_latest.HomeLatestUserAggregatesFeatures +import timelines.data_processing.ad_hoc.recap.data_record_preparation.RecapDataRecordsAggMinimalJavaDataset + +/** + * Any update here should be in sync with [[TimelinesFeatureGroups]] and [[AggMinimalDataRecordGeneratorJob]]. + */ +object TimelinesAggregationSources { + + /** + * This is the recap data records after post-processing in [[GenerateRecapAggMinimalDataRecordsJob]] + */ + val timelinesDailyRecapMinimalSource = OfflineAggregateSource( + name = "timelines_daily_recap", + timestampFeature = TIMESTAMP, + dalDataSet = Some(RecapDataRecordsAggMinimalJavaDataset), + scaldingSuffixType = Some("dal"), + withValidation = true + ) + val timelinesDailyTwitterWideSource = OfflineAggregateSource( + name = "timelines_daily_twitter_wide", + timestampFeature = TIMESTAMP, + scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/twitter_wide_data_records"), + scaldingSuffixType = Some("daily"), + withValidation = true + ) + + val timelinesDailyListTimelineSource = OfflineAggregateSource( + name = "timelines_daily_list_timeline", + timestampFeature = TIMESTAMP, + scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/all_features/list"), + scaldingSuffixType = Some("hourly"), + withValidation = true + ) + + val timelinesDailyHomeLatestSource = OfflineAggregateSource( + name = "timelines_daily_home_latest", + timestampFeature = HomeLatestUserAggregatesFeatures.AGGREGATE_TIMESTAMP_MS, + scaldingHdfsPath = Some("/user/timelines/processed/p_home_latest/user_aggregates"), + scaldingSuffixType = Some("daily") + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala new file mode 100644 index 000000000..7cefc67b9 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/AuthorFeaturesAdapter.scala @@ -0,0 +1,70 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.{DataRecord, Feature, FeatureContext, RichDataRecord} +import com.twitter.ml.featurestore.catalog.entities.core.Author +import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity +import com.twitter.ml.featurestore.lib.data.PredictionRecord +import com.twitter.ml.featurestore.lib.feature.{BoundFeature, BoundFeatureSet} +import com.twitter.ml.featurestore.lib.{UserId, Discrete => FSDiscrete} +import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase +import java.lang.{Boolean => JBoolean} +import java.util +import scala.collection.JavaConverters._ + +object AuthorFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] { + val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(Author) + val UserFeaturesSet: BoundFeatureSet = BoundFeatureSet(UserStateBoundFeature) + + /** + * Boolean features about viewer's user state. + * enum UserState { + * NEW = 0, + * NEAR_ZERO = 1, + * VERY_LIGHT = 2, + * LIGHT = 3, + * MEDIUM_TWEETER = 4, + * MEDIUM_NON_TWEETER = 5, + * HEAVY_NON_TWEETER = 6, + * HEAVY_TWEETER = 7 + * }(persisted='true') + */ + val IS_USER_NEW = new Binary("timelines.author.user_state.is_user_new", Set(UserState).asJava) + val IS_USER_LIGHT = new Binary("timelines.author.user_state.is_user_light", Set(UserState).asJava) + val IS_USER_MEDIUM_TWEETER = + new Binary("timelines.author.user_state.is_user_medium_tweeter", Set(UserState).asJava) + val IS_USER_MEDIUM_NON_TWEETER = + new Binary("timelines.author.user_state.is_user_medium_non_tweeter", Set(UserState).asJava) + val IS_USER_HEAVY_NON_TWEETER = + new Binary("timelines.author.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava) + val IS_USER_HEAVY_TWEETER = + new Binary("timelines.author.user_state.is_user_heavy_tweeter", Set(UserState).asJava) + val userStateToFeatureMap: Map[Long, Binary] = Map( + 0L -> IS_USER_NEW, + 1L -> IS_USER_LIGHT, + 2L -> IS_USER_LIGHT, + 3L -> IS_USER_LIGHT, + 4L -> IS_USER_MEDIUM_TWEETER, + 5L -> IS_USER_MEDIUM_NON_TWEETER, + 6L -> IS_USER_HEAVY_NON_TWEETER, + 7L -> IS_USER_HEAVY_TWEETER + ) + + val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet + + private val allFeatures: Seq[Feature[_]] = UserStateBooleanFeatures.toSeq + override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*) + override def commonFeatures: Set[Feature[_]] = Set.empty + + override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = { + val newRecord = new RichDataRecord(new DataRecord) + record + .getFeatureValue(UserStateBoundFeature) + .flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach { + booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true) + } + + List(newRecord.getRecord).asJava + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD new file mode 100644 index 000000000..93f39405d --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/BUILD @@ -0,0 +1,199 @@ +heron_binary( + name = "heron-without-jass", + main = "com.twitter.timelines.prediction.common.aggregates.real_time.TypeSafeRunner", + oss = True, + platform = "java8", + runtime_platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + ":real_time", + "3rdparty/jvm/org/slf4j:slf4j-jdk14", + ], +) + +jvm_app( + name = "rta_heron", + binary = ":heron-without-jass", + bundles = [ + bundle( + fileset = ["resources/jaas.conf"], + ), + ], + tags = [ + "bazel-compatible", + "bazel-only", + ], +) + +scala_library( + sources = ["*.scala"], + platform = "java8", + strict_deps = False, + tags = ["bazel-compatible"], + dependencies = [ + ":online-configs", + "3rdparty/src/jvm/com/twitter/summingbird:storm", + "src/java/com/twitter/heron/util", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features", + "src/scala/com/twitter/ml/api/util", + "src/scala/com/twitter/storehaus_internal/memcache", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + "src/scala/com/twitter/summingbird_internal/runner/storm", + "src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2", + "src/scala/com/twitter/timelines/prediction/adapters/client_log_event", + "src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr", + "src/scala/com/twitter/timelines/prediction/features/client_log_event", + "src/scala/com/twitter/timelines/prediction/features/common", + "src/scala/com/twitter/timelines/prediction/features/list_features", + "src/scala/com/twitter/timelines/prediction/features/recap", + "src/scala/com/twitter/timelines/prediction/features/user_health", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/timelines/suggests/common:record-scala", + "timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/heron", + "timelines/data_processing/ml_util/aggregation_framework/job", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "timelines/data_processing/ml_util/transforms", + "timelines/src/main/scala/com/twitter/timelines/clients/memcache_common", + "util/util-core:scala", + ], +) + +scala_library( + name = "online-configs", + sources = [ + "AuthorFeaturesAdapter.scala", + "Event.scala", + "FeatureStoreUtils.scala", + "StormAggregateSourceUtils.scala", + "TimelinesOnlineAggregationConfig.scala", + "TimelinesOnlineAggregationConfigBase.scala", + "TimelinesOnlineAggregationSources.scala", + "TimelinesStormAggregateSource.scala", + "TweetFeaturesReadableStore.scala", + "UserFeaturesAdapter.scala", + "UserFeaturesReadableStore.scala", + ], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + ":base-config", + "3rdparty/src/jvm/com/twitter/scalding:db", + "3rdparty/src/jvm/com/twitter/storehaus:core", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "3rdparty/src/jvm/com/twitter/summingbird:online", + "3rdparty/src/jvm/com/twitter/summingbird:storm", + "abuse/detection/src/main/thrift/com/twitter/abuse/detection/mention_interactions:thrift-scala", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "snowflake/src/main/thrift:thrift-scala", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features", + "src/scala/com/twitter/ml/api/util:datarecord", + "src/scala/com/twitter/ml/featurestore/catalog/datasets/geo:geo-user-location", + "src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features", + "src/scala/com/twitter/ml/featurestore/catalog/entities/core", + "src/scala/com/twitter/ml/featurestore/catalog/features/core:user", + "src/scala/com/twitter/ml/featurestore/catalog/features/geo", + "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity", + "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info", + "src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores", + "src/scala/com/twitter/ml/featurestore/lib/data", + "src/scala/com/twitter/ml/featurestore/lib/dataset/offline", + "src/scala/com/twitter/ml/featurestore/lib/export/strato:app-names", + "src/scala/com/twitter/ml/featurestore/lib/feature", + "src/scala/com/twitter/ml/featurestore/lib/online", + "src/scala/com/twitter/ml/featurestore/lib/params", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + "src/scala/com/twitter/summingbird_internal/runner/storm", + "src/scala/com/twitter/summingbird_internal/sources/common", + "src/scala/com/twitter/summingbird_internal/sources/common/remote:ClientEventSourceScrooge", + "src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2", + "src/scala/com/twitter/timelines/prediction/adapters/client_log_event", + "src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr", + "src/scala/com/twitter/timelines/prediction/common/adapters:base", + "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter", + "src/scala/com/twitter/timelines/prediction/common/aggregates", + "src/scala/com/twitter/timelines/prediction/features/client_log_event", + "src/scala/com/twitter/timelines/prediction/features/common", + "src/scala/com/twitter/timelines/prediction/features/list_features", + "src/scala/com/twitter/timelines/prediction/features/recap", + "src/scala/com/twitter/timelines/prediction/features/user_health", + "src/thrift/com/twitter/clientapp/gen:clientapp-scala", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/timelines/suggests/common:engagement-java", + "src/thrift/com/twitter/timelines/suggests/common:engagement-scala", + "src/thrift/com/twitter/timelines/suggests/common:record-scala", + "src/thrift/com/twitter/timelineservice/injection:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala", + "strato/src/main/scala/com/twitter/strato/client", + "timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache", + "timelines/data_processing/ad_hoc/suggests/common:raw_training_data_creator", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/heron:configs", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "timelines/data_processing/ml_util/transforms", + "timelines/data_processing/util:rich-request", + "tweetsource/common/src/main/thrift:thrift-scala", + "twitter-server-internal/src/main/scala", + "unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/config", + "unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/summingbird", + "unified_user_actions/thrift/src/main/thrift/com/twitter/unified_user_actions:unified_user_actions-scala", + "util/util-core:scala", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) + +scala_library( + name = "base-config", + sources = [ + "AuthorFeaturesAdapter.scala", + "TimelinesOnlineAggregationConfigBase.scala", + "TweetFeaturesAdapter.scala", + "UserFeaturesAdapter.scala", + ], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/resources/com/twitter/timelines/prediction/common/aggregates/real_time", + "src/scala/com/twitter/ml/api/util:datarecord", + "src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features", + "src/scala/com/twitter/ml/featurestore/catalog/entities/core", + "src/scala/com/twitter/ml/featurestore/catalog/features/core:user", + "src/scala/com/twitter/ml/featurestore/catalog/features/geo", + "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity", + "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info", + "src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores", + "src/scala/com/twitter/ml/featurestore/lib/data", + "src/scala/com/twitter/ml/featurestore/lib/feature", + "src/scala/com/twitter/timelines/prediction/common/adapters:base", + "src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter", + "src/scala/com/twitter/timelines/prediction/common/aggregates", + "src/scala/com/twitter/timelines/prediction/features/client_log_event", + "src/scala/com/twitter/timelines/prediction/features/common", + "src/scala/com/twitter/timelines/prediction/features/list_features", + "src/scala/com/twitter/timelines/prediction/features/recap", + "src/scala/com/twitter/timelines/prediction/features/user_health", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:feature_context-java", + "src/thrift/com/twitter/timelines/suggests/common:engagement-scala", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/heron:base-config", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "timelines/data_processing/ml_util/transforms", + "util/util-core:scala", + "util/util-core:util-core-util", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala new file mode 100644 index 000000000..1bd697d0d --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/Event.scala @@ -0,0 +1,11 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +private[real_time] sealed trait Event[T] { def event: T } + +private[real_time] case class HomeEvent[T](override val event: T) extends Event[T] + +private[real_time] case class ProfileEvent[T](override val event: T) extends Event[T] + +private[real_time] case class SearchEvent[T](override val event: T) extends Event[T] + +private[real_time] case class UuaEvent[T](override val event: T) extends Event[T] diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala new file mode 100644 index 000000000..156d9d35f --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/FeatureStoreUtils.scala @@ -0,0 +1,53 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.featurestore.catalog.datasets.magicrecs.UserFeaturesDataset +import com.twitter.ml.featurestore.catalog.datasets.geo.GeoUserLocationDataset +import com.twitter.ml.featurestore.lib.dataset.DatasetParams +import com.twitter.ml.featurestore.lib.export.strato.FeatureStoreAppNames +import com.twitter.ml.featurestore.lib.online.FeatureStoreClient +import com.twitter.ml.featurestore.lib.params.FeatureStoreParams +import com.twitter.strato.client.{Client, Strato} +import com.twitter.strato.opcontext.Attribution.ManhattanAppId +import com.twitter.util.Duration + +private[real_time] object FeatureStoreUtils { + private def mkStratoClient(serviceIdentifier: ServiceIdentifier): Client = + Strato.client + .withMutualTls(serviceIdentifier) + .withRequestTimeout(Duration.fromMilliseconds(50)) + .build() + + private val featureStoreParams: FeatureStoreParams = + FeatureStoreParams( + perDataset = Map( + UserFeaturesDataset.id -> + DatasetParams( + stratoSuffix = Some(FeatureStoreAppNames.Timelines), + attributions = Seq(ManhattanAppId("athena", "timelines_aggregates_v2_features_by_user")) + ), + GeoUserLocationDataset.id -> + DatasetParams( + attributions = Seq(ManhattanAppId("starbuck", "timelines_geo_features_by_user")) + ) + ) + ) + + def mkFeatureStoreClient( + serviceIdentifier: ServiceIdentifier, + statsReceiver: StatsReceiver + ): FeatureStoreClient = { + com.twitter.server.Init() // necessary in order to use WilyNS path + + val stratoClient: Client = mkStratoClient(serviceIdentifier) + val featureStoreClient: FeatureStoreClient = FeatureStoreClient( + featureSet = + UserFeaturesAdapter.UserFeaturesSet ++ AuthorFeaturesAdapter.UserFeaturesSet ++ TweetFeaturesAdapter.TweetFeaturesSet, + client = stratoClient, + statsReceiver = statsReceiver, + featureStoreParams = featureStoreParams + ) + featureStoreClient + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala new file mode 100644 index 000000000..42f86fa4f --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/LocallyReplicatedStore.scala @@ -0,0 +1,79 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.storehaus.ReplicatedReadableStore +import com.twitter.storehaus.Store +import com.twitter.timelines.clients.memcache_common._ +import com.twitter.timelines.util.FailOpenHandler +import com.twitter.util.Future + +object ServedFeaturesMemcacheConfigBuilder { + def getTwCacheDestination(cluster: String, isProd: Boolean = false): String = + if (!isProd) { + s"/srv#/test/$cluster/cache//twemcache_timelines_served_features_cache" + } else { + s"/srv#/prod/$cluster/cache/timelines_served_features" + } + + /** + * @cluster The DC of the cache that this client will send requests to. This + * can be different to the DC where the summingbird job is running in. + * @isProd Define if this client is part of a production summingbird job as + * different accesspoints will need to be chosen. + */ + def build(cluster: String, isProd: Boolean = false): StorehausMemcacheConfig = + StorehausMemcacheConfig( + destName = getTwCacheDestination(cluster, isProd), + keyPrefix = "", + requestTimeout = 200.milliseconds, + numTries = 2, + globalTimeout = 400.milliseconds, + tcpConnectTimeout = 200.milliseconds, + connectionAcquisitionTimeout = 200.milliseconds, + numPendingRequests = 1000, + isReadOnly = false + ) +} + +/** + * If lookup key does not exist locally, make a call to the replicated store(s). + * If value exists remotely, write the first returned value to the local store + * and return it. Map any exceptions to None so that the subsequent operations + * may proceed. + */ +class LocallyReplicatedStore[-K, V]( + localStore: Store[K, V], + remoteStore: ReplicatedReadableStore[K, V], + scopedStatsReceiver: StatsReceiver) + extends Store[K, V] { + private[this] val failOpenHandler = new FailOpenHandler(scopedStatsReceiver.scope("failOpen")) + private[this] val localFailsCounter = scopedStatsReceiver.counter("localFails") + private[this] val localWritesCounter = scopedStatsReceiver.counter("localWrites") + private[this] val remoteFailsCounter = scopedStatsReceiver.counter("remoteFails") + + override def get(k: K): Future[Option[V]] = + failOpenHandler { + localStore + .get(k) + .flatMap { + case Some(v) => Future.value(Some(v)) + case _ => { + localFailsCounter.incr() + val replicatedOptFu = remoteStore.get(k) + // async write if result is not empty + replicatedOptFu.onSuccess { + case Some(v) => { + localWritesCounter.incr() + localStore.put((k, Some(v))) + } + case _ => { + remoteFailsCounter.incr() + Unit + } + } + replicatedOptFu + } + } + } { _: Throwable => Future.None } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala new file mode 100644 index 000000000..e72d3392b --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/StormAggregateSourceUtils.scala @@ -0,0 +1,254 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.DataRecordMerger +import com.twitter.ml.api.Feature +import com.twitter.ml.api.RichDataRecord +import com.twitter.ml.featurestore.catalog.entities.core.Author +import com.twitter.ml.featurestore.catalog.entities.core.Tweet +import com.twitter.ml.featurestore.catalog.entities.core.User +import com.twitter.ml.featurestore.lib.online.FeatureStoreClient +import com.twitter.summingbird.Producer +import com.twitter.summingbird.storm.Storm +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import java.lang.{Long => JLong} + +import com.twitter.unified_user_actions.thriftscala.ActionType +import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction + +private[real_time] object StormAggregateSourceUtils { + type UserId = Long + type AuthorId = Long + type TweetId = Long + + /** + * Attaches a [[FeatureStoreClient]] to the underyling [[Producer]]. The FeatureStoreClient + * hydrates additional user features. + * + * @param underlyingProducer converts a stream of [[com.twitter.clientapp.thriftscala.LogEvent]] + * to a stream of [[DataRecord]]. + */ + def wrapByFeatureStoreClient( + underlyingProducer: Producer[Storm, Event[DataRecord]], + jobConfig: RealTimeAggregatesJobConfig, + scopedStatsReceiver: StatsReceiver + ): Producer[Storm, Event[DataRecord]] = { + lazy val keyDataRecordCounter = scopedStatsReceiver.counter("keyDataRecord") + lazy val keyFeatureCounter = scopedStatsReceiver.counter("keyFeature") + lazy val leftDataRecordCounter = scopedStatsReceiver.counter("leftDataRecord") + lazy val rightDataRecordCounter = scopedStatsReceiver.counter("rightDataRecord") + lazy val mergeNumFeaturesCounter = scopedStatsReceiver.counter("mergeNumFeatures") + lazy val authorKeyDataRecordCounter = scopedStatsReceiver.counter("authorKeyDataRecord") + lazy val authorKeyFeatureCounter = scopedStatsReceiver.counter("authorKeyFeature") + lazy val authorLeftDataRecordCounter = scopedStatsReceiver.counter("authorLeftDataRecord") + lazy val authorRightDataRecordCounter = scopedStatsReceiver.counter("authorRightDataRecord") + lazy val authorMergeNumFeaturesCounter = scopedStatsReceiver.counter("authorMergeNumFeatures") + lazy val tweetKeyDataRecordCounter = + scopedStatsReceiver.counter("tweetKeyDataRecord") + lazy val tweetKeyFeatureCounter = scopedStatsReceiver.counter("tweetKeyFeature") + lazy val tweetLeftDataRecordCounter = + scopedStatsReceiver.counter("tweetLeftDataRecord") + lazy val tweetRightDataRecordCounter = + scopedStatsReceiver.counter("tweetRightDataRecord") + lazy val tweetMergeNumFeaturesCounter = + scopedStatsReceiver.counter("tweetMergeNumFeatures") + + @transient lazy val featureStoreClient: FeatureStoreClient = + FeatureStoreUtils.mkFeatureStoreClient( + serviceIdentifier = jobConfig.serviceIdentifier, + statsReceiver = scopedStatsReceiver + ) + + lazy val joinUserFeaturesDataRecordProducer = + if (jobConfig.keyedByUserEnabled) { + lazy val keyedByUserFeaturesStormService: Storm#Service[Set[UserId], DataRecord] = + Storm.service( + new UserFeaturesReadableStore( + featureStoreClient = featureStoreClient, + userEntity = User, + userFeaturesAdapter = UserFeaturesAdapter + ) + ) + + leftJoinDataRecordProducer( + keyFeature = SharedFeatures.USER_ID, + leftDataRecordProducer = underlyingProducer, + rightStormService = keyedByUserFeaturesStormService, + keyDataRecordCounter = keyDataRecordCounter, + keyFeatureCounter = keyFeatureCounter, + leftDataRecordCounter = leftDataRecordCounter, + rightDataRecordCounter = rightDataRecordCounter, + mergeNumFeaturesCounter = mergeNumFeaturesCounter + ) + } else { + underlyingProducer + } + + lazy val joinAuthorFeaturesDataRecordProducer = + if (jobConfig.keyedByAuthorEnabled) { + lazy val keyedByAuthorFeaturesStormService: Storm#Service[Set[AuthorId], DataRecord] = + Storm.service( + new UserFeaturesReadableStore( + featureStoreClient = featureStoreClient, + userEntity = Author, + userFeaturesAdapter = AuthorFeaturesAdapter + ) + ) + + leftJoinDataRecordProducer( + keyFeature = TimelinesSharedFeatures.SOURCE_AUTHOR_ID, + leftDataRecordProducer = joinUserFeaturesDataRecordProducer, + rightStormService = keyedByAuthorFeaturesStormService, + keyDataRecordCounter = authorKeyDataRecordCounter, + keyFeatureCounter = authorKeyFeatureCounter, + leftDataRecordCounter = authorLeftDataRecordCounter, + rightDataRecordCounter = authorRightDataRecordCounter, + mergeNumFeaturesCounter = authorMergeNumFeaturesCounter + ) + } else { + joinUserFeaturesDataRecordProducer + } + + lazy val joinTweetFeaturesDataRecordProducer = { + if (jobConfig.keyedByTweetEnabled) { + lazy val keyedByTweetFeaturesStormService: Storm#Service[Set[TweetId], DataRecord] = + Storm.service( + new TweetFeaturesReadableStore( + featureStoreClient = featureStoreClient, + tweetEntity = Tweet, + tweetFeaturesAdapter = TweetFeaturesAdapter + ) + ) + + leftJoinDataRecordProducer( + keyFeature = TimelinesSharedFeatures.SOURCE_TWEET_ID, + leftDataRecordProducer = joinAuthorFeaturesDataRecordProducer, + rightStormService = keyedByTweetFeaturesStormService, + keyDataRecordCounter = tweetKeyDataRecordCounter, + keyFeatureCounter = tweetKeyFeatureCounter, + leftDataRecordCounter = tweetLeftDataRecordCounter, + rightDataRecordCounter = tweetRightDataRecordCounter, + mergeNumFeaturesCounter = tweetMergeNumFeaturesCounter + ) + } else { + joinAuthorFeaturesDataRecordProducer + } + } + + joinTweetFeaturesDataRecordProducer + } + + private[this] lazy val DataRecordMerger = new DataRecordMerger + + /** + * Make join key from the client event data record and return both. + * @param keyFeature Feature to extract join key value: USER_ID, SOURCE_TWEET_ID, etc. + * @param record DataRecord containing client engagement and basic tweet-side features + * @return The return type is a tuple of this key and original data record which will be used + * in the subsequent leftJoin operation. + */ + private[this] def mkKey( + keyFeature: Feature[JLong], + record: DataRecord, + keyDataRecordCounter: Counter, + keyFeatureCounter: Counter + ): Set[Long] = { + keyDataRecordCounter.incr() + val richRecord = new RichDataRecord(record) + if (richRecord.hasFeature(keyFeature)) { + keyFeatureCounter.incr() + val key: Long = richRecord.getFeatureValue(keyFeature).toLong + Set(key) + } else { + Set.empty[Long] + } + } + + /** + * After the leftJoin, merge the client event data record and the joined data record + * into a single data record used for further aggregation. + */ + private[this] def mergeDataRecord( + leftRecord: Event[DataRecord], + rightRecordOpt: Option[DataRecord], + leftDataRecordCounter: Counter, + rightDataRecordCounter: Counter, + mergeNumFeaturesCounter: Counter + ): Event[DataRecord] = { + leftDataRecordCounter.incr() + rightRecordOpt.foreach { rightRecord => + rightDataRecordCounter.incr() + DataRecordMerger.merge(leftRecord.event, rightRecord) + mergeNumFeaturesCounter.incr(new RichDataRecord(leftRecord.event).numFeatures()) + } + leftRecord + } + + private[this] def leftJoinDataRecordProducer( + keyFeature: Feature[JLong], + leftDataRecordProducer: Producer[Storm, Event[DataRecord]], + rightStormService: Storm#Service[Set[Long], DataRecord], + keyDataRecordCounter: => Counter, + keyFeatureCounter: => Counter, + leftDataRecordCounter: => Counter, + rightDataRecordCounter: => Counter, + mergeNumFeaturesCounter: => Counter + ): Producer[Storm, Event[DataRecord]] = { + val keyedLeftDataRecordProducer: Producer[Storm, (Set[Long], Event[DataRecord])] = + leftDataRecordProducer.map { + case dataRecord: HomeEvent[DataRecord] => + val key = mkKey( + keyFeature = keyFeature, + record = dataRecord.event, + keyDataRecordCounter = keyDataRecordCounter, + keyFeatureCounter = keyFeatureCounter + ) + (key, dataRecord) + case dataRecord: ProfileEvent[DataRecord] => + val key = Set.empty[Long] + (key, dataRecord) + case dataRecord: SearchEvent[DataRecord] => + val key = Set.empty[Long] + (key, dataRecord) + case dataRecord: UuaEvent[DataRecord] => + val key = Set.empty[Long] + (key, dataRecord) + } + + keyedLeftDataRecordProducer + .leftJoin(rightStormService) + .map { + case (_, (leftRecord, rightRecordOpt)) => + mergeDataRecord( + leftRecord = leftRecord, + rightRecordOpt = rightRecordOpt, + leftDataRecordCounter = leftDataRecordCounter, + rightDataRecordCounter = rightDataRecordCounter, + mergeNumFeaturesCounter = mergeNumFeaturesCounter + ) + } + } + + /** + * Filter Unified User Actions events to include only actions that has home timeline visit prior to landing on the page + */ + def isUuaBCEEventsFromHome(event: UnifiedUserAction): Boolean = { + def breadcrumbViewsContain(view: String): Boolean = + event.eventMetadata.breadcrumbViews.map(_.contains(view)).getOrElse(false) + + (event.actionType) match { + case ActionType.ClientTweetV2Impression if breadcrumbViewsContain("home") => + true + case ActionType.ClientTweetVideoFullscreenV2Impression + if (breadcrumbViewsContain("home") & breadcrumbViewsContain("video")) => + true + case ActionType.ClientProfileV2Impression if breadcrumbViewsContain("home") => + true + case _ => false + } + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala new file mode 100644 index 000000000..8d7a41d21 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfig.scala @@ -0,0 +1,34 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.conversions.DurationOps._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.{ + OnlineAggregationStoresTrait, + RealTimeAggregateStore +} + +object TimelinesOnlineAggregationConfig + extends TimelinesOnlineAggregationDefinitionsTrait + with OnlineAggregationStoresTrait { + + import TimelinesOnlineAggregationSources._ + + override lazy val ProductionStore = RealTimeAggregateStore( + memcacheDataSet = "timelines_real_time_aggregates", + isProd = true, + cacheTTL = 5.days + ) + + override lazy val StagingStore = RealTimeAggregateStore( + memcacheDataSet = "twemcache_timelines_real_time_aggregates", + isProd = false, + cacheTTL = 5.days + ) + + override lazy val inputSource = timelinesOnlineAggregateSource + + /** + * AggregateToCompute: This defines the complete set of aggregates to be + * computed by the aggregation job and to be stored in memcache. + */ + override lazy val AggregatesToCompute = ProdAggregates ++ StagingAggregates +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala new file mode 100644 index 000000000..0d7c072e2 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationConfigBase.scala @@ -0,0 +1,1112 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.conversions.DurationOps._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateSource +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.OnlineAggregationConfigTrait +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.CountMetric +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.SumMetric +import com.twitter.timelines.data_processing.ml_util.transforms.BinaryUnion +import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform +import com.twitter.timelines.data_processing.ml_util.transforms.IsNewUserTransform +import com.twitter.timelines.data_processing.ml_util.transforms.IsPositionTransform +import com.twitter.timelines.data_processing.ml_util.transforms.LogTransform +import com.twitter.timelines.data_processing.ml_util.transforms.PositionCase +import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform +import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveUnverifiedUserTransform +import com.twitter.timelines.prediction.features.client_log_event.ClientLogEventDataRecordFeatures +import com.twitter.timelines.prediction.features.common.CombinedFeatures +import com.twitter.timelines.prediction.features.common.CombinedFeatures._ +import com.twitter.timelines.prediction.features.common.ProfileLabelFeatures +import com.twitter.timelines.prediction.features.common.SearchLabelFeatures +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_FIVE +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_ONE +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.IS_TOP_TEN +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures.LOG_POSITION +import com.twitter.timelines.prediction.features.list_features.ListFeatures +import com.twitter.timelines.prediction.features.recap.RecapFeatures +import com.twitter.util.Duration +import java.lang.{Boolean => JBoolean} +import java.lang.{Long => JLong} +import scala.io.Source + +object TimelinesOnlineAggregationUtils { + val TweetLabels: Set[Feature[JBoolean]] = CombinedFeatures.EngagementsRealTime + val TweetCoreLabels: Set[Feature[JBoolean]] = CombinedFeatures.CoreEngagements + val TweetDwellLabels: Set[Feature[JBoolean]] = CombinedFeatures.DwellEngagements + val TweetCoreAndDwellLabels: Set[Feature[JBoolean]] = TweetCoreLabels ++ TweetDwellLabels + val PrivateEngagementLabelsV2: Set[Feature[JBoolean]] = CombinedFeatures.PrivateEngagementsV2 + val ProfileCoreLabels: Set[Feature[JBoolean]] = ProfileLabelFeatures.CoreEngagements + val ProfileNegativeEngagementLabels: Set[Feature[JBoolean]] = + ProfileLabelFeatures.NegativeEngagements + val ProfileNegativeEngagementUnionLabels: Set[Feature[JBoolean]] = Set( + ProfileLabelFeatures.IS_NEGATIVE_FEEDBACK_UNION) + val SearchCoreLabels: Set[Feature[JBoolean]] = SearchLabelFeatures.CoreEngagements + val TweetNegativeEngagementLabels: Set[Feature[JBoolean]] = + CombinedFeatures.NegativeEngagementsRealTime + val TweetNegativeEngagementDontLikeLabels: Set[Feature[JBoolean]] = + CombinedFeatures.NegativeEngagementsRealTimeDontLike + val TweetNegativeEngagementSecondaryLabels: Set[Feature[JBoolean]] = + CombinedFeatures.NegativeEngagementsSecondary + val AllTweetNegativeEngagementLabels: Set[Feature[JBoolean]] = + TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels ++ TweetNegativeEngagementSecondaryLabels + val UserAuthorEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.UserAuthorEngagements + val ShareEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.ShareEngagements + val BookmarkEngagementLabels: Set[Feature[JBoolean]] = CombinedFeatures.BookmarkEngagements + val AllBCEDwellLabels: Set[Feature[JBoolean]] = + CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements + val AllTweetUnionLabels: Set[Feature[JBoolean]] = Set( + CombinedFeatures.IS_IMPLICIT_POSITIVE_FEEDBACK_UNION, + CombinedFeatures.IS_EXPLICIT_POSITIVE_FEEDBACK_UNION, + CombinedFeatures.IS_ALL_NEGATIVE_FEEDBACK_UNION + ) + val AllTweetLabels: Set[Feature[JBoolean]] = + TweetLabels ++ TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels ++ ProfileCoreLabels ++ ProfileNegativeEngagementLabels ++ ProfileNegativeEngagementUnionLabels ++ UserAuthorEngagementLabels ++ SearchCoreLabels ++ ShareEngagementLabels ++ BookmarkEngagementLabels ++ PrivateEngagementLabelsV2 ++ AllBCEDwellLabels ++ AllTweetUnionLabels + + def addFeatureFilterFromResource( + prodGroup: AggregateGroup, + aggRemovalPath: String + ): AggregateGroup = { + val resource = Some(Source.fromResource(aggRemovalPath)) + val lines = resource.map(_.getLines.toSeq) + lines match { + case Some(value) => prodGroup.copy(aggExclusionRegex = value) + case _ => prodGroup + } + } +} + +trait TimelinesOnlineAggregationDefinitionsTrait extends OnlineAggregationConfigTrait { + import TimelinesOnlineAggregationUtils._ + + def inputSource: AggregateSource + def ProductionStore: AggregateStore + def StagingStore: AggregateStore + + val TweetFeatures: Set[Feature[_]] = Set( + ClientLogEventDataRecordFeatures.HasConsumerVideo, + ClientLogEventDataRecordFeatures.PhotoCount + ) + val CandidateTweetSourceFeatures: Set[Feature[_]] = Set( + ClientLogEventDataRecordFeatures.FromRecap, + ClientLogEventDataRecordFeatures.FromRecycled, + ClientLogEventDataRecordFeatures.FromActivity, + ClientLogEventDataRecordFeatures.FromSimcluster, + ClientLogEventDataRecordFeatures.FromErg, + ClientLogEventDataRecordFeatures.FromCroon, + ClientLogEventDataRecordFeatures.FromList, + ClientLogEventDataRecordFeatures.FromRecTopic + ) + + def createStagingGroup(prodGroup: AggregateGroup): AggregateGroup = + prodGroup.copy( + outputStore = StagingStore + ) + + // Aggregate user engagements/features by tweet Id. + val tweetEngagement30MinuteCountsProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate user engagements/features by tweet Id. + val tweetVerifiedDontLikeEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v6", + preTransforms = Seq(RichRemoveUnverifiedUserTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val tweetNegativeEngagement6HourCounts = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v2", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val tweetVerifiedNegativeEngagementCounts = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v7", + preTransforms = Seq(RichRemoveUnverifiedUserTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val promotedTweetEngagementRealTimeCounts = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v3.is_promoted", + preTransforms = Seq( + DownsampleTransform( + negativeSamplingRate = 0.0, + keepLabels = Set(ClientLogEventDataRecordFeatures.IsPromoted))), + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(2.hours, 24.hours), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate total engagement counts by tweet Id for non-public + * engagements. Similar to EB's public engagement counts. + */ + val tweetEngagementTotalCountsProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val tweetNegativeEngagementTotalCounts = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v2", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = TweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's user id. + */ + val userEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v1", + keys = Set(SharedFeatures.USER_ID), + features = TweetFeatures, + labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's user id. + */ + val userEngagementRealTimeAggregatesV2 = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v2", + keys = Set(SharedFeatures.USER_ID), + features = ClientLogEventDataRecordFeatures.TweetFeaturesV2, + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate author's user state features grouped by viewer's user id. + */ + val userEngagementAuthorUserStateRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v3", + preTransforms = Seq.empty, + keys = Set(SharedFeatures.USER_ID), + features = AuthorFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate author's user state features grouped by viewer's user id. + */ + val userNegativeEngagementAuthorUserStateRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v4", + preTransforms = Seq.empty, + keys = Set(SharedFeatures.USER_ID), + features = AuthorFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's user id, with 48 hour halfLife. + */ + val userEngagement48HourRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v5", + keys = Set(SharedFeatures.USER_ID), + features = TweetFeatures, + labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(48.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate author's user state features grouped by viewer's user id. + */ + val userNegativeEngagementAuthorUserState72HourRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_aggregates_v6", + preTransforms = Seq.empty, + keys = Set(SharedFeatures.USER_ID), + features = AuthorFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(72.hours), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate features grouped by source author id: for each author, aggregate features are created + * to quantify engagements (fav, reply, etc.) which tweets of the author has received. + */ + val authorEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = TweetLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate features grouped by source author id: for each author, aggregate features are created + * to quantify negative engagements (mute, block, etc.) which tweets of the author has received. + * + * This aggregate group is not used in Home, but it is used in Follow Recommendation Service so need to keep it for now. + * + */ + val authorNegativeEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_aggregates_v2", + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = TweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate features grouped by source author id: for each author, aggregate features are created + * to quantify negative engagements (don't like) which tweets of the author has received from + * verified users. + */ + val authorVerifiedNegativeEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_aggregates_v3", + preTransforms = Seq(RichRemoveUnverifiedUserTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by topic id. + */ + val topicEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_aggregates_v1", + keys = Set(TimelinesSharedFeatures.TOPIC_ID), + features = Set.empty, + labels = TweetLabels ++ AllTweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate user engagements / user state by topic id. + */ + val topicEngagementUserStateRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_aggregates_v2", + keys = Set(TimelinesSharedFeatures.TOPIC_ID), + features = UserFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate user negative engagements / user state by topic id. + */ + val topicNegativeEngagementUserStateRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_aggregates_v3", + keys = Set(TimelinesSharedFeatures.TOPIC_ID), + features = UserFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by topic id like real_time_topic_aggregates_v1 but 24hour halfLife + */ + val topicEngagement24HourRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_aggregates_v4", + keys = Set(TimelinesSharedFeatures.TOPIC_ID), + features = Set.empty, + labels = TweetLabels ++ AllTweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate user engagements / user state by tweet Id. + val tweetEngagementUserStateRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v3", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = UserFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate user engagements / user gender by tweet Id. + val tweetEngagementGenderRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v4", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = UserFeaturesAdapter.GenderBooleanFeatures, + labels = + TweetCoreAndDwellLabels ++ TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate user negative engagements / user state by tweet Id. + val tweetNegativeEngagementUserStateRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v5", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = UserFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate user negative engagements / user state by tweet Id. + val tweetVerifiedNegativeEngagementUserStateRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_aggregates_v8", + preTransforms = Seq(RichRemoveUnverifiedUserTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = UserFeaturesAdapter.UserStateBooleanFeatures, + labels = TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet engagement labels and candidate tweet source features grouped by user id. + */ + val userCandidateTweetSourceEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_candidate_tweet_source_aggregates_v1", + keys = Set(SharedFeatures.USER_ID), + features = CandidateTweetSourceFeatures, + labels = TweetCoreAndDwellLabels ++ NegativeEngagementsRealTimeDontLike, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet engagement labels and candidate tweet source features grouped by user id. + */ + val userCandidateTweetSourceEngagement48HourRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_candidate_tweet_source_aggregates_v2", + keys = Set(SharedFeatures.USER_ID), + features = CandidateTweetSourceFeatures, + labels = TweetCoreAndDwellLabels ++ NegativeEngagementsRealTimeDontLike, + metrics = Set(CountMetric), + halfLives = Set(48.hours), + outputStore = ProductionStore, + includeAnyFeature = false, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's user id on Profile engagements + */ + val userProfileEngagementRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "profile_real_time_user_aggregates_v1", + preTransforms = Seq(IsNewUserTransform), + keys = Set(SharedFeatures.USER_ID), + features = TweetFeatures, + labels = ProfileCoreLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val NegativeEngagementsUnionTransform = RichITransform( + BinaryUnion( + featuresToUnify = ProfileNegativeEngagementLabels, + outputFeature = ProfileLabelFeatures.IS_NEGATIVE_FEEDBACK_UNION + )) + + /** + * Aggregate tweet features grouped by viewer's user id on Profile negative engagements. + */ + val userProfileNegativeEngagementRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "profile_negative_engagement_real_time_user_aggregates_v1", + preTransforms = Seq(NegativeEngagementsUnionTransform), + keys = Set(SharedFeatures.USER_ID), + features = Set.empty, + labels = ProfileNegativeEngagementLabels ++ ProfileNegativeEngagementUnionLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 72.hours, 14.day), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's and author's user ids and on Profile engagements + */ + val userAuthorProfileEngagementRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "user_author_profile_real_time_aggregates_v1", + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = ProfileCoreLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours, 72.hours), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate tweet features grouped by viewer's and author's user ids and on negative Profile engagements + */ + val userAuthorProfileNegativeEngagementRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "user_author_profile_negative_engagement_real_time_aggregates_v1", + preTransforms = Seq(NegativeEngagementsUnionTransform), + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = ProfileNegativeEngagementUnionLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 72.hours, 14.day), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val newUserAuthorEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_new_user_author_aggregates_v1", + preTransforms = Seq(IsNewUserTransform), + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = TweetCoreAndDwellLabels ++ Set( + IS_CLICKED, + IS_PROFILE_CLICKED, + IS_PHOTO_EXPANDED + ), + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val userAuthorEngagementRealTimeAggregatesProd = { + // Computing user-author real-time aggregates is very expensive so we + // take the union of all major negative feedback engagements to create + // a single negtive label for aggregation. We also include a number of + // core positive engagements. + val BinaryUnionNegativeEngagements = + BinaryUnion( + featuresToUnify = AllTweetNegativeEngagementLabels, + outputFeature = IS_NEGATIVE_FEEDBACK_UNION + ) + val BinaryUnionNegativeEngagementsTransform = RichITransform(BinaryUnionNegativeEngagements) + + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_author_aggregates_v1", + preTransforms = Seq(BinaryUnionNegativeEngagementsTransform), + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = UserAuthorEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 1.day), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + } + + /** + * Aggregate tweet features grouped by list id. + */ + val listEngagementRealTimeAggregatesProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_list_aggregates_v1", + keys = Set(ListFeatures.LIST_ID), + features = Set.empty, + labels = + TweetCoreAndDwellLabels ++ TweetNegativeEngagementLabels ++ TweetNegativeEngagementDontLikeLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate features grouped by topic of tweet and country from user's location + val topicCountryRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_country_aggregates_v1", + keys = Set(TimelinesSharedFeatures.TOPIC_ID, UserFeaturesAdapter.USER_COUNTRY_ID), + features = Set.empty, + labels = + TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels ++ PrivateEngagementLabelsV2 ++ ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 72.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate features grouped by TweetId_Country from user's location + val tweetCountryRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_country_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID), + features = Set.empty, + labels = TweetCoreAndDwellLabels ++ AllTweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = true, + includeTimestampFeature = false, + ) + + // Additional aggregate features grouped by TweetId_Country from user's location + val tweetCountryPrivateEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_country_aggregates_v2", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID), + features = Set.empty, + labels = PrivateEngagementLabelsV2 ++ ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 72.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Aggregate features grouped by TweetId_Country from user's location + val tweetCountryVerifiedNegativeEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_country_aggregates_v3", + preTransforms = Seq(RichRemoveUnverifiedUserTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID, UserFeaturesAdapter.USER_COUNTRY_ID), + features = Set.empty, + labels = AllTweetNegativeEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, Duration.Top), + outputStore = ProductionStore, + includeAnyLabel = true, + includeTimestampFeature = false, + ) + + object positionTranforms extends IsPositionTransform { + override val isInPositionRangeFeature: Seq[PositionCase] = + Seq(PositionCase(1, IS_TOP_ONE), PositionCase(5, IS_TOP_FIVE), PositionCase(10, IS_TOP_TEN)) + override val decodedPositionFeature: Feature.Discrete = + ClientLogEventDataRecordFeatures.InjectedPosition + } + + val userPositionEngagementsCountsProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_position_based_user_aggregates_v1", + keys = Set(SharedFeatures.USER_ID), + features = Set(IS_TOP_ONE, IS_TOP_FIVE, IS_TOP_TEN), + labels = TweetCoreAndDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + preTransforms = Seq(positionTranforms), + includeAnyLabel = false, + includeAnyFeature = false, + includeTimestampFeature = false, + ) + + val userPositionEngagementsSumProd = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_position_based_user_sum_aggregates_v2", + keys = Set(SharedFeatures.USER_ID), + features = Set(LOG_POSITION), + labels = TweetCoreAndDwellLabels, + metrics = Set(SumMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + preTransforms = + Seq(new LogTransform(ClientLogEventDataRecordFeatures.InjectedPosition, LOG_POSITION)), + includeAnyLabel = false, + includeAnyFeature = false, + includeTimestampFeature = false, + ) + + // Aggregates for share engagements + val tweetShareEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_share_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val userShareEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_share_aggregates_v1", + keys = Set(SharedFeatures.USER_ID), + features = Set.empty, + labels = ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val userAuthorShareEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_author_share_aggregates_v1", + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val topicShareEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_topic_share_aggregates_v1", + keys = Set(TimelinesSharedFeatures.TOPIC_ID), + features = Set.empty, + labels = ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val authorShareEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_share_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = ShareEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + // Bookmark RTAs + val tweetBookmarkEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_bookmark_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = BookmarkEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val userBookmarkEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_bookmark_aggregates_v1", + keys = Set(SharedFeatures.USER_ID), + features = Set.empty, + labels = BookmarkEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val userAuthorBookmarkEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_author_bookmark_aggregates_v1", + keys = Set(SharedFeatures.USER_ID, TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = BookmarkEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyFeature = true, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val authorBookmarkEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_bookmark_aggregates_v1", + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = Set.empty, + labels = BookmarkEngagementLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate on user level dwell labels from BCE + */ + val userBCEDwellEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_user_bce_dwell_aggregates", + keys = Set(SharedFeatures.USER_ID), + features = Set.empty, + labels = AllBCEDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + /** + * Aggregate on tweet level dwell labels from BCE + */ + val tweetBCEDwellEngagementsRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_tweet_bce_dwell_aggregates", + keys = Set(TimelinesSharedFeatures.SOURCE_TWEET_ID), + features = Set.empty, + labels = AllBCEDwellLabels, + metrics = Set(CountMetric), + halfLives = Set(30.minutes, 24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeTimestampFeature = false, + ) + + val ImplicitPositiveEngagementsUnionTransform = RichITransform( + BinaryUnion( + featuresToUnify = CombinedFeatures.ImplicitPositiveEngagements, + outputFeature = CombinedFeatures.IS_IMPLICIT_POSITIVE_FEEDBACK_UNION + ) + ) + + val ExplicitPositiveEngagementsUnionTransform = RichITransform( + BinaryUnion( + featuresToUnify = CombinedFeatures.ExplicitPositiveEngagements, + outputFeature = CombinedFeatures.IS_EXPLICIT_POSITIVE_FEEDBACK_UNION + ) + ) + + val AllNegativeEngagementsUnionTransform = RichITransform( + BinaryUnion( + featuresToUnify = CombinedFeatures.AllNegativeEngagements, + outputFeature = CombinedFeatures.IS_ALL_NEGATIVE_FEEDBACK_UNION + ) + ) + + /** + * Aggregate features for author content preference + */ + val authorContentPreferenceRealTimeAggregates = + AggregateGroup( + inputSource = inputSource, + aggregatePrefix = "real_time_author_content_preference_aggregates", + preTransforms = Seq( + ImplicitPositiveEngagementsUnionTransform, + ExplicitPositiveEngagementsUnionTransform, + AllNegativeEngagementsUnionTransform), + keys = Set(TimelinesSharedFeatures.SOURCE_AUTHOR_ID), + features = + ClientLogEventDataRecordFeatures.AuthorContentPreferenceTweetTypeFeatures ++ AuthorFeaturesAdapter.UserStateBooleanFeatures, + labels = AllTweetUnionLabels, + metrics = Set(CountMetric), + halfLives = Set(24.hours), + outputStore = ProductionStore, + includeAnyLabel = false, + includeAnyFeature = false, + ) + + val FeaturesGeneratedByPreTransforms = Set(LOG_POSITION, IS_TOP_TEN, IS_TOP_FIVE, IS_TOP_ONE) + + val ProdAggregateGroups = Set( + tweetEngagement30MinuteCountsProd, + tweetEngagementTotalCountsProd, + tweetNegativeEngagement6HourCounts, + tweetNegativeEngagementTotalCounts, + userEngagementRealTimeAggregatesProd, + userEngagement48HourRealTimeAggregatesProd, + userNegativeEngagementAuthorUserStateRealTimeAggregates, + userNegativeEngagementAuthorUserState72HourRealTimeAggregates, + authorEngagementRealTimeAggregatesProd, + topicEngagementRealTimeAggregatesProd, + topicEngagement24HourRealTimeAggregatesProd, + tweetEngagementUserStateRealTimeAggregatesProd, + tweetNegativeEngagementUserStateRealTimeAggregates, + userProfileEngagementRealTimeAggregates, + newUserAuthorEngagementRealTimeAggregatesProd, + userAuthorEngagementRealTimeAggregatesProd, + listEngagementRealTimeAggregatesProd, + tweetCountryRealTimeAggregates, + tweetShareEngagementsRealTimeAggregates, + userShareEngagementsRealTimeAggregates, + userAuthorShareEngagementsRealTimeAggregates, + topicShareEngagementsRealTimeAggregates, + authorShareEngagementsRealTimeAggregates, + tweetBookmarkEngagementsRealTimeAggregates, + userBookmarkEngagementsRealTimeAggregates, + userAuthorBookmarkEngagementsRealTimeAggregates, + authorBookmarkEngagementsRealTimeAggregates, + topicCountryRealTimeAggregates, + tweetCountryPrivateEngagementsRealTimeAggregates, + userBCEDwellEngagementsRealTimeAggregates, + tweetBCEDwellEngagementsRealTimeAggregates, + authorContentPreferenceRealTimeAggregates, + authorVerifiedNegativeEngagementRealTimeAggregatesProd, + tweetVerifiedDontLikeEngagementRealTimeAggregatesProd, + tweetVerifiedNegativeEngagementCounts, + tweetVerifiedNegativeEngagementUserStateRealTimeAggregates, + tweetCountryVerifiedNegativeEngagementsRealTimeAggregates + ).map( + addFeatureFilterFromResource( + _, + "com/twitter/timelines/prediction/common/aggregates/real_time/aggregates_to_drop.txt")) + + val StagingAggregateGroups = ProdAggregateGroups.map(createStagingGroup) + + /** + * Contains the fully typed aggregate groups from which important + * values can be derived e.g. the features to be computed, halflives etc. + */ + override val ProdAggregates = ProdAggregateGroups.flatMap(_.buildTypedAggregateGroups()) + + override val StagingAggregates = StagingAggregateGroups.flatMap(_.buildTypedAggregateGroups()) + + + override val ProdCommonAggregates = ProdAggregates + .filter(_.keysToAggregate == Set(SharedFeatures.USER_ID)) + + /** + * This defines the set of selected features from a candidate + * that we'd like to send to the served features cache by TLM. + * These should include interesting and necessary features that + * cannot be extracted from LogEvents only by the real-time aggregates + * job. If you are adding new AggregateGroups requiring TLM-side + * candidate features, make sure to add them here. + */ + val candidateFeaturesToCache: Set[Feature[_]] = Set( + TimelinesSharedFeatures.SOURCE_AUTHOR_ID, + RecapFeatures.HASHTAGS, + RecapFeatures.MENTIONED_SCREEN_NAMES, + RecapFeatures.URL_DOMAINS + ) +} + +/** + * This config should only be used to access the aggregate features constructed by the + * aggregation config, and not for implementing an online real-time aggregates job. + */ +object TimelinesOnlineAggregationFeaturesOnlyConfig + extends TimelinesOnlineAggregationDefinitionsTrait { + + private[real_time] case class DummyAggregateSource(name: String, timestampFeature: Feature[JLong]) + extends AggregateSource + + private[real_time] case class DummyAggregateStore(name: String) extends AggregateStore + + override lazy val inputSource = DummyAggregateSource( + name = "timelines_rta", + timestampFeature = SharedFeatures.TIMESTAMP + ) + override lazy val ProductionStore = DummyAggregateStore("timelines_rta") + override lazy val StagingStore = DummyAggregateStore("timelines_rta") + + override lazy val AggregatesToCompute = ProdAggregates ++ StagingAggregates +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala new file mode 100644 index 000000000..71e97a1b1 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesOnlineAggregationSources.scala @@ -0,0 +1,5 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +object TimelinesOnlineAggregationSources { + val timelinesOnlineAggregateSource = new TimelinesStormAggregateSource +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala new file mode 100644 index 000000000..e386d4da1 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesRealTimeAggregatesJob.scala @@ -0,0 +1,182 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.DefaultStatsReceiver +import com.twitter.summingbird.Options +import com.twitter.summingbird.online.option.FlatMapParallelism +import com.twitter.summingbird.online.option.SourceParallelism +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron._ +import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform +import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform +import com.twitter.timelines.data_processing.ml_util.transforms.UserDownsampleTransform + +import com.twitter.timelines.prediction.common.aggregates.BCELabelTransformFromUUADataRecord + +/** + * Sets up relevant topology parameters. Our primary goal is to handle the + * LogEvent stream and aggregate (sum) on the parsed DataRecords without falling + * behind. Our constraint is the resulting write (and read) QPS to the backing + * memcache store. + * + * If the job is falling behind, add more flatMappers and/or Summers after + * inspecting the viz panels for the respective job (go/heron-ui). An increase in + * Summers (and/or aggregation keys and features in the config) results in an + * increase in memcache QPS (go/cb and search for our cache). Adjust with CacheSize + * settings until QPS is well-controlled. + * + */ +object TimelinesRealTimeAggregatesJobConfigs extends RealTimeAggregatesJobConfigs { + import TimelinesOnlineAggregationUtils._ + + /** + * We remove input records that do not contain a label/engagement as defined in AllTweetLabels, which includes + * explicit user engagements including public, private and impression events. By avoiding ingesting records without + * engagemnts, we guarantee that no distribution shifts occur in computed aggregate features when we add a new spout + * to input aggregate sources. Counterfactual signal is still available since we aggregate on explicit dwell + * engagements. + */ + val NegativeDownsampleTransform = + DownsampleTransform( + negativeSamplingRate = 0.0, + keepLabels = AllTweetLabels, + positiveSamplingRate = 1.0) + + /** + * We downsample positive engagements for devel topology to reduce traffic, aiming for equivalent of 10% of prod traffic. + * First apply consistent downsampling to 10% of users, and then apply downsampling to remove records without + * explicit labels. We apply user-consistent sampling to more closely approximate prod query patterns. + */ + val StagingUserBasedDownsampleTransform = + UserDownsampleTransform( + availability = 1000, + featureName = "rta_devel" + ) + + override val Prod = RealTimeAggregatesJobConfig( + appId = "summingbird_timelines_rta", + topologyWorkers = 1450, + sourceCount = 120, + flatMapCount = 1800, + summerCount = 3850, + cacheSize = 200, + containerRamGigaBytes = 54, + name = "timelines_real_time_aggregates", + teamName = "timelines", + teamEmail = "", + // If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap. + // Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead. + componentToMetaSpaceSizeMap = Map( + "Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M", + "Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M" + ), + // If either component is hitting memory limit at prod + // its memory need to increase: either increase total memory of container (containerRamGigaBytes), + // or allocate more memory for one component while keeping total memory unchanged. + componentToRamGigaBytesMap = Map( + "Tail-FlatMap-Source" -> 3, // Home source + "Tail-FlatMap-Source.2" -> 3, // Profile source + "Tail-FlatMap-Source.3" -> 3, // Search source + "Tail-FlatMap-Source.4" -> 3, // UUA source + "Tail-FlatMap" -> 8 + // Tail will use the leftover memory in the container. + // Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB. + ), + topologyNamedOptions = Map( + "TL_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(120)), + "PROFILE_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(30)), + "SEARCH_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(10)), + "UUA_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(10)), + "COMBINED_PRODUCER" -> Options() + .set(FlatMapParallelism(1800)) + ), + // The UUA datarecord for BCE events inputted will not have binary labels populated. + // BCELabelTransform will set the datarecord with binary BCE dwell labels features based on the corresponding dwell_time_ms. + // It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform + // because ProdNegativeDownsampleTransform will remove datarecord that contains no features from AllTweetLabels. + onlinePreTransforms = + Seq(RichITransform(BCELabelTransformFromUUADataRecord), NegativeDownsampleTransform) + ) + + /** + * we downsample 10% computation of devel RTA based on [[StagingNegativeDownsampleTransform]]. + * To better test scalability of topology, we reduce computing resource of components "Tail-FlatMap" + * and "Tail" to be 10% of prod but keep computing resource of component "Tail-FlatMap-Source" unchanged. + * hence flatMapCount=110, summerCount=105 and sourceCount=100. Hence topologyWorkers =(110+105+100)/5 = 63. + */ + override val Devel = RealTimeAggregatesJobConfig( + appId = "summingbird_timelines_rta_devel", + topologyWorkers = 120, + sourceCount = 120, + flatMapCount = 150, + summerCount = 300, + cacheSize = 200, + containerRamGigaBytes = 54, + name = "timelines_real_time_aggregates_devel", + teamName = "timelines", + teamEmail = "", + // If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap + // Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead. + componentToMetaSpaceSizeMap = Map( + "Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M", + "Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M" + ), + // If either component is hitting memory limit at prod + // its memory need to increase: either increase total memory of container (containerRamGigaBytes), + // or allocate more memory for one component while keeping total memory unchanged. + componentToRamGigaBytesMap = Map( + "Tail-FlatMap-Source" -> 3, // Home source + "Tail-FlatMap-Source.2" -> 3, // Profile source + "Tail-FlatMap-Source.3" -> 3, // Search source + "Tail-FlatMap-Source.4" -> 3, // UUA source + "Tail-FlatMap" -> 8 + // Tail will use the leftover memory in the container. + // Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB. + ), + topologyNamedOptions = Map( + "TL_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(120)), + "PROFILE_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(30)), + "SEARCH_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(10)), + "UUA_EVENTS_SOURCE" -> Options() + .set(SourceParallelism(10)), + "COMBINED_PRODUCER" -> Options() + .set(FlatMapParallelism(150)) + ), + // It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform + onlinePreTransforms = Seq( + StagingUserBasedDownsampleTransform, + RichITransform(BCELabelTransformFromUUADataRecord), + NegativeDownsampleTransform), + enableUserReindexingNighthawkBtreeStore = true, + enableUserReindexingNighthawkHashStore = true, + userReindexingNighthawkBtreeStoreConfig = NighthawkUnderlyingStoreConfig( + serversetPath = + "/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_btree_test_api", + // NOTE: table names are prefixed to every pkey so keep it short + tableName = "u_r_v1", // (u)ser_(r)eindexing_v1 + // keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day + cacheTTL = 1.day + ), + userReindexingNighthawkHashStoreConfig = NighthawkUnderlyingStoreConfig( + // For prod: "/s/cache-user/nighthawk_timelines_real_time_aggregates_hash_api", + serversetPath = + "/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_hash_test_api", + // NOTE: table names are prefixed to every pkey so keep it short + tableName = "u_r_v1", // (u)ser_(r)eindexing_v1 + // keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day + cacheTTL = 1.day + ) + ) +} + +object TimelinesRealTimeAggregatesJob extends RealTimeAggregatesJobBase { + override lazy val statsReceiver = DefaultStatsReceiver.scope("timelines_real_time_aggregates") + override lazy val jobConfigs = TimelinesRealTimeAggregatesJobConfigs + override lazy val aggregatesToCompute = TimelinesOnlineAggregationConfig.AggregatesToCompute +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala new file mode 100644 index 000000000..2e096dc07 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TimelinesStormAggregateSource.scala @@ -0,0 +1,185 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.clientapp.thriftscala.LogEvent +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.summingbird._ +import com.twitter.summingbird.storm.Storm +import com.twitter.summingbird_internal.sources.AppId +import com.twitter.summingbird_internal.sources.storm.remote.ClientEventSourceScrooge2 +import com.twitter.timelines.data_processing.ad_hoc.suggests.common.AllScribeProcessor +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.StormAggregateSource +import com.twitter.timelines.prediction.adapters.client_log_event.ClientLogEventAdapter +import com.twitter.timelines.prediction.adapters.client_log_event.ProfileClientLogEventAdapter +import com.twitter.timelines.prediction.adapters.client_log_event.SearchClientLogEventAdapter +import com.twitter.timelines.prediction.adapters.client_log_event.UuaEventAdapter +import com.twitter.unified_user_actions.client.config.KafkaConfigs +import com.twitter.unified_user_actions.client.summingbird.UnifiedUserActionsSourceScrooge +import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction +import scala.collection.JavaConverters._ + +/** + * Storm Producer for client events generated on Home, Profile, and Search + */ +class TimelinesStormAggregateSource extends StormAggregateSource { + + override val name = "timelines_rta" + override val timestampFeature = SharedFeatures.TIMESTAMP + + private lazy val TimelinesClientEventSourceName = "TL_EVENTS_SOURCE" + private lazy val ProfileClientEventSourceName = "PROFILE_EVENTS_SOURCE" + private lazy val SearchClientEventSourceName = "SEARCH_EVENTS_SOURCE" + private lazy val UuaEventSourceName = "UUA_EVENTS_SOURCE" + private lazy val CombinedProducerName = "COMBINED_PRODUCER" + private lazy val FeatureStoreProducerName = "FEATURE_STORE_PRODUCER" + + private def isNewUserEvent(event: LogEvent): Boolean = { + event.logBase.flatMap(_.userId).flatMap(SnowflakeId.timeFromIdOpt).exists(_.untilNow < 30.days) + } + + private def mkDataRecords(event: LogEvent, dataRecordCounter: Counter): Seq[DataRecord] = { + val dataRecords: Seq[DataRecord] = + if (AllScribeProcessor.isValidSuggestTweetEvent(event)) { + ClientLogEventAdapter.adaptToDataRecords(event).asScala + } else { + Seq.empty[DataRecord] + } + dataRecordCounter.incr(dataRecords.size) + dataRecords + } + + private def mkProfileDataRecords( + event: LogEvent, + dataRecordCounter: Counter + ): Seq[DataRecord] = { + val dataRecords: Seq[DataRecord] = + ProfileClientLogEventAdapter.adaptToDataRecords(event).asScala + dataRecordCounter.incr(dataRecords.size) + dataRecords + } + + private def mkSearchDataRecords( + event: LogEvent, + dataRecordCounter: Counter + ): Seq[DataRecord] = { + val dataRecords: Seq[DataRecord] = + SearchClientLogEventAdapter.adaptToDataRecords(event).asScala + dataRecordCounter.incr(dataRecords.size) + dataRecords + } + + private def mkUuaDataRecords( + event: UnifiedUserAction, + dataRecordCounter: Counter + ): Seq[DataRecord] = { + val dataRecords: Seq[DataRecord] = + UuaEventAdapter.adaptToDataRecords(event).asScala + dataRecordCounter.incr(dataRecords.size) + dataRecords + } + + override def build( + statsReceiver: StatsReceiver, + jobConfig: RealTimeAggregatesJobConfig + ): Producer[Storm, DataRecord] = { + lazy val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName) + lazy val dataRecordCounter = scopedStatsReceiver.counter("dataRecord") + + // Home Timeline Engagements + // Step 1: => LogEvent + lazy val clientEventProducer: Producer[Storm, HomeEvent[LogEvent]] = + ClientEventSourceScrooge2( + appId = AppId(jobConfig.appId), + topic = "julep_client_event_suggests", + resumeAtLastReadOffset = false, + enableTls = true + ).source.map(HomeEvent[LogEvent]).name(TimelinesClientEventSourceName) + + // Profile Engagements + // Step 1: => LogEvent + lazy val profileClientEventProducer: Producer[Storm, ProfileEvent[LogEvent]] = + ClientEventSourceScrooge2( + appId = AppId(jobConfig.appId), + topic = "julep_client_event_profile_real_time_engagement_metrics", + resumeAtLastReadOffset = false, + enableTls = true + ).source + .map(ProfileEvent[LogEvent]) + .name(ProfileClientEventSourceName) + + // Search Engagements + // Step 1: => LogEvent + // Only process events for all users to save resource + lazy val searchClientEventProducer: Producer[Storm, SearchEvent[LogEvent]] = + ClientEventSourceScrooge2( + appId = AppId(jobConfig.appId), + topic = "julep_client_event_search_real_time_engagement_metrics", + resumeAtLastReadOffset = false, + enableTls = true + ).source + .map(SearchEvent[LogEvent]) + .name(SearchClientEventSourceName) + + // Unified User Actions (includes Home and other product surfaces) + lazy val uuaEventProducer: Producer[Storm, UuaEvent[UnifiedUserAction]] = + UnifiedUserActionsSourceScrooge( + appId = AppId(jobConfig.appId), + parallelism = 10, + kafkaConfig = KafkaConfigs.ProdUnifiedUserActionsEngagementOnly + ).source + .filter(StormAggregateSourceUtils.isUuaBCEEventsFromHome(_)) + .map(UuaEvent[UnifiedUserAction]) + .name(UuaEventSourceName) + + // Combined + // Step 2: + // (a) Combine + // (b) Transform LogEvent => Seq[DataRecord] + // (c) Apply sampler + lazy val combinedClientEventDataRecordProducer: Producer[Storm, Event[DataRecord]] = + profileClientEventProducer // This becomes the bottom branch + .merge(clientEventProducer) // This becomes the middle branch + .merge(searchClientEventProducer) + .merge(uuaEventProducer) // This becomes the top + .flatMap { // LogEvent => Seq[DataRecord] + case e: HomeEvent[LogEvent] => + mkDataRecords(e.event, dataRecordCounter).map(HomeEvent[DataRecord]) + case e: ProfileEvent[LogEvent] => + mkProfileDataRecords(e.event, dataRecordCounter).map(ProfileEvent[DataRecord]) + case e: SearchEvent[LogEvent] => + mkSearchDataRecords(e.event, dataRecordCounter).map(SearchEvent[DataRecord]) + case e: UuaEvent[UnifiedUserAction] => + mkUuaDataRecords( + e.event, + dataRecordCounter + ).map(UuaEvent[DataRecord]) + } + .flatMap { // Apply sampler + case e: HomeEvent[DataRecord] => + jobConfig.sequentiallyTransform(e.event).map(HomeEvent[DataRecord]) + case e: ProfileEvent[DataRecord] => + jobConfig.sequentiallyTransform(e.event).map(ProfileEvent[DataRecord]) + case e: SearchEvent[DataRecord] => + jobConfig.sequentiallyTransform(e.event).map(SearchEvent[DataRecord]) + case e: UuaEvent[DataRecord] => + jobConfig.sequentiallyTransform(e.event).map(UuaEvent[DataRecord]) + } + .name(CombinedProducerName) + + // Step 3: Join with Feature Store features + lazy val featureStoreDataRecordProducer: Producer[Storm, DataRecord] = + StormAggregateSourceUtils + .wrapByFeatureStoreClient( + underlyingProducer = combinedClientEventDataRecordProducer, + jobConfig = jobConfig, + scopedStatsReceiver = scopedStatsReceiver + ).map(_.event).name(FeatureStoreProducerName) + + featureStoreDataRecordProducer + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala new file mode 100644 index 000000000..0d5c06d7c --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesAdapter.scala @@ -0,0 +1,35 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.featurestore.catalog.entities.core.Tweet +import com.twitter.ml.featurestore.catalog.features.trends.TweetTrendsScores +import com.twitter.ml.featurestore.lib.TweetId +import com.twitter.ml.featurestore.lib.data.PredictionRecord +import com.twitter.ml.featurestore.lib.data.PredictionRecordAdapter +import com.twitter.ml.featurestore.lib.feature.BoundFeature +import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet +import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase +import java.util +import scala.collection.JavaConverters._ + +object TweetFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] { + + private val ContinuousFeatureMap: Map[BoundFeature[TweetId, Double], Feature.Continuous] = Map() + + val TweetFeaturesSet: BoundFeatureSet = new BoundFeatureSet(ContinuousFeatureMap.keys.toSet) + + val AllFeatures: Seq[Feature[_]] = + ContinuousFeatureMap.values.toSeq + + private val adapter = PredictionRecordAdapter.oneToOne(TweetFeaturesSet) + + override def getFeatureContext: FeatureContext = new FeatureContext(AllFeatures: _*) + + override def commonFeatures: Set[Feature[_]] = Set.empty + + override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = { + List(adapter.adaptToDataRecord(record)).asJava + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala new file mode 100644 index 000000000..b461e179a --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TweetFeaturesReadableStore.scala @@ -0,0 +1,53 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.ml.api.DataRecord +import com.twitter.ml.featurestore.lib.TweetId +import com.twitter.ml.featurestore.lib.data.PredictionRecord +import com.twitter.ml.featurestore.lib.entity.Entity +import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest} +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase +import com.twitter.util.Future +import scala.collection.JavaConverters._ + +class TweetFeaturesReadableStore( + featureStoreClient: FeatureStoreClient, + tweetEntity: Entity[TweetId], + tweetFeaturesAdapter: TimelinesAdapterBase[PredictionRecord]) + extends ReadableStore[Set[Long], DataRecord] { + + override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = { + val orderedKeys: Seq[K] = keys.toSeq + val featureStoreRequests: Seq[FeatureStoreRequest] = getFeatureStoreRequests(orderedKeys) + val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient( + featureStoreRequests) + + getDataRecordMap(orderedKeys, predictionRecordsFut) + } + + private def getFeatureStoreRequests[K <: Set[Long]]( + orderedKeys: Seq[K] + ): Seq[FeatureStoreRequest] = { + orderedKeys.map { key: Set[Long] => + FeatureStoreRequest( + entityIds = key.map { tweetId => tweetEntity.withId(TweetId(tweetId)) }.toSeq + ) + } + } + + private def getDataRecordMap[K <: Set[Long]]( + orderedKeys: Seq[K], + predictionRecordsFut: Future[Seq[PredictionRecord]] + ): Map[K, Future[Option[DataRecord]]] = { + orderedKeys.zipWithIndex.map { + case (tweetIdSet, index) => + val dataRecordFutOpt: Future[Option[DataRecord]] = predictionRecordsFut.map { + predictionRecords => + predictionRecords.lift(index).flatMap { predictionRecordAtIndex: PredictionRecord => + tweetFeaturesAdapter.adaptToDataRecords(predictionRecordAtIndex).asScala.headOption + } + } + (tweetIdSet, dataRecordFutOpt) + }.toMap + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala new file mode 100644 index 000000000..92b6618e4 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/TypeSafeRunner.scala @@ -0,0 +1,7 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.summingbird_internal.runner.storm.GenericRunner + +object TypeSafeRunner { + def main(args: Array[String]): Unit = GenericRunner(args, TimelinesRealTimeAggregatesJob(_)) +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala new file mode 100644 index 000000000..8ff39938c --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesAdapter.scala @@ -0,0 +1,108 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType.InferredGender +import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Text +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api.RichDataRecord +import com.twitter.ml.featurestore.catalog.entities.core.User +import com.twitter.ml.featurestore.catalog.features.core.UserAccount +import com.twitter.ml.featurestore.catalog.features.geo.UserLocation +import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity +import com.twitter.ml.featurestore.lib.EntityId +import com.twitter.ml.featurestore.lib.data.PredictionRecord +import com.twitter.ml.featurestore.lib.feature.BoundFeature +import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet +import com.twitter.ml.featurestore.lib.UserId +import com.twitter.ml.featurestore.lib.{Discrete => FSDiscrete} +import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase +import com.twitter.timelines.prediction.features.user_health.UserHealthFeatures +import java.lang.{Boolean => JBoolean} +import java.lang.{String => JString} +import java.util +import scala.collection.JavaConverters._ + +object UserFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] { + val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(User) + + /** + * Boolean features about viewer's user state. + * enum UserState { + * NEW = 0, + * NEAR_ZERO = 1, + * VERY_LIGHT = 2, + * LIGHT = 3, + * MEDIUM_TWEETER = 4, + * MEDIUM_NON_TWEETER = 5, + * HEAVY_NON_TWEETER = 6, + * HEAVY_TWEETER = 7 + * }(persisted='true') + */ + val IS_USER_NEW = new Binary("timelines.user_state.is_user_new", Set(UserState).asJava) + val IS_USER_LIGHT = new Binary("timelines.user_state.is_user_light", Set(UserState).asJava) + val IS_USER_MEDIUM_TWEETER = + new Binary("timelines.user_state.is_user_medium_tweeter", Set(UserState).asJava) + val IS_USER_MEDIUM_NON_TWEETER = + new Binary("timelines.user_state.is_user_medium_non_tweeter", Set(UserState).asJava) + val IS_USER_HEAVY_NON_TWEETER = + new Binary("timelines.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava) + val IS_USER_HEAVY_TWEETER = + new Binary("timelines.user_state.is_user_heavy_tweeter", Set(UserState).asJava) + val userStateToFeatureMap: Map[Long, Binary] = Map( + 0L -> IS_USER_NEW, + 1L -> IS_USER_LIGHT, + 2L -> IS_USER_LIGHT, + 3L -> IS_USER_LIGHT, + 4L -> IS_USER_MEDIUM_TWEETER, + 5L -> IS_USER_MEDIUM_NON_TWEETER, + 6L -> IS_USER_HEAVY_NON_TWEETER, + 7L -> IS_USER_HEAVY_TWEETER + ) + + val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet + + + val USER_COUNTRY_ID = new Text("geo.user_location.country_code") + val UserCountryCodeFeature: BoundFeature[UserId, String] = + UserLocation.CountryCodeAlpha2.bind(User) + val UserLocationFeatures: Set[Feature[_]] = Set(USER_COUNTRY_ID) + + private val UserVerifiedFeaturesSet = Set( + UserAccount.IsUserVerified.bind(User), + UserAccount.IsUserBlueVerified.bind(User), + UserAccount.IsUserGoldVerified.bind(User), + UserAccount.IsUserGrayVerified.bind(User) + ) + + val UserFeaturesSet: BoundFeatureSet = + BoundFeatureSet(UserStateBoundFeature, UserCountryCodeFeature) ++ + BoundFeatureSet(UserVerifiedFeaturesSet.asInstanceOf[Set[BoundFeature[_ <: EntityId, _]]]) + + private val allFeatures: Seq[Feature[_]] = + UserStateBooleanFeatures.toSeq ++ GenderBooleanFeatures.toSeq ++ + UserLocationFeatures.toSeq ++ Seq(UserHealthFeatures.IsUserVerifiedUnion) + + override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*) + override def commonFeatures: Set[Feature[_]] = Set.empty + + override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = { + val newRecord = new RichDataRecord(new DataRecord) + record + .getFeatureValue(UserStateBoundFeature) + .flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach { + booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true) + } + record.getFeatureValue(UserCountryCodeFeature).foreach { countryCodeFeatureValue => + newRecord.setFeatureValue[JString](USER_COUNTRY_ID, countryCodeFeatureValue) + } + + val isUserVerifiedUnion = + UserVerifiedFeaturesSet.exists(feature => record.getFeatureValue(feature).getOrElse(false)) + newRecord.setFeatureValue[JBoolean](UserHealthFeatures.IsUserVerifiedUnion, isUserVerifiedUnion) + + List(newRecord.getRecord).asJava + } +} diff --git a/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala new file mode 100644 index 000000000..c1931c32b --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/UserFeaturesReadableStore.scala @@ -0,0 +1,37 @@ +package com.twitter.timelines.prediction.common.aggregates.real_time + +import com.twitter.ml.api.DataRecord +import com.twitter.ml.featurestore.lib.UserId +import com.twitter.ml.featurestore.lib.data.PredictionRecord +import com.twitter.ml.featurestore.lib.entity.Entity +import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest} +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase +import com.twitter.util.Future +import scala.collection.JavaConverters._ + +class UserFeaturesReadableStore( + featureStoreClient: FeatureStoreClient, + userEntity: Entity[UserId], + userFeaturesAdapter: TimelinesAdapterBase[PredictionRecord]) + extends ReadableStore[Set[Long], DataRecord] { + + override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = { + val orderedKeys = keys.toSeq + val featureStoreRequests: Seq[FeatureStoreRequest] = orderedKeys.map { key: Set[Long] => + FeatureStoreRequest( + entityIds = key.map(userId => userEntity.withId(UserId(userId))).toSeq + ) + } + val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient( + featureStoreRequests) + + orderedKeys.zipWithIndex.map { + case (userId, index) => + val dataRecordFutOpt = predictionRecordsFut.map { predictionRecords => + userFeaturesAdapter.adaptToDataRecords(predictionRecords(index)).asScala.headOption + } + (userId, dataRecordFutOpt) + }.toMap + } +} diff --git a/src/scala/com/twitter/timelines/prediction/features/README.md b/src/scala/com/twitter/timelines/prediction/features/README.md new file mode 100644 index 000000000..d42639a77 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/README.md @@ -0,0 +1,6 @@ +## Prediction Features + +This directory contains a collection of `Features` (`com.twitter.ml.api.Feature`) which are definitions of feature names and datatypes which allow the features to be efficiently processed and passed to the different ranking models. +By predefining the features with their names and datatypes, when features are being generated, scribed or used to score they can be identified with only a hash of their name. + +Not all of these features are used in the model, many are experimental or deprecated. \ No newline at end of file diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD b/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD new file mode 100644 index 000000000..3d3c34092 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/client_log_event/BUILD @@ -0,0 +1,11 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/scala/com/twitter/suggests/controller_data", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala new file mode 100644 index 000000000..cccb99998 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/client_log_event/ClientLogEventDataRecordFeatures.scala @@ -0,0 +1,169 @@ +package com.twitter.timelines.prediction.features.client_log_event + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.Discrete +import scala.collection.JavaConverters._ +import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.thriftscala.CandidateTweetSourceId + +object ClientLogEventDataRecordFeatures { + val HasConsumerVideo = new Binary( + "client_log_event.tweet.has_consumer_video", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val PhotoCount = new Continuous( + "client_log_event.tweet.photo_count", + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val HasImage = new Binary( + "client_log_event.tweet.has_image", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val IsReply = + new Binary("client_log_event.tweet.is_reply", Set(PublicReplies, PrivateReplies).asJava) + val IsRetweet = + new Binary("client_log_event.tweet.is_retweet", Set(PublicRetweets, PrivateRetweets).asJava) + val IsPromoted = + new Binary( + "client_log_event.tweet.is_promoted", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HasVisibleLink = new Binary( + "client_log_event.tweet.has_visible_link", + Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HasHashtag = new Binary( + "client_log_event.tweet.has_hashtag", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val FromMutualFollow = new Binary("client_log_event.tweet.from_mutual_follow") + val IsInNetwork = new Binary("client_log_event.tweet.is_in_network") + val IsNotInNetwork = new Binary("client_log_event.tweet.is_not_in_network") + val FromRecap = new Binary("client_log_event.tweet.from_recap") + val FromRecycled = new Binary("client_log_event.tweet.from_recycled") + val FromActivity = new Binary("client_log_event.tweet.from_activity") + val FromSimcluster = new Binary("client_log_event.tweet.from_simcluster") + val FromErg = new Binary("client_log_event.tweet.from_erg") + val FromCroon = new Binary("client_log_event.tweet.from_croon") + val FromList = new Binary("client_log_event.tweet.from_list") + val FromRecTopic = new Binary("client_log_event.tweet.from_rec_topic") + val InjectedPosition = new Discrete("client_log_event.tweet.injectedPosition") + val TextOnly = new Binary("client_log_event.tweet.text_only") + val HasLikedBySocialContext = new Binary("client_log_event.tweet.has_liked_by_social_context") + val HasFollowedBySocialContext = new Binary( + "client_log_event.tweet.has_followed_by_social_context") + val HasTopicSocialContext = new Binary("client_log_event.tweet.has_topic_social_context") + val IsFollowedTopicTweet = new Binary("client_log_event.tweet.is_followed_topic_tweet") + val IsRecommendedTopicTweet = new Binary("client_log_event.tweet.is_recommended_topic_tweet") + val IsTweetAgeLessThan15Seconds = new Binary( + "client_log_event.tweet.tweet_age_less_than_15_seconds") + val IsTweetAgeLessThanOrEqualTo30Minutes = new Binary( + "client_log_event.tweet.tweet_age_lte_30_minutes") + val IsTweetAgeLessThanOrEqualTo1Hour = new Binary("client_log_event.tweet.tweet_age_lte_1_hour") + val IsTweetAgeLessThanOrEqualTo6Hours = new Binary("client_log_event.tweet.tweet_age_lte_6_hours") + val IsTweetAgeLessThanOrEqualTo12Hours = new Binary( + "client_log_event.tweet.tweet_age_lte_12_hours") + val IsTweetAgeGreaterThanOrEqualTo24Hours = new Binary( + "client_log_event.tweet.tweet_age_gte_24_hours") + val HasGreaterThanOrEqualTo100Favs = new Binary("client_log_event.tweet.has_gte_100_favs") + val HasGreaterThanOrEqualTo1KFavs = new Binary("client_log_event.tweet.has_gte_1k_favs") + val HasGreaterThanOrEqualTo10KFavs = new Binary("client_log_event.tweet.has_gte_10k_favs") + val HasGreaterThanOrEqualTo100KFavs = new Binary("client_log_event.tweet.has_gte_100k_favs") + val HasGreaterThanOrEqualTo10Retweets = new Binary("client_log_event.tweet.has_gte_10_retweets") + val HasGreaterThanOrEqualTo100Retweets = new Binary("client_log_event.tweet.has_gte_100_retweets") + val HasGreaterThanOrEqualTo1KRetweets = new Binary("client_log_event.tweet.has_gte_1k_retweets") + + val TweetTypeToFeatureMap: Map[String, Binary] = Map( + "link" -> HasVisibleLink, + "hashtag" -> HasHashtag, + "mutual_follow" -> FromMutualFollow, + "in_network" -> IsInNetwork, + "text_only" -> TextOnly, + "has_liked_by_social_context" -> HasLikedBySocialContext, + "has_followed_by_social_context" -> HasFollowedBySocialContext, + "has_topic_social_context" -> HasTopicSocialContext, + "is_followed_topic_tweet" -> IsFollowedTopicTweet, + "is_recommended_topic_tweet" -> IsRecommendedTopicTweet, + "tweet_age_less_than_15_seconds" -> IsTweetAgeLessThan15Seconds, + "tweet_age_lte_30_minutes" -> IsTweetAgeLessThanOrEqualTo30Minutes, + "tweet_age_lte_1_hour" -> IsTweetAgeLessThanOrEqualTo1Hour, + "tweet_age_lte_6_hours" -> IsTweetAgeLessThanOrEqualTo6Hours, + "tweet_age_lte_12_hours" -> IsTweetAgeLessThanOrEqualTo12Hours, + "tweet_age_gte_24_hours" -> IsTweetAgeGreaterThanOrEqualTo24Hours, + "has_gte_100_favs" -> HasGreaterThanOrEqualTo100Favs, + "has_gte_1k_favs" -> HasGreaterThanOrEqualTo1KFavs, + "has_gte_10k_favs" -> HasGreaterThanOrEqualTo10KFavs, + "has_gte_100k_favs" -> HasGreaterThanOrEqualTo100KFavs, + "has_gte_10_retweets" -> HasGreaterThanOrEqualTo10Retweets, + "has_gte_100_retweets" -> HasGreaterThanOrEqualTo100Retweets, + "has_gte_1k_retweets" -> HasGreaterThanOrEqualTo1KRetweets + ) + + val CandidateTweetSourceIdFeatureMap: Map[Int, Binary] = Map( + CandidateTweetSourceId.RecapTweet.value -> FromRecap, + CandidateTweetSourceId.RecycledTweet.value -> FromRecycled, + CandidateTweetSourceId.RecommendedTweet.value -> FromActivity, + CandidateTweetSourceId.Simcluster.value -> FromSimcluster, + CandidateTweetSourceId.ErgTweet.value -> FromErg, + CandidateTweetSourceId.CroonTopicTweet.value -> FromCroon, + CandidateTweetSourceId.CroonTweet.value -> FromCroon, + CandidateTweetSourceId.ListTweet.value -> FromList, + CandidateTweetSourceId.RecommendedTopicTweet.value -> FromRecTopic + ) + + val TweetFeaturesV2: Set[Feature[_]] = Set( + HasImage, + IsReply, + IsRetweet, + HasVisibleLink, + HasHashtag, + FromMutualFollow, + IsInNetwork + ) + + val ContentTweetTypeFeatures: Set[Feature[_]] = Set( + HasImage, + HasVisibleLink, + HasHashtag, + TextOnly, + HasVisibleLink + ) + + val FreshnessTweetTypeFeatures: Set[Feature[_]] = Set( + IsTweetAgeLessThan15Seconds, + IsTweetAgeLessThanOrEqualTo30Minutes, + IsTweetAgeLessThanOrEqualTo1Hour, + IsTweetAgeLessThanOrEqualTo6Hours, + IsTweetAgeLessThanOrEqualTo12Hours, + IsTweetAgeGreaterThanOrEqualTo24Hours + ) + + val SocialProofTweetTypeFeatures: Set[Feature[_]] = Set( + HasLikedBySocialContext, + HasFollowedBySocialContext, + HasTopicSocialContext + ) + + val TopicTweetPreferenceTweetTypeFeatures: Set[Feature[_]] = Set( + IsFollowedTopicTweet, + IsRecommendedTopicTweet + ) + + val TweetPopularityTweetTypeFeatures: Set[Feature[_]] = Set( + HasGreaterThanOrEqualTo100Favs, + HasGreaterThanOrEqualTo1KFavs, + HasGreaterThanOrEqualTo10KFavs, + HasGreaterThanOrEqualTo100KFavs, + HasGreaterThanOrEqualTo10Retweets, + HasGreaterThanOrEqualTo100Retweets, + HasGreaterThanOrEqualTo1KRetweets + ) + + val UserGraphInteractionTweetTypeFeatures: Set[Feature[_]] = Set( + IsInNetwork, + FromMutualFollow, + IsNotInNetwork, + IsPromoted + ) + + val UserContentPreferenceTweetTypeFeatures: Set[Feature[_]] = + ContentTweetTypeFeatures ++ FreshnessTweetTypeFeatures ++ SocialProofTweetTypeFeatures ++ TopicTweetPreferenceTweetTypeFeatures ++ TweetPopularityTweetTypeFeatures ++ UserGraphInteractionTweetTypeFeatures + val AuthorContentPreferenceTweetTypeFeatures: Set[Feature[_]] = + Set(IsInNetwork, FromMutualFollow, IsNotInNetwork) ++ ContentTweetTypeFeatures +} diff --git a/src/scala/com/twitter/timelines/prediction/features/common/BUILD b/src/scala/com/twitter/timelines/prediction/features/common/BUILD new file mode 100644 index 000000000..bfbe764c7 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/common/BUILD @@ -0,0 +1,11 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala new file mode 100644 index 000000000..d995fe2b0 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/common/CombinedFeatures.scala @@ -0,0 +1,536 @@ +package com.twitter.timelines.prediction.features.common + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureType +import com.twitter.ml.api.Feature.Binary +import java.lang.{Boolean => JBoolean} +import scala.collection.JavaConverters._ + +object CombinedFeatures { + val IS_CLICKED = + new Binary("timelines.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_DWELLED = + new Binary("timelines.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava) + val IS_DWELLED_IN_BOUNDS_V1 = new Binary( + "timelines.engagement.is_dwelled_in_bounds_v1", + Set(TweetsViewed, EngagementsPrivate).asJava) + val IS_FAVORITED = new Binary( + "timelines.engagement.is_favorited", + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_FOLLOWED = new Binary( + "timelines.engagement.is_followed", + Set(EngagementsPrivate, EngagementsPublic, Follow).asJava) + val IS_IMPRESSED = + new Binary("timelines.engagement.is_impressed", Set(TweetsViewed, EngagementsPrivate).asJava) + val IS_OPEN_LINKED = new Binary( + "timelines.engagement.is_open_linked", + Set(EngagementsPrivate, LinksClickedOn).asJava) + val IS_PHOTO_EXPANDED = new Binary( + "timelines.engagement.is_photo_expanded", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED = new Binary( + "timelines.engagement.is_profile_clicked", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_QUOTED = new Binary( + "timelines.engagement.is_quoted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED = new Binary( + "timelines.engagement.is_replied", + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_RETWEETED = new Binary( + "timelines.engagement.is_retweeted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_RETWEETED_WITHOUT_QUOTE = new Binary( + "timelines.enagagement.is_retweeted_without_quote", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_SHARE_DM_CLICKED = + new Binary("timelines.engagement.is_tweet_share_dm_clicked", Set(EngagementsPrivate).asJava) + val IS_SHARE_DM_SENT = + new Binary("timelines.engagement.is_tweet_share_dm_sent", Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_25 = new Binary( + "timelines.engagement.is_video_playback_25", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_50 = new Binary( + "timelines.engagement.is_video_playback_50", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_75 = new Binary( + "timelines.engagement.is_video_playback_75", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_95 = new Binary( + "timelines.engagement.is_video_playback_95", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_COMPLETE = new Binary( + "timelines.engagement.is_video_playback_complete", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_START = new Binary( + "timelines.engagement.is_video_playback_start", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_VIEWED = new Binary( + "timelines.engagement.is_video_viewed", + Set(MediaEngagementActivities, EngagementsPrivate).asJava) + val IS_VIDEO_QUALITY_VIEWED = new Binary( + "timelines.engagement.is_video_quality_viewed", + Set(MediaEngagementActivities, EngagementsPrivate).asJava + ) + // v1: post click engagements: fav, reply + val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied", + Set( + TweetsClicked, + PublicLikes, + PrivateLikes, + PublicReplies, + PrivateReplies, + EngagementsPrivate, + EngagementsPublic).asJava) + // v2: post click engagements: click + val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_v2", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs", + Set( + TweetsClicked, + PublicLikes, + PrivateLikes, + PublicReplies, + PrivateReplies, + EngagementsPrivate, + EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_favorited", + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_replied", + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_retweeted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_clicked", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_followed", + Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_share_dm_clicked", + Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_profile_clicked", + Set(EngagementsPrivate).asJava) + + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_uam_gt_0", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_uam_gt_1", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_uam_gt_2", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary( + "timelines.engagement.is_good_clicked_convo_desc_uam_gt_3", + Set(EngagementsPrivate, EngagementsPublic).asJava) + + val IS_TWEET_DETAIL_DWELLED = new Binary( + "timelines.engagement.is_tweet_detail_dwelled", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary( + "timelines.engagement.is_tweet_detail_dwelled_8_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary( + "timelines.engagement.is_tweet_detail_dwelled_15_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary( + "timelines.engagement.is_tweet_detail_dwelled_25_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary( + "timelines.engagement.is_tweet_detail_dwelled_30_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_PROFILE_DWELLED = new Binary( + "timelines.engagement.is_profile_dwelled", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_10_SEC = new Binary( + "timelines.engagement.is_profile_dwelled_10_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_20_SEC = new Binary( + "timelines.engagement.is_profile_dwelled_20_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_30_SEC = new Binary( + "timelines.engagement.is_profile_dwelled_30_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED = new Binary( + "timelines.engagement.is_fullscreen_video_dwelled", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary( + "timelines.engagement.is_fullscreen_video_dwelled_5_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary( + "timelines.engagement.is_fullscreen_video_dwelled_10_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary( + "timelines.engagement.is_fullscreen_video_dwelled_20_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary( + "timelines.engagement.is_fullscreen_video_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_15_SEC = new Binary( + "timelines.engagement.is_link_dwelled_15_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_30_SEC = new Binary( + "timelines.engagement.is_link_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_60_SEC = new Binary( + "timelines.engagement.is_link_dwelled_60_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_HOME_LATEST_VISITED = + new Binary("timelines.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava) + + val IS_BOOKMARKED = + new Binary("timelines.engagement.is_bookmarked", Set(EngagementsPrivate).asJava) + val IS_SHARED = + new Binary("timelines.engagement.is_shared", Set(EngagementsPrivate).asJava) + val IS_SHARE_MENU_CLICKED = + new Binary("timelines.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava) + + // Negative engagements + val IS_DONT_LIKE = new Binary("timelines.engagement.is_dont_like", Set(EngagementsPrivate).asJava) + val IS_BLOCK_CLICKED = new Binary( + "timelines.engagement.is_block_clicked", + Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava) + val IS_BLOCK_DIALOG_BLOCKED = new Binary( + "timelines.engagement.is_block_dialog_blocked", + Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava) + val IS_MUTE_CLICKED = new Binary( + "timelines.engagement.is_mute_clicked", + Set(Mutes, TweetsClicked, EngagementsPrivate).asJava) + val IS_MUTE_DIALOG_MUTED = + new Binary("timelines.engagement.is_mute_dialog_muted", Set(Mutes, EngagementsPrivate).asJava) + val IS_REPORT_TWEET_CLICKED = new Binary( + "timelines.engagement.is_report_tweet_clicked", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_CARET_CLICKED = + new Binary("timelines.engagement.is_caret_clicked", Set(EngagementsPrivate).asJava) + val IS_NOT_ABOUT_TOPIC = + new Binary("timelines.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava) + val IS_NOT_RECENT = + new Binary("timelines.engagement.is_not_recent", Set(EngagementsPrivate).asJava) + val IS_NOT_RELEVANT = + new Binary("timelines.engagement.is_not_relevant", Set(EngagementsPrivate).asJava) + val IS_SEE_FEWER = + new Binary("timelines.engagement.is_see_fewer", Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC = + new Binary("timelines.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava) + val IS_FOLLOW_TOPIC = + new Binary("timelines.engagement.is_follow_topic", Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN_TOPIC = + new Binary("timelines.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava) + val IS_NEGATIVE_FEEDBACK = + new Binary("timelines.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava) + val IS_IMPLICIT_POSITIVE_FEEDBACK_UNION = + new Binary( + "timelines.engagement.is_implicit_positive_feedback_union", + Set(EngagementsPrivate).asJava) + val IS_EXPLICIT_POSITIVE_FEEDBACK_UNION = + new Binary( + "timelines.engagement.is_explicit_positive_feedback_union", + Set(EngagementsPrivate).asJava) + val IS_ALL_NEGATIVE_FEEDBACK_UNION = + new Binary( + "timelines.engagement.is_all_negative_feedback_union", + Set(EngagementsPrivate).asJava) + // Reciprocal engagements for reply forward engagement + val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_impressed_by_author", + Set(EngagementsPrivate).asJava) + val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_favorited_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava) + val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_quoted_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava) + val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_replied_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava) + val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_retweeted_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava) + val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_blocked_by_author", + Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_followed_by_author", + Set(EngagementsPrivate, EngagementsPublic, Follow).asJava) + val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_unfollowed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_muted_by_author", + Set(Mutes, EngagementsPrivate).asJava) + val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary( + "timelines.engagement.is_replied_reply_reported_by_author", + Set(EngagementsPrivate).asJava) + + // Reciprocal engagements for fav forward engagement + val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary( + "timelines.engagement.is_favorited_fav_favorited_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava + ) + val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary( + "timelines.engagement.is_favorited_fav_replied_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava + ) + val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary( + "timelines.engagement.is_favorited_fav_retweeted_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava + ) + val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary( + "timelines.engagement.is_favorited_fav_followed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava + ) + + // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_follow", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_fav", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_reply", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_retweet", + Set( + ProfilesViewed, + ProfilesClicked, + EngagementsPrivate, + PrivateRetweets, + PublicRetweets).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_tweet_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_share_dm_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of all binary features above + val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_engaged", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava) + + // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_user_report_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_tweet_report_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_mute", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary( + "timelines.engagement.is_profile_clicked_and_profile_block", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of bad profile click engagements and existing negative feedback + val IS_NEGATIVE_FEEDBACK_V2 = new Binary( + "timelines.engagement.is_negative_feedback_v2", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_NEGATIVE_FEEDBACK_UNION = new Binary( + "timelines.engagement.is_negative_feedback_union", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // don't like, mute or profile page -> mute + val IS_WEAK_NEGATIVE_FEEDBACK = new Binary( + "timelines.engagement.is_weak_negative_feedback", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // report, block or profile page -> report, block + val IS_STRONG_NEGATIVE_FEEDBACK = new Binary( + "timelines.engagement.is_strong_negative_feedback", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // engagement for following user from any surface area + val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary( + "timelines.engagement.is_followed_from_any_surface_area", + Set(EngagementsPublic, EngagementsPrivate).asJava) + val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary( + "timelines.engagement.is_relevance_prompt_yes_clicked", + Set(EngagementsPublic, EngagementsPrivate).asJava) + + // Reply downvote engagements + val IS_REPLY_DOWNVOTED = + new Binary("timelines.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava) + val IS_REPLY_DOWNVOTE_REMOVED = + new Binary("timelines.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava) + + /** + * Contains all engagements that are used/consumed by real-time + * aggregates summingbird jobs. These engagements need to be + * extractable from [[ClientEvent]]. + */ + val EngagementsRealTime: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_DWELLED, + IS_FAVORITED, + IS_FOLLOWED, + IS_OPEN_LINKED, + IS_PHOTO_EXPANDED, + IS_PROFILE_CLICKED, + IS_QUOTED, + IS_REPLIED, + IS_RETWEETED, + IS_RETWEETED_WITHOUT_QUOTE, + IS_SHARE_DM_CLICKED, + IS_SHARE_DM_SENT, + IS_VIDEO_PLAYBACK_50, + IS_VIDEO_VIEWED, + IS_VIDEO_QUALITY_VIEWED + ) + + val NegativeEngagementsRealTime: Set[Feature[JBoolean]] = Set( + IS_REPORT_TWEET_CLICKED, + IS_BLOCK_CLICKED, + IS_MUTE_CLICKED + ) + + val NegativeEngagementsRealTimeDontLike: Set[Feature[JBoolean]] = Set( + IS_DONT_LIKE + ) + + val NegativeEngagementsSecondary: Set[Feature[JBoolean]] = Set( + IS_NOT_INTERESTED_IN_TOPIC, + IS_NOT_ABOUT_TOPIC, + IS_NOT_RECENT, + IS_NOT_RELEVANT, + IS_SEE_FEWER, + IS_UNFOLLOW_TOPIC + ) + + val PrivateEngagements: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_DWELLED, + IS_OPEN_LINKED, + IS_PHOTO_EXPANDED, + IS_PROFILE_CLICKED, + IS_QUOTED, + IS_VIDEO_PLAYBACK_50, + IS_VIDEO_QUALITY_VIEWED + ) + + val ImpressedEngagements: Set[Feature[JBoolean]] = Set( + IS_IMPRESSED + ) + + val PrivateEngagementsV2: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_OPEN_LINKED, + IS_PHOTO_EXPANDED, + IS_PROFILE_CLICKED, + IS_VIDEO_PLAYBACK_50, + IS_VIDEO_QUALITY_VIEWED + ) ++ ImpressedEngagements + + val CoreEngagements: Set[Feature[JBoolean]] = Set( + IS_FAVORITED, + IS_REPLIED, + IS_RETWEETED + ) + + val DwellEngagements: Set[Feature[JBoolean]] = Set( + IS_DWELLED + ) + + val PrivateCoreEngagements: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_OPEN_LINKED, + IS_PHOTO_EXPANDED, + IS_VIDEO_PLAYBACK_50, + IS_VIDEO_QUALITY_VIEWED + ) + + val ConditionalEngagements: Set[Feature[JBoolean]] = Set( + IS_GOOD_CLICKED_CONVO_DESC_V1, + IS_GOOD_CLICKED_CONVO_DESC_V2, + IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S + ) + + val ShareEngagements: Set[Feature[JBoolean]] = Set( + IS_SHARED, + IS_SHARE_MENU_CLICKED + ) + + val BookmarkEngagements: Set[Feature[JBoolean]] = Set( + IS_BOOKMARKED + ) + + val TweetDetailDwellEngagements: Set[Feature[JBoolean]] = Set( + IS_TWEET_DETAIL_DWELLED, + IS_TWEET_DETAIL_DWELLED_8_SEC, + IS_TWEET_DETAIL_DWELLED_15_SEC, + IS_TWEET_DETAIL_DWELLED_25_SEC, + IS_TWEET_DETAIL_DWELLED_30_SEC + ) + + val ProfileDwellEngagements: Set[Feature[JBoolean]] = Set( + IS_PROFILE_DWELLED, + IS_PROFILE_DWELLED_10_SEC, + IS_PROFILE_DWELLED_20_SEC, + IS_PROFILE_DWELLED_30_SEC + ) + + val FullscreenVideoDwellEngagements: Set[Feature[JBoolean]] = Set( + IS_FULLSCREEN_VIDEO_DWELLED, + IS_FULLSCREEN_VIDEO_DWELLED_5_SEC, + IS_FULLSCREEN_VIDEO_DWELLED_10_SEC, + IS_FULLSCREEN_VIDEO_DWELLED_20_SEC, + IS_FULLSCREEN_VIDEO_DWELLED_30_SEC + ) + + // Please do not add new engagements here until having estimated the impact + // to capacity requirements. User-author real-time aggregates have a very + // large key space. + val UserAuthorEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ DwellEngagements ++ Set( + IS_CLICKED, + IS_PROFILE_CLICKED, + IS_PHOTO_EXPANDED, + IS_VIDEO_PLAYBACK_50, + IS_NEGATIVE_FEEDBACK_UNION + ) + + val ImplicitPositiveEngagements: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_DWELLED, + IS_OPEN_LINKED, + IS_PROFILE_CLICKED, + IS_QUOTED, + IS_VIDEO_PLAYBACK_50, + IS_VIDEO_QUALITY_VIEWED, + IS_TWEET_DETAIL_DWELLED, + IS_GOOD_CLICKED_CONVO_DESC_V1, + IS_GOOD_CLICKED_CONVO_DESC_V2, + IS_SHARED, + IS_SHARE_MENU_CLICKED, + IS_SHARE_DM_SENT, + IS_SHARE_DM_CLICKED + ) + + val ExplicitPositiveEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ Set( + IS_FOLLOWED, + IS_QUOTED + ) + + val AllNegativeEngagements: Set[Feature[JBoolean]] = + NegativeEngagementsRealTime ++ NegativeEngagementsRealTimeDontLike ++ Set( + IS_NOT_RECENT, + IS_NOT_RELEVANT, + IS_SEE_FEWER + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala new file mode 100644 index 000000000..369b48b39 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/common/NonHomeLabelFeatures.scala @@ -0,0 +1,97 @@ +package com.twitter.timelines.prediction.features.common + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.Feature.Binary +import java.lang.{Boolean => JBoolean} +import scala.collection.JavaConverters._ + +object ProfileLabelFeatures { + private val prefix = "profile" + + val IS_CLICKED = + new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_DWELLED = + new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava) + val IS_FAVORITED = new Binary( + s"${prefix}.engagement.is_favorited", + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED = new Binary( + s"${prefix}.engagement.is_replied", + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_RETWEETED = new Binary( + s"${prefix}.engagement.is_retweeted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + + // Negative engagements + val IS_DONT_LIKE = + new Binary(s"${prefix}.engagement.is_dont_like", Set(EngagementsPrivate).asJava) + val IS_BLOCK_CLICKED = new Binary( + s"${prefix}.engagement.is_block_clicked", + Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava) + val IS_MUTE_CLICKED = new Binary( + s"${prefix}.engagement.is_mute_clicked", + Set(Mutes, TweetsClicked, EngagementsPrivate).asJava) + val IS_REPORT_TWEET_CLICKED = new Binary( + s"${prefix}.engagement.is_report_tweet_clicked", + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_NEGATIVE_FEEDBACK_UNION = new Binary( + s"${prefix}.engagement.is_negative_feedback_union", + Set(EngagementsPrivate, Blocks, Mutes, TweetsClicked, EngagementsPublic).asJava) + + val CoreEngagements: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_DWELLED, + IS_FAVORITED, + IS_REPLIED, + IS_RETWEETED + ) + + val NegativeEngagements: Set[Feature[JBoolean]] = Set( + IS_DONT_LIKE, + IS_BLOCK_CLICKED, + IS_MUTE_CLICKED, + IS_REPORT_TWEET_CLICKED + ) + +} + +object SearchLabelFeatures { + private val prefix = "search" + + val IS_CLICKED = + new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_DWELLED = + new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava) + val IS_FAVORITED = new Binary( + s"${prefix}.engagement.is_favorited", + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED = new Binary( + s"${prefix}.engagement.is_replied", + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_RETWEETED = new Binary( + s"${prefix}.engagement.is_retweeted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_PROFILE_CLICKED_SEARCH_RESULT_USER = new Binary( + s"${prefix}.engagement.is_profile_clicked_search_result_user", + Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET = new Binary( + s"${prefix}.engagement.is_profile_clicked_search_result_tweet", + Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_TYPEAHEAD_USER = new Binary( + s"${prefix}.engagement.is_profile_clicked_typeahead_user", + Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava) + + val CoreEngagements: Set[Feature[JBoolean]] = Set( + IS_CLICKED, + IS_DWELLED, + IS_FAVORITED, + IS_REPLIED, + IS_RETWEETED, + IS_PROFILE_CLICKED_SEARCH_RESULT_USER, + IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET, + IS_PROFILE_CLICKED_TYPEAHEAD_USER + ) +} +// Add Tweet Detail labels later diff --git a/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala new file mode 100644 index 000000000..99698530f --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/common/TimelinesSharedFeatures.scala @@ -0,0 +1,759 @@ +package com.twitter.timelines.prediction.features.common + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.Discrete +import com.twitter.ml.api.Feature.SparseBinary +import com.twitter.ml.api.Feature.SparseContinuous +import com.twitter.ml.api.Feature.Text +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import scala.collection.JavaConverters._ + +object TimelinesSharedFeatures extends TimelinesSharedFeatures("") +object InReplyToTweetTimelinesSharedFeatures extends TimelinesSharedFeatures("in_reply_to_tweet") + +/** + * Defines shared features + */ +class TimelinesSharedFeatures(prefix: String) { + private def name(featureName: String): String = { + if (prefix.nonEmpty) { + s"$prefix.$featureName" + } else { + featureName + } + } + + // meta + val EXPERIMENT_META = new SparseBinary( + name("timelines.meta.experiment_meta"), + Set(ExperimentId, ExperimentName).asJava) + + // historically used in the "combined models" to distinguish in-network and out of network tweets. + // now the feature denotes which adapter (recap or rectweet) was used to generate the datarecords. + // and is used by the data collection pipeline to split the training data. + val INJECTION_TYPE = new Discrete(name("timelines.meta.injection_type")) + + // Used to indicate which injection module is this + val INJECTION_MODULE_NAME = new Text(name("timelines.meta.injection_module_name")) + + val LIST_ID = new Discrete(name("timelines.meta.list_id")) + val LIST_IS_PINNED = new Binary(name("timelines.meta.list_is_pinned")) + + // internal id per each PS request. mainly to join back commomn features and candidate features later + val PREDICTION_REQUEST_ID = new Discrete(name("timelines.meta.prediction_request_id")) + // internal id per each TLM request. mainly to deduplicate re-served cached tweets in logging + val SERVED_REQUEST_ID = new Discrete(name("timelines.meta.served_request_id")) + // internal id used for join key in kafka logging, equal to servedRequestId if tweet is cached, + // else equal to predictionRequestId + val SERVED_ID = new Discrete(name("timelines.meta.served_id")) + val REQUEST_JOIN_ID = new Discrete(name("timelines.meta.request_join_id")) + + // Internal boolean flag per tweet, whether the tweet is served from RankedTweetsCache: TQ-14050 + // this feature should not be trained on, blacklisted in feature_config: D838346 + val IS_READ_FROM_CACHE = new Binary(name("timelines.meta.is_read_from_cache")) + + // model score discounts + val PHOTO_DISCOUNT = new Continuous(name("timelines.score_discounts.photo")) + val VIDEO_DISCOUNT = new Continuous(name("timelines.score_discounts.video")) + val TWEET_HEIGHT_DISCOUNT = new Continuous(name("timelines.score_discounts.tweet_height")) + val TOXICITY_DISCOUNT = new Continuous(name("timelines.score_discounts.toxicity")) + + // engagements + val ENGAGEMENT_TYPE = new Discrete(name("timelines.engagement.type")) + val PREDICTED_IS_FAVORITED = + new Continuous(name("timelines.engagement_predicted.is_favorited"), Set(EngagementScore).asJava) + val PREDICTED_IS_RETWEETED = + new Continuous(name("timelines.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava) + val PREDICTED_IS_QUOTED = + new Continuous(name("timelines.engagement_predicted.is_quoted"), Set(EngagementScore).asJava) + val PREDICTED_IS_REPLIED = + new Continuous(name("timelines.engagement_predicted.is_replied"), Set(EngagementScore).asJava) + val PREDICTED_IS_OPEN_LINKED = new Continuous( + name("timelines.engagement_predicted.is_open_linked"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous( + name("timelines.engagement_predicted.is_good_open_link"), + Set(EngagementScore).asJava) + val PREDICTED_IS_PROFILE_CLICKED = new Continuous( + name("timelines.engagement_predicted.is_profile_clicked"), + Set(EngagementScore).asJava + ) + val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous( + name("timelines.engagement_predicted.is_profile_clicked_and_profile_engaged"), + Set(EngagementScore).asJava + ) + val PREDICTED_IS_CLICKED = + new Continuous(name("timelines.engagement_predicted.is_clicked"), Set(EngagementScore).asJava) + val PREDICTED_IS_PHOTO_EXPANDED = new Continuous( + name("timelines.engagement_predicted.is_photo_expanded"), + Set(EngagementScore).asJava + ) + val PREDICTED_IS_FOLLOWED = + new Continuous(name("timelines.engagement_predicted.is_followed"), Set(EngagementScore).asJava) + val PREDICTED_IS_DONT_LIKE = + new Continuous(name("timelines.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava) + val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous( + name("timelines.engagement_predicted.is_video_playback_50"), + Set(EngagementScore).asJava + ) + val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous( + name("timelines.engagement_predicted.is_video_quality_viewed"), + Set(EngagementScore).asJava + ) + val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous( + name("timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous( + name("timelines.engagement_predicted.is_good_clicked_convo_desc_v2"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous( + name("timelines.engagement_predicted.is_tweet_detail_dwelled_8_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous( + name("timelines.engagement_predicted.is_tweet_detail_dwelled_15_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous( + name("timelines.engagement_predicted.is_tweet_detail_dwelled_25_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous( + name("timelines.engagement_predicted.is_tweet_detail_dwelled_30_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous( + name( + "timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous( + name("timelines.engagement_predicted.is_favorited_fav_engaged_by_author"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_REPORT_TWEET_CLICKED = + new Continuous( + name("timelines.engagement_predicted.is_report_tweet_clicked"), + Set(EngagementScore).asJava) + val PREDICTED_IS_NEGATIVE_FEEDBACK = new Continuous( + name("timelines.engagement_predicted.is_negative_feedback"), + Set(EngagementScore).asJava) + val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 = new Continuous( + name("timelines.engagement_predicted.is_negative_feedback_v2"), + Set(EngagementScore).asJava) + val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK = new Continuous( + name("timelines.engagement_predicted.is_weak_negative_feedback"), + Set(EngagementScore).asJava) + val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK = new Continuous( + name("timelines.engagement_predicted.is_strong_negative_feedback"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous( + name("timelines.engagement_predicted.is_dwelled_in_bounds_v1"), + Set(EngagementScore).asJava) + val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous( + name("timelines.engagement_predicted.dwell_normalized_overall"), + Set(EngagementScore).asJava) + val PREDICTED_DWELL_CDF = + new Continuous(name("timelines.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava) + val PREDICTED_DWELL_CDF_OVERALL = new Continuous( + name("timelines.engagement_predicted.dwell_cdf_overall"), + Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED = + new Continuous(name("timelines.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava) + + val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous( + name("timelines.engagement_predicted.is_home_latest_visited"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_BOOKMARKED = new Continuous( + name("timelines.engagement_predicted.is_bookmarked"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_SHARED = + new Continuous(name("timelines.engagement_predicted.is_shared"), Set(EngagementScore).asJava) + val PREDICTED_IS_SHARE_MENU_CLICKED = new Continuous( + name("timelines.engagement_predicted.is_share_menu_clicked"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous( + name("timelines.engagement_predicted.is_profile_dwelled_20_sec"), + Set(EngagementScore).asJava) + + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous( + name("timelines.engagement_predicted.is_fullscreen_video_dwelled_5_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous( + name("timelines.engagement_predicted.is_fullscreen_video_dwelled_10_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous( + name("timelines.engagement_predicted.is_fullscreen_video_dwelled_20_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous( + name("timelines.engagement_predicted.is_fullscreen_video_dwelled_30_sec"), + Set(EngagementScore).asJava) + + // Please use this timestamp, not the `meta.timestamp`, for the actual served timestamp. + val SERVED_TIMESTAMP = + new Discrete("timelines.meta.timestamp.served", Set(PrivateTimestamp).asJava) + + // timestamp when the engagement has occurred. do not train on these features + val TIMESTAMP_FAVORITED = + new Discrete("timelines.meta.timestamp.engagement.favorited", Set(PublicTimestamp).asJava) + val TIMESTAMP_RETWEETED = + new Discrete("timelines.meta.timestamp.engagement.retweeted", Set(PublicTimestamp).asJava) + val TIMESTAMP_REPLIED = + new Discrete("timelines.meta.timestamp.engagement.replied", Set(PublicTimestamp).asJava) + val TIMESTAMP_PROFILE_CLICKED = new Discrete( + "timelines.meta.timestamp.engagement.profile_clicked", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_CLICKED = + new Discrete("timelines.meta.timestamp.engagement.clicked", Set(PrivateTimestamp).asJava) + val TIMESTAMP_PHOTO_EXPANDED = + new Discrete("timelines.meta.timestamp.engagement.photo_expanded", Set(PrivateTimestamp).asJava) + val TIMESTAMP_DWELLED = + new Discrete("timelines.meta.timestamp.engagement.dwelled", Set(PrivateTimestamp).asJava) + val TIMESTAMP_VIDEO_PLAYBACK_50 = new Discrete( + "timelines.meta.timestamp.engagement.video_playback_50", + Set(PrivateTimestamp).asJava) + // reply engaged by author + val TIMESTAMP_REPLY_FAVORITED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.reply_favorited_by_author", + Set(PublicTimestamp).asJava) + val TIMESTAMP_REPLY_REPLIED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.reply_replied_by_author", + Set(PublicTimestamp).asJava) + val TIMESTAMP_REPLY_RETWEETED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.reply_retweeted_by_author", + Set(PublicTimestamp).asJava) + // fav engaged by author + val TIMESTAMP_FAV_FAVORITED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.fav_favorited_by_author", + Set(PublicTimestamp).asJava) + val TIMESTAMP_FAV_REPLIED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.fav_replied_by_author", + Set(PublicTimestamp).asJava) + val TIMESTAMP_FAV_RETWEETED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.fav_retweeted_by_author", + Set(PublicTimestamp).asJava) + val TIMESTAMP_FAV_FOLLOWED_BY_AUTHOR = new Discrete( + "timelines.meta.timestamp.engagement.fav_followed_by_author", + Set(PublicTimestamp).asJava) + // good click + val TIMESTAMP_GOOD_CLICK_CONVO_DESC_FAVORITED = new Discrete( + "timelines.meta.timestamp.engagement.good_click_convo_desc_favorited", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_GOOD_CLICK_CONVO_DESC_REPLIIED = new Discrete( + "timelines.meta.timestamp.engagement.good_click_convo_desc_replied", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_GOOD_CLICK_CONVO_DESC_PROFILE_CLICKED = new Discrete( + "timelines.meta.timestamp.engagement.good_click_convo_desc_profiile_clicked", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_NEGATIVE_FEEDBACK = new Discrete( + "timelines.meta.timestamp.engagement.negative_feedback", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_REPORT_TWEET_CLICK = + new Discrete( + "timelines.meta.timestamp.engagement.report_tweet_click", + Set(PrivateTimestamp).asJava) + val TIMESTAMP_IMPRESSED = + new Discrete("timelines.meta.timestamp.engagement.impressed", Set(PublicTimestamp).asJava) + val TIMESTAMP_TWEET_DETAIL_DWELLED = + new Discrete( + "timelines.meta.timestamp.engagement.tweet_detail_dwelled", + Set(PublicTimestamp).asJava) + val TIMESTAMP_PROFILE_DWELLED = + new Discrete("timelines.meta.timestamp.engagement.profile_dwelled", Set(PublicTimestamp).asJava) + val TIMESTAMP_FULLSCREEN_VIDEO_DWELLED = + new Discrete( + "timelines.meta.timestamp.engagement.fullscreen_video_dwelled", + Set(PublicTimestamp).asJava) + val TIMESTAMP_LINK_DWELLED = + new Discrete("timelines.meta.timestamp.engagement.link_dwelled", Set(PublicTimestamp).asJava) + + // these are used to dup and split the negative instances during streaming processing (kafka) + val TRAINING_FOR_FAVORITED = + new Binary("timelines.meta.training_data.for_favorited", Set(EngagementId).asJava) + val TRAINING_FOR_RETWEETED = + new Binary("timelines.meta.training_data.for_retweeted", Set(EngagementId).asJava) + val TRAINING_FOR_REPLIED = + new Binary("timelines.meta.training_data.for_replied", Set(EngagementId).asJava) + val TRAINING_FOR_PROFILE_CLICKED = + new Binary("timelines.meta.training_data.for_profile_clicked", Set(EngagementId).asJava) + val TRAINING_FOR_CLICKED = + new Binary("timelines.meta.training_data.for_clicked", Set(EngagementId).asJava) + val TRAINING_FOR_PHOTO_EXPANDED = + new Binary("timelines.meta.training_data.for_photo_expanded", Set(EngagementId).asJava) + val TRAINING_FOR_VIDEO_PLAYBACK_50 = + new Binary("timelines.meta.training_data.for_video_playback_50", Set(EngagementId).asJava) + val TRAINING_FOR_NEGATIVE_FEEDBACK = + new Binary("timelines.meta.training_data.for_negative_feedback", Set(EngagementId).asJava) + val TRAINING_FOR_REPORTED = + new Binary("timelines.meta.training_data.for_reported", Set(EngagementId).asJava) + val TRAINING_FOR_DWELLED = + new Binary("timelines.meta.training_data.for_dwelled", Set(EngagementId).asJava) + val TRAINING_FOR_SHARED = + new Binary("timelines.meta.training_data.for_shared", Set(EngagementId).asJava) + val TRAINING_FOR_SHARE_MENU_CLICKED = + new Binary("timelines.meta.training_data.for_share_menu_clicked", Set(EngagementId).asJava) + + // Warning: do not train on these features + val PREDICTED_SCORE = new Continuous(name("timelines.score"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_FAV = new Continuous(name("timelines.score.fav"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_RETWEET = + new Continuous(name("timelines.score.retweet"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_REPLY = + new Continuous(name("timelines.score.reply"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_OPEN_LINK = + new Continuous(name("timelines.score.open_link"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_GOOD_OPEN_LINK = + new Continuous(name("timelines.score.good_open_link"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_PROFILE_CLICK = + new Continuous(name("timelines.score.profile_click"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DETAIL_EXPAND = + new Continuous(name("timelines.score.detail_expand"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_PHOTO_EXPAND = + new Continuous(name("timelines.score.photo_expand"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_PLAYBACK_50 = + new Continuous(name("timelines.score.playback_50"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_VIDEO_QUALITY_VIEW = + new Continuous(name("timelines.score.video_quality_view"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DONT_LIKE = + new Continuous(name("timelines.score.dont_like"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_PROFILE_CLICKED_AND_PROFILE_ENGAGED = + new Continuous( + name("timelines.score.profile_clicked_and_profile_engaged"), + Set(EngagementScore).asJava) + val PREDICTED_SCORE_GOOD_CLICKED_V1 = + new Continuous(name("timelines.score.good_clicked_v1"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_GOOD_CLICKED_V2 = + new Continuous(name("timelines.score.good_clicked_v2"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DWELL = + new Continuous(name("timelines.score.dwell"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DWELL_CDF = + new Continuous(name("timelines.score.dwell_cfd"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DWELL_CDF_OVERALL = + new Continuous(name("timelines.score.dwell_cfd_overall"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_DWELL_NORMALIZED_OVERALL = + new Continuous(name("timelines.score.dwell_normalized_overall"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_NEGATIVE_FEEDBACK = + new Continuous(name("timelines.score.negative_feedback"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_NEGATIVE_FEEDBACK_V2 = + new Continuous(name("timelines.score.negative_feedback_v2"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_WEAK_NEGATIVE_FEEDBACK = + new Continuous(name("timelines.score.weak_negative_feedback"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_STRONG_NEGATIVE_FEEDBACK = + new Continuous(name("timelines.score.strong_negative_feedback"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_REPORT_TWEET_CLICKED = + new Continuous(name("timelines.score.report_tweet_clicked"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_UNFOLLOW_TOPIC = + new Continuous(name("timelines.score.unfollow_topic"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_FOLLOW = + new Continuous(name("timelines.score.follow"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_RELEVANCE_PROMPT_YES_CLICKED = + new Continuous( + name("timelines.score.relevance_prompt_yes_clicked"), + Set(EngagementScore).asJava) + val PREDICTED_SCORE_BOOKMARK = + new Continuous(name("timelines.score.bookmark"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_SHARE = + new Continuous(name("timelines.score.share"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_SHARE_MENU_CLICK = + new Continuous(name("timelines.score.share_menu_click"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_PROFILE_DWELLED = + new Continuous(name("timelines.score.good_profile_dwelled"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_TWEET_DETAIL_DWELLED = + new Continuous(name("timelines.score.tweet_detail_dwelled"), Set(EngagementScore).asJava) + val PREDICTED_SCORE_FULLSCREEN_VIDEO_DWELL = + new Continuous(name("timelines.score.fullscreen_video_dwell"), Set(EngagementScore).asJava) + + // hydrated in TimelinesSharedFeaturesAdapter that recap adapter calls + val ORIGINAL_AUTHOR_ID = new Discrete(name("entities.original_author_id"), Set(UserId).asJava) + val SOURCE_AUTHOR_ID = new Discrete(name("entities.source_author_id"), Set(UserId).asJava) + val SOURCE_TWEET_ID = new Discrete(name("entities.source_tweet_id"), Set(TweetId).asJava) + val TOPIC_ID = new Discrete(name("entities.topic_id"), Set(SemanticcoreClassification).asJava) + val INFERRED_TOPIC_IDS = + new SparseBinary(name("entities.inferred_topic_ids"), Set(SemanticcoreClassification).asJava) + val INFERRED_TOPIC_ID = TypedAggregateGroup.sparseFeature(INFERRED_TOPIC_IDS) + + val WEIGHTED_FAV_COUNT = new Continuous( + name("timelines.earlybird.weighted_fav_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val WEIGHTED_RETWEET_COUNT = new Continuous( + name("timelines.earlybird.weighted_retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val WEIGHTED_REPLY_COUNT = new Continuous( + name("timelines.earlybird.weighted_reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val WEIGHTED_QUOTE_COUNT = new Continuous( + name("timelines.earlybird.weighted_quote_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val EMBEDS_IMPRESSION_COUNT_V2 = new Continuous( + name("timelines.earlybird.embeds_impression_count_v2"), + Set(CountOfImpression).asJava) + val EMBEDS_URL_COUNT_V2 = new Continuous( + name("timelines.earlybird.embeds_url_count_v2"), + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val DECAYED_FAVORITE_COUNT = new Continuous( + name("timelines.earlybird.decayed_favorite_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val DECAYED_RETWEET_COUNT = new Continuous( + name("timelines.earlybird.decayed_retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val DECAYED_REPLY_COUNT = new Continuous( + name("timelines.earlybird.decayed_reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val DECAYED_QUOTE_COUNT = new Continuous( + name("timelines.earlybird.decayed_quote_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val FAKE_FAVORITE_COUNT = new Continuous( + name("timelines.earlybird.fake_favorite_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val FAKE_RETWEET_COUNT = new Continuous( + name("timelines.earlybird.fake_retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val FAKE_REPLY_COUNT = new Continuous( + name("timelines.earlybird.fake_reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val FAKE_QUOTE_COUNT = new Continuous( + name("timelines.earlybird.fake_quote_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val QUOTE_COUNT = new Continuous( + name("timelines.earlybird.quote_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + + // Safety features + val LABEL_ABUSIVE_FLAG = + new Binary(name("timelines.earlybird.label_abusive_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_ABUSIVE_HI_RCL_FLAG = + new Binary(name("timelines.earlybird.label_abusive_hi_rcl_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_DUP_CONTENT_FLAG = + new Binary(name("timelines.earlybird.label_dup_content_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_NSFW_HI_PRC_FLAG = + new Binary(name("timelines.earlybird.label_nsfw_hi_prc_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_NSFW_HI_RCL_FLAG = + new Binary(name("timelines.earlybird.label_nsfw_hi_rcl_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_SPAM_FLAG = + new Binary(name("timelines.earlybird.label_spam_flag"), Set(TweetSafetyLabels).asJava) + val LABEL_SPAM_HI_RCL_FLAG = + new Binary(name("timelines.earlybird.label_spam_hi_rcl_flag"), Set(TweetSafetyLabels).asJava) + + // Periscope features + val PERISCOPE_EXISTS = new Binary( + name("timelines.earlybird.periscope_exists"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val PERISCOPE_IS_LIVE = new Binary( + name("timelines.earlybird.periscope_is_live"), + Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava) + val PERISCOPE_HAS_BEEN_FEATURED = new Binary( + name("timelines.earlybird.periscope_has_been_featured"), + Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava) + val PERISCOPE_IS_CURRENTLY_FEATURED = new Binary( + name("timelines.earlybird.periscope_is_currently_featured"), + Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava + ) + val PERISCOPE_IS_FROM_QUALITY_SOURCE = new Binary( + name("timelines.earlybird.periscope_is_from_quality_source"), + Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava + ) + + val VISIBLE_TOKEN_RATIO = new Continuous(name("timelines.earlybird.visible_token_ratio")) + val HAS_QUOTE = new Binary( + name("timelines.earlybird.has_quote"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val IS_COMPOSER_SOURCE_CAMERA = new Binary( + name("timelines.earlybird.is_composer_source_camera"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + + val EARLYBIRD_SCORE = new Continuous( + name("timelines.earlybird_score"), + Set(EngagementScore).asJava + ) // separating from the rest of "timelines.earlybird." namespace + + val DWELL_TIME_MS = new Continuous( + name("timelines.engagement.dwell_time_ms"), + Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava) + + val TWEET_DETAIL_DWELL_TIME_MS = new Continuous( + name("timelines.engagement.tweet_detail_dwell_time_ms"), + Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava) + + val PROFILE_DWELL_TIME_MS = new Continuous( + name("timelines.engagement.profile_dwell_time_ms"), + Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava) + + val FULLSCREEN_VIDEO_DWELL_TIME_MS = new Continuous( + name("timelines.engagement.fullscreen_video_dwell_time_ms"), + Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava) + + val LINK_DWELL_TIME_MS = new Continuous( + name("timelines.engagement.link_dwell_time_ms"), + Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava) + + val ASPECT_RATIO_DEN = new Continuous( + name("tweetsource.tweet.media.aspect_ratio_den"), + Set(MediaFile, MediaProcessingInformation).asJava) + val ASPECT_RATIO_NUM = new Continuous( + name("tweetsource.tweet.media.aspect_ratio_num"), + Set(MediaFile, MediaProcessingInformation).asJava) + val BIT_RATE = new Continuous( + name("tweetsource.tweet.media.bit_rate"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HEIGHT_2 = new Continuous( + name("tweetsource.tweet.media.height_2"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HEIGHT_1 = new Continuous( + name("tweetsource.tweet.media.height_1"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HEIGHT_3 = new Continuous( + name("tweetsource.tweet.media.height_3"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HEIGHT_4 = new Continuous( + name("tweetsource.tweet.media.height_4"), + Set(MediaFile, MediaProcessingInformation).asJava) + val RESIZE_METHOD_1 = new Discrete( + name("tweetsource.tweet.media.resize_method_1"), + Set(MediaFile, MediaProcessingInformation).asJava) + val RESIZE_METHOD_2 = new Discrete( + name("tweetsource.tweet.media.resize_method_2"), + Set(MediaFile, MediaProcessingInformation).asJava) + val RESIZE_METHOD_3 = new Discrete( + name("tweetsource.tweet.media.resize_method_3"), + Set(MediaFile, MediaProcessingInformation).asJava) + val RESIZE_METHOD_4 = new Discrete( + name("tweetsource.tweet.media.resize_method_4"), + Set(MediaFile, MediaProcessingInformation).asJava) + val VIDEO_DURATION = new Continuous( + name("tweetsource.tweet.media.video_duration"), + Set(MediaFile, MediaProcessingInformation).asJava) + val WIDTH_1 = new Continuous( + name("tweetsource.tweet.media.width_1"), + Set(MediaFile, MediaProcessingInformation).asJava) + val WIDTH_2 = new Continuous( + name("tweetsource.tweet.media.width_2"), + Set(MediaFile, MediaProcessingInformation).asJava) + val WIDTH_3 = new Continuous( + name("tweetsource.tweet.media.width_3"), + Set(MediaFile, MediaProcessingInformation).asJava) + val WIDTH_4 = new Continuous( + name("tweetsource.tweet.media.width_4"), + Set(MediaFile, MediaProcessingInformation).asJava) + val NUM_MEDIA_TAGS = new Continuous( + name("tweetsource.tweet.media.num_tags"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val MEDIA_TAG_SCREEN_NAMES = new SparseBinary( + name("tweetsource.tweet.media.tag_screen_names"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val STICKER_IDS = new SparseBinary( + name("tweetsource.tweet.media.sticker_ids"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + + val NUM_COLOR_PALLETTE_ITEMS = new Continuous( + name("tweetsource.v2.tweet.media.num_color_pallette_items"), + Set(MediaFile, MediaProcessingInformation).asJava) + val COLOR_1_RED = new Continuous( + name("tweetsource.v2.tweet.media.color_1_red"), + Set(MediaFile, MediaProcessingInformation).asJava) + val COLOR_1_BLUE = new Continuous( + name("tweetsource.v2.tweet.media.color_1_blue"), + Set(MediaFile, MediaProcessingInformation).asJava) + val COLOR_1_GREEN = new Continuous( + name("tweetsource.v2.tweet.media.color_1_green"), + Set(MediaFile, MediaProcessingInformation).asJava) + val COLOR_1_PERCENTAGE = new Continuous( + name("tweetsource.v2.tweet.media.color_1_percentage"), + Set(MediaFile, MediaProcessingInformation).asJava) + val MEDIA_PROVIDERS = new SparseBinary( + name("tweetsource.v2.tweet.media.providers"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val IS_360 = new Binary( + name("tweetsource.v2.tweet.media.is_360"), + Set(MediaFile, MediaProcessingInformation).asJava) + val VIEW_COUNT = + new Continuous(name("tweetsource.v2.tweet.media.view_count"), Set(MediaContentMetrics).asJava) + val IS_MANAGED = new Binary( + name("tweetsource.v2.tweet.media.is_managed"), + Set(MediaFile, MediaProcessingInformation).asJava) + val IS_MONETIZABLE = new Binary( + name("tweetsource.v2.tweet.media.is_monetizable"), + Set(MediaFile, MediaProcessingInformation).asJava) + val IS_EMBEDDABLE = new Binary( + name("tweetsource.v2.tweet.media.is_embeddable"), + Set(MediaFile, MediaProcessingInformation).asJava) + val CLASSIFICATION_LABELS = new SparseContinuous( + name("tweetsource.v2.tweet.media.classification_labels"), + Set(MediaFile, MediaProcessingInformation).asJava) + + val NUM_STICKERS = new Continuous( + name("tweetsource.v2.tweet.media.num_stickers"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val NUM_FACES = new Continuous( + name("tweetsource.v2.tweet.media.num_faces"), + Set(MediaFile, MediaProcessingInformation).asJava) + val FACE_AREAS = new Continuous( + name("tweetsource.v2.tweet.media.face_areas"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_SELECTED_PREVIEW_IMAGE = new Binary( + name("tweetsource.v2.tweet.media.has_selected_preview_image"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_TITLE = new Binary( + name("tweetsource.v2.tweet.media.has_title"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_DESCRIPTION = new Binary( + name("tweetsource.v2.tweet.media.has_description"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_VISIT_SITE_CALL_TO_ACTION = new Binary( + name("tweetsource.v2.tweet.media.has_visit_site_call_to_action"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_APP_INSTALL_CALL_TO_ACTION = new Binary( + name("tweetsource.v2.tweet.media.has_app_install_call_to_action"), + Set(MediaFile, MediaProcessingInformation).asJava) + val HAS_WATCH_NOW_CALL_TO_ACTION = new Binary( + name("tweetsource.v2.tweet.media.has_watch_now_call_to_action"), + Set(MediaFile, MediaProcessingInformation).asJava) + + val NUM_CAPS = + new Continuous(name("tweetsource.tweet.text.num_caps"), Set(PublicTweets, PrivateTweets).asJava) + val TWEET_LENGTH = + new Continuous(name("tweetsource.tweet.text.length"), Set(PublicTweets, PrivateTweets).asJava) + val TWEET_LENGTH_TYPE = new Discrete( + name("tweetsource.tweet.text.length_type"), + Set(PublicTweets, PrivateTweets).asJava) + val NUM_WHITESPACES = new Continuous( + name("tweetsource.tweet.text.num_whitespaces"), + Set(PublicTweets, PrivateTweets).asJava) + val HAS_QUESTION = + new Binary(name("tweetsource.tweet.text.has_question"), Set(PublicTweets, PrivateTweets).asJava) + val NUM_NEWLINES = new Continuous( + name("tweetsource.tweet.text.num_newlines"), + Set(PublicTweets, PrivateTweets).asJava) + val EMOJI_TOKENS = new SparseBinary( + name("tweetsource.v3.tweet.text.emoji_tokens"), + Set(PublicTweets, PrivateTweets).asJava) + val EMOTICON_TOKENS = new SparseBinary( + name("tweetsource.v3.tweet.text.emoticon_tokens"), + Set(PublicTweets, PrivateTweets).asJava) + val NUM_EMOJIS = new Continuous( + name("tweetsource.v3.tweet.text.num_emojis"), + Set(PublicTweets, PrivateTweets).asJava) + val NUM_EMOTICONS = new Continuous( + name("tweetsource.v3.tweet.text.num_emoticons"), + Set(PublicTweets, PrivateTweets).asJava) + val POS_UNIGRAMS = new SparseBinary( + name("tweetsource.v3.tweet.text.pos_unigrams"), + Set(PublicTweets, PrivateTweets).asJava) + val POS_BIGRAMS = new SparseBinary( + name("tweetsource.v3.tweet.text.pos_bigrams"), + Set(PublicTweets, PrivateTweets).asJava) + val TEXT_TOKENS = new SparseBinary( + name("tweetsource.v4.tweet.text.tokens"), + Set(PublicTweets, PrivateTweets).asJava) + + // Health features model scores (see go/toxicity, go/pblock, go/pspammytweet) + val PBLOCK_SCORE = + new Continuous(name("timelines.earlybird.pblock_score"), Set(TweetSafetyScores).asJava) + val TOXICITY_SCORE = + new Continuous(name("timelines.earlybird.toxicity_score"), Set(TweetSafetyScores).asJava) + val EXPERIMENTAL_HEALTH_MODEL_SCORE_1 = + new Continuous( + name("timelines.earlybird.experimental_health_model_score_1"), + Set(TweetSafetyScores).asJava) + val EXPERIMENTAL_HEALTH_MODEL_SCORE_2 = + new Continuous( + name("timelines.earlybird.experimental_health_model_score_2"), + Set(TweetSafetyScores).asJava) + val EXPERIMENTAL_HEALTH_MODEL_SCORE_3 = + new Continuous( + name("timelines.earlybird.experimental_health_model_score_3"), + Set(TweetSafetyScores).asJava) + val EXPERIMENTAL_HEALTH_MODEL_SCORE_4 = + new Continuous( + name("timelines.earlybird.experimental_health_model_score_4"), + Set(TweetSafetyScores).asJava) + val PSPAMMY_TWEET_SCORE = + new Continuous(name("timelines.earlybird.pspammy_tweet_score"), Set(TweetSafetyScores).asJava) + val PREPORTED_TWEET_SCORE = + new Continuous(name("timelines.earlybird.preported_tweet_score"), Set(TweetSafetyScores).asJava) + + // where record was displayed e.g. recap vs ranked timeline vs recycled + // (do NOT use for training in prediction, since this is set post-scoring) + // This differs from TimelinesSharedFeatures.INJECTION_TYPE, which is only + // set to Recap or Rectweet, and is available pre-scoring. + // This also differs from TimeFeatures.IS_TWEET_RECYCLED, which is set + // pre-scoring and indicates if a tweet is being considered for recycling. + // In contrast, DISPLAY_SUGGEST_TYPE == RecycledTweet means the tweet + // was actually served in a recycled tweet module. The two should currently + // have the same value, but need not in future, so please only use + // IS_TWEET_RECYCLED/CANDIDATE_TWEET_SOURCE_ID for training models and + // only use DISPLAY_SUGGEST_TYPE for offline analysis of tweets actually + // served in recycled modules. + val DISPLAY_SUGGEST_TYPE = new Discrete(name("recap.display.suggest_type")) + + // Candidate tweet source id - related to DISPLAY_SUGGEST_TYPE above, but this is a + // property of the candidate rather than display location so is safe to use + // in model training, unlike DISPLAY_SUGGEST_TYPE. + val CANDIDATE_TWEET_SOURCE_ID = + new Discrete(name("timelines.meta.candidate_tweet_source_id"), Set(TweetId).asJava) + + // Was at least 50% of this tweet in the user's viewport for at least 500 ms, + // OR did the user engage with the tweet publicly or privately + val IS_LINGER_IMPRESSION = + new Binary(name("timelines.engagement.is_linger_impression"), Set(EngagementsPrivate).asJava) + + // Features to create rollups + val LANGUAGE_GROUP = new Discrete(name("timelines.tweet.text.language_group")) + + // The final position index of the tweet being trained on in the timeline + // served from TLM (could still change later in TLS-API), as recorded by + // PositionIndexLoggingEnvelopeTransform. + val FINAL_POSITION_INDEX = new Discrete(name("timelines.display.final_position_index")) + + // The traceId of the timeline request, can be used to group tweets in the same response. + val TRACE_ID = new Discrete(name("timelines.display.trace_id"), Set(TfeTransactionId).asJava) + + // Whether this tweet was randomly injected into the timeline or not, for exploration purposes + val IS_RANDOM_TWEET = new Binary(name("timelines.display.is_random_tweet")) + + // Whether this tweet was reordered with softmax ranking for explore/exploit, and needs to + // be excluded from exploit only holdback + val IS_SOFTMAX_RANKING_TWEET = new Binary(name("timelines.display.is_softmax_ranking_tweet")) + + // Whether the user viewing the tweet has disabled ranked timeline. + val IS_RANKED_TIMELINE_DISABLER = new Binary( + name("timelines.user_features.is_ranked_timeline_disabler"), + Set(AnnotationValue, GeneralSettings).asJava) + + // Whether the user viewing the tweet was one of those released from DDG 4205 control + // as part of http://go/shrink-4205 process to shrink the quality features holdback. + val IS_USER_RELEASED_FROM_QUALITY_HOLDBACK = new Binary( + name("timelines.user_features.is_released_from_quality_holdback"), + Set(ExperimentId, ExperimentName).asJava) + + val INITIAL_PREDICTION_FAV = + new Continuous(name("timelines.initial_prediction.fav"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_RETWEET = + new Continuous(name("timelines.initial_prediction.retweet"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_REPLY = + new Continuous(name("timelines.initial_prediction.reply"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_OPEN_LINK = + new Continuous(name("timelines.initial_prediction.open_link"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_PROFILE_CLICK = + new Continuous(name("timelines.initial_prediction.profile_click"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_VIDEO_PLAYBACK_50 = new Continuous( + name("timelines.initial_prediction.video_playback_50"), + Set(EngagementScore).asJava) + val INITIAL_PREDICTION_DETAIL_EXPAND = + new Continuous(name("timelines.initial_prediction.detail_expand"), Set(EngagementScore).asJava) + val INITIAL_PREDICTION_PHOTO_EXPAND = + new Continuous(name("timelines.initial_prediction.photo_expand"), Set(EngagementScore).asJava) + + val VIEWER_FOLLOWS_ORIGINAL_AUTHOR = + new Binary(name("timelines.viewer_follows_original_author"), Set(Follow).asJava) + + val IS_TOP_ONE = new Binary(name("timelines.position.is_top_one")) + val IS_TOP_FIVE = + new Binary(name(featureName = "timelines.position.is_top_five")) + val IS_TOP_TEN = + new Binary(name(featureName = "timelines.position.is_top_ten")) + + val LOG_POSITION = + new Continuous(name(featureName = "timelines.position.log_10")) + +} diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD new file mode 100644 index 000000000..f6caadea0 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/engagement_features/BUILD @@ -0,0 +1,12 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/timelineservice/server/suggests/features/engagement_features:thrift-scala", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/transforms", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala new file mode 100644 index 000000000..e65c9db20 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/engagement_features/EngagementFeatures.scala @@ -0,0 +1,246 @@ +package com.twitter.timelines.prediction.features.engagement_features + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.logging.Logger +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.SparseBinary +import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform +import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform +import com.twitter.timelines.data_processing.ml_util.transforms.SparseBinaryUnion +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.timelineservice.suggests.features.engagement_features.thriftscala.{ + EngagementFeatures => ThriftEngagementFeatures +} +import com.twitter.timelineservice.suggests.features.engagement_features.v1.thriftscala.{ + EngagementFeatures => ThriftEngagementFeaturesV1 +} +import scala.collection.JavaConverters._ + +object EngagementFeatures { + private[this] val logger = Logger.get(getClass.getSimpleName) + + sealed trait EngagementFeature + case object Count extends EngagementFeature + case object RealGraphWeightAverage extends EngagementFeature + case object RealGraphWeightMax extends EngagementFeature + case object RealGraphWeightMin extends EngagementFeature + case object RealGraphWeightMissing extends EngagementFeature + case object RealGraphWeightVariance extends EngagementFeature + case object UserIds extends EngagementFeature + + def fromThrift(thriftEngagementFeatures: ThriftEngagementFeatures): Option[EngagementFeatures] = { + thriftEngagementFeatures match { + case thriftEngagementFeaturesV1: ThriftEngagementFeatures.V1 => + Some( + EngagementFeatures( + favoritedBy = thriftEngagementFeaturesV1.v1.favoritedBy, + retweetedBy = thriftEngagementFeaturesV1.v1.retweetedBy, + repliedBy = thriftEngagementFeaturesV1.v1.repliedBy, + ) + ) + case _ => { + logger.error("Unexpected EngagementFeatures version found.") + None + } + } + } + + val empty: EngagementFeatures = EngagementFeatures() +} + +/** + * Contains user IDs who have engaged with a target entity, such as a Tweet, + * and any additional data needed for derived features. + */ +case class EngagementFeatures( + favoritedBy: Seq[Long] = Nil, + retweetedBy: Seq[Long] = Nil, + repliedBy: Seq[Long] = Nil, + realGraphWeightByUser: Map[Long, Double] = Map.empty) { + def isEmpty: Boolean = favoritedBy.isEmpty && retweetedBy.isEmpty && repliedBy.isEmpty + def nonEmpty: Boolean = !isEmpty + def toLogThrift: ThriftEngagementFeatures.V1 = + ThriftEngagementFeatures.V1( + ThriftEngagementFeaturesV1( + favoritedBy = favoritedBy, + retweetedBy = retweetedBy, + repliedBy = repliedBy + ) + ) +} + +/** + * Represents engagement features derived from the Real Graph weight. + * + * These features are from the perspective of the source user, who is viewing their + * timeline, to the destination users (or user), who created engagements. + * + * @param count number of engagements present + * @param max max score of the engaging users + * @param mean average score of the engaging users + * @param min minimum score of the engaging users + * @param missing for engagements present, how many Real Graph scores were missing + * @param variance variance of scores of the engaging users + */ +case class RealGraphDerivedEngagementFeatures( + count: Int, + max: Double, + mean: Double, + min: Double, + missing: Int, + variance: Double) + +object EngagementDataRecordFeatures { + import EngagementFeatures._ + + val FavoritedByUserIds = new SparseBinary( + "engagement_features.user_ids.favorited_by", + Set(UserId, PrivateLikes, PublicLikes).asJava) + val RetweetedByUserIds = new SparseBinary( + "engagement_features.user_ids.retweeted_by", + Set(UserId, PrivateRetweets, PublicRetweets).asJava) + val RepliedByUserIds = new SparseBinary( + "engagement_features.user_ids.replied_by", + Set(UserId, PrivateReplies, PublicReplies).asJava) + + val InNetworkFavoritesCount = new Continuous( + "engagement_features.in_network.favorites.count", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val InNetworkRetweetsCount = new Continuous( + "engagement_features.in_network.retweets.count", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val InNetworkRepliesCount = new Continuous( + "engagement_features.in_network.replies.count", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + + // real graph derived features + val InNetworkFavoritesAvgRealGraphWeight = new Continuous( + "engagement_features.real_graph.favorites.avg_weight", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val InNetworkFavoritesMaxRealGraphWeight = new Continuous( + "engagement_features.real_graph.favorites.max_weight", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val InNetworkFavoritesMinRealGraphWeight = new Continuous( + "engagement_features.real_graph.favorites.min_weight", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val InNetworkFavoritesRealGraphWeightMissing = new Continuous( + "engagement_features.real_graph.favorites.missing" + ) + val InNetworkFavoritesRealGraphWeightVariance = new Continuous( + "engagement_features.real_graph.favorites.weight_variance" + ) + + val InNetworkRetweetsMaxRealGraphWeight = new Continuous( + "engagement_features.real_graph.retweets.max_weight", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val InNetworkRetweetsMinRealGraphWeight = new Continuous( + "engagement_features.real_graph.retweets.min_weight", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val InNetworkRetweetsAvgRealGraphWeight = new Continuous( + "engagement_features.real_graph.retweets.avg_weight", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val InNetworkRetweetsRealGraphWeightMissing = new Continuous( + "engagement_features.real_graph.retweets.missing" + ) + val InNetworkRetweetsRealGraphWeightVariance = new Continuous( + "engagement_features.real_graph.retweets.weight_variance" + ) + + val InNetworkRepliesMaxRealGraphWeight = new Continuous( + "engagement_features.real_graph.replies.max_weight", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val InNetworkRepliesMinRealGraphWeight = new Continuous( + "engagement_features.real_graph.replies.min_weight", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val InNetworkRepliesAvgRealGraphWeight = new Continuous( + "engagement_features.real_graph.replies.avg_weight", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val InNetworkRepliesRealGraphWeightMissing = new Continuous( + "engagement_features.real_graph.replies.missing" + ) + val InNetworkRepliesRealGraphWeightVariance = new Continuous( + "engagement_features.real_graph.replies.weight_variance" + ) + + sealed trait FeatureGroup { + def continuousFeatures: Map[EngagementFeature, Continuous] + def sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] + def allFeatures: Seq[Feature[_]] = + (continuousFeatures.values ++ sparseBinaryFeatures.values).toSeq + } + + case object Favorites extends FeatureGroup { + override val continuousFeatures: Map[EngagementFeature, Continuous] = + Map( + Count -> InNetworkFavoritesCount, + RealGraphWeightAverage -> InNetworkFavoritesAvgRealGraphWeight, + RealGraphWeightMax -> InNetworkFavoritesMaxRealGraphWeight, + RealGraphWeightMin -> InNetworkFavoritesMinRealGraphWeight, + RealGraphWeightMissing -> InNetworkFavoritesRealGraphWeightMissing, + RealGraphWeightVariance -> InNetworkFavoritesRealGraphWeightVariance + ) + + override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] = + Map(UserIds -> FavoritedByUserIds) + } + + case object Retweets extends FeatureGroup { + override val continuousFeatures: Map[EngagementFeature, Continuous] = + Map( + Count -> InNetworkRetweetsCount, + RealGraphWeightAverage -> InNetworkRetweetsAvgRealGraphWeight, + RealGraphWeightMax -> InNetworkRetweetsMaxRealGraphWeight, + RealGraphWeightMin -> InNetworkRetweetsMinRealGraphWeight, + RealGraphWeightMissing -> InNetworkRetweetsRealGraphWeightMissing, + RealGraphWeightVariance -> InNetworkRetweetsRealGraphWeightVariance + ) + + override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] = + Map(UserIds -> RetweetedByUserIds) + } + + case object Replies extends FeatureGroup { + override val continuousFeatures: Map[EngagementFeature, Continuous] = + Map( + Count -> InNetworkRepliesCount, + RealGraphWeightAverage -> InNetworkRepliesAvgRealGraphWeight, + RealGraphWeightMax -> InNetworkRepliesMaxRealGraphWeight, + RealGraphWeightMin -> InNetworkRepliesMinRealGraphWeight, + RealGraphWeightMissing -> InNetworkRepliesRealGraphWeightMissing, + RealGraphWeightVariance -> InNetworkRepliesRealGraphWeightVariance + ) + + override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] = + Map(UserIds -> RepliedByUserIds) + } + + val PublicEngagerSets = Set(FavoritedByUserIds, RetweetedByUserIds, RepliedByUserIds) + val PublicEngagementUserIds = new SparseBinary( + "engagement_features.user_ids.public", + Set(UserId, EngagementsPublic).asJava + ) + val ENGAGER_ID = TypedAggregateGroup.sparseFeature(PublicEngagementUserIds) + + val UnifyPublicEngagersTransform = SparseBinaryUnion( + featuresToUnify = PublicEngagerSets, + outputFeature = PublicEngagementUserIds + ) + + object RichUnifyPublicEngagersTransform extends OneToSomeTransform { + override def apply(dataRecord: DataRecord): Option[DataRecord] = + RichITransform(EngagementDataRecordFeatures.UnifyPublicEngagersTransform)(dataRecord) + override def featuresToTransform: Set[Feature[_]] = + EngagementDataRecordFeatures.UnifyPublicEngagersTransform.featuresToUnify.toSet + } +} diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD b/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD new file mode 100644 index 000000000..c28786b77 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/escherbird/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/tweetypie:tweet-scala", + ], +) + +scala_library( + name = "escherbird-features", + sources = ["EscherbirdFeatures.scala"], + tags = ["bazel-only"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala new file mode 100644 index 000000000..3aaf9b856 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeatures.scala @@ -0,0 +1,19 @@ +package com.twitter.timelines.prediction.features.escherbird + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature +import java.util.{Set => JSet} +import scala.collection.JavaConverters._ + +object EscherbirdFeatures { + val TweetGroupIds = new Feature.SparseBinary("escherbird.tweet_group_ids") + val TweetDomainIds = new Feature.SparseBinary("escherbird.tweet_domain_ids", Set(DomainId).asJava) + val TweetEntityIds = + new Feature.SparseBinary("escherbird.tweet_entity_ids", Set(SemanticcoreClassification).asJava) +} + +case class EscherbirdFeatures( + tweetId: Long, + tweetGroupIds: JSet[String], + tweetDomainIds: JSet[String], + tweetEntityIds: JSet[String]) diff --git a/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala new file mode 100644 index 000000000..bd3333a03 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/escherbird/EscherbirdFeaturesConverter.scala @@ -0,0 +1,19 @@ +package com.twitter.timelines.prediction.features.escherbird + +import com.twitter.tweetypie.thriftscala.Tweet +import scala.collection.JavaConverters._ + +object EscherbirdFeaturesConverter { + val DeprecatedOrTestDomains = Set(1L, 5L, 7L, 9L, 14L, 19L, 20L, 31L) + + def fromTweet(tweet: Tweet): Option[EscherbirdFeatures] = tweet.escherbirdEntityAnnotations.map { + escherbirdEntityAnnotations => + val annotations = escherbirdEntityAnnotations.entityAnnotations + .filterNot(annotation => DeprecatedOrTestDomains.contains(annotation.domainId)) + val tweetGroupIds = annotations.map(_.groupId.toString).toSet.asJava + val tweetDomainIds = annotations.map(_.domainId.toString).toSet.asJava + // An entity is only unique within a given domain + val tweetEntityIds = annotations.map(a => s"${a.domainId}.${a.entityId}").toSet.asJava + EscherbirdFeatures(tweet.id, tweetGroupIds, tweetDomainIds, tweetEntityIds) + } +} diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel b/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel new file mode 100644 index 000000000..0ee33acdb --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/followsource/BUILD.bazel @@ -0,0 +1,7 @@ +scala_library( + sources = ["*.scala"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala new file mode 100644 index 000000000..012103b14 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/followsource/FollowSourceFeatures.scala @@ -0,0 +1,53 @@ +package com.twitter.timelines.prediction.features.followsource + +import com.twitter.ml.api.Feature +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import scala.collection.JavaConverters._ + +object FollowSourceFeatures { + + // Corresponds to an algorithm constant from com.twitter.hermit.profile.HermitProfileConstants + val FollowSourceAlgorithm = new Feature.Text("follow_source.algorithm") + + // Type of follow action: one of "unfollow", "follow", "follow_back", "follow_many", "follow_all" + val FollowAction = new Feature.Text( + "follow_source.action", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + + // Millisecond timestamp when follow occurred + val FollowTimestamp = + new Feature.Discrete("follow_source.follow_timestamp", Set(Follow, PrivateTimestamp).asJava) + + // Age of follow (in minutes) + val FollowAgeMinutes = + new Feature.Continuous("follow_source.follow_age_minutes", Set(Follow).asJava) + + // Tweet ID of tweet details page from where follow happened (if applicable) + val FollowCauseTweetId = new Feature.Discrete("follow_source.cause_tweet_id", Set(TweetId).asJava) + + // String representation of follow client (android, web, iphone, etc). Derived from "client" + // portion of client event namespace. + val FollowClientId = new Feature.Text("follow_source.client_id", Set(ClientType).asJava) + + // If the follow happens via a profile's Following or Followers, + // the id of the profile owner is recorded here. + val FollowAssociationId = + new Feature.Discrete("follow_source.association_id", Set(Follow, UserId).asJava) + + // The "friendly name" here is computed using FollowSourceUtil.getSource. It represents + // a grouping on a few client events that reflect where the event occurred. For example, + // events on the tweet details page are grouped using "tweetDetails": + // case (Some("web"), Some("permalink"), _, _, _) => "tweetDetails" + // case (Some("iphone"), Some("tweet"), _, _, _) => "tweetDetails" + // case (Some("android"), Some("tweet"), _, _, _) => "tweetDetails" + val FollowSourceFriendlyName = new Feature.Text("follow_source.friendly_name", Set(Follow).asJava) + + // Up to two sources and actions that preceded the follow (for example, a profile visit + // through a mention click, which itself was on a tweet detail page reached through a tweet + // click in the Home tab). See go/followsource for more details and examples. + // The "source" here is computed using FollowSourceUtil.getSource + val PreFollowAction1 = new Feature.Text("follow_source.pre_follow_action_1", Set(Follow).asJava) + val PreFollowAction2 = new Feature.Text("follow_source.pre_follow_action_2", Set(Follow).asJava) + val PreFollowSource1 = new Feature.Text("follow_source.pre_follow_source_1", Set(Follow).asJava) + val PreFollowSource2 = new Feature.Text("follow_source.pre_follow_source_2", Set(Follow).asJava) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/BUILD b/src/scala/com/twitter/timelines/prediction/features/itl/BUILD new file mode 100644 index 000000000..6fc497bf3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/itl/BUILD @@ -0,0 +1,9 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala new file mode 100644 index 000000000..3351e5c11 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/itl/ITLFeatures.scala @@ -0,0 +1,575 @@ +package com.twitter.timelines.prediction.features.itl + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.Discrete +import com.twitter.ml.api.Feature.SparseBinary +import scala.collection.JavaConverters._ + +object ITLFeatures { + // engagement + val IS_RETWEETED = + new Binary("itl.engagement.is_retweeted", Set(PublicRetweets, PrivateRetweets).asJava) + val IS_FAVORITED = + new Binary("itl.engagement.is_favorited", Set(PublicLikes, PrivateLikes).asJava) + val IS_REPLIED = + new Binary("itl.engagement.is_replied", Set(PublicReplies, PrivateReplies).asJava) + // v1: post click engagements: fav, reply + val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_favorited_or_replied", + Set( + PublicLikes, + PrivateLikes, + PublicReplies, + PrivateReplies, + EngagementsPrivate, + EngagementsPublic).asJava) + // v2: post click engagements: click + val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_v2", + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_favorited", + Set(PublicLikes, PrivateLikes).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_replied", + Set(PublicReplies, PrivateReplies).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_retweeted", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_clicked", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = + new Binary("itl.engagement.is_good_clicked_convo_desc_followed", Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_share_dm_clicked", + Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary( + "itl.engagement.is_good_clicked_convo_desc_profile_clicked", + Set(EngagementsPrivate).asJava) + + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_uam_gt_0", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_uam_gt_1", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_uam_gt_2", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary( + "itl.engagement.is_good_clicked_convo_desc_uam_gt_3", + Set(EngagementsPrivate, EngagementsPublic).asJava) + + val IS_TWEET_DETAIL_DWELLED = new Binary( + "itl.engagement.is_tweet_detail_dwelled", + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary( + "itl.engagement.is_tweet_detail_dwelled_8_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary( + "itl.engagement.is_tweet_detail_dwelled_15_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary( + "itl.engagement.is_tweet_detail_dwelled_25_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary( + "itl.engagement.is_tweet_detail_dwelled_30_sec", + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_PROFILE_DWELLED = new Binary( + "itl.engagement.is_profile_dwelled", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_10_SEC = new Binary( + "itl.engagement.is_profile_dwelled_10_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_20_SEC = new Binary( + "itl.engagement.is_profile_dwelled_20_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_30_SEC = new Binary( + "itl.engagement.is_profile_dwelled_30_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED = new Binary( + "itl.engagement.is_fullscreen_video_dwelled", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary( + "itl.engagement.is_fullscreen_video_dwelled_5_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary( + "itl.engagement.is_fullscreen_video_dwelled_10_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary( + "itl.engagement.is_fullscreen_video_dwelled_20_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary( + "itl.engagement.is_fullscreen_video_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_15_SEC = new Binary( + "itl.engagement.is_link_dwelled_15_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_30_SEC = new Binary( + "itl.engagement.is_link_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_60_SEC = new Binary( + "itl.engagement.is_link_dwelled_60_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_QUOTED = + new Binary("itl.engagement.is_quoted", Set(PublicRetweets, PrivateRetweets).asJava) + val IS_RETWEETED_WITHOUT_QUOTE = new Binary( + "itl.engagement.is_retweeted_without_quote", + Set(PublicRetweets, PrivateRetweets).asJava) + val IS_CLICKED = new Binary( + "itl.engagement.is_clicked", + Set(EngagementsPrivate, TweetsClicked, LinksClickedOn).asJava) + val IS_PROFILE_CLICKED = new Binary( + "itl.engagement.is_profile_clicked", + Set(EngagementsPrivate, TweetsClicked, ProfilesViewed, ProfilesClicked).asJava) + val IS_DWELLED = new Binary("itl.engagement.is_dwelled", Set(EngagementsPrivate).asJava) + val IS_DWELLED_IN_BOUNDS_V1 = + new Binary("itl.engagement.is_dwelled_in_bounds_v1", Set(EngagementsPrivate).asJava) + val DWELL_NORMALIZED_OVERALL = + new Continuous("itl.engagement.dwell_normalized_overall", Set(EngagementsPrivate).asJava) + val DWELL_CDF_OVERALL = + new Continuous("itl.engagement.dwell_cdf_overall", Set(EngagementsPrivate).asJava) + val DWELL_CDF = new Continuous("itl.engagement.dwell_cdf", Set(EngagementsPrivate).asJava) + + val IS_DWELLED_1S = new Binary("itl.engagement.is_dwelled_1s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_2S = new Binary("itl.engagement.is_dwelled_2s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_3S = new Binary("itl.engagement.is_dwelled_3s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_4S = new Binary("itl.engagement.is_dwelled_4s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_5S = new Binary("itl.engagement.is_dwelled_5s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_6S = new Binary("itl.engagement.is_dwelled_6s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_7S = new Binary("itl.engagement.is_dwelled_7s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_8S = new Binary("itl.engagement.is_dwelled_8s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_9S = new Binary("itl.engagement.is_dwelled_9s", Set(EngagementsPrivate).asJava) + val IS_DWELLED_10S = new Binary("itl.engagement.is_dwelled_10s", Set(EngagementsPrivate).asJava) + + val IS_SKIPPED_1S = new Binary("itl.engagement.is_skipped_1s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_2S = new Binary("itl.engagement.is_skipped_2s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_3S = new Binary("itl.engagement.is_skipped_3s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_4S = new Binary("itl.engagement.is_skipped_4s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_5S = new Binary("itl.engagement.is_skipped_5s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_6S = new Binary("itl.engagement.is_skipped_6s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_7S = new Binary("itl.engagement.is_skipped_7s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_8S = new Binary("itl.engagement.is_skipped_8s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_9S = new Binary("itl.engagement.is_skipped_9s", Set(EngagementsPrivate).asJava) + val IS_SKIPPED_10S = new Binary("itl.engagement.is_skipped_10s", Set(EngagementsPrivate).asJava) + + val IS_FOLLOWED = + new Binary("itl.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_IMPRESSED = new Binary("itl.engagement.is_impressed", Set(EngagementsPrivate).asJava) + val IS_OPEN_LINKED = + new Binary("itl.engagement.is_open_linked", Set(EngagementsPrivate, LinksClickedOn).asJava) + val IS_PHOTO_EXPANDED = new Binary( + "itl.engagement.is_photo_expanded", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_VIDEO_VIEWED = + new Binary("itl.engagement.is_video_viewed", Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_VIDEO_PLAYBACK_50 = new Binary( + "itl.engagement.is_video_playback_50", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_VIDEO_QUALITY_VIEWED = new Binary( + "itl.engagement.is_video_quality_viewed", + Set(EngagementsPrivate, EngagementsPublic).asJava + ) + val IS_BOOKMARKED = + new Binary("itl.engagement.is_bookmarked", Set(EngagementsPrivate).asJava) + val IS_SHARED = + new Binary("itl.engagement.is_shared", Set(EngagementsPrivate).asJava) + val IS_SHARE_MENU_CLICKED = + new Binary("itl.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava) + + // Negative engagements + val IS_DONT_LIKE = + new Binary("itl.engagement.is_dont_like", Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_BLOCK_CLICKED = new Binary( + "itl.engagement.is_block_clicked", + Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava) + val IS_BLOCK_DIALOG_BLOCKED = new Binary( + "itl.engagement.is_block_dialog_blocked", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_MUTE_CLICKED = + new Binary("itl.engagement.is_mute_clicked", Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_MUTE_DIALOG_MUTED = + new Binary("itl.engagement.is_mute_dialog_muted", Set(EngagementsPrivate).asJava) + val IS_REPORT_TWEET_CLICKED = new Binary( + "itl.engagement.is_report_tweet_clicked", + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_CARET_CLICKED = + new Binary("itl.engagement.is_caret_clicked", Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_NOT_ABOUT_TOPIC = + new Binary("itl.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava) + val IS_NOT_RECENT = + new Binary("itl.engagement.is_not_recent", Set(EngagementsPrivate).asJava) + val IS_NOT_RELEVANT = + new Binary("itl.engagement.is_not_relevant", Set(EngagementsPrivate).asJava) + val IS_SEE_FEWER = + new Binary("itl.engagement.is_see_fewer", Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC = + new Binary("itl.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava) + val IS_FOLLOW_TOPIC = + new Binary("itl.engagement.is_follow_topic", Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN_TOPIC = + new Binary("itl.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava) + val IS_HOME_LATEST_VISITED = + new Binary("itl.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava) + + // This derived label is the logical OR of IS_DONT_LIKE, IS_BLOCK_CLICKED, IS_MUTE_CLICKED and IS_REPORT_TWEET_CLICKED + val IS_NEGATIVE_FEEDBACK = + new Binary("itl.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava) + + // Reciprocal engagements for reply forward engagement + val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_impressed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_favorited_by_author", + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_quoted_by_author", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_replied_by_author", + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_retweeted_by_author", + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_blocked_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_followed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_unfollowed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_muted_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_reported_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + + // This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED + val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary( + "itl.engagement.is_replied_reply_engaged_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava) + + // Reciprocal engagements for fav forward engagement + val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary( + "itl.engagement.is_favorited_fav_favorited_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava + ) + val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary( + "itl.engagement.is_favorited_fav_replied_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava + ) + val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary( + "itl.engagement.is_favorited_fav_retweeted_by_author", + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava + ) + val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary( + "itl.engagement.is_favorited_fav_followed_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava + ) + // This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED + val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary( + "itl.engagement.is_favorited_fav_engaged_by_author", + Set(EngagementsPrivate, EngagementsPublic).asJava + ) + + // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary( + "itl.engagement.is_profile_clicked_and_profile_follow", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary( + "itl.engagement.is_profile_clicked_and_profile_fav", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary( + "itl.engagement.is_profile_clicked_and_profile_reply", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary( + "itl.engagement.is_profile_clicked_and_profile_retweet", + Set( + ProfilesViewed, + ProfilesClicked, + EngagementsPrivate, + PrivateRetweets, + PublicRetweets).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary( + "itl.engagement.is_profile_clicked_and_profile_tweet_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary( + "itl.engagement.is_profile_clicked_and_profile_share_dm_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of all binary features above + val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary( + "itl.engagement.is_profile_clicked_and_profile_engaged", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava) + + // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary( + "itl.engagement.is_profile_clicked_and_profile_user_report_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary( + "itl.engagement.is_profile_clicked_and_profile_tweet_report_click", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary( + "itl.engagement.is_profile_clicked_and_profile_mute", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary( + "itl.engagement.is_profile_clicked_and_profile_block", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of bad profile click engagements and existing negative feedback + val IS_NEGATIVE_FEEDBACK_V2 = new Binary( + "itl.engagement.is_negative_feedback_v2", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // engagement for following user from any surface area + val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary( + "itl.engagement.is_followed_from_any_surface_area", + Set(EngagementsPublic, EngagementsPrivate).asJava) + + // Relevance prompt tweet engagements + val IS_RELEVANCE_PROMPT_YES_CLICKED = + new Binary("itl.engagement.is_relevance_prompt_yes_clicked", Set(EngagementsPrivate).asJava) + + // Reply downvote engagements + val IS_REPLY_DOWNVOTED = + new Binary("itl.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava) + val IS_REPLY_DOWNVOTE_REMOVED = + new Binary("itl.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava) + + // features from RecommendedTweet + val RECTWEET_SCORE = new Continuous("itl.recommended_tweet_features.rectweet_score") + val NUM_FAVORITING_USERS = new Continuous("itl.recommended_tweet_features.num_favoriting_users") + val NUM_FOLLOWING_USERS = new Continuous("itl.recommended_tweet_features.num_following_users") + val CONTENT_SOURCE_TYPE = new Discrete("itl.recommended_tweet_features.content_source_type") + + val RECOS_SCORE = new Continuous( + "itl.recommended_tweet_features.recos_score", + Set(EngagementScore, UsersRealGraphScore, UsersSalsaScore).asJava) + val AUTHOR_REALGRAPH_SCORE = new Continuous( + "itl.recommended_tweet_features.realgraph_score", + Set(UsersRealGraphScore).asJava) + val AUTHOR_SARUS_SCORE = new Continuous( + "itl.recommended_tweet_features.sarus_score", + Set(EngagementScore, UsersSalsaScore).asJava) + + val NUM_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.num_interacting_users", + Set(EngagementScore).asJava + ) + val MAX_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.max_realgraph_score_of_interacting_users", + Set(UsersRealGraphScore, EngagementScore).asJava + ) + val SUM_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.sum_realgraph_score_of_interacting_users", + Set(UsersRealGraphScore, EngagementScore).asJava + ) + val AVG_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.avg_realgraph_score_of_interacting_users", + Set(UsersRealGraphScore, EngagementScore).asJava + ) + val MAX_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.max_sarus_score_of_interacting_users", + Set(EngagementScore, UsersSalsaScore).asJava + ) + val SUM_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.sum_sarus_score_of_interacting_users", + Set(EngagementScore, UsersSalsaScore).asJava + ) + val AVG_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous( + "itl.recommended_tweet_features.avg_sarus_score_of_interacting_users", + Set(EngagementScore, UsersSalsaScore).asJava + ) + + val NUM_INTERACTING_FOLLOWINGS = new Continuous( + "itl.recommended_tweet_features.num_interacting_followings", + Set(EngagementScore).asJava + ) + + // features from HydratedTweetFeatures + val REAL_GRAPH_WEIGHT = + new Continuous("itl.hydrated_tweet_features.real_graph_weight", Set(UsersRealGraphScore).asJava) + val SARUS_GRAPH_WEIGHT = new Continuous("itl.hydrated_tweet_features.sarus_graph_weight") + val FROM_TOP_ENGAGED_USER = new Binary("itl.hydrated_tweet_features.from_top_engaged_user") + val FROM_TOP_INFLUENCER = new Binary("itl.hydrated_tweet_features.from_top_influencer") + val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous( + "itl.hydrated_tweet_features.topic_sim_searcher_interested_in_author_known_for" + ) + val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous( + "itl.hydrated_tweet_features.topic_sim_searcher_author_both_interested_in" + ) + val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous( + "itl.hydrated_tweet_features.topic_sim_searcher_author_both_known_for" + ) + val USER_REP = new Continuous("itl.hydrated_tweet_features.user_rep") + val NORMALIZED_PARUS_SCORE = new Continuous("itl.hydrated_tweet_features.normalized_parus_score") + val CONTAINS_MEDIA = new Binary("itl.hydrated_tweet_features.contains_media") + val FROM_NEARBY = new Binary("itl.hydrated_tweet_features.from_nearby") + val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous( + "itl.hydrated_tweet_features.topic_sim_searcher_interested_in_tweet" + ) + val MATCHES_UI_LANG = new Binary( + "itl.hydrated_tweet_features.matches_ui_lang", + Set(ProvidedLanguage, InferredLanguage).asJava) + val MATCHES_SEARCHER_MAIN_LANG = new Binary( + "itl.hydrated_tweet_features.matches_searcher_main_lang", + Set(ProvidedLanguage, InferredLanguage).asJava + ) + val MATCHES_SEARCHER_LANGS = new Binary( + "itl.hydrated_tweet_features.matches_searcher_langs", + Set(ProvidedLanguage, InferredLanguage).asJava) + val HAS_CARD = new Binary( + "itl.hydrated_tweet_features.has_card", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_IMAGE = new Binary( + "itl.hydrated_tweet_features.has_image", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NATIVE_IMAGE = new Binary( + "itl.hydrated_tweet_features.has_native_image", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VIDEO = new Binary("itl.hydrated_tweet_features.has_video") + val HAS_CONSUMER_VIDEO = new Binary( + "itl.hydrated_tweet_features.has_consumer_video", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_PRO_VIDEO = new Binary( + "itl.hydrated_tweet_features.has_pro_video", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_PERISCOPE = new Binary( + "itl.hydrated_tweet_features.has_periscope", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VINE = new Binary( + "itl.hydrated_tweet_features.has_vine", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NATIVE_VIDEO = new Binary( + "itl.hydrated_tweet_features.has_native_video", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_LINK = new Binary( + "itl.hydrated_tweet_features.has_link", + Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val LINK_COUNT = new Continuous( + "itl.hydrated_tweet_features.link_count", + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val URL_DOMAINS = new SparseBinary( + "itl.hydrated_tweet_features.url_domains", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VISIBLE_LINK = new Binary( + "itl.hydrated_tweet_features.has_visible_link", + Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NEWS = new Binary( + "itl.hydrated_tweet_features.has_news", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_TREND = new Binary( + "itl.hydrated_tweet_features.has_trend", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val BLENDER_SCORE = + new Continuous("itl.hydrated_tweet_features.blender_score", Set(EngagementScore).asJava) + val PARUS_SCORE = + new Continuous("itl.hydrated_tweet_features.parus_score", Set(EngagementScore).asJava) + val TEXT_SCORE = + new Continuous("itl.hydrated_tweet_features.text_score", Set(EngagementScore).asJava) + val BIDIRECTIONAL_REPLY_COUNT = new Continuous( + "itl.hydrated_tweet_features.bidirectional_reply_count", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val UNIDIRECTIONAL_REPLY_COUNT = new Continuous( + "itl.hydrated_tweet_features.unidirectional_reply_count", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val BIDIRECTIONAL_RETWEET_COUNT = new Continuous( + "itl.hydrated_tweet_features.bidirectional_retweet_count", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous( + "itl.hydrated_tweet_features.unidirectional_retweet_count", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val BIDIRECTIONAL_FAV_COUNT = new Continuous( + "itl.hydrated_tweet_features.bidirectional_fav_count", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val UNIDIRECTIONAL_FAV_COUNT = new Continuous( + "itl.hydrated_tweet_features.unidirectional_fav_count", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val CONVERSATION_COUNT = new Continuous("itl.hydrated_tweet_features.conversation_count") + val FAV_COUNT = new Continuous( + "itl.hydrated_tweet_features.fav_count", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val REPLY_COUNT = new Continuous( + "itl.hydrated_tweet_features.reply_count", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val RETWEET_COUNT = new Continuous( + "itl.hydrated_tweet_features.retweet_count", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val PREV_USER_TWEET_ENGAGEMENT = new Continuous( + "itl.hydrated_tweet_features.prev_user_tweet_enagagement", + Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava + ) + val IS_SENSITIVE = new Binary("itl.hydrated_tweet_features.is_sensitive") + val HAS_MULTIPLE_MEDIA = new Binary( + "itl.hydrated_tweet_features.has_multiple_media", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary( + "itl.hydrated_tweet_features.has_multiple_hashtag_or_trend", + Set( + UserVisibleFlag, + CountOfPrivateTweetEntitiesAndMetadata, + CountOfPublicTweetEntitiesAndMetadata).asJava) + val IS_AUTHOR_PROFILE_EGG = + new Binary("itl.hydrated_tweet_features.is_author_profile_egg", Set(ProfileImage).asJava) + val IS_AUTHOR_NEW = + new Binary("itl.hydrated_tweet_features.is_author_new", Set(UserType, UserState).asJava) + val NUM_MENTIONS = new Continuous( + "itl.hydrated_tweet_features.num_mentions", + Set( + UserVisibleFlag, + CountOfPrivateTweetEntitiesAndMetadata, + CountOfPublicTweetEntitiesAndMetadata).asJava) + val NUM_HASHTAGS = new Continuous( + "itl.hydrated_tweet_features.num_hashtags", + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val LANGUAGE = new Discrete( + "itl.hydrated_tweet_features.language", + Set(ProvidedLanguage, InferredLanguage).asJava) + val LINK_LANGUAGE = new Continuous( + "itl.hydrated_tweet_features.link_language", + Set(ProvidedLanguage, InferredLanguage).asJava) + val IS_AUTHOR_NSFW = + new Binary("itl.hydrated_tweet_features.is_author_nsfw", Set(UserType).asJava) + val IS_AUTHOR_SPAM = + new Binary("itl.hydrated_tweet_features.is_author_spam", Set(UserType).asJava) + val IS_AUTHOR_BOT = new Binary("itl.hydrated_tweet_features.is_author_bot", Set(UserType).asJava) + val IS_OFFENSIVE = new Binary("itl.hydrated_tweet_features.is_offensive") + val FROM_VERIFIED_ACCOUNT = + new Binary("itl.hydrated_tweet_features.from_verified_account", Set(UserVerifiedFlag).asJava) + val EMBEDS_IMPRESSION_COUNT = new Continuous( + "itl.hydrated_tweet_features.embeds_impression_count", + Set(CountOfImpression).asJava) + val EMBEDS_URL_COUNT = + new Continuous("itl.hydrated_tweet_features.embeds_url_count", Set(UrlFoundFlag).asJava) + val FAV_COUNT_V2 = new Continuous( + "recap.earlybird.fav_count_v2", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val RETWEET_COUNT_V2 = new Continuous( + "recap.earlybird.retweet_count_v2", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val REPLY_COUNT_V2 = new Continuous( + "recap.earlybird.reply_count_v2", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD new file mode 100644 index 000000000..6fc497bf3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/list_features/BUILD @@ -0,0 +1,9 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala new file mode 100644 index 000000000..ffb00d1f6 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/list_features/ListFeatures.scala @@ -0,0 +1,24 @@ +package com.twitter.timelines.prediction.features.list_features + +import com.twitter.ml.api.Feature.{Binary, Discrete} +import com.twitter.ml.api.FeatureContext +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import scala.collection.JavaConverters._ + +object ListFeatures { + + // list.id is used for list tweet injections in home. timelines.meta.list_id is used for list tweets in list timeline. + val LIST_ID = new Discrete("list.id") + + val VIEWER_IS_OWNER = + new Binary("list.viewer.is_owner", Set(ListsNonpublicList, ListsPublicList).asJava) + val VIEWER_IS_SUBSCRIBER = new Binary("list.viewer.is_subscriber") + val IS_PINNED_LIST = new Binary("list.is_pinned") + + val featureContext = new FeatureContext( + LIST_ID, + VIEWER_IS_OWNER, + VIEWER_IS_SUBSCRIBER, + IS_PINNED_LIST + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD new file mode 100644 index 000000000..6fc497bf3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/BUILD @@ -0,0 +1,9 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala new file mode 100644 index 000000000..65d721a05 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/p_home_latest/HomeLatestUserFeatures.scala @@ -0,0 +1,49 @@ +package com.twitter.timelines.prediction.features.p_home_latest + +import com.twitter.ml.api.Feature.{Continuous, Discrete} +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import scala.collection.JavaConverters._ + +object HomeLatestUserFeatures { + val LAST_LOGIN_TIMESTAMP_MS = + new Discrete("home_latest.user_feature.last_login_timestamp_ms", Set(PrivateTimestamp).asJava) +} + +object HomeLatestUserAggregatesFeatures { + + /** + * Used as `timestampFeature` in `OfflineAggregateSource` required by feature aggregations, set to + * the `dateRange` end timestamp by default + */ + val AGGREGATE_TIMESTAMP_MS = + new Discrete("home_latest.user_feature.aggregate_timestamp_ms", Set(PrivateTimestamp).asJava) + val HOME_TOP_IMPRESSIONS = + new Continuous("home_latest.user_feature.home_top_impressions", Set(CountOfImpression).asJava) + val HOME_LATEST_IMPRESSIONS = + new Continuous( + "home_latest.user_feature.home_latest_impressions", + Set(CountOfImpression).asJava) + val HOME_TOP_LAST_LOGIN_TIMESTAMP_MS = + new Discrete( + "home_latest.user_feature.home_top_last_login_timestamp_ms", + Set(PrivateTimestamp).asJava) + val HOME_LATEST_LAST_LOGIN_TIMESTAMP_MS = + new Discrete( + "home_latest.user_feature.home_latest_last_login_timestamp_ms", + Set(PrivateTimestamp).asJava) + val HOME_LATEST_MOST_RECENT_CLICK_TIMESTAMP_MS = + new Discrete( + "home_latest.user_feature.home_latest_most_recent_click_timestamp_ms", + Set(PrivateTimestamp).asJava) +} + +case class HomeLatestUserFeatures(userId: Long, lastLoginTimestampMs: Long) + +case class HomeLatestUserAggregatesFeatures( + userId: Long, + aggregateTimestampMs: Long, + homeTopImpressions: Option[Double], + homeLatestImpressions: Option[Double], + homeTopLastLoginTimestampMs: Option[Long], + homeLatestLastLoginTimestampMs: Option[Long], + homeLatestMostRecentClickTimestampMs: Option[Long]) diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD b/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD new file mode 100644 index 000000000..babba31bb --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/ppmi/BUILD @@ -0,0 +1,8 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala new file mode 100644 index 000000000..7e6d1dea8 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/ppmi/PpmiFeatures.scala @@ -0,0 +1,7 @@ +package com.twitter.timelines.prediction.features.ppmi + +import com.twitter.ml.api.Feature.Continuous + +object PpmiDataRecordFeatures { + val PPMI_SCORE = new Continuous("ppmi.source_author.score") +} diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD b/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD new file mode 100644 index 000000000..868acec21 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/real_graph/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/scala/com/twitter/ml/featurestore/catalog/entities/core", + "src/scala/com/twitter/ml/featurestore/catalog/entities/timelines", + "src/scala/com/twitter/ml/featurestore/catalog/features/timelines:realgraph", + "src/scala/com/twitter/ml/featurestore/lib/entity", + "src/scala/com/twitter/ml/featurestore/lib/feature", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/timelines/real_graph:real_graph-scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala new file mode 100644 index 000000000..7c52349aa --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatureStoreFeatures.scala @@ -0,0 +1,232 @@ +package com.twitter.timelines.prediction.features.real_graph + +import com.twitter.ml.featurestore.catalog.entities.core.UserAuthor +import com.twitter.ml.featurestore.catalog.features.timelines.RealGraph +import com.twitter.ml.featurestore.lib.EdgeEntityId +import com.twitter.ml.featurestore.lib.UserId +import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet +import com.twitter.ml.featurestore.lib.feature.Feature +import com.twitter.ml.featurestore.lib.feature.FeatureSet + +object RealGraphDataRecordFeatureStoreFeatures { + val boundUserAuthorfeatureSet: BoundFeatureSet = FeatureSet( + RealGraph.DestId, + RealGraph.AddressBookEmail.DaysSinceLast, + RealGraph.AddressBookEmail.ElapsedDays, + RealGraph.AddressBookEmail.Ewma, + RealGraph.AddressBookEmail.IsMissing, + RealGraph.AddressBookEmail.Mean, + RealGraph.AddressBookEmail.NonZeroDays, + RealGraph.AddressBookEmail.Variance, + RealGraph.AddressBookInBoth.DaysSinceLast, + RealGraph.AddressBookInBoth.ElapsedDays, + RealGraph.AddressBookInBoth.Ewma, + RealGraph.AddressBookInBoth.IsMissing, + RealGraph.AddressBookInBoth.Mean, + RealGraph.AddressBookInBoth.NonZeroDays, + RealGraph.AddressBookInBoth.Variance, + RealGraph.AddressBookMutualEdgeEmail.DaysSinceLast, + RealGraph.AddressBookMutualEdgeEmail.ElapsedDays, + RealGraph.AddressBookMutualEdgeEmail.Ewma, + RealGraph.AddressBookMutualEdgeEmail.IsMissing, + RealGraph.AddressBookMutualEdgeEmail.Mean, + RealGraph.AddressBookMutualEdgeEmail.NonZeroDays, + RealGraph.AddressBookMutualEdgeEmail.Variance, + RealGraph.AddressBookMutualEdgeInBoth.DaysSinceLast, + RealGraph.AddressBookMutualEdgeInBoth.ElapsedDays, + RealGraph.AddressBookMutualEdgeInBoth.Ewma, + RealGraph.AddressBookMutualEdgeInBoth.IsMissing, + RealGraph.AddressBookMutualEdgeInBoth.Mean, + RealGraph.AddressBookMutualEdgeInBoth.NonZeroDays, + RealGraph.AddressBookMutualEdgeInBoth.Variance, + RealGraph.AddressBookMutualEdgePhone.DaysSinceLast, + RealGraph.AddressBookMutualEdgePhone.ElapsedDays, + RealGraph.AddressBookMutualEdgePhone.Ewma, + RealGraph.AddressBookMutualEdgePhone.IsMissing, + RealGraph.AddressBookMutualEdgePhone.Mean, + RealGraph.AddressBookMutualEdgePhone.NonZeroDays, + RealGraph.AddressBookMutualEdgePhone.Variance, + RealGraph.AddressBookPhone.DaysSinceLast, + RealGraph.AddressBookPhone.ElapsedDays, + RealGraph.AddressBookPhone.Ewma, + RealGraph.AddressBookPhone.IsMissing, + RealGraph.AddressBookPhone.Mean, + RealGraph.AddressBookPhone.NonZeroDays, + RealGraph.AddressBookPhone.Variance, + RealGraph.DirectMessages.DaysSinceLast, + RealGraph.DirectMessages.ElapsedDays, + RealGraph.DirectMessages.Ewma, + RealGraph.DirectMessages.IsMissing, + RealGraph.DirectMessages.Mean, + RealGraph.DirectMessages.NonZeroDays, + RealGraph.DirectMessages.Variance, + RealGraph.DwellTime.DaysSinceLast, + RealGraph.DwellTime.ElapsedDays, + RealGraph.DwellTime.Ewma, + RealGraph.DwellTime.IsMissing, + RealGraph.DwellTime.Mean, + RealGraph.DwellTime.NonZeroDays, + RealGraph.DwellTime.Variance, + RealGraph.Follow.DaysSinceLast, + RealGraph.Follow.ElapsedDays, + RealGraph.Follow.Ewma, + RealGraph.Follow.IsMissing, + RealGraph.Follow.Mean, + RealGraph.Follow.NonZeroDays, + RealGraph.Follow.Variance, + RealGraph.InspectedStatuses.DaysSinceLast, + RealGraph.InspectedStatuses.ElapsedDays, + RealGraph.InspectedStatuses.Ewma, + RealGraph.InspectedStatuses.IsMissing, + RealGraph.InspectedStatuses.Mean, + RealGraph.InspectedStatuses.NonZeroDays, + RealGraph.InspectedStatuses.Variance, + RealGraph.Likes.DaysSinceLast, + RealGraph.Likes.ElapsedDays, + RealGraph.Likes.Ewma, + RealGraph.Likes.IsMissing, + RealGraph.Likes.Mean, + RealGraph.Likes.NonZeroDays, + RealGraph.Likes.Variance, + RealGraph.LinkClicks.DaysSinceLast, + RealGraph.LinkClicks.ElapsedDays, + RealGraph.LinkClicks.Ewma, + RealGraph.LinkClicks.IsMissing, + RealGraph.LinkClicks.Mean, + RealGraph.LinkClicks.NonZeroDays, + RealGraph.LinkClicks.Variance, + RealGraph.Mentions.DaysSinceLast, + RealGraph.Mentions.ElapsedDays, + RealGraph.Mentions.Ewma, + RealGraph.Mentions.IsMissing, + RealGraph.Mentions.Mean, + RealGraph.Mentions.NonZeroDays, + RealGraph.Mentions.Variance, + RealGraph.MutualFollow.DaysSinceLast, + RealGraph.MutualFollow.ElapsedDays, + RealGraph.MutualFollow.Ewma, + RealGraph.MutualFollow.IsMissing, + RealGraph.MutualFollow.Mean, + RealGraph.MutualFollow.NonZeroDays, + RealGraph.MutualFollow.Variance, + RealGraph.NumTweetQuotes.DaysSinceLast, + RealGraph.NumTweetQuotes.ElapsedDays, + RealGraph.NumTweetQuotes.Ewma, + RealGraph.NumTweetQuotes.IsMissing, + RealGraph.NumTweetQuotes.Mean, + RealGraph.NumTweetQuotes.NonZeroDays, + RealGraph.NumTweetQuotes.Variance, + RealGraph.PhotoTags.DaysSinceLast, + RealGraph.PhotoTags.ElapsedDays, + RealGraph.PhotoTags.Ewma, + RealGraph.PhotoTags.IsMissing, + RealGraph.PhotoTags.Mean, + RealGraph.PhotoTags.NonZeroDays, + RealGraph.PhotoTags.Variance, + RealGraph.ProfileViews.DaysSinceLast, + RealGraph.ProfileViews.ElapsedDays, + RealGraph.ProfileViews.Ewma, + RealGraph.ProfileViews.IsMissing, + RealGraph.ProfileViews.Mean, + RealGraph.ProfileViews.NonZeroDays, + RealGraph.ProfileViews.Variance, + RealGraph.Retweets.DaysSinceLast, + RealGraph.Retweets.ElapsedDays, + RealGraph.Retweets.Ewma, + RealGraph.Retweets.IsMissing, + RealGraph.Retweets.Mean, + RealGraph.Retweets.NonZeroDays, + RealGraph.Retweets.Variance, + RealGraph.SmsFollow.DaysSinceLast, + RealGraph.SmsFollow.ElapsedDays, + RealGraph.SmsFollow.Ewma, + RealGraph.SmsFollow.IsMissing, + RealGraph.SmsFollow.Mean, + RealGraph.SmsFollow.NonZeroDays, + RealGraph.SmsFollow.Variance, + RealGraph.TweetClicks.DaysSinceLast, + RealGraph.TweetClicks.ElapsedDays, + RealGraph.TweetClicks.Ewma, + RealGraph.TweetClicks.IsMissing, + RealGraph.TweetClicks.Mean, + RealGraph.TweetClicks.NonZeroDays, + RealGraph.TweetClicks.Variance, + RealGraph.Weight + ).bind(UserAuthor) + + private[this] val edgeFeatures: Seq[RealGraph.EdgeFeature] = Seq( + RealGraph.AddressBookEmail, + RealGraph.AddressBookInBoth, + RealGraph.AddressBookMutualEdgeEmail, + RealGraph.AddressBookMutualEdgeInBoth, + RealGraph.AddressBookMutualEdgePhone, + RealGraph.AddressBookPhone, + RealGraph.DirectMessages, + RealGraph.DwellTime, + RealGraph.Follow, + RealGraph.InspectedStatuses, + RealGraph.Likes, + RealGraph.LinkClicks, + RealGraph.Mentions, + RealGraph.MutualFollow, + RealGraph.PhotoTags, + RealGraph.ProfileViews, + RealGraph.Retweets, + RealGraph.SmsFollow, + RealGraph.TweetClicks + ) + + val htlDoubleFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Double]] = { + val features = edgeFeatures.flatMap { ef => + Seq(ef.Ewma, ef.Mean, ef.Variance) + } ++ Seq(RealGraph.Weight) + features.toSet + } + + val htlLongFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Long]] = { + val features = edgeFeatures.flatMap { ef => + Seq(ef.DaysSinceLast, ef.ElapsedDays, ef.NonZeroDays) + } + features.toSet + } + + private val edgeFeatureToLegacyName = Map( + RealGraph.AddressBookEmail -> "num_address_book_email", + RealGraph.AddressBookInBoth -> "num_address_book_in_both", + RealGraph.AddressBookMutualEdgeEmail -> "num_address_book_mutual_edge_email", + RealGraph.AddressBookMutualEdgeInBoth -> "num_address_book_mutual_edge_in_both", + RealGraph.AddressBookMutualEdgePhone -> "num_address_book_mutual_edge_phone", + RealGraph.AddressBookPhone -> "num_address_book_phone", + RealGraph.DirectMessages -> "direct_messages", + RealGraph.DwellTime -> "total_dwell_time", + RealGraph.Follow -> "num_follow", + RealGraph.InspectedStatuses -> "num_inspected_tweets", + RealGraph.Likes -> "num_favorites", + RealGraph.LinkClicks -> "num_link_clicks", + RealGraph.Mentions -> "num_mentions", + RealGraph.MutualFollow -> "num_mutual_follow", + RealGraph.PhotoTags -> "num_photo_tags", + RealGraph.ProfileViews -> "num_profile_views", + RealGraph.Retweets -> "num_retweets", + RealGraph.SmsFollow -> "num_sms_follow", + RealGraph.TweetClicks -> "num_tweet_clicks", + ) + + def convertFeatureToLegacyName( + prefix: String, + variance: String = "variance" + ): Map[Feature[EdgeEntityId[UserId, UserId], _ >: Long with Double <: AnyVal], String] = + edgeFeatureToLegacyName.flatMap { + case (k, v) => + Seq( + k.NonZeroDays -> s"${prefix}.${v}.non_zero_days", + k.DaysSinceLast -> s"${prefix}.${v}.days_since_last", + k.ElapsedDays -> s"${prefix}.${v}.elapsed_days", + k.Ewma -> s"${prefix}.${v}.ewma", + k.Mean -> s"${prefix}.${v}.mean", + k.Variance -> s"${prefix}.${v}.${variance}", + ) + } ++ Map( + RealGraph.Weight -> (prefix + ".weight") + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala new file mode 100644 index 000000000..4c1915944 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/real_graph/RealGraphDataRecordFeatures.scala @@ -0,0 +1,534 @@ +package com.twitter.timelines.prediction.features.real_graph + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature._ +import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphEdgeFeature +import scala.collection.JavaConverters._ + + +object RealGraphDataRecordFeatures { + // the source user id + val SRC_ID = new Discrete("realgraph.src_id", Set(UserId).asJava) + // the destination user id + val DST_ID = new Discrete("realgraph.dst_id", Set(UserId).asJava) + // real graph weight + val WEIGHT = new Continuous("realgraph.weight", Set(UsersRealGraphScore).asJava) + // the number of retweets that the source user sent to the destination user + val NUM_RETWEETS_MEAN = + new Continuous("realgraph.num_retweets.mean", Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_EWMA = + new Continuous("realgraph.num_retweets.ewma", Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_VARIANCE = + new Continuous("realgraph.num_retweets.variance", Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_NON_ZERO_DAYS = new Continuous( + "realgraph.num_retweets.non_zero_days", + Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_ELAPSED_DAYS = new Continuous( + "realgraph.num_retweets.elapsed_days", + Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_retweets.days_since_last", + Set(PrivateRetweets, PublicRetweets).asJava) + val NUM_RETWEETS_IS_MISSING = + new Binary("realgraph.num_retweets.is_missing", Set(PrivateRetweets, PublicRetweets).asJava) + // the number of favories that the source user sent to the destination user + val NUM_FAVORITES_MEAN = + new Continuous("realgraph.num_favorites.mean", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_EWMA = + new Continuous("realgraph.num_favorites.ewma", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_VARIANCE = + new Continuous("realgraph.num_favorites.variance", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_NON_ZERO_DAYS = + new Continuous("realgraph.num_favorites.non_zero_days", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_ELAPSED_DAYS = + new Continuous("realgraph.num_favorites.elapsed_days", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_DAYS_SINCE_LAST = + new Continuous("realgraph.num_favorites.days_since_last", Set(PublicLikes, PrivateLikes).asJava) + val NUM_FAVORITES_IS_MISSING = + new Binary("realgraph.num_favorites.is_missing", Set(PublicLikes, PrivateLikes).asJava) + // the number of mentions that the source user sent to the destination user + val NUM_MENTIONS_MEAN = + new Continuous("realgraph.num_mentions.mean", Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_EWMA = + new Continuous("realgraph.num_mentions.ewma", Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_VARIANCE = new Continuous( + "realgraph.num_mentions.variance", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_NON_ZERO_DAYS = new Continuous( + "realgraph.num_mentions.non_zero_days", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_ELAPSED_DAYS = new Continuous( + "realgraph.num_mentions.elapsed_days", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_mentions.days_since_last", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_MENTIONS_IS_MISSING = new Binary( + "realgraph.num_mentions.is_missing", + Set(EngagementsPrivate, EngagementsPublic).asJava) + // the number of direct messages that the source user sent to the destination user + val NUM_DIRECT_MESSAGES_MEAN = new Continuous( + "realgraph.num_direct_messages.mean", + Set(DmEntitiesAndMetadata, CountOfDms).asJava) + val NUM_DIRECT_MESSAGES_EWMA = new Continuous( + "realgraph.num_direct_messages.ewma", + Set(DmEntitiesAndMetadata, CountOfDms).asJava) + val NUM_DIRECT_MESSAGES_VARIANCE = new Continuous( + "realgraph.num_direct_messages.variance", + Set(DmEntitiesAndMetadata, CountOfDms).asJava) + val NUM_DIRECT_MESSAGES_NON_ZERO_DAYS = new Continuous( + "realgraph.num_direct_messages.non_zero_days", + Set(DmEntitiesAndMetadata, CountOfDms).asJava + ) + val NUM_DIRECT_MESSAGES_ELAPSED_DAYS = new Continuous( + "realgraph.num_direct_messages.elapsed_days", + Set(DmEntitiesAndMetadata, CountOfDms).asJava + ) + val NUM_DIRECT_MESSAGES_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_direct_messages.days_since_last", + Set(DmEntitiesAndMetadata, CountOfDms).asJava + ) + val NUM_DIRECT_MESSAGES_IS_MISSING = new Binary( + "realgraph.num_direct_messages.is_missing", + Set(DmEntitiesAndMetadata, CountOfDms).asJava) + // the number of tweet clicks that the source user sent to the destination user + val NUM_TWEET_CLICKS_MEAN = + new Continuous("realgraph.num_tweet_clicks.mean", Set(TweetsClicked).asJava) + val NUM_TWEET_CLICKS_EWMA = + new Continuous("realgraph.num_tweet_clicks.ewma", Set(TweetsClicked).asJava) + val NUM_TWEET_CLICKS_VARIANCE = + new Continuous("realgraph.num_tweet_clicks.variance", Set(TweetsClicked).asJava) + val NUM_TWEET_CLICKS_NON_ZERO_DAYS = + new Continuous("realgraph.num_tweet_clicks.non_zero_days", Set(TweetsClicked).asJava) + val NUM_TWEET_CLICKS_ELAPSED_DAYS = + new Continuous("realgraph.num_tweet_clicks.elapsed_days", Set(TweetsClicked).asJava) + val NUM_TWEET_CLICKS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_tweet_clicks.days_since_last", + Set(TweetsClicked).asJava + ) + val NUM_TWEET_CLICKS_IS_MISSING = + new Binary("realgraph.num_tweet_clicks.is_missing", Set(TweetsClicked).asJava) + // the number of link clicks that the source user sent to the destination user + val NUM_LINK_CLICKS_MEAN = + new Continuous("realgraph.num_link_clicks.mean", Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_EWMA = + new Continuous("realgraph.num_link_clicks.ewma", Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_VARIANCE = + new Continuous("realgraph.num_link_clicks.variance", Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_NON_ZERO_DAYS = new Continuous( + "realgraph.num_link_clicks.non_zero_days", + Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_ELAPSED_DAYS = new Continuous( + "realgraph.num_link_clicks.elapsed_days", + Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_link_clicks.days_since_last", + Set(CountOfTweetEntitiesClicked).asJava) + val NUM_LINK_CLICKS_IS_MISSING = + new Binary("realgraph.num_link_clicks.is_missing", Set(CountOfTweetEntitiesClicked).asJava) + // the number of profile views that the source user sent to the destination user + val NUM_PROFILE_VIEWS_MEAN = + new Continuous("realgraph.num_profile_views.mean", Set(ProfilesViewed).asJava) + val NUM_PROFILE_VIEWS_EWMA = + new Continuous("realgraph.num_profile_views.ewma", Set(ProfilesViewed).asJava) + val NUM_PROFILE_VIEWS_VARIANCE = + new Continuous("realgraph.num_profile_views.variance", Set(ProfilesViewed).asJava) + val NUM_PROFILE_VIEWS_NON_ZERO_DAYS = + new Continuous("realgraph.num_profile_views.non_zero_days", Set(ProfilesViewed).asJava) + val NUM_PROFILE_VIEWS_ELAPSED_DAYS = + new Continuous("realgraph.num_profile_views.elapsed_days", Set(ProfilesViewed).asJava) + val NUM_PROFILE_VIEWS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_profile_views.days_since_last", + Set(ProfilesViewed).asJava + ) + val NUM_PROFILE_VIEWS_IS_MISSING = + new Binary("realgraph.num_profile_views.is_missing", Set(ProfilesViewed).asJava) + // the total dwell time the source user spends on the target user's tweets + val TOTAL_DWELL_TIME_MEAN = + new Continuous("realgraph.total_dwell_time.mean", Set(CountOfImpression).asJava) + val TOTAL_DWELL_TIME_EWMA = + new Continuous("realgraph.total_dwell_time.ewma", Set(CountOfImpression).asJava) + val TOTAL_DWELL_TIME_VARIANCE = + new Continuous("realgraph.total_dwell_time.variance", Set(CountOfImpression).asJava) + val TOTAL_DWELL_TIME_NON_ZERO_DAYS = + new Continuous("realgraph.total_dwell_time.non_zero_days", Set(CountOfImpression).asJava) + val TOTAL_DWELL_TIME_ELAPSED_DAYS = + new Continuous("realgraph.total_dwell_time.elapsed_days", Set(CountOfImpression).asJava) + val TOTAL_DWELL_TIME_DAYS_SINCE_LAST = new Continuous( + "realgraph.total_dwell_time.days_since_last", + Set(CountOfImpression).asJava + ) + val TOTAL_DWELL_TIME_IS_MISSING = + new Binary("realgraph.total_dwell_time.is_missing", Set(CountOfImpression).asJava) + // the number of the target user's tweets that the source user has inspected + val NUM_INSPECTED_TWEETS_MEAN = + new Continuous("realgraph.num_inspected_tweets.mean", Set(CountOfImpression).asJava) + val NUM_INSPECTED_TWEETS_EWMA = + new Continuous("realgraph.num_inspected_tweets.ewma", Set(CountOfImpression).asJava) + val NUM_INSPECTED_TWEETS_VARIANCE = + new Continuous("realgraph.num_inspected_tweets.variance", Set(CountOfImpression).asJava) + val NUM_INSPECTED_TWEETS_NON_ZERO_DAYS = new Continuous( + "realgraph.num_inspected_tweets.non_zero_days", + Set(CountOfImpression).asJava + ) + val NUM_INSPECTED_TWEETS_ELAPSED_DAYS = new Continuous( + "realgraph.num_inspected_tweets.elapsed_days", + Set(CountOfImpression).asJava + ) + val NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_inspected_tweets.days_since_last", + Set(CountOfImpression).asJava + ) + val NUM_INSPECTED_TWEETS_IS_MISSING = + new Binary("realgraph.num_inspected_tweets.is_missing", Set(CountOfImpression).asJava) + // the number of photos in which the source user has tagged the target user + val NUM_PHOTO_TAGS_MEAN = new Continuous( + "realgraph.num_photo_tags.mean", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_EWMA = new Continuous( + "realgraph.num_photo_tags.ewma", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_VARIANCE = new Continuous( + "realgraph.num_photo_tags.variance", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_NON_ZERO_DAYS = new Continuous( + "realgraph.num_photo_tags.non_zero_days", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_ELAPSED_DAYS = new Continuous( + "realgraph.num_photo_tags.elapsed_days", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_photo_tags.days_since_last", + Set(EngagementsPrivate, EngagementsPublic).asJava) + val NUM_PHOTO_TAGS_IS_MISSING = new Binary( + "realgraph.num_photo_tags.is_missing", + Set(EngagementsPrivate, EngagementsPublic).asJava) + + val NUM_FOLLOW_MEAN = new Continuous( + "realgraph.num_follow.mean", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_EWMA = new Continuous( + "realgraph.num_follow.ewma", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_VARIANCE = new Continuous( + "realgraph.num_follow.variance", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_NON_ZERO_DAYS = new Continuous( + "realgraph.num_follow.non_zero_days", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_ELAPSED_DAYS = new Continuous( + "realgraph.num_follow.elapsed_days", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_follow.days_since_last", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_FOLLOW_IS_MISSING = new Binary( + "realgraph.num_follow.is_missing", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + // the number of blocks that the source user sent to the destination user + val NUM_BLOCKS_MEAN = + new Continuous("realgraph.num_blocks.mean", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_EWMA = + new Continuous("realgraph.num_blocks.ewma", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_VARIANCE = + new Continuous("realgraph.num_blocks.variance", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_NON_ZERO_DAYS = + new Continuous("realgraph.num_blocks.non_zero_days", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_ELAPSED_DAYS = + new Continuous("realgraph.num_blocks.elapsed_days", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_DAYS_SINCE_LAST = + new Continuous("realgraph.num_blocks.days_since_last", Set(CountOfBlocks).asJava) + val NUM_BLOCKS_IS_MISSING = + new Binary("realgraph.num_blocks.is_missing", Set(CountOfBlocks).asJava) + // the number of mutes that the source user sent to the destination user + val NUM_MUTES_MEAN = + new Continuous("realgraph.num_mutes.mean", Set(CountOfMutes).asJava) + val NUM_MUTES_EWMA = + new Continuous("realgraph.num_mutes.ewma", Set(CountOfMutes).asJava) + val NUM_MUTES_VARIANCE = + new Continuous("realgraph.num_mutes.variance", Set(CountOfMutes).asJava) + val NUM_MUTES_NON_ZERO_DAYS = + new Continuous("realgraph.num_mutes.non_zero_days", Set(CountOfMutes).asJava) + val NUM_MUTES_ELAPSED_DAYS = + new Continuous("realgraph.num_mutes.elapsed_days", Set(CountOfMutes).asJava) + val NUM_MUTES_DAYS_SINCE_LAST = + new Continuous("realgraph.num_mutes.days_since_last", Set(CountOfMutes).asJava) + val NUM_MUTES_IS_MISSING = + new Binary("realgraph.num_mutes.is_missing", Set(CountOfMutes).asJava) + // the number of report as abuses that the source user sent to the destination user + val NUM_REPORTS_AS_ABUSES_MEAN = + new Continuous("realgraph.num_report_as_abuses.mean", Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_EWMA = + new Continuous("realgraph.num_report_as_abuses.ewma", Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_VARIANCE = + new Continuous("realgraph.num_report_as_abuses.variance", Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_NON_ZERO_DAYS = + new Continuous("realgraph.num_report_as_abuses.non_zero_days", Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_ELAPSED_DAYS = + new Continuous("realgraph.num_report_as_abuses.elapsed_days", Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_DAYS_SINCE_LAST = + new Continuous( + "realgraph.num_report_as_abuses.days_since_last", + Set(CountOfAbuseReports).asJava) + val NUM_REPORTS_AS_ABUSES_IS_MISSING = + new Binary("realgraph.num_report_as_abuses.is_missing", Set(CountOfAbuseReports).asJava) + // the number of report as spams that the source user sent to the destination user + val NUM_REPORTS_AS_SPAMS_MEAN = + new Continuous( + "realgraph.num_report_as_spams.mean", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_EWMA = + new Continuous( + "realgraph.num_report_as_spams.ewma", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_VARIANCE = + new Continuous( + "realgraph.num_report_as_spams.variance", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_NON_ZERO_DAYS = + new Continuous( + "realgraph.num_report_as_spams.non_zero_days", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_ELAPSED_DAYS = + new Continuous( + "realgraph.num_report_as_spams.elapsed_days", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_DAYS_SINCE_LAST = + new Continuous( + "realgraph.num_report_as_spams.days_since_last", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + val NUM_REPORTS_AS_SPAMS_IS_MISSING = + new Binary( + "realgraph.num_report_as_spams.is_missing", + Set(CountOfAbuseReports, SafetyRelationships).asJava) + + val NUM_MUTUAL_FOLLOW_MEAN = new Continuous( + "realgraph.num_mutual_follow.mean", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_EWMA = new Continuous( + "realgraph.num_mutual_follow.ewma", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_VARIANCE = new Continuous( + "realgraph.num_mutual_follow.variance", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_NON_ZERO_DAYS = new Continuous( + "realgraph.num_mutual_follow.non_zero_days", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_ELAPSED_DAYS = new Continuous( + "realgraph.num_mutual_follow.elapsed_days", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_mutual_follow.days_since_last", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + val NUM_MUTUAL_FOLLOW_IS_MISSING = new Binary( + "realgraph.num_mutual_follow.is_missing", + Set( + Follow, + PrivateAccountsFollowedBy, + PublicAccountsFollowedBy, + PrivateAccountsFollowing, + PublicAccountsFollowing).asJava + ) + + val NUM_SMS_FOLLOW_MEAN = new Continuous( + "realgraph.num_sms_follow.mean", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_EWMA = new Continuous( + "realgraph.num_sms_follow.ewma", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_VARIANCE = new Continuous( + "realgraph.num_sms_follow.variance", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_NON_ZERO_DAYS = new Continuous( + "realgraph.num_sms_follow.non_zero_days", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_ELAPSED_DAYS = new Continuous( + "realgraph.num_sms_follow.elapsed_days", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_sms_follow.days_since_last", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + val NUM_SMS_FOLLOW_IS_MISSING = new Binary( + "realgraph.num_sms_follow.is_missing", + Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava) + + val NUM_ADDRESS_BOOK_EMAIL_MEAN = + new Continuous("realgraph.num_address_book_email.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_EMAIL_EWMA = + new Continuous("realgraph.num_address_book_email.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_EMAIL_VARIANCE = + new Continuous("realgraph.num_address_book_email.variance", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_EMAIL_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_email.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_EMAIL_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_email.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_EMAIL_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_email.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_EMAIL_IS_MISSING = + new Binary("realgraph.num_address_book_email.is_missing", Set(AddressBook).asJava) + + val NUM_ADDRESS_BOOK_IN_BOTH_MEAN = + new Continuous("realgraph.num_address_book_in_both.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_IN_BOTH_EWMA = + new Continuous("realgraph.num_address_book_in_both.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_IN_BOTH_VARIANCE = new Continuous( + "realgraph.num_address_book_in_both.variance", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_IN_BOTH_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_in_both.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_IN_BOTH_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_in_both.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_IN_BOTH_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_in_both.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_IN_BOTH_IS_MISSING = new Binary( + "realgraph.num_address_book_in_both.is_missing", + Set(AddressBook).asJava + ) + + val NUM_ADDRESS_BOOK_PHONE_MEAN = + new Continuous("realgraph.num_address_book_phone.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_PHONE_EWMA = + new Continuous("realgraph.num_address_book_phone.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_PHONE_VARIANCE = + new Continuous("realgraph.num_address_book_phone.variance", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_PHONE_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_phone.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_PHONE_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_phone.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_PHONE_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_phone.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_PHONE_IS_MISSING = + new Binary("realgraph.num_address_book_phone.is_missing", Set(AddressBook).asJava) + + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_MEAN = + new Continuous("realgraph.num_address_book_mutual_edge_email.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_EWMA = + new Continuous("realgraph.num_address_book_mutual_edge_email.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_VARIANCE = + new Continuous("realgraph.num_address_book_mutual_edge_email.variance", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_email.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_email.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_mutual_edge_email.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_IS_MISSING = + new Binary("realgraph.num_address_book_mutual_edge_email.is_missing", Set(AddressBook).asJava) + + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_MEAN = + new Continuous("realgraph.num_address_book_mutual_edge_in_both.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_EWMA = + new Continuous("realgraph.num_address_book_mutual_edge_in_both.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_VARIANCE = new Continuous( + "realgraph.num_address_book_mutual_edge_in_both.variance", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_in_both.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_in_both.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_mutual_edge_in_both.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_IS_MISSING = new Binary( + "realgraph.num_address_book_mutual_edge_in_both.is_missing", + Set(AddressBook).asJava + ) + + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_MEAN = + new Continuous("realgraph.num_address_book_mutual_edge_phone.mean", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_EWMA = + new Continuous("realgraph.num_address_book_mutual_edge_phone.ewma", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_VARIANCE = + new Continuous("realgraph.num_address_book_mutual_edge_phone.variance", Set(AddressBook).asJava) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_NON_ZERO_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_phone.non_zero_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_ELAPSED_DAYS = new Continuous( + "realgraph.num_address_book_mutual_edge_phone.elapsed_days", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_DAYS_SINCE_LAST = new Continuous( + "realgraph.num_address_book_mutual_edge_phone.days_since_last", + Set(AddressBook).asJava + ) + val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_IS_MISSING = + new Binary("realgraph.num_address_book_mutual_edge_phone.is_missing", Set(AddressBook).asJava) +} + +case class RealGraphEdgeDataRecordFeatures( + edgeFeatureOpt: Option[RealGraphEdgeFeature], + meanFeature: Continuous, + ewmaFeature: Continuous, + varianceFeature: Continuous, + nonZeroDaysFeature: Continuous, + elapsedDaysFeature: Continuous, + daysSinceLastFeature: Continuous, + isMissingFeature: Binary) diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/BUILD b/src/scala/com/twitter/timelines/prediction/features/recap/BUILD new file mode 100644 index 000000000..6fc497bf3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/recap/BUILD @@ -0,0 +1,9 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala new file mode 100644 index 000000000..c8ee6da7d --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeatures.scala @@ -0,0 +1,967 @@ +package com.twitter.timelines.prediction.features.recap + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.Discrete +import com.twitter.ml.api.Feature.SparseBinary +import com.twitter.ml.api.Feature.Text +import scala.collection.JavaConverters._ + +object RecapFeatures extends RecapFeatures("") +object InReplyToRecapFeatures extends RecapFeatures("in_reply_to_tweet") + +class RecapFeatures(prefix: String) { + private def name(featureName: String): String = { + if (prefix.nonEmpty) { + s"$prefix.$featureName" + } else { + featureName + } + } + + val IS_IPAD_CLIENT = new Binary(name("recap.client.is_ipad"), Set(ClientType).asJava) + val IS_WEB_CLIENT = new Binary(name("recap.client.is_web"), Set(ClientType).asJava) + val IS_IPHONE_CLIENT = new Binary(name("recap.client.is_phone"), Set(ClientType).asJava) + val IS_ANDROID_CLIENT = new Binary(name("recap.client.is_android"), Set(ClientType).asJava) + val IS_ANDROID_TABLET_CLIENT = + new Binary(name("recap.client.is_android_tablet"), Set(ClientType).asJava) + + // features from userAgent + val CLIENT_NAME = new Text(name("recap.user_agent.client_name"), Set(ClientType).asJava) + val CLIENT_SOURCE = new Discrete(name("recap.user_agent.client_source"), Set(ClientType).asJava) + val CLIENT_VERSION = new Text(name("recap.user_agent.client_version"), Set(ClientVersion).asJava) + val CLIENT_VERSION_CODE = + new Text(name("recap.user_agent.client_version_code"), Set(ClientVersion).asJava) + val DEVICE = new Text(name("recap.user_agent.device"), Set(DeviceType).asJava) + val FROM_DOG_FOOD = new Binary(name("recap.meta.from_dog_food"), Set(UserAgent).asJava) + val FROM_TWITTER_CLIENT = + new Binary(name("recap.user_agent.from_twitter_client"), Set(UserAgent).asJava) + val MANUFACTURER = new Text(name("recap.user_agent.manufacturer"), Set(UserAgent).asJava) + val MODEL = new Text(name("recap.user_agent.model"), Set(UserAgent).asJava) + val NETWORK_CONNECTION = + new Discrete(name("recap.user_agent.network_connection"), Set(UserAgent).asJava) + val SDK_VERSION = new Text(name("recap.user_agent.sdk_version"), Set(AppId, UserAgent).asJava) + + // engagement + val IS_RETWEETED = new Binary( + name("recap.engagement.is_retweeted"), + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_FAVORITED = new Binary( + name("recap.engagement.is_favorited"), + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED = new Binary( + name("recap.engagement.is_replied"), + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + // v1: post click engagements: fav, reply + val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_favorited_or_replied"), + Set( + PublicLikes, + PrivateLikes, + PublicReplies, + PrivateReplies, + EngagementsPrivate, + EngagementsPublic).asJava) + // v2: post click engagements: click + val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_v2"), + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_favorited"), + Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_replied"), + Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_retweeted"), + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_clicked"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_followed"), + Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_share_dm_clicked"), + Set(EngagementsPrivate).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_profile_clicked"), + Set(EngagementsPrivate).asJava) + + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_uam_gt_0"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_uam_gt_1"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_uam_gt_2"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary( + name("recap.engagement.is_good_clicked_convo_desc_uam_gt_3"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + + val IS_TWEET_DETAIL_DWELLED = new Binary( + name("recap.engagement.is_tweet_detail_dwelled"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary( + name("recap.engagement.is_tweet_detail_dwelled_8_sec"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary( + name("recap.engagement.is_tweet_detail_dwelled_15_sec"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary( + name("recap.engagement.is_tweet_detail_dwelled_25_sec"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary( + name("recap.engagement.is_tweet_detail_dwelled_30_sec"), + Set(TweetsClicked, EngagementsPrivate).asJava) + + val IS_PROFILE_DWELLED = new Binary( + "recap.engagement.is_profile_dwelled", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_10_SEC = new Binary( + "recap.engagement.is_profile_dwelled_10_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_20_SEC = new Binary( + "recap.engagement.is_profile_dwelled_20_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_DWELLED_30_SEC = new Binary( + "recap.engagement.is_profile_dwelled_30_sec", + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED = new Binary( + "recap.engagement.is_fullscreen_video_dwelled", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary( + "recap.engagement.is_fullscreen_video_dwelled_5_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary( + "recap.engagement.is_fullscreen_video_dwelled_10_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary( + "recap.engagement.is_fullscreen_video_dwelled_20_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary( + "recap.engagement.is_fullscreen_video_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_15_SEC = new Binary( + "recap.engagement.is_link_dwelled_15_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_30_SEC = new Binary( + "recap.engagement.is_link_dwelled_30_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_LINK_DWELLED_60_SEC = new Binary( + "recap.engagement.is_link_dwelled_60_sec", + Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava) + + val IS_QUOTED = new Binary( + name("recap.engagement.is_quoted"), + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_RETWEETED_WITHOUT_QUOTE = new Binary( + name("recap.engagement.is_retweeted_without_quote"), + Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava) + val IS_CLICKED = + new Binary(name("recap.engagement.is_clicked"), Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_DWELLED = new Binary(name("recap.engagement.is_dwelled"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_IN_BOUNDS_V1 = + new Binary(name("recap.engagement.is_dwelled_in_bounds_v1"), Set(EngagementsPrivate).asJava) + val DWELL_NORMALIZED_OVERALL = new Continuous( + name("recap.engagement.dwell_normalized_overall"), + Set(EngagementsPrivate).asJava) + val DWELL_CDF_OVERALL = + new Continuous(name("recap.engagement.dwell_cdf_overall"), Set(EngagementsPrivate).asJava) + val DWELL_CDF = new Continuous(name("recap.engagement.dwell_cdf"), Set(EngagementsPrivate).asJava) + + val IS_DWELLED_1S = + new Binary(name("recap.engagement.is_dwelled_1s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_2S = + new Binary(name("recap.engagement.is_dwelled_2s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_3S = + new Binary(name("recap.engagement.is_dwelled_3s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_4S = + new Binary(name("recap.engagement.is_dwelled_4s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_5S = + new Binary(name("recap.engagement.is_dwelled_5s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_6S = + new Binary(name("recap.engagement.is_dwelled_6s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_7S = + new Binary(name("recap.engagement.is_dwelled_7s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_8S = + new Binary(name("recap.engagement.is_dwelled_8s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_9S = + new Binary(name("recap.engagement.is_dwelled_9s"), Set(EngagementsPrivate).asJava) + val IS_DWELLED_10S = + new Binary(name("recap.engagement.is_dwelled_10s"), Set(EngagementsPrivate).asJava) + + val IS_SKIPPED_1S = + new Binary(name("recap.engagement.is_skipped_1s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_2S = + new Binary(name("recap.engagement.is_skipped_2s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_3S = + new Binary(name("recap.engagement.is_skipped_3s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_4S = + new Binary(name("recap.engagement.is_skipped_4s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_5S = + new Binary(name("recap.engagement.is_skipped_5s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_6S = + new Binary(name("recap.engagement.is_skipped_6s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_7S = + new Binary(name("recap.engagement.is_skipped_7s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_8S = + new Binary(name("recap.engagement.is_skipped_8s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_9S = + new Binary(name("recap.engagement.is_skipped_9s"), Set(EngagementsPrivate).asJava) + val IS_SKIPPED_10S = + new Binary(name("recap.engagement.is_skipped_10s"), Set(EngagementsPrivate).asJava) + + val IS_IMPRESSED = + new Binary(name("recap.engagement.is_impressed"), Set(EngagementsPrivate).asJava) + val IS_FOLLOWED = + new Binary("recap.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_PROFILE_CLICKED = new Binary( + name("recap.engagement.is_profile_clicked"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_OPEN_LINKED = new Binary( + name("recap.engagement.is_open_linked"), + Set(EngagementsPrivate, LinksClickedOn).asJava) + val IS_PHOTO_EXPANDED = + new Binary(name("recap.engagement.is_photo_expanded"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_VIEWED = + new Binary(name("recap.engagement.is_video_viewed"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_START = + new Binary(name("recap.engagement.is_video_playback_start"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_25 = + new Binary(name("recap.engagement.is_video_playback_25"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_50 = + new Binary(name("recap.engagement.is_video_playback_50"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_75 = + new Binary(name("recap.engagement.is_video_playback_75"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_95 = + new Binary(name("recap.engagement.is_video_playback_95"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_PLAYBACK_COMPLETE = + new Binary(name("recap.engagement.is_video_playback_complete"), Set(EngagementsPrivate).asJava) + val IS_VIDEO_VIEWED_AND_PLAYBACK_50 = new Binary( + name("recap.engagement.is_video_viewed_and_playback_50"), + Set(EngagementsPrivate).asJava) + val IS_VIDEO_QUALITY_VIEWED = new Binary( + name("recap.engagement.is_video_quality_viewed"), + Set(EngagementsPrivate).asJava + ) + val IS_TWEET_SHARE_DM_CLICKED = + new Binary(name("recap.engagement.is_tweet_share_dm_clicked"), Set(EngagementsPrivate).asJava) + val IS_TWEET_SHARE_DM_SENT = + new Binary(name("recap.engagement.is_tweet_share_dm_sent"), Set(EngagementsPrivate).asJava) + val IS_BOOKMARKED = + new Binary(name("recap.engagement.is_bookmarked"), Set(EngagementsPrivate).asJava) + val IS_SHARED = + new Binary(name("recap.engagement.is_shared"), Set(EngagementsPrivate).asJava) + val IS_SHARE_MENU_CLICKED = + new Binary(name("recap.engagement.is_share_menu_clicked"), Set(EngagementsPrivate).asJava) + + // Negative engagements + val IS_DONT_LIKE = + new Binary(name("recap.engagement.is_dont_like"), Set(EngagementsPrivate).asJava) + val IS_BLOCK_CLICKED = new Binary( + name("recap.engagement.is_block_clicked"), + Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava) + val IS_BLOCK_DIALOG_BLOCKED = new Binary( + name("recap.engagement.is_block_dialog_blocked"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_MUTE_CLICKED = new Binary( + name("recap.engagement.is_mute_clicked"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_MUTE_DIALOG_MUTED = + new Binary(name("recap.engagement.is_mute_dialog_muted"), Set(EngagementsPrivate).asJava) + val IS_REPORT_TWEET_CLICKED = new Binary( + name("recap.engagement.is_report_tweet_clicked"), + Set(TweetsClicked, EngagementsPrivate).asJava) + val IS_NEGATIVE_FEEDBACK = + new Binary("recap.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava) + val IS_NOT_ABOUT_TOPIC = + new Binary(name("recap.engagement.is_not_about_topic"), Set(EngagementsPrivate).asJava) + val IS_NOT_RECENT = + new Binary(name("recap.engagement.is_not_recent"), Set(EngagementsPrivate).asJava) + val IS_NOT_RELEVANT = + new Binary(name("recap.engagement.is_not_relevant"), Set(EngagementsPrivate).asJava) + val IS_SEE_FEWER = + new Binary(name("recap.engagement.is_see_fewer"), Set(EngagementsPrivate).asJava) + val IS_TOPIC_SPEC_NEG_ENGAGEMENT = + new Binary("recap.engagement.is_topic_spec_neg_engagement", Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC = + new Binary("recap.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC_EXPLICIT_POSITIVE_LABEL = + new Binary( + "recap.engagement.is_unfollow_topic_explicit_positive_label", + Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC_IMPLICIT_POSITIVE_LABEL = + new Binary( + "recap.engagement.is_unfollow_topic_implicit_positive_label", + Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC_STRONG_EXPLICIT_NEGATIVE_LABEL = + new Binary( + "recap.engagement.is_unfollow_topic_strong_explicit_negative_label", + Set(EngagementsPrivate).asJava) + val IS_UNFOLLOW_TOPIC_EXPLICIT_NEGATIVE_LABEL = + new Binary( + "recap.engagement.is_unfollow_topic_explicit_negative_label", + Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN = + new Binary("recap.engagement.is_not_interested_in", Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN_EXPLICIT_POSITIVE_LABEL = + new Binary( + "recap.engagement.is_not_interested_in_explicit_positive_label", + Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN_EXPLICIT_NEGATIVE_LABEL = + new Binary( + "recap.engagement.is_not_interested_in_explicit_negative_label", + Set(EngagementsPrivate).asJava) + val IS_CARET_CLICKED = + new Binary(name("recap.engagement.is_caret_clicked"), Set(EngagementsPrivate).asJava) + val IS_FOLLOW_TOPIC = + new Binary("recap.engagement.is_follow_topic", Set(EngagementsPrivate).asJava) + val IS_NOT_INTERESTED_IN_TOPIC = + new Binary("recap.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava) + val IS_HOME_LATEST_VISITED = + new Binary(name("recap.engagement.is_home_latest_visited"), Set(EngagementsPrivate).asJava) + + // Relevance prompt tweet engagements + val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary( + name("recap.engagement.is_relevance_prompt_yes_clicked"), + Set(EngagementsPrivate).asJava) + val IS_RELEVANCE_PROMPT_NO_CLICKED = new Binary( + name("recap.engagement.is_relevance_prompt_no_clicked"), + Set(EngagementsPrivate).asJava) + val IS_RELEVANCE_PROMPT_IMPRESSED = new Binary( + name("recap.engagement.is_relevance_prompt_impressed"), + Set(EngagementsPrivate).asJava) + + // Reciprocal engagements for reply forward engagement + val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_impressed_by_author"), + Set(EngagementsPrivate).asJava) + val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_favorited_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava) + val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_quoted_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava) + val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_replied_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava) + val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_retweeted_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava) + val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_blocked_by_author"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_followed_by_author"), + Set(EngagementsPrivate, EngagementsPublic, Follow).asJava) + val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_unfollowed_by_author"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_muted_by_author"), + Set(EngagementsPrivate).asJava) + val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_reported_by_author"), + Set(EngagementsPrivate).asJava) + + // This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED + val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary( + name("recap.engagement.is_replied_reply_engaged_by_author"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + + // Reciprocal engagements for fav forward engagement + val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary( + name("recap.engagement.is_favorited_fav_favorited_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava + ) + val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary( + name("recap.engagement.is_favorited_fav_replied_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava + ) + val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary( + name("recap.engagement.is_favorited_fav_retweeted_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava + ) + val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary( + name("recap.engagement.is_favorited_fav_followed_by_author"), + Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava + ) + // This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED + val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary( + name("recap.engagement.is_favorited_fav_engaged_by_author"), + Set(EngagementsPrivate, EngagementsPublic).asJava) + + // define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_follow"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_fav"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_reply"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_retweet"), + Set( + ProfilesViewed, + ProfilesClicked, + EngagementsPrivate, + PrivateRetweets, + PublicRetweets).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_tweet_click"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_share_dm_click"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of all binary features above + val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_engaged"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava) + + // define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page + val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_user_report_click"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_tweet_report_click"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_mute"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary( + name("recap.engagement.is_profile_clicked_and_profile_block"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // This derived label is the union of bad profile click engagements and existing negative feedback + val IS_NEGATIVE_FEEDBACK_V2 = new Binary( + name("recap.engagement.is_negative_feedback_v2"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_STRONG_NEGATIVE_FEEDBACK = new Binary( + name("recap.engagement.is_strong_negative_feedback"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + val IS_WEAK_NEGATIVE_FEEDBACK = new Binary( + name("recap.engagement.is_weak_negative_feedback"), + Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava) + // engagement for following user from any surface area + val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary( + "recap.engagement.is_followed_from_any_surface_area", + Set(EngagementsPublic, EngagementsPrivate).asJava) + + // Reply downvote engagements + val IS_REPLY_DOWNVOTED = + new Binary(name("recap.engagement.is_reply_downvoted"), Set(EngagementsPrivate).asJava) + val IS_REPLY_DOWNVOTE_REMOVED = + new Binary(name("recap.engagement.is_reply_downvote_removed"), Set(EngagementsPrivate).asJava) + + // Other engagements + val IS_GOOD_OPEN_LINK = new Binary( + name("recap.engagement.is_good_open_link"), + Set(EngagementsPrivate, LinksClickedOn).asJava) + val IS_ENGAGED = new Binary( + name("recap.engagement.any"), + Set(EngagementsPrivate, EngagementsPublic).asJava + ) // Deprecated - to be removed shortly + val IS_EARLYBIRD_UNIFIED_ENGAGEMENT = new Binary( + name("recap.engagement.is_unified_engagement"), + Set(EngagementsPrivate, EngagementsPublic).asJava + ) // A subset of IS_ENGAGED specifically intended for use in earlybird models + + // features from ThriftTweetFeatures + val PREV_USER_TWEET_ENGAGEMENT = new Continuous( + name("recap.tweetfeature.prev_user_tweet_enagagement"), + Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava) + val IS_SENSITIVE = new Binary(name("recap.tweetfeature.is_sensitive")) + val HAS_MULTIPLE_MEDIA = new Binary( + name("recap.tweetfeature.has_multiple_media"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val IS_AUTHOR_PROFILE_EGG = new Binary(name("recap.tweetfeature.is_author_profile_egg")) + val IS_AUTHOR_NEW = + new Binary(name("recap.tweetfeature.is_author_new"), Set(UserState, UserType).asJava) + val NUM_MENTIONS = new Continuous( + name("recap.tweetfeature.num_mentions"), + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val HAS_MENTION = new Binary(name("recap.tweetfeature.has_mention"), Set(UserVisibleFlag).asJava) + val NUM_HASHTAGS = new Continuous( + name("recap.tweetfeature.num_hashtags"), + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val HAS_HASHTAG = new Binary( + name("recap.tweetfeature.has_hashtag"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val LINK_LANGUAGE = new Continuous( + name("recap.tweetfeature.link_language"), + Set(ProvidedLanguage, InferredLanguage).asJava) + val IS_AUTHOR_NSFW = + new Binary(name("recap.tweetfeature.is_author_nsfw"), Set(UserSafetyLabels, UserType).asJava) + val IS_AUTHOR_SPAM = + new Binary(name("recap.tweetfeature.is_author_spam"), Set(UserSafetyLabels, UserType).asJava) + val IS_AUTHOR_BOT = + new Binary(name("recap.tweetfeature.is_author_bot"), Set(UserSafetyLabels, UserType).asJava) + val SIGNATURE = + new Discrete(name("recap.tweetfeature.signature"), Set(DigitalSignatureNonrepudiation).asJava) + val LANGUAGE = new Discrete( + name("recap.tweetfeature.language"), + Set(ProvidedLanguage, InferredLanguage).asJava) + val FROM_INACTIVE_USER = + new Binary(name("recap.tweetfeature.from_inactive_user"), Set(UserActiveFlag).asJava) + val PROBABLY_FROM_FOLLOWED_AUTHOR = new Binary(name("recap.v3.tweetfeature.probably_from_follow")) + val FROM_MUTUAL_FOLLOW = new Binary(name("recap.tweetfeature.from_mutual_follow")) + val USER_REP = new Continuous(name("recap.tweetfeature.user_rep")) + val FROM_VERIFIED_ACCOUNT = + new Binary(name("recap.tweetfeature.from_verified_account"), Set(UserVerifiedFlag).asJava) + val IS_BUSINESS_SCORE = new Continuous(name("recap.tweetfeature.is_business_score")) + val HAS_CONSUMER_VIDEO = new Binary( + name("recap.tweetfeature.has_consumer_video"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_PRO_VIDEO = new Binary( + name("recap.tweetfeature.has_pro_video"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VINE = new Binary( + name("recap.tweetfeature.has_vine"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_PERISCOPE = new Binary( + name("recap.tweetfeature.has_periscope"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NATIVE_VIDEO = new Binary( + name("recap.tweetfeature.has_native_video"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NATIVE_IMAGE = new Binary( + name("recap.tweetfeature.has_native_image"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_CARD = new Binary( + name("recap.tweetfeature.has_card"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_IMAGE = new Binary( + name("recap.tweetfeature.has_image"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_NEWS = new Binary( + name("recap.tweetfeature.has_news"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VIDEO = new Binary( + name("recap.tweetfeature.has_video"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_VISIBLE_LINK = new Binary( + name("recap.tweetfeature.has_visible_link"), + Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val LINK_COUNT = new Continuous( + name("recap.tweetfeature.link_count"), + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + val HAS_LINK = new Binary( + name("recap.tweetfeature.has_link"), + Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val IS_OFFENSIVE = new Binary(name("recap.tweetfeature.is_offensive")) + val HAS_TREND = new Binary( + name("recap.tweetfeature.has_trend"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary( + name("recap.tweetfeature.has_multiple_hashtag_or_trend"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val URL_DOMAINS = new SparseBinary( + name("recap.tweetfeature.url_domains"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val CONTAINS_MEDIA = new Binary( + name("recap.tweetfeature.contains_media"), + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val RETWEET_SEARCHER = new Binary(name("recap.tweetfeature.retweet_searcher")) + val REPLY_SEARCHER = new Binary(name("recap.tweetfeature.reply_searcher")) + val MENTION_SEARCHER = + new Binary(name("recap.tweetfeature.mention_searcher"), Set(UserVisibleFlag).asJava) + val REPLY_OTHER = + new Binary(name("recap.tweetfeature.reply_other"), Set(PublicReplies, PrivateReplies).asJava) + val RETWEET_OTHER = new Binary( + name("recap.tweetfeature.retweet_other"), + Set(PublicRetweets, PrivateRetweets).asJava) + val IS_REPLY = + new Binary(name("recap.tweetfeature.is_reply"), Set(PublicReplies, PrivateReplies).asJava) + val IS_RETWEET = + new Binary(name("recap.tweetfeature.is_retweet"), Set(PublicRetweets, PrivateRetweets).asJava) + val IS_EXTENDED_REPLY = new Binary( + name("recap.tweetfeature.is_extended_reply"), + Set(PublicReplies, PrivateReplies).asJava) + val MATCH_UI_LANG = new Binary( + name("recap.tweetfeature.match_ui_lang"), + Set(ProvidedLanguage, InferredLanguage).asJava) + val MATCH_SEARCHER_MAIN_LANG = new Binary( + name("recap.tweetfeature.match_searcher_main_lang"), + Set(ProvidedLanguage, InferredLanguage).asJava) + val MATCH_SEARCHER_LANGS = new Binary( + name("recap.tweetfeature.match_searcher_langs"), + Set(ProvidedLanguage, InferredLanguage).asJava) + val BIDIRECTIONAL_REPLY_COUNT = new Continuous( + name("recap.tweetfeature.bidirectional_reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val UNIDIRECTIONAL_REPLY_COUNT = new Continuous( + name("recap.tweetfeature.unidirectional_reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val BIDIRECTIONAL_RETWEET_COUNT = new Continuous( + name("recap.tweetfeature.bidirectional_retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous( + name("recap.tweetfeature.unidirectional_retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val BIDIRECTIONAL_FAV_COUNT = new Continuous( + name("recap.tweetfeature.bidirectional_fav_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val UNIDIRECTIONAL_FAV_COUNT = new Continuous( + name("recap.tweetfeature.unidirectiona_fav_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val CONVERSATIONAL_COUNT = new Continuous( + name("recap.tweetfeature.conversational_count"), + Set(CountOfPrivateTweets, CountOfPublicTweets).asJava) + // tweet impressions on an embedded tweet + val EMBEDS_IMPRESSION_COUNT = new Continuous( + name("recap.tweetfeature.embeds_impression_count"), + Set(CountOfImpression).asJava) + // number of URLs that embed the tweet + val EMBEDS_URL_COUNT = new Continuous( + name("recap.tweetfeature.embeds_url_count"), + Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava) + // currently only counts views on Snappy and Amplify pro videos. Counts for other videos forthcoming + val VIDEO_VIEW_COUNT = new Continuous( + name("recap.tweetfeature.video_view_count"), + Set( + CountOfTweetEntitiesClicked, + CountOfPrivateTweetEntitiesAndMetadata, + CountOfPublicTweetEntitiesAndMetadata, + EngagementsPrivate, + EngagementsPublic).asJava + ) + val TWEET_COUNT_FROM_USER_IN_SNAPSHOT = new Continuous( + name("recap.tweetfeature.tweet_count_from_user_in_snapshot"), + Set(CountOfPrivateTweets, CountOfPublicTweets).asJava) + val NORMALIZED_PARUS_SCORE = + new Continuous("recap.tweetfeature.normalized_parus_score", Set(EngagementScore).asJava) + val PARUS_SCORE = new Continuous("recap.tweetfeature.parus_score", Set(EngagementScore).asJava) + val REAL_GRAPH_WEIGHT = + new Continuous("recap.tweetfeature.real_graph_weight", Set(UsersRealGraphScore).asJava) + val SARUS_GRAPH_WEIGHT = new Continuous("recap.tweetfeature.sarus_graph_weight") + val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous( + "recap.tweetfeature.topic_sim_searcher_interested_in_author_known_for") + val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous( + "recap.tweetfeature.topic_sim_searcher_author_both_interested_in") + val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous( + "recap.tweetfeature.topic_sim_searcher_author_both_known_for") + val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous( + "recap.tweetfeature.topic_sim_searcher_interested_in_tweet") + val IS_RETWEETER_PROFILE_EGG = + new Binary(name("recap.v2.tweetfeature.is_retweeter_profile_egg"), Set(UserType).asJava) + val IS_RETWEETER_NEW = + new Binary(name("recap.v2.tweetfeature.is_retweeter_new"), Set(UserType, UserState).asJava) + val IS_RETWEETER_BOT = + new Binary( + name("recap.v2.tweetfeature.is_retweeter_bot"), + Set(UserType, UserSafetyLabels).asJava) + val IS_RETWEETER_NSFW = + new Binary( + name("recap.v2.tweetfeature.is_retweeter_nsfw"), + Set(UserType, UserSafetyLabels).asJava) + val IS_RETWEETER_SPAM = + new Binary( + name("recap.v2.tweetfeature.is_retweeter_spam"), + Set(UserType, UserSafetyLabels).asJava) + val RETWEET_OF_MUTUAL_FOLLOW = new Binary( + name("recap.v2.tweetfeature.retweet_of_mutual_follow"), + Set(PublicRetweets, PrivateRetweets).asJava) + val SOURCE_AUTHOR_REP = new Continuous(name("recap.v2.tweetfeature.source_author_rep")) + val IS_RETWEET_OF_REPLY = new Binary( + name("recap.v2.tweetfeature.is_retweet_of_reply"), + Set(PublicRetweets, PrivateRetweets).asJava) + val RETWEET_DIRECTED_AT_USER_IN_FIRST_DEGREE = new Binary( + name("recap.v2.tweetfeature.is_retweet_directed_at_user_in_first_degree"), + Set(PublicRetweets, PrivateRetweets, Follow).asJava) + val MENTIONED_SCREEN_NAMES = new SparseBinary( + "entities.users.mentioned_screen_names", + Set(DisplayName, UserVisibleFlag).asJava) + val MENTIONED_SCREEN_NAME = new Text( + "entities.users.mentioned_screen_names.member", + Set(DisplayName, UserVisibleFlag).asJava) + val HASHTAGS = new SparseBinary( + "entities.hashtags", + Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava) + val URL_SLUGS = new SparseBinary(name("recap.linkfeature.url_slugs"), Set(UrlFoundFlag).asJava) + + // features from ThriftSearchResultMetadata + val REPLY_COUNT = new Continuous( + name("recap.searchfeature.reply_count"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + val RETWEET_COUNT = new Continuous( + name("recap.searchfeature.retweet_count"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val FAV_COUNT = new Continuous( + name("recap.searchfeature.fav_count"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val BLENDER_SCORE = new Continuous(name("recap.searchfeature.blender_score")) + val TEXT_SCORE = new Continuous(name("recap.searchfeature.text_score")) + + // features related to content source + val SOURCE_TYPE = new Discrete(name("recap.source.type")) + + // features from addressbook + // the author is in the user's email addressbook + val USER_TO_AUTHOR_EMAIL_REACHABLE = + new Binary(name("recap.addressbook.user_to_author_email_reachable"), Set(AddressBook).asJava) + // the author is in the user's phone addressbook + val USER_TO_AUTHOR_PHONE_REACHABLE = + new Binary(name("recap.addressbook.user_to_author_phone_reachable"), Set(AddressBook).asJava) + // the user is in the author's email addressbook + val AUTHOR_TO_USER_EMAIL_REACHABLE = + new Binary(name("recap.addressbook.author_to_user_email_reachable"), Set(AddressBook).asJava) + // the user is in the user's phone addressbook + val AUTHOR_TO_USER_PHONE_REACHABLE = + new Binary(name("recap.addressbook.author_to_user_phone_reachable"), Set(AddressBook).asJava) + + // predicted engagement (these features are used by prediction service to return the predicted engagement probability) + // these should match the names in engagement_to_score_feature_mapping + val PREDICTED_IS_FAVORITED = + new Continuous(name("recap.engagement_predicted.is_favorited"), Set(EngagementScore).asJava) + val PREDICTED_IS_RETWEETED = + new Continuous(name("recap.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava) + val PREDICTED_IS_QUOTED = + new Continuous(name("recap.engagement_predicted.is_quoted"), Set(EngagementScore).asJava) + val PREDICTED_IS_REPLIED = + new Continuous(name("recap.engagement_predicted.is_replied"), Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous( + name("recap.engagement_predicted.is_good_open_link"), + Set(EngagementScore).asJava) + val PREDICTED_IS_PROFILE_CLICKED = new Continuous( + name("recap.engagement_predicted.is_profile_clicked"), + Set(EngagementScore).asJava) + val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous( + name("recap.engagement_predicted.is_profile_clicked_and_profile_engaged"), + Set(EngagementScore).asJava) + val PREDICTED_IS_CLICKED = + new Continuous(name("recap.engagement_predicted.is_clicked"), Set(EngagementScore).asJava) + val PREDICTED_IS_PHOTO_EXPANDED = new Continuous( + name("recap.engagement_predicted.is_photo_expanded"), + Set(EngagementScore).asJava) + val PREDICTED_IS_DONT_LIKE = + new Continuous(name("recap.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava) + val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous( + name("recap.engagement_predicted.is_video_playback_50"), + Set(EngagementScore).asJava) + val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous( + name("recap.engagement_predicted.is_video_quality_viewed"), + Set(EngagementScore).asJava) + val PREDICTED_IS_BOOKMARKED = + new Continuous(name("recap.engagement_predicted.is_bookmarked"), Set(EngagementScore).asJava) + val PREDICTED_IS_SHARED = + new Continuous(name("recap.engagement_predicted.is_shared"), Set(EngagementScore).asJava) + val PREDICTED_IS_SHARE_MENU_CLICKED = + new Continuous( + name("recap.engagement_predicted.is_share_menu_clicked"), + Set(EngagementScore).asJava) + val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous( + name("recap.engagement_predicted.is_profile_dwelled_20_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous( + name("recap.engagement_predicted.is_fullscreen_video_dwelled_5_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous( + name("recap.engagement_predicted.is_fullscreen_video_dwelled_10_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous( + name("recap.engagement_predicted.is_fullscreen_video_dwelled_20_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous( + name("recap.engagement_predicted.is_fullscreen_video_dwelled_30_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_UNIFIED_ENGAGEMENT = new Continuous( + name("recap.engagement_predicted.is_unified_engagement"), + Set(EngagementScore).asJava) + val PREDICTED_IS_COMPOSE_TRIGGERED = new Continuous( + name("recap.engagement_predicted.is_compose_triggered"), + Set(EngagementScore).asJava) + val PREDICTED_IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Continuous( + name("recap.engagement_predicted.is_replied_reply_impressed_by_author"), + Set(EngagementScore).asJava) + val PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Continuous( + name("recap.engagement_predicted.is_replied_reply_engaged_by_author"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous( + name("recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous( + name("recap.engagement_predicted.is_good_clicked_convo_desc_v2"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous( + name("recap.engagement_predicted.is_tweet_detail_dwelled_8_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous( + name("recap.engagement_predicted.is_tweet_detail_dwelled_15_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous( + name("recap.engagement_predicted.is_tweet_detail_dwelled_25_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous( + name("recap.engagement_predicted.is_tweet_detail_dwelled_30_sec"), + Set(EngagementScore).asJava) + val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous( + name("recap.engagement_predicted.is_favorited_fav_engaged_by_author"), + Set(EngagementScore).asJava) + val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous( + name( + "recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"), + Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous( + name("recap.engagement_predicted.is_dwelled_in_bounds_v1"), + Set(EngagementScore).asJava) + val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous( + name("recap.engagement_predicted.dwell_normalized_overall"), + Set(EngagementScore).asJava) + val PREDICTED_DWELL_CDF = + new Continuous(name("recap.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava) + val PREDICTED_DWELL_CDF_OVERALL = new Continuous( + name("recap.engagement_predicted.dwell_cdf_overall"), + Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED = + new Continuous(name("recap.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava) + + val PREDICTED_IS_DWELLED_1S = + new Continuous(name("recap.engagement_predicted.is_dwelled_1s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_2S = + new Continuous(name("recap.engagement_predicted.is_dwelled_2s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_3S = + new Continuous(name("recap.engagement_predicted.is_dwelled_3s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_4S = + new Continuous(name("recap.engagement_predicted.is_dwelled_4s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_5S = + new Continuous(name("recap.engagement_predicted.is_dwelled_5s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_6S = + new Continuous(name("recap.engagement_predicted.is_dwelled_6s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_7S = + new Continuous(name("recap.engagement_predicted.is_dwelled_7s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_8S = + new Continuous(name("recap.engagement_predicted.is_dwelled_8s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_9S = + new Continuous(name("recap.engagement_predicted.is_dwelled_9s"), Set(EngagementScore).asJava) + val PREDICTED_IS_DWELLED_10S = + new Continuous(name("recap.engagement_predicted.is_dwelled_10s"), Set(EngagementScore).asJava) + + val PREDICTED_IS_SKIPPED_1S = + new Continuous(name("recap.engagement_predicted.is_skipped_1s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_2S = + new Continuous(name("recap.engagement_predicted.is_skipped_2s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_3S = + new Continuous(name("recap.engagement_predicted.is_skipped_3s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_4S = + new Continuous(name("recap.engagement_predicted.is_skipped_4s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_5S = + new Continuous(name("recap.engagement_predicted.is_skipped_5s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_6S = + new Continuous(name("recap.engagement_predicted.is_skipped_6s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_7S = + new Continuous(name("recap.engagement_predicted.is_skipped_7s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_8S = + new Continuous(name("recap.engagement_predicted.is_skipped_8s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_9S = + new Continuous(name("recap.engagement_predicted.is_skipped_9s"), Set(EngagementScore).asJava) + val PREDICTED_IS_SKIPPED_10S = + new Continuous(name("recap.engagement_predicted.is_skipped_10s"), Set(EngagementScore).asJava) + + val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous( + name("recap.engagement_predicted.is_home_latest_visited"), + Set(EngagementScore).asJava) + val PREDICTED_IS_NEGATIVE_FEEDBACK = + new Continuous( + name("recap.engagement_predicted.is_negative_feedback"), + Set(EngagementScore).asJava) + val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 = + new Continuous( + name("recap.engagement_predicted.is_negative_feedback_v2"), + Set(EngagementScore).asJava) + val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK = + new Continuous( + name("recap.engagement_predicted.is_weak_negative_feedback"), + Set(EngagementScore).asJava) + val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK = + new Continuous( + name("recap.engagement_predicted.is_strong_negative_feedback"), + Set(EngagementScore).asJava) + val PREDICTED_IS_REPORT_TWEET_CLICKED = + new Continuous( + name("recap.engagement_predicted.is_report_tweet_clicked"), + Set(EngagementScore).asJava) + val PREDICTED_IS_UNFOLLOW_TOPIC = + new Continuous( + name("recap.engagement_predicted.is_unfollow_topic"), + Set(EngagementScore).asJava) + val PREDICTED_IS_RELEVANCE_PROMPT_YES_CLICKED = new Continuous( + name("recap.engagement_predicted.is_relevance_prompt_yes_clicked"), + Set(EngagementScore).asJava) + + // engagement for following user from any surface area + val PREDICTED_IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Continuous( + "recap.engagement_predicted.is_followed_from_any_surface_area", + Set(EngagementScore).asJava) + + + // These are global engagement counts for the Tweets. + val FAV_COUNT_V2 = new Continuous( + name("recap.earlybird.fav_count_v2"), + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava) + val RETWEET_COUNT_V2 = new Continuous( + name("recap.earlybird.retweet_count_v2"), + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava) + val REPLY_COUNT_V2 = new Continuous( + name("recap.earlybird.reply_count_v2"), + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava) + + val HAS_US_POLITICAL_ANNOTATION = new Binary( + name("recap.has_us_political_annotation"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ALL_GROUPS_ANNOTATION = new Binary( + name("recap.has_us_political_all_groups_annotation"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL = new Binary( + name("recap.has_us_political_annotation_high_recall"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V2 = new Binary( + name("recap.has_us_political_annotation_high_recall_v2"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V0 = new Binary( + name("recap.has_us_political_annotation_high_precision_v0"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_BALANCED_PRECISION_RECALL_V0 = new Binary( + name("recap.has_us_political_annotation_balanced_precision_recall_v0"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V3 = new Binary( + name("recap.has_us_political_annotation_high_recall_v3"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V3 = new Binary( + name("recap.has_us_political_annotation_high_precision_v3"), + Set(SemanticcoreClassification).asJava + ) + + val HAS_US_POLITICAL_ANNOTATION_BALANCED_V3 = new Binary( + name("recap.has_us_political_annotation_balanced_v3"), + Set(SemanticcoreClassification).asJava + ) + +} diff --git a/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala new file mode 100644 index 000000000..edf152cda --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/recap/RecapFeaturesUtils.scala @@ -0,0 +1,29 @@ +package com.twitter.timelines.prediction.features.recap + +object RecapFeaturesUtils { + // This needs to be updated if an engagement model is added or removed from prediction service. + val scoreFeatureIdsMap: Map[String, Long] = Map( + RecapFeatures.IS_FAVORITED.getFeatureName -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId, + RecapFeatures.IS_REPLIED.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId, + RecapFeatures.IS_RETWEETED.getFeatureName -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId, + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId, + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId, +// RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId, + RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureName -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId, + RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId + ) + + // This needs to be updated if an engagement model is added or removed from prediction service. + val labelFeatureIdToScoreFeatureIdsMap: Map[Long, Long] = Map( + RecapFeatures.IS_FAVORITED.getFeatureId -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId, + RecapFeatures.IS_REPLIED.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId, + RecapFeatures.IS_RETWEETED.getFeatureId -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId, + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId, + RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId, + // RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId, + RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId, + RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId + ) + + val labelFeatureNames: Seq[String] = scoreFeatureIdsMap.keys.toSeq +} diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD b/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD new file mode 100644 index 000000000..6fc497bf3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/request_context/BUILD @@ -0,0 +1,9 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala new file mode 100644 index 000000000..a7dd28852 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/request_context/RequestContextFeatures.scala @@ -0,0 +1,57 @@ +package com.twitter.timelines.prediction.features.request_context + +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api.Feature._ +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import scala.collection.JavaConverters._ + +object RequestContextFeatures { + val COUNTRY_CODE = + new Text("request_context.country_code", Set(PrivateCountryOrRegion, InferredCountry).asJava) + val LANGUAGE_CODE = new Text( + "request_context.language_code", + Set(GeneralSettings, ProvidedLanguage, InferredLanguage).asJava) + val REQUEST_PROVENANCE = new Text("request_context.request_provenance", Set(AppUsage).asJava) + val DISPLAY_WIDTH = new Continuous("request_context.display_width", Set(OtherDeviceInfo).asJava) + val DISPLAY_HEIGHT = new Continuous("request_context.display_height", Set(OtherDeviceInfo).asJava) + val DISPLAY_DPI = new Continuous("request_context.display_dpi", Set(OtherDeviceInfo).asJava) + + // the following features are not Continuous Features because for e.g. continuity between + // 23 and 0 hours cannot be handled that way. instead, we will treat each slice of hours/days + // independently, like a set of sparse binary features. + val TIMESTAMP_GMT_HOUR = + new Discrete("request_context.timestamp_gmt_hour", Set(PrivateTimestamp).asJava) + val TIMESTAMP_GMT_DOW = + new Discrete("request_context.timestamp_gmt_dow", Set(PrivateTimestamp).asJava) + + val IS_GET_INITIAL = new Binary("request_context.is_get_initial") + val IS_GET_MIDDLE = new Binary("request_context.is_get_middle") + val IS_GET_NEWER = new Binary("request_context.is_get_newer") + val IS_GET_OLDER = new Binary("request_context.is_get_older") + + // the following features are not Binary Features because the source field is Option[Boolean], + // and we want to distinguish Some(false) from None. None will be converted to -1. + val IS_POLLING = new Discrete("request_context.is_polling") + val IS_SESSION_START = new Discrete("request_context.is_session_start") + + // Helps distinguish requests from "home" vs "home_latest" (reverse chron home view). + val TIMELINE_KIND = new Text("request_context.timeline_kind") + + val featureContext = new FeatureContext( + COUNTRY_CODE, + LANGUAGE_CODE, + REQUEST_PROVENANCE, + DISPLAY_WIDTH, + DISPLAY_HEIGHT, + DISPLAY_DPI, + TIMESTAMP_GMT_HOUR, + TIMESTAMP_GMT_DOW, + IS_GET_INITIAL, + IS_GET_MIDDLE, + IS_GET_NEWER, + IS_GET_OLDER, + IS_POLLING, + IS_SESSION_START, + TIMELINE_KIND + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD b/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD new file mode 100644 index 000000000..ec194353b --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/simcluster/BUILD @@ -0,0 +1,13 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "src/thrift/com/twitter/timelines/suggests/common:record-scala", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/conversion:for-timelines", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala new file mode 100644 index 000000000..4d2b4db81 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala @@ -0,0 +1,61 @@ +package com.twitter.timelines.prediction.features.simcluster + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.Feature._ +import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import scala.collection.JavaConverters._ + +class SimclusterFeaturesHelper(statsReceiver: StatsReceiver) { + import SimclusterFeatures._ + + private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName) + private[this] val invalidSimclusterModelVersion = scopedStatsReceiver + .counter("invalidSimclusterModelVersion") + + def fromUserClusterInterestsPair( + userInterestClustersPair: (Long, ClustersUserIsInterestedIn) + ): Option[SimclusterFeatures] = { + val (userId, userInterestClusters) = userInterestClustersPair + if (userInterestClusters.knownForModelVersion == SIMCLUSTER_MODEL_VERSION) { + val userInterestClustersFavScores = for { + (clusterId, scores) <- userInterestClusters.clusterIdToScores + favScore <- scores.favScore + } yield (clusterId.toString, favScore) + Some( + SimclusterFeatures( + userId, + userInterestClusters.knownForModelVersion, + userInterestClustersFavScores.toMap + ) + ) + } else { + // We maintain this counter to make sure that the hardcoded modelVersion we are using is correct. + invalidSimclusterModelVersion.incr + None + } + } +} + +object SimclusterFeatures { + // Check http://go/simclustersv2runbook for production versions + // Our models are trained for this specific model version only. + val SIMCLUSTER_MODEL_VERSION = "20M_145K_dec11" + val prefix = s"simcluster.v2.$SIMCLUSTER_MODEL_VERSION" + + val SIMCLUSTER_USER_INTEREST_CLUSTER_SCORES = new SparseContinuous( + s"$prefix.user_interest_cluster_scores", + Set(EngagementScore, InferredInterests).asJava + ) + val SIMCLUSTER_USER_INTEREST_CLUSTER_IDS = new SparseBinary( + s"$prefix.user_interest_cluster_ids", + Set(InferredInterests).asJava + ) + val SIMCLUSTER_MODEL_VERSION_METADATA = new Text("meta.simcluster_version") +} + +case class SimclusterFeatures( + userId: Long, + modelVersion: String, + interestClusterScoresMap: Map[String, Double]) diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala new file mode 100644 index 000000000..355a89c22 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterTweetFeatures.scala @@ -0,0 +1,150 @@ +package com.twitter.timelines.prediction.features.simcluster + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.{Feature, FeatureContext} +import com.twitter.ml.api.Feature.{Continuous, SparseBinary, SparseContinuous} +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.timelines.suggests.common.record.thriftscala.SuggestionRecord +import scala.collection.JavaConverters._ + +class SimclusterTweetFeatures(statsReceiver: StatsReceiver) extends CombineCountsBase { + import SimclusterTweetFeatures._ + + private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName) + private[this] val invalidSimclusterModelVersion = scopedStatsReceiver + .counter("invalidSimclusterModelVersion") + private[this] val getFeaturesFromOverlappingSimclusterIdsCount = scopedStatsReceiver + .counter("getFeaturesFromOverlappingSimclusterIdsCount") + private[this] val emptySimclusterMaps = scopedStatsReceiver + .counter("emptySimclusterMaps") + private[this] val nonOverlappingSimclusterMaps = scopedStatsReceiver + .counter("nonOverlappingSimclusterMaps") + + // Parameters required by CombineCountsBase + override val topK: Int = 5 + override val hardLimit: Option[Int] = None + override val precomputedCountFeatures: Seq[Feature[_]] = Seq( + SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE, + SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE + ) + + private def getFeaturesFromOverlappingSimclusterIds( + userSimclustersInterestedInMap: Map[String, Double], + tweetSimclustersTopKMap: Map[String, Double] + ): Map[Feature[_], List[Double]] = { + getFeaturesFromOverlappingSimclusterIdsCount.incr + if (userSimclustersInterestedInMap.isEmpty || tweetSimclustersTopKMap.isEmpty) { + emptySimclusterMaps.incr + Map.empty + } else { + val overlappingSimclusterIds = + userSimclustersInterestedInMap.keySet intersect tweetSimclustersTopKMap.keySet + if (overlappingSimclusterIds.isEmpty) { + nonOverlappingSimclusterMaps.incr + Map.empty + } else { + val (combinedScores, tweetScores) = overlappingSimclusterIds.map { id => + val tweetScore = tweetSimclustersTopKMap.getOrElse(id, 0.0) + val combinedScore = userSimclustersInterestedInMap.getOrElse(id, 0.0) * tweetScore + (combinedScore, tweetScore) + }.unzip + Map( + SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE -> combinedScores.toList, + SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE -> tweetScores.toList + ) + } + } + } + + def getCountFeaturesValuesMap( + suggestionRecord: SuggestionRecord, + simclustersTweetTopKMap: Map[String, Double] + ): Map[Feature[_], List[Double]] = { + val userSimclustersInterestedInMap = formatUserSimclustersInterestedIn(suggestionRecord) + + val tweetSimclustersTopKMap = formatTweetSimclustersTopK(simclustersTweetTopKMap) + + getFeaturesFromOverlappingSimclusterIds(userSimclustersInterestedInMap, tweetSimclustersTopKMap) + } + + def filterByModelVersion( + simclustersMapOpt: Option[Map[String, Double]] + ): Option[Map[String, Double]] = { + simclustersMapOpt.flatMap { simclustersMap => + val filteredSimclustersMap = simclustersMap.filter { + case (clusterId, score) => + // The clusterId format is ModelVersion.IntegerClusterId.ScoreType as specified at + // com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters + clusterId.contains(SimclusterFeatures.SIMCLUSTER_MODEL_VERSION) + } + + // The assumption is that the simclustersMap will contain clusterIds with the same modelVersion. + // We maintain this counter to make sure that the hardcoded modelVersion we are using is correct. + if (simclustersMap.size > filteredSimclustersMap.size) { + invalidSimclusterModelVersion.incr + } + + if (filteredSimclustersMap.nonEmpty) Some(filteredSimclustersMap) else None + } + } + + val allFeatures: Seq[Feature[_]] = outputFeaturesPostMerge.toSeq ++ Seq( + SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS, + SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES) + val featureContext = new FeatureContext(allFeatures: _*) +} + +object SimclusterTweetFeatures { + val SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS = new SparseBinary( + s"${SimclusterFeatures.prefix}.tweet_topk_cluster_ids", + Set(InferredInterests).asJava + ) + val SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES = new SparseContinuous( + s"${SimclusterFeatures.prefix}.tweet_topk_cluster_scores", + Set(EngagementScore, InferredInterests).asJava + ) + + val SIMCLUSTER_TWEET_TOPK_CLUSTER_ID = + TypedAggregateGroup.sparseFeature(SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS) + + val SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE = new Continuous( + s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_tweet_score", + Set(EngagementScore, InferredInterests).asJava + ) + + val SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE = new Continuous( + s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_combined_score", + Set(EngagementScore, InferredInterests).asJava + ) + + def formatUserSimclustersInterestedIn(suggestionRecord: SuggestionRecord): Map[String, Double] = { + suggestionRecord.userSimclustersInterestedIn + .map { clustersUserIsInterestedIn => + if (clustersUserIsInterestedIn.knownForModelVersion == SimclusterFeatures.SIMCLUSTER_MODEL_VERSION) { + clustersUserIsInterestedIn.clusterIdToScores.collect { + case (clusterId, scores) if scores.favScore.isDefined => + (clusterId.toString, scores.favScore.get) + } + } else Map.empty[String, Double] + }.getOrElse(Map.empty[String, Double]) + .toMap + } + + def formatTweetSimclustersTopK( + simclustersTweetTopKMap: Map[String, Double] + ): Map[String, Double] = { + simclustersTweetTopKMap.collect { + case (clusterId, score) => + // The clusterId format is as specified at + // com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters + // and we want to extract the IntegerClusterId. + // The split function takes a regex; therefore, we need to escape . and we also need to escape + // \ since they are both special characters. Hence, the double \\. + val clusterIdSplit = clusterId.split("\\.") + val integerClusterId = clusterIdSplit(1) // The IntegerClusterId is at position 1. + (integerClusterId, score) + } + } +} diff --git a/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala new file mode 100644 index 000000000..0629636c0 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclustersScoresFeatures.scala @@ -0,0 +1,43 @@ +package com.twitter.timelines.prediction.features.simcluster + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType.SemanticcoreClassification +import com.twitter.ml.api.Feature +import com.twitter.ml.api.Feature.Continuous +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsBase +import scala.collection.JavaConverters._ + +object SimclustersScoresFeatures extends CombineCountsBase { + override def topK: Int = 2 + + override def hardLimit: Option[Int] = Some(20) + + val prefix = s"recommendations.sim_clusters_scores" + val TOPIC_CONSUMER_TWEET_EMBEDDING_Cs = new Continuous( + s"$prefix.localized_topic_consumer_tweet_embedding_cosine_similarity", + Set(SemanticcoreClassification).asJava) + val TOPIC_PRODUCER_TWEET_EMBEDDING_Cs = new Continuous( + s"$prefix.topic_producer_tweet_embedding_cosine_similarity", + Set(SemanticcoreClassification).asJava) + val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM = new Continuous( + s"$prefix.user_interested_in_localized_topic_consumer_embedding_cosine_similarity", + Set(SemanticcoreClassification).asJava) + val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous( + s"$prefix.user_interested_in_localized_topic_consumer_embedding_dot_product", + Set(SemanticcoreClassification).asJava) + val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM = new Continuous( + s"$prefix.user_interested_in_localized_topic_producer_embedding_cosine_similarity", + Set(SemanticcoreClassification).asJava) + val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous( + s"$prefix.user_interested_in_localized_topic_producer_embedding_dot_product", + Set(SemanticcoreClassification).asJava) + + override def precomputedCountFeatures: Seq[Feature[_]] = + Seq( + TOPIC_CONSUMER_TWEET_EMBEDDING_Cs, + TOPIC_PRODUCER_TWEET_EMBEDDING_Cs, + USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM, + USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT, + USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM, + USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD b/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD new file mode 100644 index 000000000..0c00b1e5b --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/socialproof/BUILD @@ -0,0 +1,15 @@ +scala_library( + name = "socialproof_features", + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/ibm/icu:icu4j", + "src/java/com/twitter/ml/api:api-base", + "src/scala/com/twitter/ml/api/util", + "src/scala/com/twitter/timelines/util", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/timelines/socialproof:socialproof-scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala new file mode 100644 index 000000000..163ba7efa --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/socialproof/SocialProofFeatures.scala @@ -0,0 +1,172 @@ +package com.twitter.timelines.prediction.features.socialproof + +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature.Binary +import com.twitter.ml.api.Feature.Continuous +import com.twitter.ml.api.Feature.SparseBinary +import com.twitter.ml.api.util.FDsl._ +import com.twitter.timelines.prediction.features.socialproof.SocialProofDataRecordFeatures._ +import com.twitter.timelines.socialproof.thriftscala.SocialProof +import com.twitter.timelines.socialproof.v1.thriftscala.SocialProofType +import com.twitter.timelines.util.CommonTypes.UserId +import scala.collection.JavaConverters._ +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ + +abstract class SocialProofUserGroundTruth(userIds: Seq[UserId], count: Int) { + require( + count >= userIds.size, + "count must be equal to or greater than the number of entries in userIds" + ) + // Using Double as the return type to make it more convenient for these values to be used as + // ML feature values. + val displayedUserCount: Double = userIds.size.toDouble + val undisplayedUserCount: Double = count - userIds.size.toDouble + val totalCount: Double = count.toDouble + + def featureDisplayedUsers: SparseBinary + def featureDisplayedUserCount: Continuous + def featureUndisplayedUserCount: Continuous + def featureTotalUserCount: Continuous + + def setFeatures(rec: DataRecord): Unit = { + rec.setFeatureValue(featureDisplayedUsers, toStringSet(userIds)) + rec.setFeatureValue(featureDisplayedUserCount, displayedUserCount) + rec.setFeatureValue(featureUndisplayedUserCount, undisplayedUserCount) + rec.setFeatureValue(featureTotalUserCount, totalCount) + } + protected def toStringSet(value: Seq[Long]): Set[String] = { + value.map(_.toString).toSet + } +} + +case class FavoritedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0) + extends SocialProofUserGroundTruth(userIds, count) { + + override val featureDisplayedUsers = SocialProofDisplayedFavoritedByUsers + override val featureDisplayedUserCount = SocialProofDisplayedFavoritedByUserCount + override val featureUndisplayedUserCount = SocialProofUndisplayedFavoritedByUserCount + override val featureTotalUserCount = SocialProofTotalFavoritedByUserCount +} + +case class RetweetedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0) + extends SocialProofUserGroundTruth(userIds, count) { + + override val featureDisplayedUsers = SocialProofDisplayedRetweetedByUsers + override val featureDisplayedUserCount = SocialProofDisplayedRetweetedByUserCount + override val featureUndisplayedUserCount = SocialProofUndisplayedRetweetedByUserCount + override val featureTotalUserCount = SocialProofTotalRetweetedByUserCount +} + +case class RepliedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0) + extends SocialProofUserGroundTruth(userIds, count) { + + override val featureDisplayedUsers = SocialProofDisplayedRepliedByUsers + override val featureDisplayedUserCount = SocialProofDisplayedRepliedByUserCount + override val featureUndisplayedUserCount = SocialProofUndisplayedRepliedByUserCount + override val featureTotalUserCount = SocialProofTotalRepliedByUserCount +} + +case class SocialProofFeatures( + hasSocialProof: Boolean, + favoritedBy: FavoritedBySocialProofUserGroundTruth = FavoritedBySocialProofUserGroundTruth(), + retweetedBy: RetweetedBySocialProofUserGroundTruth = RetweetedBySocialProofUserGroundTruth(), + repliedBy: RepliedBySocialProofUserGroundTruth = RepliedBySocialProofUserGroundTruth()) { + + def setFeatures(dataRecord: DataRecord): Unit = + if (hasSocialProof) { + dataRecord.setFeatureValue(HasSocialProof, hasSocialProof) + favoritedBy.setFeatures(dataRecord) + retweetedBy.setFeatures(dataRecord) + repliedBy.setFeatures(dataRecord) + } +} + +object SocialProofFeatures { + def apply(socialProofs: Seq[SocialProof]): SocialProofFeatures = + socialProofs.foldLeft(SocialProofFeatures(hasSocialProof = socialProofs.nonEmpty))( + (prevFeatures, socialProof) => { + val userIds = socialProof.v1.userIds + val count = socialProof.v1.count + socialProof.v1.socialProofType match { + case SocialProofType.FavoritedBy => + prevFeatures.copy(favoritedBy = FavoritedBySocialProofUserGroundTruth(userIds, count)) + case SocialProofType.RetweetedBy => + prevFeatures.copy(retweetedBy = RetweetedBySocialProofUserGroundTruth(userIds, count)) + case SocialProofType.RepliedBy => + prevFeatures.copy(repliedBy = RepliedBySocialProofUserGroundTruth(userIds, count)) + case _ => + prevFeatures // skip silently instead of breaking jobs, since this isn't used yet + } + }) +} + +object SocialProofDataRecordFeatures { + val HasSocialProof = new Binary("recap.social_proof.has_social_proof") + + val SocialProofDisplayedFavoritedByUsers = new SparseBinary( + "recap.social_proof.list.displayed.favorited_by", + Set(UserId, PublicLikes, PrivateLikes).asJava + ) + val SocialProofDisplayedFavoritedByUserCount = new Continuous( + "recap.social_proof.count.displayed.favorited_by", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val SocialProofUndisplayedFavoritedByUserCount = new Continuous( + "recap.social_proof.count.undisplayed.favorited_by", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val SocialProofTotalFavoritedByUserCount = new Continuous( + "recap.social_proof.count.total.favorited_by", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + + val SocialProofDisplayedRetweetedByUsers = new SparseBinary( + "recap.social_proof.list.displayed.retweeted_by", + Set(UserId, PublicRetweets, PrivateRetweets).asJava + ) + val SocialProofDisplayedRetweetedByUserCount = new Continuous( + "recap.social_proof.count.displayed.retweeted_by", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val SocialProofUndisplayedRetweetedByUserCount = new Continuous( + "recap.social_proof.count.undisplayed.retweeted_by", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val SocialProofTotalRetweetedByUserCount = new Continuous( + "recap.social_proof.count.total.retweeted_by", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + + val SocialProofDisplayedRepliedByUsers = new SparseBinary( + "recap.social_proof.list.displayed.replied_by", + Set(UserId, PublicReplies, PrivateReplies).asJava + ) + val SocialProofDisplayedRepliedByUserCount = new Continuous( + "recap.social_proof.count.displayed.replied_by", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val SocialProofUndisplayedRepliedByUserCount = new Continuous( + "recap.social_proof.count.undisplayed.replied_by", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val SocialProofTotalRepliedByUserCount = new Continuous( + "recap.social_proof.count.total.replied_by", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + + val AllFeatures = Seq( + HasSocialProof, + SocialProofDisplayedFavoritedByUsers, + SocialProofDisplayedFavoritedByUserCount, + SocialProofUndisplayedFavoritedByUserCount, + SocialProofTotalFavoritedByUserCount, + SocialProofDisplayedRetweetedByUsers, + SocialProofDisplayedRetweetedByUserCount, + SocialProofUndisplayedRetweetedByUserCount, + SocialProofTotalRetweetedByUserCount, + SocialProofDisplayedRepliedByUsers, + SocialProofDisplayedRepliedByUserCount, + SocialProofUndisplayedRepliedByUserCount, + SocialProofTotalRepliedByUserCount + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD new file mode 100644 index 000000000..b5c49af36 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/time_features/BUILD @@ -0,0 +1,10 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/timelines/time_features:time_features-scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala new file mode 100644 index 000000000..b398203c3 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala @@ -0,0 +1,111 @@ +package com.twitter.timelines.prediction.features.time_features + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import com.twitter.ml.api.Feature._ +import scala.collection.JavaConverters._ +import com.twitter.util.Duration +import com.twitter.conversions.DurationOps._ + +object TimeDataRecordFeatures { + val TIME_BETWEEN_NON_POLLING_REQUESTS_AVG = new Continuous( + "time_features.time_between_non_polling_requests_avg", + Set(PrivateTimestamp).asJava + ) + val TIME_SINCE_TWEET_CREATION = new Continuous("time_features.time_since_tweet_creation") + val TIME_SINCE_SOURCE_TWEET_CREATION = new Continuous( + "time_features.time_since_source_tweet_creation" + ) + val TIME_SINCE_LAST_NON_POLLING_REQUEST = new Continuous( + "time_features.time_since_last_non_polling_request", + Set(PrivateTimestamp).asJava + ) + val NON_POLLING_REQUESTS_SINCE_TWEET_CREATION = new Continuous( + "time_features.non_polling_requests_since_tweet_creation", + Set(PrivateTimestamp).asJava + ) + val TWEET_AGE_RATIO = new Continuous("time_features.tweet_age_ratio") + val IS_TWEET_RECYCLED = new Binary("time_features.is_tweet_recycled") + // Last Engagement features + val LAST_FAVORITE_SINCE_CREATION_HRS = new Continuous( + "time_features.earlybird.last_favorite_since_creation_hrs", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val LAST_RETWEET_SINCE_CREATION_HRS = new Continuous( + "time_features.earlybird.last_retweet_since_creation_hrs", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val LAST_REPLY_SINCE_CREATION_HRS = new Continuous( + "time_features.earlybird.last_reply_since_creation_hrs", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val LAST_QUOTE_SINCE_CREATION_HRS = new Continuous( + "time_features.earlybird.last_quote_since_creation_hrs", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val TIME_SINCE_LAST_FAVORITE_HRS = new Continuous( + "time_features.earlybird.time_since_last_favorite", + Set(CountOfPrivateLikes, CountOfPublicLikes).asJava + ) + val TIME_SINCE_LAST_RETWEET_HRS = new Continuous( + "time_features.earlybird.time_since_last_retweet", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + val TIME_SINCE_LAST_REPLY_HRS = new Continuous( + "time_features.earlybird.time_since_last_reply", + Set(CountOfPrivateReplies, CountOfPublicReplies).asJava + ) + val TIME_SINCE_LAST_QUOTE_HRS = new Continuous( + "time_features.earlybird.time_since_last_quote", + Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava + ) + + val TIME_SINCE_VIEWER_ACCOUNT_CREATION_SECS = + new Continuous( + "time_features.time_since_viewer_account_creation_secs", + Set(AccountCreationTime, AgeOfAccount).asJava) + + val USER_ID_IS_SNOWFLAKE_ID = + new Binary("time_features.time_user_id_is_snowflake_id", Set(UserType).asJava) + + val IS_30_DAY_NEW_USER = + new Binary("time_features.is_day_30_new_user", Set(AccountCreationTime, AgeOfAccount).asJava) + val IS_12_MONTH_NEW_USER = + new Binary("time_features.is_month_12_new_user", Set(AccountCreationTime, AgeOfAccount).asJava) + val ACCOUNT_AGE_INTERVAL = + new Discrete("time_features.account_age_interval", Set(AgeOfAccount).asJava) +} + +object AccountAgeInterval extends Enumeration { + val LTE_1_DAY, GT_1_DAY_LTE_5_DAY, GT_5_DAY_LTE_14_DAY, GT_14_DAY_LTE_30_DAY = Value + + def fromDuration(accountAge: Duration): Option[AccountAgeInterval.Value] = { + accountAge match { + case a if (a <= 1.day) => Some(LTE_1_DAY) + case a if (1.day < a && a <= 5.days) => Some(GT_1_DAY_LTE_5_DAY) + case a if (5.days < a && a <= 14.days) => Some(GT_5_DAY_LTE_14_DAY) + case a if (14.days < a && a <= 30.days) => Some(GT_14_DAY_LTE_30_DAY) + case _ => None + } + } +} + +case class TimeFeatures( + isTweetRecycled: Boolean, + timeSinceTweetCreation: Double, + isDay30NewUser: Boolean, + isMonth12NewUser: Boolean, + timeSinceSourceTweetCreation: Double, // same as timeSinceTweetCreation for non-retweets + timeSinceViewerAccountCreationSecs: Option[Double], + timeBetweenNonPollingRequestsAvg: Option[Double] = None, + timeSinceLastNonPollingRequest: Option[Double] = None, + nonPollingRequestsSinceTweetCreation: Option[Double] = None, + tweetAgeRatio: Option[Double] = None, + lastFavSinceCreationHrs: Option[Double] = None, + lastRetweetSinceCreationHrs: Option[Double] = None, + lastReplySinceCreationHrs: Option[Double] = None, + lastQuoteSinceCreationHrs: Option[Double] = None, + timeSinceLastFavoriteHrs: Option[Double] = None, + timeSinceLastRetweetHrs: Option[Double] = None, + timeSinceLastReplyHrs: Option[Double] = None, + timeSinceLastQuoteHrs: Option[Double] = None, + accountAgeInterval: Option[AccountAgeInterval.Value] = None) diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD new file mode 100644 index 000000000..a4ad0eabf --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/BUILD @@ -0,0 +1,10 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "graph-feature-service/src/main/thrift/com/twitter/graph_feature_service:graph_feature_service_thrift-scala", + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala new file mode 100644 index 000000000..03a112578 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeatures.scala @@ -0,0 +1,93 @@ +package com.twitter.timelines.prediction.features.two_hop_features + +import com.twitter.graph_feature_service.thriftscala.EdgeType +import com.twitter.ml.api.Feature._ +import scala.collection.JavaConverters._ +import TwoHopFeaturesConfig.personalDataTypesMap + +object TwoHopFeaturesDescriptor { + val prefix = "two_hop" + val normalizedPostfix = "normalized" + val leftNodeDegreePostfix = "left_degree" + val rightNodeDegreePostfix = "right_degree" + + type TwoHopFeatureMap = Map[(EdgeType, EdgeType), Continuous] + type TwoHopFeatureNodeDegreeMap = Map[EdgeType, Continuous] + + def apply(edgeTypePairs: Seq[(EdgeType, EdgeType)]): TwoHopFeaturesDescriptor = { + new TwoHopFeaturesDescriptor(edgeTypePairs) + } +} + +class TwoHopFeaturesDescriptor(edgeTypePairs: Seq[(EdgeType, EdgeType)]) { + import TwoHopFeaturesDescriptor._ + + def getLeftEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = { + edgeTypePair._1 + } + + def getLeftEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = { + getLeftEdge(edgeTypePair).originalName.toLowerCase + } + + def getRightEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = { + edgeTypePair._2 + } + + def getRightEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = { + getRightEdge(edgeTypePair).originalName.toLowerCase + } + + val rawFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => { + val leftEdgeType = getLeftEdge(edgeTypePair) + val leftEdgeName = getLeftEdgeName(edgeTypePair) + val rightEdgeType = getRightEdge(edgeTypePair) + val rightEdgeName = getRightEdgeName(edgeTypePair) + val personalDataTypes = ( + personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++ + personalDataTypesMap.getOrElse(rightEdgeType, Set.empty) + ).asJava + val rawFeature = new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName", personalDataTypes) + edgeTypePair -> rawFeature + })(collection.breakOut) + + val leftNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => { + val leftEdgeType = getLeftEdge(edgeTypePair) + val leftEdgeName = getLeftEdgeName(edgeTypePair) + val personalDataTypes = personalDataTypesMap.getOrElse(leftEdgeType, Set.empty).asJava + val leftNodeDegreeFeature = + new Continuous(s"$prefix.$leftEdgeName.$leftNodeDegreePostfix", personalDataTypes) + leftEdgeType -> leftNodeDegreeFeature + })(collection.breakOut) + + val rightNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => { + val rightEdgeType = getRightEdge(edgeTypePair) + val rightEdgeName = getRightEdgeName(edgeTypePair) + val personalDataTypes = personalDataTypesMap.getOrElse(rightEdgeType, Set.empty).asJava + val rightNodeDegreeFeature = + new Continuous(s"$prefix.$rightEdgeName.$rightNodeDegreePostfix", personalDataTypes) + rightEdgeType -> rightNodeDegreeFeature + })(collection.breakOut) + + val normalizedFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => { + val leftEdgeType = getLeftEdge(edgeTypePair) + val leftEdgeName = getLeftEdgeName(edgeTypePair) + val rightEdgeType = getRightEdge(edgeTypePair) + val rightEdgeName = getRightEdgeName(edgeTypePair) + val personalDataTypes = ( + personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++ + personalDataTypesMap.getOrElse(rightEdgeType, Set.empty) + ).asJava + val normalizedFeature = + new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName.$normalizedPostfix", personalDataTypes) + edgeTypePair -> normalizedFeature + })(collection.breakOut) + + private val rawFeaturesSeq: Seq[Continuous] = rawFeaturesMap.values.toSeq + private val leftNodeDegreeFeaturesSeq: Seq[Continuous] = leftNodeDegreeFeaturesMap.values.toSeq + private val rightNodeDegreeFeaturesSeq: Seq[Continuous] = rightNodeDegreeFeaturesMap.values.toSeq + private val normalizedFeaturesSeq: Seq[Continuous] = normalizedFeaturesMap.values.toSeq + + val featuresSeq: Seq[Continuous] = + rawFeaturesSeq ++ leftNodeDegreeFeaturesSeq ++ rightNodeDegreeFeaturesSeq ++ normalizedFeaturesSeq +} diff --git a/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala new file mode 100644 index 000000000..ece502e30 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/two_hop_features/TwoHopFeaturesConfig.scala @@ -0,0 +1,30 @@ +package com.twitter.timelines.prediction.features.two_hop_features + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType +import com.twitter.graph_feature_service.thriftscala.{EdgeType, FeatureType} + +object TwoHopFeaturesConfig { + val leftEdgeTypes = Seq(EdgeType.Following, EdgeType.Favorite, EdgeType.MutualFollow) + val rightEdgeTypes = Seq( + EdgeType.FollowedBy, + EdgeType.FavoritedBy, + EdgeType.RetweetedBy, + EdgeType.MentionedBy, + EdgeType.MutualFollow) + + val edgeTypePairs: Seq[(EdgeType, EdgeType)] = { + for (leftEdgeType <- leftEdgeTypes; rightEdgeType <- rightEdgeTypes) + yield (leftEdgeType, rightEdgeType) + } + + val featureTypes: Seq[FeatureType] = edgeTypePairs.map(pair => FeatureType(pair._1, pair._2)) + + val personalDataTypesMap: Map[EdgeType, Set[PersonalDataType]] = Map( + EdgeType.Following -> Set(PersonalDataType.CountOfFollowersAndFollowees), + EdgeType.Favorite -> Set( + PersonalDataType.CountOfPrivateLikes, + PersonalDataType.CountOfPublicLikes), + EdgeType.MutualFollow -> Set(PersonalDataType.CountOfFollowersAndFollowees), + EdgeType.FollowedBy -> Set(PersonalDataType.CountOfFollowersAndFollowees) + ) +} diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD b/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD new file mode 100644 index 000000000..598e0c066 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/user_health/BUILD @@ -0,0 +1,10 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/timelines/author_features/user_health:thrift-scala", + ], +) diff --git a/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala b/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala new file mode 100644 index 000000000..7c8c7f8b1 --- /dev/null +++ b/src/scala/com/twitter/timelines/prediction/features/user_health/UserHealthFeatures.scala @@ -0,0 +1,23 @@ +package com.twitter.timelines.prediction.features.user_health + +import com.twitter.ml.api.Feature +import com.twitter.timelines.author_features.user_health.thriftscala.UserState +import com.twitter.dal.personal_data.thriftjava.PersonalDataType.{UserState => UserStatePDT} +import com.twitter.dal.personal_data.thriftjava.PersonalDataType._ +import scala.collection.JavaConverters._ + +object UserHealthFeatures { + val UserState = new Feature.Discrete("user_health.user_state", Set(UserStatePDT, UserType).asJava) + val IsLightMinusUser = + new Feature.Binary("user_health.is_light_minus_user", Set(UserStatePDT, UserType).asJava) + val AuthorState = + new Feature.Discrete("user_health.author_state", Set(UserStatePDT, UserType).asJava) + val NumAuthorFollowers = + new Feature.Continuous("author_health.num_followers", Set(CountOfFollowersAndFollowees).asJava) + val NumAuthorConnectDays = new Feature.Continuous("author_health.num_connect_days") + val NumAuthorConnect = new Feature.Continuous("author_health.num_connect") + + val IsUserVerifiedUnion = new Feature.Binary("user_account.is_user_verified_union") +} + +case class UserHealthFeatures(id: Long, userStateOpt: Option[UserState]) diff --git a/timelines/data_processing/ml_util/aggregation_framework/AggregateGroup.scala b/timelines/data_processing/ml_util/aggregation_framework/AggregateGroup.scala new file mode 100644 index 000000000..6797d838a --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/AggregateGroup.scala @@ -0,0 +1,124 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.ml.api._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetric +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.EasyMetric +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.MaxMetric +import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform +import com.twitter.util.Duration +import java.lang.{Boolean => JBoolean} +import java.lang.{Long => JLong} +import scala.language.existentials + +/** + * A wrapper for [[com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup]] + * (see TypedAggregateGroup.scala) with some convenient syntactic sugar that avoids + * the user having to specify different groups for different types of features. + * Gets translated into multiple strongly typed TypedAggregateGroup(s) + * by the buildTypedAggregateGroups() method defined below. + * + * @param inputSource Source to compute this aggregate over + * @param preTransforms Sequence of [[ITransform]] that is applied to + * data records pre-aggregation (e.g. discretization, renaming) + * @param samplingTransformOpt Optional [[OneToSomeTransform]] that samples data record + * @param aggregatePrefix Prefix to use for naming resultant aggregate features + * @param keys Features to group by when computing the aggregates + * (e.g. USER_ID, AUTHOR_ID). These must be either discrete, string or sparse binary. + * Grouping by a sparse binary feature is different than grouping by a discrete or string + * feature. For example, if you have a sparse binary feature WORDS_IN_TWEET which is + * a set of all words in a tweet, then grouping by this feature generates a + * separate aggregate mean/count/etc for each value of the feature (each word), and + * not just a single aggregate count for different "sets of words" + * @param features Features to aggregate (e.g. blender_score or is_photo). + * @param labels Labels to cross the features with to make pair features, if any. + * @param metrics Aggregation metrics to compute (e.g. count, mean) + * @param halfLives Half lives to use for the aggregations, to be crossed with the above. + * use Duration.Top for "forever" aggregations over an infinite time window (no decay). + * @param outputStore Store to output this aggregate to + * @param includeAnyFeature Aggregate label counts for any feature value + * @param includeAnyLabel Aggregate feature counts for any label value (e.g. all impressions) + * @param includeTimestampFeature compute max aggregate on timestamp feature + * @param aggExclusionRegex Sequence of Regexes, which define features to + */ +case class AggregateGroup( + inputSource: AggregateSource, + aggregatePrefix: String, + keys: Set[Feature[_]], + features: Set[Feature[_]], + labels: Set[_ <: Feature[JBoolean]], + metrics: Set[EasyMetric], + halfLives: Set[Duration], + outputStore: AggregateStore, + preTransforms: Seq[OneToSomeTransform] = Seq.empty, + includeAnyFeature: Boolean = true, + includeAnyLabel: Boolean = true, + includeTimestampFeature: Boolean = false, + aggExclusionRegex: Seq[String] = Seq.empty) { + + private def toStrongType[T]( + metrics: Set[EasyMetric], + features: Set[Feature[_]], + featureType: FeatureType + ): TypedAggregateGroup[_] = { + val underlyingMetrics: Set[AggregationMetric[T, _]] = + metrics.flatMap(_.forFeatureType[T](featureType)) + val underlyingFeatures: Set[Feature[T]] = features + .map(_.asInstanceOf[Feature[T]]) + + TypedAggregateGroup[T]( + inputSource = inputSource, + aggregatePrefix = aggregatePrefix, + keysToAggregate = keys, + featuresToAggregate = underlyingFeatures, + labels = labels, + metrics = underlyingMetrics, + halfLives = halfLives, + outputStore = outputStore, + preTransforms = preTransforms, + includeAnyFeature, + includeAnyLabel, + aggExclusionRegex + ) + } + + private def timestampTypedAggregateGroup: TypedAggregateGroup[_] = { + val metrics: Set[AggregationMetric[JLong, _]] = + Set(MaxMetric.forFeatureType[JLong](TypedAggregateGroup.timestampFeature.getFeatureType).get) + + TypedAggregateGroup[JLong]( + inputSource = inputSource, + aggregatePrefix = aggregatePrefix, + keysToAggregate = keys, + featuresToAggregate = Set(TypedAggregateGroup.timestampFeature), + labels = Set.empty, + metrics = metrics, + halfLives = Set(Duration.Top), + outputStore = outputStore, + preTransforms = preTransforms, + includeAnyFeature = false, + includeAnyLabel = true, + aggExclusionRegex = Seq.empty + ) + } + + def buildTypedAggregateGroups(): List[TypedAggregateGroup[_]] = { + val typedAggregateGroupsList = { + if (features.isEmpty) { + List(toStrongType(metrics, features, FeatureType.BINARY)) + } else { + features + .groupBy(_.getFeatureType()) + .toList + .map { + case (featureType, features) => + toStrongType(metrics, features, featureType) + } + } + } + + val optionalTimestampTypedAggregateGroup = + if (includeTimestampFeature) List(timestampTypedAggregateGroup) else List() + + typedAggregateGroupsList ++ optionalTimestampTypedAggregateGroup + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/AggregateSource.scala b/timelines/data_processing/ml_util/aggregation_framework/AggregateSource.scala new file mode 100644 index 000000000..7fb239c65 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/AggregateSource.scala @@ -0,0 +1,9 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.ml.api.Feature +import java.lang.{Long => JLong} + +trait AggregateSource extends Serializable { + def name: String + def timestampFeature: Feature[JLong] +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/AggregateStore.scala b/timelines/data_processing/ml_util/aggregation_framework/AggregateStore.scala new file mode 100644 index 000000000..1c09b33f0 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/AggregateStore.scala @@ -0,0 +1,5 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +trait AggregateStore extends Serializable { + def name: String +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/AggregationConfig.scala b/timelines/data_processing/ml_util/aggregation_framework/AggregationConfig.scala new file mode 100644 index 000000000..2b117ddbd --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/AggregationConfig.scala @@ -0,0 +1,5 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +trait AggregationConfig { + def aggregatesToCompute: Set[TypedAggregateGroup[_]] +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/AggregationKey.scala b/timelines/data_processing/ml_util/aggregation_framework/AggregationKey.scala new file mode 100644 index 000000000..c3aafef69 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/AggregationKey.scala @@ -0,0 +1,50 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.bijection.Bufferable +import com.twitter.bijection.Injection +import scala.util.Try + +/** + * Case class that represents the "grouping" key for any aggregate feature. + * Used by Summingbird to output aggregates to the key-value "store" using sumByKey() + * + * @discreteFeaturesById All discrete featureids (+ values) that are part of this key + * @textFeaturesById All string featureids (+ values) that are part of this key + * + * Example 1: the user aggregate features in aggregatesv1 all group by USER_ID, + * which is a discrete feature. When storing these features, the key would be: + * + * discreteFeaturesById = Map(hash(USER_ID) -> ), textFeaturesById = Map() + * + * Ex 2: If aggregating grouped by USER_ID, AUTHOR_ID, tweet link url, the key would be: + * + * discreteFeaturesById = Map(hash(USER_ID) -> , hash(AUTHOR_ID) -> ), + * textFeaturesById = Map(hash(URL_FEATURE) -> ) + * + * I could have just used a DataRecord for the key, but I wanted to make it strongly typed + * and only support grouping by discrete and string features, so using a case class instead. + * + * Re: efficiency, storing the hash of the feature in addition to just the feature value + * is somewhat more inefficient than only storing the feature value in the key, but it + * adds flexibility to group multiple types of aggregates in the same output store. If we + * decide this isn't a good tradeoff to make later, we can reverse/refactor this decision. + */ +case class AggregationKey( + discreteFeaturesById: Map[Long, Long], + textFeaturesById: Map[Long, String]) + +/** + * A custom injection for the above case class, + * so that Summingbird knows how to store it in Manhattan. + */ +object AggregationKeyInjection extends Injection[AggregationKey, Array[Byte]] { + /* Injection from tuple representation of AggregationKey to Array[Byte] */ + val featureMapsInjection: Injection[(Map[Long, Long], Map[Long, String]), Array[Byte]] = + Bufferable.injectionOf[(Map[Long, Long], Map[Long, String])] + + def apply(aggregationKey: AggregationKey): Array[Byte] = + featureMapsInjection(AggregationKey.unapply(aggregationKey).get) + + def invert(ab: Array[Byte]): Try[AggregationKey] = + featureMapsInjection.invert(ab).map(AggregationKey.tupled(_)) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/BUILD b/timelines/data_processing/ml_util/aggregation_framework/BUILD new file mode 100644 index 000000000..aff488116 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/BUILD @@ -0,0 +1,101 @@ +scala_library( + name = "common_types", + sources = ["*.scala"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/twitter/algebird:bijection", + "3rdparty/jvm/com/twitter/algebird:core", + "3rdparty/jvm/com/twitter/algebird:util", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:json", + "3rdparty/jvm/com/twitter/bijection:macros", + "3rdparty/jvm/com/twitter/bijection:netty", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/com/twitter/bijection:util", + "3rdparty/jvm/org/apache/thrift:libthrift", + "3rdparty/src/jvm/com/twitter/scalding:date", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/dal/client/dataset", + "src/scala/com/twitter/ml/api/util:datarecord", + "src/scala/com/twitter/scalding_internal/dalv2/vkvs", + "src/scala/com/twitter/scalding_internal/multiformat/format/keyval", + "src/scala/com/twitter/storehaus_internal/manhattan/config", + "src/scala/com/twitter/storehaus_internal/offline", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/dal/personal_data:personal_data-scala", + "src/thrift/com/twitter/ml/api:data-java", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "timelines/data_processing/ml_util/transforms", + "util/util-core:util-core-util", + ], +) + +target( + name = "common_online_stores", + dependencies = [ + "src/scala/com/twitter/storehaus_internal/memcache", + ], +) + +target( + name = "common_offline_stores", + dependencies = [ + "src/scala/com/twitter/storehaus_internal/manhattan", + ], +) + +target( + name = "user_job", + dependencies = [ + "timelines/data_processing/ml_util/aggregation_framework/job", + ], +) + +target( + name = "scalding", + dependencies = [ + "timelines/data_processing/ml_util/aggregation_framework/scalding", + ], +) + +target( + name = "conversion", + dependencies = [ + "timelines/data_processing/ml_util/aggregation_framework/conversion", + ], +) + +target( + name = "query", + dependencies = [ + "timelines/data_processing/ml_util/aggregation_framework/query", + ], +) + +target( + name = "heron", + dependencies = [ + "timelines/data_processing/ml_util/aggregation_framework/heron", + ], +) + +target( + dependencies = [ + ":common_offline_stores", + ":common_online_stores", + ":common_types", + ":conversion", + ":heron", + ":query", + ":scalding", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/DataRecordAggregationMonoid.scala b/timelines/data_processing/ml_util/aggregation_framework/DataRecordAggregationMonoid.scala new file mode 100644 index 000000000..bc37c8e05 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/DataRecordAggregationMonoid.scala @@ -0,0 +1,92 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.algebird.Monoid +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.SRichDataRecord +import scala.collection.mutable +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon._ + +/** + * Monoid to aggregate over DataRecord objects. + * + * @param aggregates Set of ''TypedAggregateGroup'' case classes* + * to compute using this monoid (see TypedAggregateGroup.scala) + */ +trait DataRecordMonoid extends Monoid[DataRecord] { + + val aggregates: Set[TypedAggregateGroup[_]] + + def zero(): DataRecord = new DataRecord + + /* + * Add two datarecords using this monoid. + * + * @param left Left datarecord to add + * @param right Right datarecord to add + * @return Sum of the two datarecords as a DataRecord + */ + def plus(left: DataRecord, right: DataRecord): DataRecord = { + val result = zero() + aggregates.foreach(_.mutatePlus(result, left, right)) + val leftTimestamp = getTimestamp(left) + val rightTimestamp = getTimestamp(right) + SRichDataRecord(result).setFeatureValue( + SharedFeatures.TIMESTAMP, + leftTimestamp.max(rightTimestamp) + ) + result + } +} + +case class DataRecordAggregationMonoid(aggregates: Set[TypedAggregateGroup[_]]) + extends DataRecordMonoid { + + private def sumBuffer(buffer: mutable.ArrayBuffer[DataRecord]): Unit = { + val bufferSum = zero() + buffer.toIterator.foreach { value => + val leftTimestamp = getTimestamp(bufferSum) + val rightTimestamp = getTimestamp(value) + aggregates.foreach(_.mutatePlus(bufferSum, bufferSum, value)) + SRichDataRecord(bufferSum).setFeatureValue( + SharedFeatures.TIMESTAMP, + leftTimestamp.max(rightTimestamp) + ) + } + + buffer.clear() + buffer += bufferSum + } + + /* + * Efficient batched aggregation of datarecords using + * this monoid + a buffer, for performance. + * + * @param dataRecordIter An iterator of datarecords to sum + * @return A datarecord option containing the sum + */ + override def sumOption(dataRecordIter: TraversableOnce[DataRecord]): Option[DataRecord] = { + if (dataRecordIter.isEmpty) { + None + } else { + var buffer = mutable.ArrayBuffer[DataRecord]() + val BatchSize = 1000 + + dataRecordIter.foreach { u => + if (buffer.size > BatchSize) sumBuffer(buffer) + buffer += u + } + + if (buffer.size > 1) sumBuffer(buffer) + Some(buffer(0)) + } + } +} + +/* + * This class is used when there is no need to use sumBuffer functionality, as in the case of + * online aggregation of datarecords where using a buffer on a small number of datarecords + * would add some performance overhead. + */ +case class DataRecordAggregationMonoidNoBuffer(aggregates: Set[TypedAggregateGroup[_]]) + extends DataRecordMonoid {} diff --git a/timelines/data_processing/ml_util/aggregation_framework/KeyedRecord.scala b/timelines/data_processing/ml_util/aggregation_framework/KeyedRecord.scala new file mode 100644 index 000000000..bb3096767 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/KeyedRecord.scala @@ -0,0 +1,27 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.ml.api.DataRecord + +/** + * Keyed record that is used to reprsent the aggregation type and its corresponding data record. + * + * @constructor creates a new keyed record. + * + * @param aggregateType the aggregate type + * @param record the data record associated with the key + **/ +case class KeyedRecord(aggregateType: AggregateType.Value, record: DataRecord) + +/** + * Keyed record map with multiple data record. + * + * @constructor creates a new keyed record map. + * + * @param aggregateType the aggregate type + * @param recordMap a map with key of type Long and value of type DataRecord + * where the key indicates the index and the value indicating the record + * + **/ +case class KeyedRecordMap( + aggregateType: AggregateType.Value, + recordMap: scala.collection.Map[Long, DataRecord]) diff --git a/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateInjections.scala b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateInjections.scala new file mode 100644 index 000000000..7ab1233c1 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateInjections.scala @@ -0,0 +1,46 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.dal.personal_data.thriftscala.PersonalDataType +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection +import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection.Batched +import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection.JavaCompactThrift +import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection.genericInjection +import com.twitter.summingbird.batch.BatchID +import scala.collection.JavaConverters._ + +object OfflineAggregateInjections { + val offlineDataRecordAggregateInjection: KeyValInjection[AggregationKey, (BatchID, DataRecord)] = + KeyValInjection( + genericInjection(AggregationKeyInjection), + Batched(JavaCompactThrift[DataRecord]) + ) + + private[aggregation_framework] def getPdts[T]( + aggregateGroups: Iterable[T], + featureExtractor: T => Iterable[Feature[_]] + ): Option[Set[PersonalDataType]] = { + val pdts: Set[PersonalDataType] = for { + group <- aggregateGroups.toSet[T] + feature <- featureExtractor(group) + pdtSet <- feature.getPersonalDataTypes.asSet().asScala + javaPdt <- pdtSet.asScala + scalaPdt <- PersonalDataType.get(javaPdt.getValue) + } yield { + scalaPdt + } + if (pdts.nonEmpty) Some(pdts) else None + } + + def getInjection( + aggregateGroups: Set[TypedAggregateGroup[_]] + ): KeyValInjection[AggregationKey, (BatchID, DataRecord)] = { + val keyPdts = getPdts[TypedAggregateGroup[_]](aggregateGroups, _.allOutputKeys) + val valuePdts = getPdts[TypedAggregateGroup[_]](aggregateGroups, _.allOutputFeatures) + KeyValInjection( + genericInjection(AggregationKeyInjection, keyPdts), + genericInjection(Batched(JavaCompactThrift[DataRecord]), valuePdts) + ) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateSource.scala b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateSource.scala new file mode 100644 index 000000000..116f553c4 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateSource.scala @@ -0,0 +1,21 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.dal.client.dataset.TimePartitionedDALDataset +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import java.lang.{Long => JLong} + +case class OfflineAggregateSource( + override val name: String, + override val timestampFeature: Feature[JLong], + scaldingHdfsPath: Option[String] = None, + scaldingSuffixType: Option[String] = None, + dalDataSet: Option[TimePartitionedDALDataset[DataRecord]] = None, + withValidation: Boolean = true) // context: https://jira.twitter.biz/browse/TQ-10618 + extends AggregateSource { + /* + * Th help transition callers to use DAL.read, we check that either the HDFS + * path is defined, or the dalDataset. Both options cannot be set at the same time. + */ + assert(!(scaldingHdfsPath.isDefined && dalDataSet.isDefined)) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateStore.scala b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateStore.scala new file mode 100644 index 000000000..0bba08a94 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/OfflineAggregateStore.scala @@ -0,0 +1,128 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.dal.client.dataset.KeyValDALDataset +import com.twitter.ml.api.DataRecord +import com.twitter.scalding.DateParser +import com.twitter.scalding.RichDate +import com.twitter.scalding_internal.multiformat.format.keyval.KeyVal +import com.twitter.storehaus_internal.manhattan._ +import com.twitter.storehaus_internal.util.ApplicationID +import com.twitter.storehaus_internal.util.DatasetName +import com.twitter.storehaus_internal.util.HDFSPath +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird.batch.Batcher +import com.twitter.summingbird_internal.runner.store_config._ +import java.util.TimeZone +import com.twitter.summingbird.batch.MillisecondBatcher + +/* + * Configuration common to all offline aggregate stores + * + * @param outputHdfsPathPrefix HDFS prefix to store all output aggregate types offline + * @param dummyAppId Dummy manhattan app id required by summingbird (unused) + * @param dummyDatasetPrefix Dummy manhattan dataset prefix required by summingbird (unused) + * @param startDate Start date for summingbird job to begin computing aggregates + */ +case class OfflineAggregateStoreCommonConfig( + outputHdfsPathPrefix: String, + dummyAppId: String, + dummyDatasetPrefix: String, + startDate: String) + +/** + * A trait inherited by any object that defines + * a HDFS prefix to write output data to. E.g. timelines has its own + * output prefix to write aggregates_v2 results, your team can create + * its own. + */ +trait OfflineStoreCommonConfig extends Serializable { + /* + * @param startDate Date to create config for + * @return OfflineAggregateStoreCommonConfig object with all config details for output populated + */ + def apply(startDate: String): OfflineAggregateStoreCommonConfig +} + +/** + * @param name Uniquely identifiable human-readable name for this output store + * @param startDate Start date for this output store from which aggregates should be computed + * @param commonConfig Provider of other common configuration details + * @param batchesToKeep Retention policy on output (number of batches to keep) + */ +abstract class OfflineAggregateStoreBase + extends OfflineStoreOnlyConfig[ManhattanROConfig] + with AggregateStore { + + override def name: String + def startDate: String + def commonConfig: OfflineStoreCommonConfig + def batchesToKeep: Int + def maxKvSourceFailures: Int + + val datedCommonConfig: OfflineAggregateStoreCommonConfig = commonConfig.apply(startDate) + val manhattan: ManhattanROConfig = ManhattanROConfig( + /* This is a sample config, will be replaced with production config later */ + HDFSPath(s"${datedCommonConfig.outputHdfsPathPrefix}/${name}"), + ApplicationID(datedCommonConfig.dummyAppId), + DatasetName(s"${datedCommonConfig.dummyDatasetPrefix}_${name}_1"), + com.twitter.storehaus_internal.manhattan.Adama + ) + + val batcherSize = 24 + val batcher: MillisecondBatcher = Batcher.ofHours(batcherSize) + + val startTime: RichDate = + RichDate(datedCommonConfig.startDate)(TimeZone.getTimeZone("UTC"), DateParser.default) + + val offline: ManhattanROConfig = manhattan +} + +/** + * Defines an aggregates store which is composed of DataRecords + * @param name Uniquely identifiable human-readable name for this output store + * @param startDate Start date for this output store from which aggregates should be computed + * @param commonConfig Provider of other common configuration details + * @param batchesToKeep Retention policy on output (number of batches to keep) + */ +case class OfflineAggregateDataRecordStore( + override val name: String, + override val startDate: String, + override val commonConfig: OfflineStoreCommonConfig, + override val batchesToKeep: Int = 7, + override val maxKvSourceFailures: Int = 0) + extends OfflineAggregateStoreBase { + + def toOfflineAggregateDataRecordStoreWithDAL( + dalDataset: KeyValDALDataset[KeyVal[AggregationKey, (BatchID, DataRecord)]] + ): OfflineAggregateDataRecordStoreWithDAL = + OfflineAggregateDataRecordStoreWithDAL( + name = name, + startDate = startDate, + commonConfig = commonConfig, + dalDataset = dalDataset, + maxKvSourceFailures = maxKvSourceFailures + ) +} + +trait withDALDataset { + def dalDataset: KeyValDALDataset[KeyVal[AggregationKey, (BatchID, DataRecord)]] +} + +/** + * Defines an aggregates store which is composed of DataRecords and writes using DAL. + * @param name Uniquely identifiable human-readable name for this output store + * @param startDate Start date for this output store from which aggregates should be computed + * @param commonConfig Provider of other common configuration details + * @param dalDataset The KeyValDALDataset for this output store + * @param batchesToKeep Unused, kept for interface compatibility. You must define a separate Oxpecker + * retention policy to maintain the desired number of versions. + */ +case class OfflineAggregateDataRecordStoreWithDAL( + override val name: String, + override val startDate: String, + override val commonConfig: OfflineStoreCommonConfig, + override val dalDataset: KeyValDALDataset[KeyVal[AggregationKey, (BatchID, DataRecord)]], + override val batchesToKeep: Int = -1, + override val maxKvSourceFailures: Int = 0) + extends OfflineAggregateStoreBase + with withDALDataset diff --git a/timelines/data_processing/ml_util/aggregation_framework/README.md b/timelines/data_processing/ml_util/aggregation_framework/README.md new file mode 100644 index 000000000..ea9a4b446 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/README.md @@ -0,0 +1,39 @@ +Overview +======== + + +The **aggregation framework** is a set of libraries and utilities that allows teams to flexibly +compute aggregate (counting) features in both batch and in real-time. Aggregate features can capture +historical interactions between on arbitrary entities (and sets thereof), conditional on provided features +and labels. + +These types of engineered aggregate features have proven to be highly impactful across different teams at Twitter. + + +What are some features we can compute? +-------------------------------------- + +The framework supports computing aggregate features on provided grouping keys. The only constraint is that these keys are sparse binary features (or are sets thereof). + +For example, a common use case is to calculate a user's past engagement history with various types of tweets (photo, video, retweets, etc.), specific authors, specific in-network engagers or any other entity the user has interacted with and that could provide signal. In this case, the underlying aggregation keys are `userId`, `(userId, authorId)` or `(userId, engagerId)`. + +In Timelines and MagicRecs, we also compute custom aggregate engagement counts on every `tweetId`. Similary, other aggregations are possible, perhaps on `advertiserId` or `mediaId` as long as the grouping key is sparse binary. + + +What implementations are supported? +----------------------------------- + +Offline, we support the daily batch processing of DataRecords containing all required input features to generate +aggregate features. These are then uploaded to Manhattan for online hydration. + +Online, we support the real-time aggregation of DataRecords through Storm with a backing memcache that can be queried +for the real-time aggregate features. + +Additional documentation exists in the [docs folder](docs) + + +Where is this used? +-------------------- + +The Home Timeline heavy ranker uses a varierty of both [batch and real time features](../../../../src/scala/com/twitter/timelines/prediction/common/aggregates/README.md) generated by this framework. +These features are also used for email and other recommendations. \ No newline at end of file diff --git a/timelines/data_processing/ml_util/aggregation_framework/StoreConfig.scala b/timelines/data_processing/ml_util/aggregation_framework/StoreConfig.scala new file mode 100644 index 000000000..703d5893c --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/StoreConfig.scala @@ -0,0 +1,68 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureType + +/** + * Convenience class to describe the stores that make up a particular type of aggregate. + * + * For example, as of 2018/07, user aggregates are generate by merging the individual + * "user_aggregates", "rectweet_user_aggregates", and, "twitter_wide_user_aggregates". + * + * @param storeNames Name of the stores. + * @param aggregateType Type of aggregate, usually differentiated by the aggregation key. + * @param shouldHash Used at TimelineRankingAggregatesUtil.extractSecondary when extracting the + * secondary key value. + */ +case class StoreConfig[T]( + storeNames: Set[String], + aggregateType: AggregateType.Value, + shouldHash: Boolean = false +)( + implicit storeMerger: StoreMerger) { + require(storeMerger.isValidToMerge(storeNames)) + + private val representativeStore = storeNames.head + + val aggregationKeyIds: Set[Long] = storeMerger.getAggregateKeys(representativeStore) + val aggregationKeyFeatures: Set[Feature[_]] = + storeMerger.getAggregateKeyFeatures(representativeStore) + val secondaryKeyFeatureOpt: Option[Feature[_]] = storeMerger.getSecondaryKey(representativeStore) +} + +trait StoreMerger { + def aggregationConfig: AggregationConfig + + def getAggregateKeyFeatures(storeName: String): Set[Feature[_]] = + aggregationConfig.aggregatesToCompute + .filter(_.outputStore.name == storeName) + .flatMap(_.keysToAggregate) + + def getAggregateKeys(storeName: String): Set[Long] = + TypedAggregateGroup.getKeyFeatureIds(getAggregateKeyFeatures(storeName)) + + def getSecondaryKey(storeName: String): Option[Feature[_]] = { + val keys = getAggregateKeyFeatures(storeName) + require(keys.size <= 2, "Only singleton or binary aggregation keys are supported.") + require(keys.contains(SharedFeatures.USER_ID), "USER_ID must be one of the aggregation keys.") + keys + .filterNot(_ == SharedFeatures.USER_ID) + .headOption + .map { possiblySparseKey => + if (possiblySparseKey.getFeatureType != FeatureType.SPARSE_BINARY) { + possiblySparseKey + } else { + TypedAggregateGroup.sparseFeature(possiblySparseKey) + } + } + } + + /** + * Stores may only be merged if they have the same aggregation key. + */ + def isValidToMerge(storeNames: Set[String]): Boolean = { + val expectedKeyOpt = storeNames.headOption.map(getAggregateKeys) + storeNames.forall(v => getAggregateKeys(v) == expectedKeyOpt.get) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/StoreRegister.scala b/timelines/data_processing/ml_util/aggregation_framework/StoreRegister.scala new file mode 100644 index 000000000..a7e9cd535 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/StoreRegister.scala @@ -0,0 +1,13 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +trait StoreRegister { + def allStores: Set[StoreConfig[_]] + + lazy val storeMap: Map[AggregateType.Value, StoreConfig[_]] = allStores + .map(store => (store.aggregateType, store)) + .toMap + + lazy val storeNameToTypeMap: Map[String, AggregateType.Value] = allStores + .flatMap(store => store.storeNames.map(name => (name, store.aggregateType))) + .toMap +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/TypedAggregateGroup.scala b/timelines/data_processing/ml_util/aggregation_framework/TypedAggregateGroup.scala new file mode 100644 index 000000000..92afc4137 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/TypedAggregateGroup.scala @@ -0,0 +1,486 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregateFeature +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetric +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon._ +import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform +import com.twitter.util.Duration +import com.twitter.util.Try +import java.lang.{Boolean => JBoolean} +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import java.util.{Set => JSet} +import scala.annotation.tailrec +import scala.language.existentials +import scala.collection.JavaConverters._ +import scala.util.matching.Regex + +/** + * A case class contained precomputed data useful to quickly + * process operations over an aggregate. + * + * @param query The underlying feature being aggregated + * @param metric The aggregation metric + * @param outputFeatures The output features that aggregation will produce + * @param outputFeatureIds The precomputed hashes of the above outputFeatures + */ +case class PrecomputedAggregateDescriptor[T]( + query: AggregateFeature[T], + metric: AggregationMetric[T, _], + outputFeatures: List[Feature[_]], + outputFeatureIds: List[JLong]) + +object TypedAggregateGroup { + + /** + * Recursive function that generates all combinations of value + * assignments for a collection of sparse binary features. + * + * @param sparseBinaryIdValues list of sparse binary feature ids and possible values they can take + * @return A set of maps, where each map represents one possible assignment of values to ids + */ + def sparseBinaryPermutations( + sparseBinaryIdValues: List[(Long, Set[String])] + ): Set[Map[Long, String]] = sparseBinaryIdValues match { + case (id, values) +: rest => + tailRecSparseBinaryPermutations( + existingPermutations = values.map(value => Map(id -> value)), + remainingIdValues = rest + ) + case Nil => Set.empty + } + + @tailrec private[this] def tailRecSparseBinaryPermutations( + existingPermutations: Set[Map[Long, String]], + remainingIdValues: List[(Long, Set[String])] + ): Set[Map[Long, String]] = remainingIdValues match { + case Nil => existingPermutations + case (id, values) +: rest => + tailRecSparseBinaryPermutations( + existingPermutations.flatMap { existingIdValueMap => + values.map(value => existingIdValueMap ++ Map(id -> value)) + }, + rest + ) + } + + val SparseFeatureSuffix = ".member" + def sparseFeature(sparseBinaryFeature: Feature[_]): Feature[String] = + new Feature.Text( + sparseBinaryFeature.getDenseFeatureName + SparseFeatureSuffix, + AggregationMetricCommon.derivePersonalDataTypes(Some(sparseBinaryFeature))) + + /* Throws exception if obj not an instance of U */ + private[this] def validate[U](obj: Any): U = { + require(obj.isInstanceOf[U]) + obj.asInstanceOf[U] + } + + private[this] def getFeatureOpt[U](dataRecord: DataRecord, feature: Feature[U]): Option[U] = + Option(SRichDataRecord(dataRecord).getFeatureValue(feature)).map(validate[U](_)) + + /** + * Get a mapping from feature ids + * (including individual sparse elements of a sparse feature) to values + * from the given data record, for a given feature type. + * + * @param dataRecord Data record to get features from + * @param keysToAggregate key features to get id-value mappings for + * @param featureType Feature type to get id-value maps for + */ + def getKeyFeatureIdValues[U]( + dataRecord: DataRecord, + keysToAggregate: Set[Feature[_]], + featureType: FeatureType + ): Set[(Long, Option[U])] = { + val featuresOfThisType: Set[Feature[U]] = keysToAggregate + .filter(_.getFeatureType == featureType) + .map(validate[Feature[U]]) + + featuresOfThisType + .map { feature: Feature[U] => + val featureId: Long = getDenseFeatureId(feature) + val featureOpt: Option[U] = getFeatureOpt(dataRecord, feature) + (featureId, featureOpt) + } + } + + // TypedAggregateGroup may transform the aggregate keys for internal use. This method generates + // denseFeatureIds for the transformed feature. + def getDenseFeatureId(feature: Feature[_]): Long = + if (feature.getFeatureType != FeatureType.SPARSE_BINARY) { + feature.getDenseFeatureId + } else { + sparseFeature(feature).getDenseFeatureId + } + + /** + * Return denseFeatureIds for the input features after applying the custom transformation that + * TypedAggregateGroup applies to its keysToAggregate. + * + * @param keysToAggregate key features to get id for + */ + def getKeyFeatureIds(keysToAggregate: Set[Feature[_]]): Set[Long] = + keysToAggregate.map(getDenseFeatureId) + + def checkIfAllKeysExist[U](featureIdValueMap: Map[Long, Option[U]]): Boolean = + featureIdValueMap.forall { case (_, valueOpt) => valueOpt.isDefined } + + def liftOptions[U](featureIdValueMap: Map[Long, Option[U]]): Map[Long, U] = + featureIdValueMap + .flatMap { + case (id, valueOpt) => + valueOpt.map { value => (id, value) } + } + + val timestampFeature: Feature[JLong] = SharedFeatures.TIMESTAMP + + /** + * Builds all valid aggregation keys (for the output store) from + * a datarecord and a spec listing the keys to aggregate. There + * can be multiple aggregation keys generated from a single data + * record when grouping by sparse binary features, for which multiple + * values can be set within the data record. + * + * @param dataRecord Data record to read values for key features from + * @return A set of AggregationKeys encoding the values of all keys + */ + def buildAggregationKeys( + dataRecord: DataRecord, + keysToAggregate: Set[Feature[_]] + ): Set[AggregationKey] = { + val discreteAggregationKeys = getKeyFeatureIdValues[Long]( + dataRecord, + keysToAggregate, + FeatureType.DISCRETE + ).toMap + + val textAggregationKeys = getKeyFeatureIdValues[String]( + dataRecord, + keysToAggregate, + FeatureType.STRING + ).toMap + + val sparseBinaryIdValues = getKeyFeatureIdValues[JSet[String]]( + dataRecord, + keysToAggregate, + FeatureType.SPARSE_BINARY + ).map { + case (id, values) => + ( + id, + values + .map(_.asScala.toSet) + .getOrElse(Set.empty[String]) + ) + }.toList + + if (checkIfAllKeysExist(discreteAggregationKeys) && + checkIfAllKeysExist(textAggregationKeys)) { + if (sparseBinaryIdValues.nonEmpty) { + sparseBinaryPermutations(sparseBinaryIdValues).map { sparseBinaryTextKeys => + AggregationKey( + discreteFeaturesById = liftOptions(discreteAggregationKeys), + textFeaturesById = liftOptions(textAggregationKeys) ++ sparseBinaryTextKeys + ) + } + } else { + Set( + AggregationKey( + discreteFeaturesById = liftOptions(discreteAggregationKeys), + textFeaturesById = liftOptions(textAggregationKeys) + ) + ) + } + } else Set.empty[AggregationKey] + } + +} + +/** + * Specifies one or more related aggregate(s) to compute in the summingbird job. + * + * @param inputSource Source to compute this aggregate over + * @param preTransforms Sequence of [[com.twitter.ml.api.RichITransform]] that transform + * data records pre-aggregation (e.g. discretization, renaming) + * @param samplingTransformOpt Optional [[OneToSomeTransform]] that transform data + * record to optional data record (e.g. for sampling) before aggregation + * @param aggregatePrefix Prefix to use for naming resultant aggregate features + * @param keysToAggregate Features to group by when computing the aggregates + * (e.g. USER_ID, AUTHOR_ID) + * @param featuresToAggregate Features to aggregate (e.g. blender_score or is_photo) + * @param labels Labels to cross the features with to make pair features, if any. + * use Label.All if you don't want to cross with a label. + * @param metrics Aggregation metrics to compute (e.g. count, mean) + * @param halfLives Half lives to use for the aggregations, to be crossed with the above. + * use Duration.Top for "forever" aggregations over an infinite time window (no decay). + * @param outputStore Store to output this aggregate to + * @param includeAnyFeature Aggregate label counts for any feature value + * @param includeAnyLabel Aggregate feature counts for any label value (e.g. all impressions) + * + * The overall config for the summingbird job consists of a list of "AggregateGroup" + * case class objects, which get translated into strongly typed "TypedAggregateGroup" + * case class objects. A single TypedAggregateGroup always groups input data records from + * ''inputSource'' by a single set of aggregation keys (''featuresToAggregate''). + * Within these groups, we perform a comprehensive cross of: + * + * ''featuresToAggregate'' x ''labels'' x ''metrics'' x ''halfLives'' + * + * All the resultant aggregate features are assigned a human-readable feature name + * beginning with ''aggregatePrefix'', and are written to DataRecords that get + * aggregated and written to the store specified by ''outputStore''. + * + * Illustrative example. Suppose we define our spec as follows: + * + * TypedAggregateGroup( + * inputSource = "timelines_recap_daily", + * aggregatePrefix = "user_author_aggregate", + * keysToAggregate = Set(USER_ID, AUTHOR_ID), + * featuresToAggregate = Set(RecapFeatures.TEXT_SCORE, RecapFeatures.BLENDER_SCORE), + * labels = Set(RecapFeatures.IS_FAVORITED, RecapFeatures.IS_REPLIED), + * metrics = Set(CountMetric, MeanMetric), + * halfLives = Set(7.Days, 30.Days), + * outputStore = "user_author_aggregate_store" + * ) + * + * This will process data records from the source named "timelines_recap_daily" + * (see AggregateSource.scala for more details on how to add your own source) + * It will produce a total of 2x2x2x2 = 16 aggregation features, named like: + * + * user_author_aggregate.pair.recap.engagement.is_favorited.recap.searchfeature.blender_score.count.7days + * user_author_aggregate.pair.recap.engagement.is_favorited.recap.searchfeature.blender_score.count.30days + * user_author_aggregate.pair.recap.engagement.is_favorited.recap.searchfeature.blender_score.mean.7days + * + * ... (and so on) + * + * and all the result features will be stored in DataRecords, summed up, and written + * to the output store defined by the name "user_author_aggregate_store". + * (see AggregateStore.scala for details on how to add your own store). + * + * If you do not want a full cross, split up your config into multiple TypedAggregateGroup + * objects. Splitting is strongly advised to avoid blowing up and creating invalid + * or unnecessary combinations of aggregate features (note that some combinations + * are useless or invalid e.g. computing the mean of a binary feature). Splitting + * also does not cost anything in terms of real-time performance, because all + * Aggregate objects in the master spec that share the same ''keysToAggregate'', the + * same ''inputSource'' and the same ''outputStore'' are grouped by the summingbird + * job logic and stored into a single DataRecord in the output store. Overlapping + * aggregates will also automatically be deduplicated so don't worry about overlaps. + */ +case class TypedAggregateGroup[T]( + inputSource: AggregateSource, + aggregatePrefix: String, + keysToAggregate: Set[Feature[_]], + featuresToAggregate: Set[Feature[T]], + labels: Set[_ <: Feature[JBoolean]], + metrics: Set[AggregationMetric[T, _]], + halfLives: Set[Duration], + outputStore: AggregateStore, + preTransforms: Seq[OneToSomeTransform] = Seq.empty, + includeAnyFeature: Boolean = true, + includeAnyLabel: Boolean = true, + aggExclusionRegex: Seq[String] = Seq.empty) { + import TypedAggregateGroup._ + + val compiledRegexes = aggExclusionRegex.map(new Regex(_)) + + // true if should drop, false if should keep + def filterOutAggregateFeature( + feature: PrecomputedAggregateDescriptor[_], + regexes: Seq[Regex] + ): Boolean = { + if (regexes.nonEmpty) + feature.outputFeatures.exists { feature => + regexes.exists { re => re.findFirstMatchIn(feature.getDenseFeatureName).nonEmpty } + } + else false + } + + def buildAggregationKeys( + dataRecord: DataRecord + ): Set[AggregationKey] = { + TypedAggregateGroup.buildAggregationKeys(dataRecord, keysToAggregate) + } + + /** + * This val precomputes descriptors for all individual aggregates in this group + * (of type ''AggregateFeature''). Also precompute hashes of all aggregation + * "output" features generated by these operators for faster + * run-time performance (this turns out to be a primary CPU bottleneck). + * Ex: for the mean operator, "sum" and "count" are output features + */ + val individualAggregateDescriptors: Set[PrecomputedAggregateDescriptor[T]] = { + /* + * By default, in additional to all feature-label crosses, also + * compute in aggregates over each feature and label without crossing + */ + val labelOptions = labels.map(Option(_)) ++ + (if (includeAnyLabel) Set(None) else Set.empty) + val featureOptions = featuresToAggregate.map(Option(_)) ++ + (if (includeAnyFeature) Set(None) else Set.empty) + for { + feature <- featureOptions + label <- labelOptions + metric <- metrics + halfLife <- halfLives + } yield { + val query = AggregateFeature[T](aggregatePrefix, feature, label, halfLife) + + val aggregateOutputFeatures = metric.getOutputFeatures(query) + val aggregateOutputFeatureIds = metric.getOutputFeatureIds(query) + PrecomputedAggregateDescriptor( + query, + metric, + aggregateOutputFeatures, + aggregateOutputFeatureIds + ) + } + }.filterNot(filterOutAggregateFeature(_, compiledRegexes)) + + /* Precomputes a map from all generated aggregate feature ids to their half lives. */ + val continuousFeatureIdsToHalfLives: Map[Long, Duration] = + individualAggregateDescriptors.flatMap { descriptor => + descriptor.outputFeatures + .flatMap { feature => + if (feature.getFeatureType() == FeatureType.CONTINUOUS) { + Try(feature.asInstanceOf[Feature[JDouble]]).toOption + .map(feature => (feature.getFeatureId(), descriptor.query.halfLife)) + } else None + } + }.toMap + + /* + * Sparse binary keys become individual string keys in the output. + * e.g. group by "words.in.tweet", output key: "words.in.tweet.member" + */ + val allOutputKeys: Set[Feature[_]] = keysToAggregate.map { key => + if (key.getFeatureType == FeatureType.SPARSE_BINARY) sparseFeature(key) + else key + } + + val allOutputFeatures: Set[Feature[_]] = individualAggregateDescriptors.flatMap { + case PrecomputedAggregateDescriptor( + query, + metric, + outputFeatures, + outputFeatureIds + ) => + outputFeatures + } + + val aggregateContext: FeatureContext = new FeatureContext(allOutputFeatures.toList.asJava) + + /** + * Adds all aggregates in this group found in the two input data records + * into a result, mutating the result. Uses a while loop for an + * approximately 10% gain in speed over a for comprehension. + * + * WARNING: mutates ''result'' + * + * @param result The output data record to mutate + * @param left The left data record to add + * @param right The right data record to add + */ + def mutatePlus(result: DataRecord, left: DataRecord, right: DataRecord): Unit = { + val featureIterator = individualAggregateDescriptors.iterator + while (featureIterator.hasNext) { + val descriptor = featureIterator.next + descriptor.metric.mutatePlus( + result, + left, + right, + descriptor.query, + Some(descriptor.outputFeatureIds) + ) + } + } + + /** + * Apply preTransforms sequentially. If any transform results in a dropped (None) + * DataRecord, then entire tranform sequence will result in a dropped DataRecord. + * Note that preTransforms are order-dependent. + */ + private[this] def sequentiallyTransform(dataRecord: DataRecord): Option[DataRecord] = { + val recordOpt = Option(new DataRecord(dataRecord)) + preTransforms.foldLeft(recordOpt) { + case (Some(previousRecord), preTransform) => + preTransform(previousRecord) + case _ => Option.empty[DataRecord] + } + } + + /** + * Given a data record, apply transforms and fetch the incremental contributions to + * each configured aggregate from this data record, and store these in an output data record. + * + * @param dataRecord Input data record to aggregate. + * @return A set of tuples (AggregationKey, DataRecord) whose first entry is an + * AggregationKey indicating what keys we're grouping by, and whose second entry + * is an output data record with incremental contributions to the aggregate value(s) + */ + def computeAggregateKVPairs(dataRecord: DataRecord): Set[(AggregationKey, DataRecord)] = { + sequentiallyTransform(dataRecord) + .flatMap { dataRecord => + val aggregationKeys = buildAggregationKeys(dataRecord) + val increment = new DataRecord + + val isNonEmptyIncrement = individualAggregateDescriptors + .map { descriptor => + descriptor.metric.setIncrement( + output = increment, + input = dataRecord, + query = descriptor.query, + timestampFeature = inputSource.timestampFeature, + aggregateOutputs = Some(descriptor.outputFeatureIds) + ) + } + .exists(identity) + + if (isNonEmptyIncrement) { + SRichDataRecord(increment).setFeatureValue( + timestampFeature, + getTimestamp(dataRecord, inputSource.timestampFeature) + ) + Some(aggregationKeys.map(key => (key, increment))) + } else { + None + } + } + .getOrElse(Set.empty[(AggregationKey, DataRecord)]) + } + + def outputFeaturesToRenamedOutputFeatures(prefix: String): Map[Feature[_], Feature[_]] = { + require(prefix.nonEmpty) + + allOutputFeatures.map { feature => + if (feature.isSetFeatureName) { + val renamedFeatureName = prefix + feature.getDenseFeatureName + val personalDataTypes = + if (feature.getPersonalDataTypes.isPresent) feature.getPersonalDataTypes.get() + else null + + val renamedFeature = feature.getFeatureType match { + case FeatureType.BINARY => + new Feature.Binary(renamedFeatureName, personalDataTypes) + case FeatureType.DISCRETE => + new Feature.Discrete(renamedFeatureName, personalDataTypes) + case FeatureType.STRING => + new Feature.Text(renamedFeatureName, personalDataTypes) + case FeatureType.CONTINUOUS => + new Feature.Continuous(renamedFeatureName, personalDataTypes) + case FeatureType.SPARSE_BINARY => + new Feature.SparseBinary(renamedFeatureName, personalDataTypes) + case FeatureType.SPARSE_CONTINUOUS => + new Feature.SparseContinuous(renamedFeatureName, personalDataTypes) + } + feature -> renamedFeature + } else { + feature -> feature + } + }.toMap + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/Utils.scala b/timelines/data_processing/ml_util/aggregation_framework/Utils.scala new file mode 100644 index 000000000..60196fc62 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/Utils.scala @@ -0,0 +1,122 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +import com.twitter.algebird.ScMapMonoid +import com.twitter.algebird.Semigroup +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureType +import com.twitter.ml.api.util.SRichDataRecord +import java.lang.{Long => JLong} +import scala.collection.{Map => ScMap} + +object Utils { + val dataRecordMerger: DataRecordMerger = new DataRecordMerger + def EmptyDataRecord: DataRecord = new DataRecord() + + private val random = scala.util.Random + private val keyedDataRecordMapMonoid = { + val dataRecordMergerSg = new Semigroup[DataRecord] { + override def plus(x: DataRecord, y: DataRecord): DataRecord = { + dataRecordMerger.merge(x, y) + x + } + } + new ScMapMonoid[Long, DataRecord]()(dataRecordMergerSg) + } + + def keyFromLong(record: DataRecord, feature: Feature[JLong]): Long = + SRichDataRecord(record).getFeatureValue(feature).longValue + + def keyFromString(record: DataRecord, feature: Feature[String]): Long = + try { + SRichDataRecord(record).getFeatureValue(feature).toLong + } catch { + case _: NumberFormatException => 0L + } + + def keyFromHash(record: DataRecord, feature: Feature[String]): Long = + SRichDataRecord(record).getFeatureValue(feature).hashCode.toLong + + def extractSecondary[T]( + record: DataRecord, + secondaryKey: Feature[T], + shouldHash: Boolean = false + ): Long = secondaryKey.getFeatureType match { + case FeatureType.STRING => + if (shouldHash) keyFromHash(record, secondaryKey.asInstanceOf[Feature[String]]) + else keyFromString(record, secondaryKey.asInstanceOf[Feature[String]]) + case FeatureType.DISCRETE => keyFromLong(record, secondaryKey.asInstanceOf[Feature[JLong]]) + case f => throw new IllegalArgumentException(s"Feature type $f is not supported.") + } + + def mergeKeyedRecordOpts(args: Option[KeyedRecord]*): Option[KeyedRecord] = { + val keyedRecords = args.flatten + if (keyedRecords.isEmpty) { + None + } else { + val keys = keyedRecords.map(_.aggregateType) + require(keys.toSet.size == 1, "All merged records must have the same aggregate key.") + val mergedRecord = mergeRecords(keyedRecords.map(_.record): _*) + Some(KeyedRecord(keys.head, mergedRecord)) + } + } + + private def mergeRecords(args: DataRecord*): DataRecord = + if (args.isEmpty) EmptyDataRecord + else { + // can just do foldLeft(new DataRecord) for both cases, but try reusing the EmptyDataRecord singleton as much as possible + args.tail.foldLeft(args.head) { (merged, record) => + dataRecordMerger.merge(merged, record) + merged + } + } + + def mergeKeyedRecordMapOpts( + opt1: Option[KeyedRecordMap], + opt2: Option[KeyedRecordMap], + maxSize: Int = Int.MaxValue + ): Option[KeyedRecordMap] = { + if (opt1.isEmpty && opt2.isEmpty) { + None + } else { + val keys = Seq(opt1, opt2).flatten.map(_.aggregateType) + require(keys.toSet.size == 1, "All merged records must have the same aggregate key.") + val mergedRecordMap = mergeMapOpts(opt1.map(_.recordMap), opt2.map(_.recordMap), maxSize) + Some(KeyedRecordMap(keys.head, mergedRecordMap)) + } + } + + private def mergeMapOpts( + opt1: Option[ScMap[Long, DataRecord]], + opt2: Option[ScMap[Long, DataRecord]], + maxSize: Int = Int.MaxValue + ): ScMap[Long, DataRecord] = { + require(maxSize >= 0) + val keySet = opt1.map(_.keySet).getOrElse(Set.empty) ++ opt2.map(_.keySet).getOrElse(Set.empty) + val totalSize = keySet.size + val rate = if (totalSize <= maxSize) 1.0 else maxSize.toDouble / totalSize + val prunedOpt1 = opt1.map(downsample(_, rate)) + val prunedOpt2 = opt2.map(downsample(_, rate)) + Seq(prunedOpt1, prunedOpt2).flatten + .foldLeft(keyedDataRecordMapMonoid.zero)(keyedDataRecordMapMonoid.plus) + } + + def downsample[K, T](m: ScMap[K, T], samplingRate: Double): ScMap[K, T] = { + if (samplingRate >= 1.0) { + m + } else if (samplingRate <= 0) { + Map.empty + } else { + m.filter { + case (key, _) => + // It is important that the same user with the same sampling rate be deterministically + // selected or rejected. Otherwise, mergeMapOpts will choose different keys for the + // two input maps and their union will be larger than the limit we want. + random.setSeed((key.hashCode, samplingRate.hashCode).hashCode) + random.nextDouble < samplingRate + } + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2Adapter.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2Adapter.scala new file mode 100644 index 000000000..f5b7d1814 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2Adapter.scala @@ -0,0 +1,165 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.algebird.DecayedValue +import com.twitter.algebird.DecayedValueMonoid +import com.twitter.algebird.Monoid +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.FDsl._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.summingbird.batch.BatchID +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregateFeature +import com.twitter.util.Duration +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import scala.collection.JavaConverters._ +import scala.collection.mutable +import java.{util => ju} + +object AggregatesV2Adapter { + type AggregatesV2Tuple = (AggregationKey, (BatchID, DataRecord)) + + val Epsilon: Double = 1e-6 + val decayedValueMonoid: Monoid[DecayedValue] = DecayedValueMonoid(Epsilon) + + /* + * Decays the storedValue from timestamp -> sourceVersion + * + * @param storedValue value read from the aggregates v2 output store + * @param timestamp timestamp corresponding to store value + * @param sourceVersion timestamp of version to decay all values to uniformly + * @param halfLife Half life duration to use for applying decay + * + * By applying this function, the feature values for all users are decayed + * to sourceVersion. This is important to ensure that a user whose aggregates + * were updated long in the past does not have an artifically inflated count + * compared to one whose aggregates were updated (and hence decayed) more recently. + */ + def decayValueToSourceVersion( + storedValue: Double, + timestamp: Long, + sourceVersion: Long, + halfLife: Duration + ): Double = + if (timestamp > sourceVersion) { + storedValue + } else { + decayedValueMonoid + .plus( + DecayedValue.build(storedValue, timestamp, halfLife.inMilliseconds), + DecayedValue.build(0, sourceVersion, halfLife.inMilliseconds) + ) + .value + } + + /* + * Decays all the aggregate features occurring in the ''inputRecord'' + * to a given timestamp, and mutates the ''outputRecord'' accordingly. + * Note that inputRecord and outputRecord can be the same if you want + * to mutate the input in place, the function does this correctly. + * + * @param inputRecord Input record to get features from + * @param aggregates Aggregates to decay + * @param decayTo Timestamp to decay to + * @param trimThreshold Drop features below this trim threshold + * @param outputRecord Output record to mutate + * @return the mutated outputRecord + */ + def mutateDecay( + inputRecord: DataRecord, + aggregateFeaturesAndHalfLives: List[(Feature[_], Duration)], + decayTo: Long, + trimThreshold: Double, + outputRecord: DataRecord + ): DataRecord = { + val timestamp = inputRecord.getFeatureValue(SharedFeatures.TIMESTAMP).toLong + + aggregateFeaturesAndHalfLives.foreach { + case (aggregateFeature: Feature[_], halfLife: Duration) => + if (aggregateFeature.getFeatureType() == FeatureType.CONTINUOUS) { + val continuousFeature = aggregateFeature.asInstanceOf[Feature[JDouble]] + if (inputRecord.hasFeature(continuousFeature)) { + val storedValue = inputRecord.getFeatureValue(continuousFeature).toDouble + val decayedValue = decayValueToSourceVersion(storedValue, timestamp, decayTo, halfLife) + if (math.abs(decayedValue) > trimThreshold) { + outputRecord.setFeatureValue(continuousFeature, decayedValue) + } + } + } + } + + /* Update timestamp to version (now that we've decayed all aggregates) */ + outputRecord.setFeatureValue(SharedFeatures.TIMESTAMP, decayTo) + + outputRecord + } +} + +class AggregatesV2Adapter( + aggregates: Set[TypedAggregateGroup[_]], + sourceVersion: Long, + trimThreshold: Double) + extends IRecordOneToManyAdapter[AggregatesV2Adapter.AggregatesV2Tuple] { + + import AggregatesV2Adapter._ + + val keyFeatures: List[Feature[_]] = aggregates.flatMap(_.allOutputKeys).toList + val aggregateFeatures: List[Feature[_]] = aggregates.flatMap(_.allOutputFeatures).toList + val timestampFeatures: List[Feature[JLong]] = List(SharedFeatures.TIMESTAMP) + val allFeatures: List[Feature[_]] = keyFeatures ++ aggregateFeatures ++ timestampFeatures + + val featureContext: FeatureContext = new FeatureContext(allFeatures.asJava) + + override def getFeatureContext: FeatureContext = featureContext + + val aggregateFeaturesAndHalfLives: List[(Feature[_$3], Duration) forSome { type _$3 }] = + aggregateFeatures.map { aggregateFeature: Feature[_] => + val halfLife = AggregateFeature.parseHalfLife(aggregateFeature) + (aggregateFeature, halfLife) + } + + override def adaptToDataRecords(tuple: AggregatesV2Tuple): ju.List[DataRecord] = tuple match { + case (key: AggregationKey, (batchId: BatchID, record: DataRecord)) => { + val resultRecord = new SRichDataRecord(new DataRecord, featureContext) + + val itr = resultRecord.continuousFeaturesIterator() + val featuresToClear = mutable.Set[Feature[JDouble]]() + while (itr.moveNext()) { + val nextFeature = itr.getFeature + if (!aggregateFeatures.contains(nextFeature)) { + featuresToClear += nextFeature + } + } + + featuresToClear.foreach(resultRecord.clearFeature) + + keyFeatures.foreach { keyFeature: Feature[_] => + if (keyFeature.getFeatureType == FeatureType.DISCRETE) { + resultRecord.setFeatureValue( + keyFeature.asInstanceOf[Feature[JLong]], + key.discreteFeaturesById(keyFeature.getDenseFeatureId) + ) + } else if (keyFeature.getFeatureType == FeatureType.STRING) { + resultRecord.setFeatureValue( + keyFeature.asInstanceOf[Feature[String]], + key.textFeaturesById(keyFeature.getDenseFeatureId) + ) + } + } + + if (record.hasFeature(SharedFeatures.TIMESTAMP)) { + mutateDecay( + record, + aggregateFeaturesAndHalfLives, + sourceVersion, + trimThreshold, + resultRecord) + List(resultRecord.getRecord).asJava + } else { + List.empty[DataRecord].asJava + } + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2FeatureSource.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2FeatureSource.scala new file mode 100644 index 000000000..5e196a43e --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/AggregatesV2FeatureSource.scala @@ -0,0 +1,171 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.bijection.Injection +import com.twitter.bijection.thrift.CompactThriftCodec +import com.twitter.ml.api.AdaptedFeatureSource +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.IRecordOneToManyAdapter +import com.twitter.ml.api.TypedFeatureSource +import com.twitter.scalding.DateRange +import com.twitter.scalding.RichDate +import com.twitter.scalding.TypedPipe +import com.twitter.scalding.commons.source.VersionedKeyValSource +import com.twitter.scalding.commons.tap.VersionedTap.TapMode +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird_internal.bijection.BatchPairImplicits +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKeyInjection +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import org.apache.hadoop.mapred.JobConf +import scala.collection.JavaConverters._ +import AggregatesV2Adapter._ + +object AggregatesV2AdaptedSource { + val DefaultTrimThreshold = 0 +} + +trait AggregatesV2AdaptedSource extends AggregatesV2AdaptedSourceBase[DataRecord] { + override def storageFormatCodec: Injection[DataRecord, Array[Byte]] = + CompactThriftCodec[DataRecord] + override def toDataRecord(v: DataRecord): DataRecord = v +} + +trait AggregatesV2AdaptedSourceBase[StorageFormat] + extends TypedFeatureSource[AggregatesV2Tuple] + with AdaptedFeatureSource[AggregatesV2Tuple] + with BatchPairImplicits { + + /* Output root path of aggregates v2 job, excluding store name and version */ + def rootPath: String + + /* Name of store under root path to read */ + def storeName: String + + // max bijection failures + def maxFailures: Int = 0 + + /* Aggregate config used to generate above output */ + def aggregates: Set[TypedAggregateGroup[_]] + + /* trimThreshold Trim all aggregates below a certain threshold to save memory */ + def trimThreshold: Double + + def toDataRecord(v: StorageFormat): DataRecord + + def sourceVersionOpt: Option[Long] + + def enableMostRecentBeforeSourceVersion: Boolean = false + + implicit private val aggregationKeyInjection: Injection[AggregationKey, Array[Byte]] = + AggregationKeyInjection + implicit def storageFormatCodec: Injection[StorageFormat, Array[Byte]] + + private def filteredAggregates = aggregates.filter(_.outputStore.name == storeName) + def storePath: String = List(rootPath, storeName).mkString("/") + + def mostRecentVkvs: VersionedKeyValSource[_, _] = { + VersionedKeyValSource[AggregationKey, (BatchID, StorageFormat)]( + path = storePath, + sourceVersion = None, + maxFailures = maxFailures + ) + } + + private def availableVersions: Seq[Long] = + mostRecentVkvs + .getTap(TapMode.SOURCE) + .getStore(new JobConf(true)) + .getAllVersions() + .asScala + .map(_.toLong) + + private def mostRecentVersion: Long = { + require(!availableVersions.isEmpty, s"$storeName has no available versions") + availableVersions.max + } + + def versionToUse: Long = + if (enableMostRecentBeforeSourceVersion) { + sourceVersionOpt + .map(sourceVersion => + availableVersions.filter(_ <= sourceVersion) match { + case Seq() => + throw new IllegalArgumentException( + "No version older than version: %s, available versions: %s" + .format(sourceVersion, availableVersions) + ) + case versionList => versionList.max + }) + .getOrElse(mostRecentVersion) + } else { + sourceVersionOpt.getOrElse(mostRecentVersion) + } + + override lazy val adapter: IRecordOneToManyAdapter[AggregatesV2Tuple] = + new AggregatesV2Adapter(filteredAggregates, versionToUse, trimThreshold) + + override def getData: TypedPipe[AggregatesV2Tuple] = { + val vkvsToUse: VersionedKeyValSource[AggregationKey, (BatchID, StorageFormat)] = { + VersionedKeyValSource[AggregationKey, (BatchID, StorageFormat)]( + path = storePath, + sourceVersion = Some(versionToUse), + maxFailures = maxFailures + ) + } + TypedPipe.from(vkvsToUse).map { + case (key, (batch, value)) => (key, (batch, toDataRecord(value))) + } + } +} + +/* + * Adapted data record feature source from aggregates v2 manhattan output + * Params documented in parent trait. + */ +case class AggregatesV2FeatureSource( + override val rootPath: String, + override val storeName: String, + override val aggregates: Set[TypedAggregateGroup[_]], + override val trimThreshold: Double = 0, + override val maxFailures: Int = 0, +)( + implicit val dateRange: DateRange) + extends AggregatesV2AdaptedSource { + + // Increment end date by 1 millisec since summingbird output for date D is stored at (D+1)T00 + override val sourceVersionOpt: Some[Long] = Some(dateRange.end.timestamp + 1) +} + +/* + * Reads most recent available AggregatesV2FeatureSource. + * There is no constraint on recency. + * Params documented in parent trait. + */ +case class AggregatesV2MostRecentFeatureSource( + override val rootPath: String, + override val storeName: String, + override val aggregates: Set[TypedAggregateGroup[_]], + override val trimThreshold: Double = AggregatesV2AdaptedSource.DefaultTrimThreshold, + override val maxFailures: Int = 0) + extends AggregatesV2AdaptedSource { + + override val sourceVersionOpt: None.type = None +} + +/* + * Reads most recent available AggregatesV2FeatureSource + * on or before the specified beforeDate. + * Params documented in parent trait. + */ +case class AggregatesV2MostRecentFeatureSourceBeforeDate( + override val rootPath: String, + override val storeName: String, + override val aggregates: Set[TypedAggregateGroup[_]], + override val trimThreshold: Double = AggregatesV2AdaptedSource.DefaultTrimThreshold, + beforeDate: RichDate, + override val maxFailures: Int = 0) + extends AggregatesV2AdaptedSource { + + override val enableMostRecentBeforeSourceVersion = true + override val sourceVersionOpt: Some[Long] = Some(beforeDate.timestamp + 1) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/BUILD b/timelines/data_processing/ml_util/aggregation_framework/conversion/BUILD new file mode 100644 index 000000000..d6c86cc12 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/BUILD @@ -0,0 +1,71 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/algebird:core", + "3rdparty/jvm/com/twitter/algebird:util", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:json", + "3rdparty/jvm/com/twitter/bijection:netty", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/com/twitter/bijection:util", + "3rdparty/jvm/com/twitter/storehaus:algebra", + "3rdparty/jvm/com/twitter/storehaus:core", + "3rdparty/src/jvm/com/twitter/scalding:commons", + "3rdparty/src/jvm/com/twitter/scalding:core", + "3rdparty/src/jvm/com/twitter/scalding:date", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/ml/api:api-base", + "src/scala/com/twitter/ml/api/util", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "src/thrift/com/twitter/summingbird", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "util/util-core:scala", + ], +) + +scala_library( + name = "for-timelines", + sources = [ + "CombineCountsPolicy.scala", + "SparseBinaryMergePolicy.scala", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/algebird:core", + "3rdparty/jvm/com/twitter/algebird:util", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:json", + "3rdparty/jvm/com/twitter/bijection:netty", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/com/twitter/bijection:util", + "3rdparty/jvm/com/twitter/storehaus:algebra", + "3rdparty/jvm/com/twitter/storehaus:core", + "3rdparty/src/jvm/com/twitter/scalding:commons", + "3rdparty/src/jvm/com/twitter/scalding:core", + "3rdparty/src/jvm/com/twitter/scalding:date", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "src/thrift/com/twitter/summingbird", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + "util/util-core:scala", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/CombineCountsPolicy.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/CombineCountsPolicy.scala new file mode 100644 index 000000000..eb1690231 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/CombineCountsPolicy.scala @@ -0,0 +1,223 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.google.common.annotations.VisibleForTesting +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.TypedCountMetric +import java.lang.{Double => JDouble} +import scala.collection.JavaConverters._ + +case class CombinedFeatures( + sum: Feature[JDouble], + nonzero: Feature[JDouble], + mean: Feature[JDouble], + topK: Seq[Feature[JDouble]]) + +trait CombineCountsBase { + val SparseSum = "sparse_sum" + val SparseNonzero = "sparse_nonzero" + val SparseMean = "sparse_mean" + val SparseTop = "sparse_top" + + def topK: Int + def hardLimit: Option[Int] + def precomputedCountFeatures: Seq[Feature[_]] + + lazy val precomputedFeaturesMap: Map[Feature[_], CombinedFeatures] = + precomputedCountFeatures.map { countFeature => + val derivedPersonalDataTypes = + AggregationMetricCommon.derivePersonalDataTypes(Some(countFeature)) + val sum = new Feature.Continuous( + countFeature.getDenseFeatureName + "." + SparseSum, + derivedPersonalDataTypes) + val nonzero = new Feature.Continuous( + countFeature.getDenseFeatureName + "." + SparseNonzero, + derivedPersonalDataTypes) + val mean = new Feature.Continuous( + countFeature.getDenseFeatureName + "." + SparseMean, + derivedPersonalDataTypes) + val topKFeatures = (1 to topK).map { k => + new Feature.Continuous( + countFeature.getDenseFeatureName + "." + SparseTop + k, + derivedPersonalDataTypes) + } + (countFeature, CombinedFeatures(sum, nonzero, mean, topKFeatures)) + }.toMap + + lazy val outputFeaturesPostMerge: Set[Feature[JDouble]] = + precomputedFeaturesMap.values.flatMap { combinedFeatures: CombinedFeatures => + Seq( + combinedFeatures.sum, + combinedFeatures.nonzero, + combinedFeatures.mean + ) ++ combinedFeatures.topK + }.toSet + + private case class ComputedStats(sum: Double, nonzero: Double, mean: Double) + + private def preComputeStats(featureValues: Seq[Double]): ComputedStats = { + val (sum, nonzero) = featureValues.foldLeft((0.0, 0.0)) { + case ((accSum, accNonzero), value) => + (accSum + value, if (value > 0.0) accNonzero + 1.0 else accNonzero) + } + ComputedStats(sum, nonzero, if (nonzero > 0.0) sum / nonzero else 0.0) + } + + private def computeSortedFeatureValues(featureValues: List[Double]): List[Double] = + featureValues.sortBy(-_) + + private def extractKth(sortedFeatureValues: Seq[Double], k: Int): Double = + sortedFeatureValues + .lift(k - 1) + .getOrElse(0.0) + + private def setContinuousFeatureIfNonZero( + record: SRichDataRecord, + feature: Feature[JDouble], + value: Double + ): Unit = + if (value != 0.0) { + record.setFeatureValue(feature, value) + } + + def hydrateCountFeatures( + richRecord: SRichDataRecord, + features: Seq[Feature[_]], + featureValuesMap: Map[Feature[_], List[Double]] + ): Unit = + for { + feature <- features + featureValues <- featureValuesMap.get(feature) + } { + mergeRecordFromCountFeature( + countFeature = feature, + featureValues = featureValues, + richInputRecord = richRecord + ) + } + + def mergeRecordFromCountFeature( + richInputRecord: SRichDataRecord, + countFeature: Feature[_], + featureValues: List[Double] + ): Unit = { + // In majority of calls to this method from timeline scorer + // the featureValues list is empty. + // While with empty list each operation will be not that expensive, these + // small things do add up. By adding early stop here we can avoid sorting + // empty list, allocating several options and making multiple function + // calls. In addition to that, we won't iterate over [1, topK]. + if (featureValues.nonEmpty) { + val sortedFeatureValues = hardLimit + .map { limit => + computeSortedFeatureValues(featureValues).take(limit) + }.getOrElse(computeSortedFeatureValues(featureValues)).toIndexedSeq + val computed = preComputeStats(sortedFeatureValues) + + val combinedFeatures = precomputedFeaturesMap(countFeature) + setContinuousFeatureIfNonZero( + richInputRecord, + combinedFeatures.sum, + computed.sum + ) + setContinuousFeatureIfNonZero( + richInputRecord, + combinedFeatures.nonzero, + computed.nonzero + ) + setContinuousFeatureIfNonZero( + richInputRecord, + combinedFeatures.mean, + computed.mean + ) + (1 to topK).foreach { k => + setContinuousFeatureIfNonZero( + richInputRecord, + combinedFeatures.topK(k - 1), + extractKth(sortedFeatureValues, k) + ) + } + } + } +} + +object CombineCountsPolicy { + def getCountFeatures(aggregateContext: FeatureContext): Seq[Feature[_]] = + aggregateContext.getAllFeatures.asScala.toSeq + .filter { feature => + feature.getFeatureType == FeatureType.CONTINUOUS && + feature.getDenseFeatureName.endsWith(TypedCountMetric[JDouble]().operatorName) + } + + @VisibleForTesting + private[conversion] def getFeatureValues( + dataRecordsWithCounts: List[DataRecord], + countFeature: Feature[_] + ): List[Double] = + dataRecordsWithCounts.map(new SRichDataRecord(_)).flatMap { record => + Option(record.getFeatureValue(countFeature)).map(_.asInstanceOf[JDouble].toDouble) + } +} + +/** + * A merge policy that works whenever all aggregate features are + * counts (computed using CountMetric), and typically represent + * either impressions or engagements. For each such input count + * feature, the policy outputs the following (3+k) derived features + * into the output data record: + * + * Sum of the feature's value across all aggregate records + * Number of aggregate records that have the feature set to non-zero + * Mean of the feature's value across all aggregate records + * topK values of the feature across all aggregate records + * + * @param topK topK values to compute + * @param hardLimit when set, records are sorted and only the top values will be used for aggregation if + * the number of records are higher than this hard limit. + */ +case class CombineCountsPolicy( + override val topK: Int, + aggregateContextToPrecompute: FeatureContext, + override val hardLimit: Option[Int] = None) + extends SparseBinaryMergePolicy + with CombineCountsBase { + import CombineCountsPolicy._ + override val precomputedCountFeatures: Seq[Feature[_]] = getCountFeatures( + aggregateContextToPrecompute) + + override def mergeRecord( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord], + aggregateContext: FeatureContext + ): Unit = { + // Assumes aggregateContext === aggregateContextToPrecompute + mergeRecordFromCountFeatures(mutableInputRecord, aggregateRecords, precomputedCountFeatures) + } + + def defaultMergeRecord( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord] + ): Unit = { + mergeRecordFromCountFeatures(mutableInputRecord, aggregateRecords, precomputedCountFeatures) + } + + def mergeRecordFromCountFeatures( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord], + countFeatures: Seq[Feature[_]] + ): Unit = { + val richInputRecord = new SRichDataRecord(mutableInputRecord) + countFeatures.foreach { countFeature => + mergeRecordFromCountFeature( + richInputRecord = richInputRecord, + countFeature = countFeature, + featureValues = getFeatureValues(aggregateRecords, countFeature) + ) + } + } + + override def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] = + outputFeaturesPostMerge.map(_.asInstanceOf[Feature[_]]) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/DataSetPipeSketchJoin.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/DataSetPipeSketchJoin.scala new file mode 100644 index 000000000..8d3dd58bb --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/DataSetPipeSketchJoin.scala @@ -0,0 +1,46 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.bijection.Injection +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding.TypedPipe + +object DataSetPipeSketchJoin { + val DefaultSketchNumReducers = 500 + val dataRecordMerger: DataRecordMerger = new DataRecordMerger + implicit val str2Byte: String => Array[Byte] = + implicitly[Injection[String, Array[Byte]]].toFunction + + /* Computes a left sketch join on a set of skewed keys. */ + def apply( + inputDataSet: DataSetPipe, + skewedJoinKeys: Product, + joinFeaturesDataSet: DataSetPipe, + sketchNumReducers: Int = DefaultSketchNumReducers + ): DataSetPipe = { + val joinKeyList = skewedJoinKeys.productIterator.toList.asInstanceOf[List[Feature[_]]] + + def makeKey(record: DataRecord): String = + joinKeyList + .map(SRichDataRecord(record).getFeatureValue(_)) + .toString + + def byKey(pipe: DataSetPipe): TypedPipe[(String, DataRecord)] = + pipe.records.map(record => (makeKey(record), record)) + + val joinedRecords = byKey(inputDataSet) + .sketch(sketchNumReducers) + .leftJoin(byKey(joinFeaturesDataSet)) + .values + .map { + case (inputRecord, joinFeaturesOpt) => + joinFeaturesOpt.foreach { joinRecord => dataRecordMerger.merge(inputRecord, joinRecord) } + inputRecord + } + + DataSetPipe( + joinedRecords, + FeatureContext.merge(inputDataSet.featureContext, joinFeaturesDataSet.featureContext) + ) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/PickFirstRecordPolicy.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/PickFirstRecordPolicy.scala new file mode 100644 index 000000000..b022d35b0 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/PickFirstRecordPolicy.scala @@ -0,0 +1,26 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.ml.api._ +import com.twitter.ml.api.FeatureContext +import scala.collection.JavaConverters._ + +/* + * A really bad default merge policy that picks all the aggregate + * features corresponding to the first sparse key value in the list. + * Does not rename any of the aggregate features for simplicity. + * Avoid using this merge policy if at all possible. + */ +object PickFirstRecordPolicy extends SparseBinaryMergePolicy { + val dataRecordMerger: DataRecordMerger = new DataRecordMerger + + override def mergeRecord( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord], + aggregateContext: FeatureContext + ): Unit = + aggregateRecords.headOption + .foreach(aggregateRecord => dataRecordMerger.merge(mutableInputRecord, aggregateRecord)) + + override def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] = + aggregateContext.getAllFeatures.asScala.toSet +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/PickTopCtrPolicy.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/PickTopCtrPolicy.scala new file mode 100644 index 000000000..94d3ac126 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/PickTopCtrPolicy.scala @@ -0,0 +1,226 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.ml.api._ +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon +import java.lang.{Boolean => JBoolean} +import java.lang.{Double => JDouble} + +case class CtrDescriptor( + engagementFeature: Feature[JDouble], + impressionFeature: Feature[JDouble], + outputFeature: Feature[JDouble]) + +object PickTopCtrBuilderHelper { + + def createCtrDescriptors( + aggregatePrefix: String, + engagementLabels: Set[Feature[JBoolean]], + aggregatesToCompute: Set[TypedAggregateGroup[_]], + outputSuffix: String + ): Set[CtrDescriptor] = { + val aggregateFeatures = aggregatesToCompute + .filter(_.aggregatePrefix == aggregatePrefix) + + val impressionFeature = aggregateFeatures + .flatMap { group => + group.individualAggregateDescriptors + .filter(_.query.feature == None) + .filter(_.query.label == None) + .flatMap(_.outputFeatures) + } + .head + .asInstanceOf[Feature[JDouble]] + + val aggregateEngagementFeatures = + aggregateFeatures + .flatMap { group => + group.individualAggregateDescriptors + .filter(_.query.feature == None) + .filter { descriptor => + //TODO: we should remove the need to pass around engagementLabels and just use all the labels available. + descriptor.query.label.exists(engagementLabels.contains(_)) + } + .flatMap(_.outputFeatures) + } + .map(_.asInstanceOf[Feature[JDouble]]) + + aggregateEngagementFeatures + .map { aggregateEngagementFeature => + CtrDescriptor( + engagementFeature = aggregateEngagementFeature, + impressionFeature = impressionFeature, + outputFeature = new Feature.Continuous( + aggregateEngagementFeature.getDenseFeatureName + "." + outputSuffix, + AggregationMetricCommon.derivePersonalDataTypes( + Some(aggregateEngagementFeature), + Some(impressionFeature) + ) + ) + ) + } + } +} + +object PickTopCtrPolicy { + def build( + aggregatePrefix: String, + engagementLabels: Set[Feature[JBoolean]], + aggregatesToCompute: Set[TypedAggregateGroup[_]], + smoothing: Double = 1.0, + outputSuffix: String = "ratio" + ): PickTopCtrPolicy = { + val ctrDescriptors = PickTopCtrBuilderHelper.createCtrDescriptors( + aggregatePrefix = aggregatePrefix, + engagementLabels = engagementLabels, + aggregatesToCompute = aggregatesToCompute, + outputSuffix = outputSuffix + ) + PickTopCtrPolicy( + ctrDescriptors = ctrDescriptors, + smoothing = smoothing + ) + } +} + +object CombinedTopNCtrsByWilsonConfidenceIntervalPolicy { + def build( + aggregatePrefix: String, + engagementLabels: Set[Feature[JBoolean]], + aggregatesToCompute: Set[TypedAggregateGroup[_]], + outputSuffix: String = "ratioWithWCI", + z: Double = 1.96, + topN: Int = 1 + ): CombinedTopNCtrsByWilsonConfidenceIntervalPolicy = { + val ctrDescriptors = PickTopCtrBuilderHelper.createCtrDescriptors( + aggregatePrefix = aggregatePrefix, + engagementLabels = engagementLabels, + aggregatesToCompute = aggregatesToCompute, + outputSuffix = outputSuffix + ) + CombinedTopNCtrsByWilsonConfidenceIntervalPolicy( + ctrDescriptors = ctrDescriptors, + z = z, + topN = topN + ) + } +} + +/* + * A merge policy that picks the aggregate features corresponding to + * the sparse key value with the highest engagement rate (defined + * as the ratio of two specified features, representing engagements + * and impressions). Also outputs the engagement rate to the specified + * outputFeature. + * + * This is an abstract class. We can make variants of this policy by overriding + * the calculateCtr method. + */ + +abstract class PickTopCtrPolicyBase(ctrDescriptors: Set[CtrDescriptor]) + extends SparseBinaryMergePolicy { + + private def getContinuousFeature( + aggregateRecord: DataRecord, + feature: Feature[JDouble] + ): Double = { + Option(SRichDataRecord(aggregateRecord).getFeatureValue(feature)) + .map(_.asInstanceOf[JDouble].toDouble) + .getOrElse(0.0) + } + + /** + * For every provided descriptor, compute the corresponding CTR feature + * and only hydrate this result to the provided input record. + */ + override def mergeRecord( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord], + aggregateContext: FeatureContext + ): Unit = { + ctrDescriptors + .foreach { + case CtrDescriptor(engagementFeature, impressionFeature, outputFeature) => + val sortedCtrs = + aggregateRecords + .map { aggregateRecord => + val impressions = getContinuousFeature(aggregateRecord, impressionFeature) + val engagements = getContinuousFeature(aggregateRecord, engagementFeature) + calculateCtr(impressions, engagements) + } + .sortBy { ctr => -ctr } + combineTopNCtrsToSingleScore(sortedCtrs) + .foreach { score => + SRichDataRecord(mutableInputRecord).setFeatureValue(outputFeature, score) + } + } + } + + protected def calculateCtr(impressions: Double, engagements: Double): Double + + protected def combineTopNCtrsToSingleScore(sortedCtrs: Seq[Double]): Option[Double] + + override def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] = + ctrDescriptors + .map(_.outputFeature) + .toSet +} + +case class PickTopCtrPolicy(ctrDescriptors: Set[CtrDescriptor], smoothing: Double = 1.0) + extends PickTopCtrPolicyBase(ctrDescriptors) { + require(smoothing > 0.0) + + override def calculateCtr(impressions: Double, engagements: Double): Double = + (1.0 * engagements) / (smoothing + impressions) + + override def combineTopNCtrsToSingleScore(sortedCtrs: Seq[Double]): Option[Double] = + sortedCtrs.headOption +} + +case class CombinedTopNCtrsByWilsonConfidenceIntervalPolicy( + ctrDescriptors: Set[CtrDescriptor], + z: Double = 1.96, + topN: Int = 1) + extends PickTopCtrPolicyBase(ctrDescriptors) { + + private val zSquared = z * z + private val zSquaredDiv2 = zSquared / 2.0 + private val zSquaredDiv4 = zSquared / 4.0 + + /** + * calculates the lower bound of wilson score interval. which roughly says "the actual engagement + * rate is at least this value" with confidence designated by the z-score: + * https://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval + */ + override def calculateCtr(rawImpressions: Double, engagements: Double): Double = { + // just in case engagements happens to be more than impressions... + val impressions = Math.max(rawImpressions, engagements) + + if (impressions > 0.0) { + val p = engagements / impressions + (p + + zSquaredDiv2 / impressions + - z * Math.sqrt( + (p * (1.0 - p) + zSquaredDiv4 / impressions) / impressions)) / (1.0 + zSquared / impressions) + + } else 0.0 + } + + /** + * takes the topN engagement rates, and returns the joint probability as {1.0 - Π(1.0 - p)} + * + * e.g. let's say you have 0.6 chance of clicking on a tweet shared by the user A. + * you also have 0.3 chance of clicking on a tweet shared by the user B. + * seeing a tweet shared by both A and B will not lead to 0.9 chance of you clicking on it. + * but you could say that you have 0.4*0.7 chance of NOT clicking on that tweet. + */ + override def combineTopNCtrsToSingleScore(sortedCtrs: Seq[Double]): Option[Double] = + if (sortedCtrs.nonEmpty) { + val inverseLogP = sortedCtrs + .take(topN).map { p => Math.log(1.0 - p) }.sum + Some(1.0 - Math.exp(inverseLogP)) + } else None + +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryAggregateJoin.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryAggregateJoin.scala new file mode 100644 index 000000000..10c6a9096 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryAggregateJoin.scala @@ -0,0 +1,199 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.ml.api._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding.typed.TypedPipe +import com.twitter.scalding.typed.UnsortedGrouped +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import java.util.{Set => JSet} +import scala.collection.JavaConverters._ + +object SparseBinaryAggregateJoin { + import TypedAggregateGroup._ + + def makeKey(record: DataRecord, joinKeyList: List[Feature[_]]): String = { + joinKeyList.map { + case sparseKey: Feature.SparseBinary => + SRichDataRecord(record).getFeatureValue(sparseFeature(sparseKey)) + case nonSparseKey: Feature[_] => + SRichDataRecord(record).getFeatureValue(nonSparseKey) + }.toString + } + + /** + * @param record Data record to get all possible sparse aggregate keys from + * @param List of join key features (some can be sparse and some non-sparse) + * @return A list of string keys to use for joining + */ + def makeKeyPermutations(record: DataRecord, joinKeyList: List[Feature[_]]): List[String] = { + val allIdValues = joinKeyList.flatMap { + case sparseKey: Feature.SparseBinary => { + val id = sparseKey.getDenseFeatureId + val valuesOpt = Option(SRichDataRecord(record).getFeatureValue(sparseKey)) + .map(_.asInstanceOf[JSet[String]].asScala.toSet) + valuesOpt.map { (id, _) } + } + case nonSparseKey: Feature[_] => { + val id = nonSparseKey.getDenseFeatureId + Option(SRichDataRecord(record).getFeatureValue(nonSparseKey)).map { value => + (id, Set(value.toString)) + } + } + } + sparseBinaryPermutations(allIdValues).toList.map { idValues => + joinKeyList.map { key => idValues.getOrElse(key.getDenseFeatureId, "") }.toString + } + } + + private[this] def mkKeyIndexedAggregates( + joinFeaturesDataSet: DataSetPipe, + joinKeyList: List[Feature[_]] + ): TypedPipe[(String, DataRecord)] = + joinFeaturesDataSet.records + .map { record => (makeKey(record, joinKeyList), record) } + + private[this] def mkKeyIndexedInput( + inputDataSet: DataSetPipe, + joinKeyList: List[Feature[_]] + ): TypedPipe[(String, DataRecord)] = + inputDataSet.records + .flatMap { record => + for { + key <- makeKeyPermutations(record, joinKeyList) + } yield { (key, record) } + } + + private[this] def mkKeyIndexedInputWithUniqueId( + inputDataSet: DataSetPipe, + joinKeyList: List[Feature[_]], + uniqueIdFeatureList: List[Feature[_]] + ): TypedPipe[(String, String)] = + inputDataSet.records + .flatMap { record => + for { + key <- makeKeyPermutations(record, joinKeyList) + } yield { (key, makeKey(record, uniqueIdFeatureList)) } + } + + private[this] def mkRecordIndexedAggregates( + keyIndexedInput: TypedPipe[(String, DataRecord)], + keyIndexedAggregates: TypedPipe[(String, DataRecord)] + ): UnsortedGrouped[DataRecord, List[DataRecord]] = + keyIndexedInput + .join(keyIndexedAggregates) + .map { case (_, (inputRecord, aggregateRecord)) => (inputRecord, aggregateRecord) } + .group + .toList + + private[this] def mkRecordIndexedAggregatesWithUniqueId( + keyIndexedInput: TypedPipe[(String, String)], + keyIndexedAggregates: TypedPipe[(String, DataRecord)] + ): UnsortedGrouped[String, List[DataRecord]] = + keyIndexedInput + .join(keyIndexedAggregates) + .map { case (_, (inputId, aggregateRecord)) => (inputId, aggregateRecord) } + .group + .toList + + def mkJoinedDataSet( + inputDataSet: DataSetPipe, + joinFeaturesDataSet: DataSetPipe, + recordIndexedAggregates: UnsortedGrouped[DataRecord, List[DataRecord]], + mergePolicy: SparseBinaryMergePolicy + ): TypedPipe[DataRecord] = + inputDataSet.records + .map(record => (record, ())) + .leftJoin(recordIndexedAggregates) + .map { + case (inputRecord, (_, aggregateRecordsOpt)) => + aggregateRecordsOpt + .map { aggregateRecords => + mergePolicy.mergeRecord( + inputRecord, + aggregateRecords, + joinFeaturesDataSet.featureContext + ) + inputRecord + } + .getOrElse(inputRecord) + } + + def mkJoinedDataSetWithUniqueId( + inputDataSet: DataSetPipe, + joinFeaturesDataSet: DataSetPipe, + recordIndexedAggregates: UnsortedGrouped[String, List[DataRecord]], + mergePolicy: SparseBinaryMergePolicy, + uniqueIdFeatureList: List[Feature[_]] + ): TypedPipe[DataRecord] = + inputDataSet.records + .map(record => (makeKey(record, uniqueIdFeatureList), record)) + .leftJoin(recordIndexedAggregates) + .map { + case (_, (inputRecord, aggregateRecordsOpt)) => + aggregateRecordsOpt + .map { aggregateRecords => + mergePolicy.mergeRecord( + inputRecord, + aggregateRecords, + joinFeaturesDataSet.featureContext + ) + inputRecord + } + .getOrElse(inputRecord) + } + + /** + * If uniqueIdFeatures is non-empty and the join keys include a sparse binary + * key, the join will use this set of keys as a unique id to reduce + * memory consumption. You should need this option only for + * memory-intensive joins to avoid OOM errors. + */ + def apply( + inputDataSet: DataSetPipe, + joinKeys: Product, + joinFeaturesDataSet: DataSetPipe, + mergePolicy: SparseBinaryMergePolicy = PickFirstRecordPolicy, + uniqueIdFeaturesOpt: Option[Product] = None + ): DataSetPipe = { + val joinKeyList = joinKeys.productIterator.toList.asInstanceOf[List[Feature[_]]] + val sparseBinaryJoinKeySet = + joinKeyList.toSet.filter(_.getFeatureType() == FeatureType.SPARSE_BINARY) + val containsSparseBinaryKey = !sparseBinaryJoinKeySet.isEmpty + if (containsSparseBinaryKey) { + val uniqueIdFeatureList = uniqueIdFeaturesOpt + .map(uniqueIdFeatures => + uniqueIdFeatures.productIterator.toList.asInstanceOf[List[Feature[_]]]) + .getOrElse(List.empty[Feature[_]]) + val keyIndexedAggregates = mkKeyIndexedAggregates(joinFeaturesDataSet, joinKeyList) + val joinedDataSet = if (uniqueIdFeatureList.isEmpty) { + val keyIndexedInput = mkKeyIndexedInput(inputDataSet, joinKeyList) + val recordIndexedAggregates = + mkRecordIndexedAggregates(keyIndexedInput, keyIndexedAggregates) + mkJoinedDataSet(inputDataSet, joinFeaturesDataSet, recordIndexedAggregates, mergePolicy) + } else { + val keyIndexedInput = + mkKeyIndexedInputWithUniqueId(inputDataSet, joinKeyList, uniqueIdFeatureList) + val recordIndexedAggregates = + mkRecordIndexedAggregatesWithUniqueId(keyIndexedInput, keyIndexedAggregates) + mkJoinedDataSetWithUniqueId( + inputDataSet, + joinFeaturesDataSet, + recordIndexedAggregates, + mergePolicy, + uniqueIdFeatureList + ) + } + + DataSetPipe( + joinedDataSet, + mergePolicy.mergeContext( + inputDataSet.featureContext, + joinFeaturesDataSet.featureContext + ) + ) + } else { + inputDataSet.joinWithSmaller(joinKeys, joinFeaturesDataSet) { _.pass } + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMergePolicy.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMergePolicy.scala new file mode 100644 index 000000000..7201e39a2 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMergePolicy.scala @@ -0,0 +1,81 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.ml.api._ +import com.twitter.ml.api.FeatureContext +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import scala.collection.JavaConverters._ + +/** + * When using the aggregates framework to group by sparse binary keys, + * we generate different aggregate feature values for each possible + * value of the sparse key. Hence, when joining back the aggregate + * features with a training data set, each individual training record + * has multiple aggregate features to choose from, for each value taken + * by the sparse key(s) in the training record. The merge policy trait + * below specifies how to condense/combine this variable number of + * aggregate features into a constant number of features for training. + * Some simple policies might be: pick the first feature set (randomly), + * pick the top sorted by some attribute, or take some average. + * + * Example: suppose we group by (ADVERTISER_ID, INTEREST_ID) where INTEREST_ID + * is the sparse key, and compute a "CTR" aggregate feature for each such + * pair measuring the click through rate on ads with (ADVERTISER_ID, INTEREST_ID). + * Say we have the following aggregate records: + * + * (ADVERTISER_ID = 1, INTEREST_ID = 1, CTR = 5%) + * (ADVERTISER_ID = 1, INTEREST_ID = 2, CTR = 15%) + * (ADVERTISER_ID = 2, INTEREST_ID = 1, CTR = 1%) + * (ADVERTISER_ID = 2, INTEREST_ID = 2, CTR = 10%) + * ... + * At training time, each training record has one value for ADVERTISER_ID, but it + * has multiple values for INTEREST_ID e.g. + * + * (ADVERTISER_ID = 1, INTEREST_IDS = (1,2)) + * + * There are multiple potential CTRs we can get when joining in the aggregate features: + * in this case 2 values (5% and 15%) but in general it could be many depending on how + * many interests the user has. When joining back the CTR features, the merge policy says how to + * combine all these CTRs to engineer features. + * + * "Pick first" would say - pick some random CTR (whatever is first in the list, maybe 5%) + * for training (probably not a good policy). "Sort by CTR" could be a policy + * that just picks the top CTR and uses it as a feature (here 15%). Similarly, you could + * imagine "Top K sorted by CTR" (use both 5 and 15%) or "Avg CTR" (10%) or other policies, + * all of which are defined as objects/case classes that override this trait. + */ +trait SparseBinaryMergePolicy { + + /** + * @param mutableInputRecord Input record to add aggregates to + * @param aggregateRecords Aggregate feature records + * @param aggregateContext Context for aggregate records + */ + def mergeRecord( + mutableInputRecord: DataRecord, + aggregateRecords: List[DataRecord], + aggregateContext: FeatureContext + ): Unit + + def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] + + /** + * @param inputContext Context for input record + * @param aggregateContext Context for aggregate records + * @return Context for record returned by mergeRecord() + */ + def mergeContext( + inputContext: FeatureContext, + aggregateContext: FeatureContext + ): FeatureContext = new FeatureContext( + (inputContext.getAllFeatures.asScala.toSet ++ aggregateFeaturesPostMerge( + aggregateContext)).toSeq.asJava + ) + + def allOutputFeaturesPostMergePolicy[T](config: TypedAggregateGroup[T]): Set[Feature[_]] = { + val containsSparseBinary = config.keysToAggregate + .exists(_.getFeatureType == FeatureType.SPARSE_BINARY) + + if (!containsSparseBinary) config.allOutputFeatures + else aggregateFeaturesPostMerge(new FeatureContext(config.allOutputFeatures.toSeq.asJava)) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMultipleAggregateJoin.scala b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMultipleAggregateJoin.scala new file mode 100644 index 000000000..d0aff7e34 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/conversion/SparseBinaryMultipleAggregateJoin.scala @@ -0,0 +1,109 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion + +import com.twitter.bijection.Injection +import com.twitter.ml.api._ +import com.twitter.ml.api.Feature +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding.typed.TypedPipe +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup.sparseFeature +import scala.collection.JavaConverters._ + +case class SparseJoinConfig( + aggregates: DataSetPipe, + sparseKey: Feature.SparseBinary, + mergePolicies: SparseBinaryMergePolicy*) + +object SparseBinaryMultipleAggregateJoin { + type CommonMap = (String, ((Feature.SparseBinary, String), DataRecord)) + + def apply( + source: DataSetPipe, + commonKey: Feature[_], + joinConfigs: Set[SparseJoinConfig], + rightJoin: Boolean = false, + isSketchJoin: Boolean = false, + numSketchJoinReducers: Int = 0 + ): DataSetPipe = { + val emptyPipe: TypedPipe[CommonMap] = TypedPipe.empty + val aggregateMaps: Set[TypedPipe[CommonMap]] = joinConfigs.map { joinConfig => + joinConfig.aggregates.records.map { record => + val sparseKeyValue = + SRichDataRecord(record).getFeatureValue(sparseFeature(joinConfig.sparseKey)).toString + val commonKeyValue = SRichDataRecord(record).getFeatureValue(commonKey).toString + (commonKeyValue, ((joinConfig.sparseKey, sparseKeyValue), record)) + } + } + + val commonKeyToAggregateMap = aggregateMaps + .foldLeft(emptyPipe) { + case (union: TypedPipe[CommonMap], next: TypedPipe[CommonMap]) => + union ++ next + } + .group + .toList + .map { + case (commonKeyValue, aggregateTuples) => + (commonKeyValue, aggregateTuples.toMap) + } + + val commonKeyToRecordMap = source.records + .map { record => + val commonKeyValue = SRichDataRecord(record).getFeatureValue(commonKey).toString + (commonKeyValue, record) + } + + // rightJoin is not supported by Sketched, so rightJoin will be ignored if isSketchJoin is set + implicit val string2Byte = (value: String) => Injection[String, Array[Byte]](value) + val intermediateRecords = if (isSketchJoin) { + commonKeyToRecordMap.group + .sketch(numSketchJoinReducers) + .leftJoin(commonKeyToAggregateMap) + .toTypedPipe + } else if (rightJoin) { + commonKeyToAggregateMap + .rightJoin(commonKeyToRecordMap) + .mapValues(_.swap) + .toTypedPipe + } else { + commonKeyToRecordMap.leftJoin(commonKeyToAggregateMap).toTypedPipe + } + + val joinedRecords = intermediateRecords + .map { + case (commonKeyValue, (inputRecord, aggregateTupleMapOpt)) => + aggregateTupleMapOpt.foreach { aggregateTupleMap => + joinConfigs.foreach { joinConfig => + val sparseKeyValues = Option( + SRichDataRecord(inputRecord) + .getFeatureValue(joinConfig.sparseKey) + ).map(_.asScala.toList) + .getOrElse(List.empty[String]) + + val aggregateRecords = sparseKeyValues.flatMap { sparseKeyValue => + aggregateTupleMap.get((joinConfig.sparseKey, sparseKeyValue)) + } + + joinConfig.mergePolicies.foreach { mergePolicy => + mergePolicy.mergeRecord( + inputRecord, + aggregateRecords, + joinConfig.aggregates.featureContext + ) + } + } + } + inputRecord + } + + val joinedFeatureContext = joinConfigs + .foldLeft(source.featureContext) { + case (left, joinConfig) => + joinConfig.mergePolicies.foldLeft(left) { + case (soFar, mergePolicy) => + mergePolicy.mergeContext(soFar, joinConfig.aggregates.featureContext) + } + } + + DataSetPipe(joinedRecords, joinedFeatureContext) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/AUTOMATED_COMMIT_FILES b/timelines/data_processing/ml_util/aggregation_framework/docs/AUTOMATED_COMMIT_FILES new file mode 100644 index 000000000..80aaae8d9 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/AUTOMATED_COMMIT_FILES @@ -0,0 +1,5 @@ +aggregation.rst +batch.rst +index.rst +real-time.rst +troubleshooting.rst diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/aggregation.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/aggregation.rst new file mode 100644 index 000000000..fddd926b4 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/aggregation.rst @@ -0,0 +1,167 @@ +.. _aggregation: + +Core Concepts +============= + +This page provides an overview of the aggregation framework and goes through examples on how to define aggregate features. In general, we can think of an aggregate feature as a grouped set of records, on which we incrementally update the aggregate feature values, crossed by the provided features and conditional on the provided labels. + +AggregateGroup +-------------- + +An `AggregateGroup` defines a single unit of aggregate computation, similar to a SQL query. These are executed by the underlying jobs (internally, a `DataRecordAggregationMonoid `_ is applied to `DataRecords` that contain the features to aggregate). Many of these groups can exist to define different types of aggregate features. + +Let's start with the following examples of an `AggregateGroup` to discuss the meaning of each of its constructor arguments: + +.. code-block:: scala + + val UserAggregateStore = "user_aggregates" + val aggregatesToCompute: Set[TypedAggregateGroup[_]] = Set( + AggregateGroup( + inputSource = timelinesDailyRecapSource, + aggregatePrefix = "user_aggregate_v2", + preTransformOpt = Some(RemoveUserIdZero), + keys = Set(USER_ID), + features = Set(HAS_PHOTO), + labels = Set(IS_FAVORITED), + metrics = Set(CountMetric, SumMetric), + halfLives = Set(50.days), + outputStore = OfflineAggregateStore( + name = UserAggregateStore, + startDate = "2016-07-15 00:00", + commonConfig = timelinesDailyAggregateSink, + batchesToKeep = 5 + ) + ) + .flatMap(_.buildTypedAggregateGroups) + ) + +This `AggregateGroup` computes the number of times each user has faved a tweet with a photo. The aggregate count is decayed with a 50 day halflife. + +Naming and preprocessing +------------------------ + +`UserAggregateStore` is a string val that acts as a scope of a "root path" to which this group of aggregate features will be written. The root path is provided separately by the implementing job. + +`inputSource` defines the input source of `DataRecords` that we aggregate on. These records contain the relevant features required for aggregation. + +`aggregatePrefix` tells the framework what prefix to use for the aggregate features it generates. A descriptive naming scheme with versioning makes it easier to maintain features as you add or remove them over the long-term. + +`preTransforms` is a `Seq[com.twitter.ml.api.ITransform] `_ that can be applied to the data records read from the input source before they are fed into the `AggregateGroup` to apply aggregation. These transforms are optional but can be useful for certain preprocessing operations for a group's raw input features. + +.. admonition:: Examples + + You can downsample input data records by providing `preTransforms`. In addition, you could also join different input labels (e.g. "is_push_openend" and "is_push_favorited") and transform them into a combined label that is their union ("is_push_engaged") on which aggregate counts will be calculated. + + +Keys +---- + +`keys` is a crucial field in the config. It defines a `Set[com.twitter.ml.api.Feature]` which specifies a set of grouping keys to use for this `AggregateGroup`. + +Keys can only be of 3 supported types currently: `DISCRETE`, `STRING` and `SPARSE_BINARY`. Using a discrete or a string/text feature as a key specifies the unit to group records by before applying counting/aggregation operators. + + +.. admonition:: Examples + + .. cssclass:: shortlist + + #. If the key is `USER_ID`, this tells the framework to group all records by `USER_ID`, and then apply aggregations (sum/count/etc) within each user’s data to generate aggregate features for each user. + + #. If the key is `(USER_ID, AUTHOR_ID)`, then the `AggregateGroup` will output features for each unique user-author pair in the input data. + + #. Finally, using a sparse binary feature as key has special "flattening" or "flatMap" like semantics. For example, consider grouping by `(USER_ID, AUTHOR_INTEREST_IDS)` where `AUTHOR_INTEREST_IDS` is a sparse binary feature which represents a set of topic IDs the author may be tweeting about. This creates one record for each `(user_id, interest_id)` pair - so each record with multiple author interests is flattened before feeding it to the aggregation. + +Features +-------- + +`features` specifies a `Set[com.twitter.ml.api.Feature]` to aggregate within each group (defined by the keys specified earlier). + +We support 2 types of `features`: `BINARY` and `CONTINUOUS`. + +The semantics of how the aggregation works is slightly different based on the type of “feature”, and based on the “metric” (or aggregation operation): + +.. cssclass:: shortlist + +#. Binary Feature, Count Metric: Suppose we have a binary feature `HAS_PHOTO` in this set, and are applying the “Count” metric (see below for more details on the metrics), with key `USER_ID`. The semantics is that this computes a feature which measures the count of records with `HAS_PHOTO` set to true for each user. + +#. Binary Feature, Sum Metric - Does not apply. No feature will be computed. + +#. Continuous Feature, Count Metric - The count metric treats all features as binary features ignoring their value. For example, suppose we have a continuous feature `NUM_CHARACTERS_IN_TWEET`, and key `USER_ID`. This measures the count of records that have this feature `NUM_CHARACTERS_IN_TWEET` present. + +#. Continuous Feature, Sum Metric - In the above example, the features measures the sum of (num_characters_in_tweet) over all a user’s records. Dividing this sum feature by the count feature would give the average number of characters in all tweets. + +.. admonition:: Unsupported feature types + + `DISCRETE` and `SPARSE` features are not supported by the Sum Metric, because there is no meaning in summing a discrete feature or a sparse feature. You can use them with the CountMetric, but they may not do what you would expect since they will be treated as binary features losing all the information within the feature. The best way to use these is as “keys” and not as “features”. + +.. admonition:: Setting includeAnyFeature + + If constructor argument `includeAnyFeature` is set, the framework will append a feature with scope `any_feature` to the set of all features you define. This additional feature simply measures the total count of records. So if you set your features to be equal to Set.empty, this will measure the count of records for a given `USER_ID`. + +Labels +------ + +`labels` specifies a set of `BINARY` features that you can cross with, prior to applying aggregations on the `features`. This essentially restricts the aggregate computation to a subset of the records within a particular key. + +We typically use this to represent engagement labels in an ML model, in this case, `IS_FAVORITED`. + +In this example, we are grouping by `USER_ID`, the feature is `HAS_PHOTO`, the label is `IS_FAVORITED`, and we are computing `CountMetric`. The system will output a feature for each user that represents the number of favorites on tweets having photos by this `userId`. + +.. admonition:: Setting includeAnyLabel + + If constructor argument `includeAnyLabel` is set (as it is by default), then similar to `any_feature`, the framework automatically appends a label of type `any_label` to the set of all labels you define, which represents not applying any filter or cross. + +In this example, `any_label` and `any_feature` are set by default and the system would actually output 4 features for each `user_id`: + +.. cssclass:: shortlist + +#. The number of `IS_FAVORITED` (favorites) on tweet impressions having `HAS_PHOTO=true` + +#. The number of `IS_FAVORITED` (favorites) on all tweet impressions (`any_feature` aggregate) + +#. The number of tweet impressions having `HAS_PHOTO=true` (`any_label` aggregate) + +#. The total number of tweet impressions for this user id (`any_feature.any_label` aggregate) + +.. admonition:: Disabling includeAnyLabel + + To disable this automatically generated feature you can use `includeAnyLabel = false` in your config. This will remove some useful features (particularly for counterfactual signal), but it can greatly save on space since it does not store every possible impressed set of keys in the output store. So use this if you are short on space, but not otherwise. + +Metrics +------- + +`metrics` specifies the aggregate operators to apply. The most commonly used are `Count`, `Sum` and `SumSq`. + +As mentioned before, `Count` can be applied to all types of features, but treats every feature as binary and ignores the value of the feature. `Sum` and `SumSq` can only be applied to Continuous features - they will ignore all other features you specify. By combining sum and sumsq and count, you can produce powerful “z-score” features or other distributional features using a post-transform. + +It is also possible to add your own aggregate operators (e.g. `LastResetMetric `_) to the framework with some additional work. + +HalfLives +--------- + +`halfLives` specifies how fast aggregate features should be decayed. It is important to note that the framework works on an incremental basis: in the batch implementation, the summingbird-scalding job takes in the most recently computed aggregate features, processed on data until day `N-1`, then reads new data records for day `N` and computes updated values of the aggregate features. Similarly, the decay of real-time aggregate features takes the actual time delta between the current time and the last time the aggregate feature value was updated. + +The halflife `H` specifies how fast to decay old sums/counts to simulate a sliding window of counts. The implementation is such that it will take `H` amount of time to decay an aggregate feature to half its initial value. New observed values of sums/counts are added to the aggregate feature value. + +.. admonition:: Batch and real-time + + In the batch use case where aggregate features are recomputed on a daily basis, we typically take halflives on the order of weeks or longer (in Timelines, 50 days). In the real-time use case, shorter halflives are appropriate (hours) since they are updated as client engagements are received by the summingbird job. + + +SQL Equivalent +-------------- +Conceptually, you can also think of it as: + +.. code-block:: sql + + INSERT INTO . + SELECT AGG() /* AGG is , which is a exponentially decaying SUM or COUNT etc. based on the halfLifves */ + FROM ( + SELECT preTransformOpt(*) FROM + ) + GROUP BY + WHERE = True + +any_features is AGG(*). + +any_labels removes the WHERE clause. \ No newline at end of file diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/batch.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/batch.rst new file mode 100644 index 000000000..f3b6ac9a5 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/batch.rst @@ -0,0 +1,215 @@ +.. _batch: + +Batch aggregate feature jobs +============================ + +In the previous section, we went over the core concepts of the aggregation framework and discussed how you can set up you own `AggregateGroups` to compute aggregate features. + +Given these groups, this section will discuss how you can setup offline batch jobs to produce the corresponding aggregate features, updated daily. To accomplish this, we need to setup a summingbird-scalding job that is pointed to the input data records containing features and labels to be aggregated. + +Input Data +---------- + +In order to generate aggregate features, the relevant input features need to be available offline as a daily scalding source in `DataRecord` format (typically `DailySuffixFeatureSource `_, though `HourlySuffixFeatureSource` could also be usable but we have not tested this). + +.. admonition:: Note + + The input data source should contain the keys, features and labels you want to use in your `AggregateGroups`. + +Aggregation Config +------------------ + +Now that we have a daily data source with input features and labels, we need to setup the `AggregateGroup` config itself. This contains all aggregation groups that you would like to compute and we will go through the implementation step-by-step. + +.. admonition:: Example: Timelines Quality config + + `TimelinesAggregationConfig `_ imports the configured `AggregationGroups` from `TimelinesAggregationConfigDetails `_. The config is then referenced by the implementing summingbird-scalding job which we will setup below. + +OfflineAggregateSource +---------------------- + +Each `AggregateGroup` will need to define a (daily) source of input features. We use `OfflineAggregateSource` for this to tell the aggregation framework where the input data set is and the required timestamp feature that the framework uses to decay aggregate feature values: + +.. code-block:: scala + + val timelinesDailyRecapSource = OfflineAggregateSource( + name = "timelines_daily_recap", + timestampFeature = TIMESTAMP, + scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/data_records"), + scaldingSuffixType = Some("daily"), + withValidation = true + ) + +.. admonition:: Note + + .. cssclass:: shortlist + + #. The name is not important as long as it is unique. + + #. `timestampFeature` must be a discrete feature of type `com.twitter.ml.api.Feature[Long]` and represents the “time” of a given training record in milliseconds - for example, the time at which an engagement, push open event, or abuse event took place that you are trying to train on. If you do not already have such a feature in your daily training data, you need to add one. + + #. `scaldingSuffixType` can be “hourly” or “daily” depending on the type of source (`HourlySuffixFeatureSource` vs `DailySuffixFeatureSource`). + + #. Set `withValidation` to true to validate the presence of _SUCCESS file. Context: https://jira.twitter.biz/browse/TQ-10618 + +Output HDFS store +----------------- + +The output HDFS store is where the computed aggregate features are stored. This store contains all computed aggregate feature values and is incrementally updated by the aggregates job every day. + +.. code-block:: scala + + val outputHdfsPath = "/user/timelines/processed/aggregates_v2" + val timelinesOfflineAggregateSink = new OfflineStoreCommonConfig { + override def apply(startDate: String) = new OfflineAggregateStoreCommonConfig( + outputHdfsPathPrefix = outputHdfsPath, + dummyAppId = "timelines_aggregates_v2_ro", // unused - can be arbitrary + dummyDatasetPrefix = "timelines_aggregates_v2_ro", // unused - can be arbitrary + startDate = startDate + ) + } + +Note: `dummyAppId` and `dummyDatasetPrefix` are unused so can be set to any arbitrary value. They should be removed on the framework side. + +The `outputHdfsPathPrefix` is the only field that matters, and should be set to the HDFS path where you want to store the aggregate features. Make sure you have a lot of quota available at that path. + +Setting Up Aggregates Job +------------------------- + +Once you have defined a config file with the aggregates you would like to compute, the next step is to create the aggregates scalding job using the config (`example `_). This is very concise and requires only a few lines of code: + +.. code-block:: scala + + object TimelinesAggregationScaldingJob extends AggregatesV2ScaldingJob { + override val aggregatesToCompute = TimelinesAggregationConfig.aggregatesToCompute + } + +Now that the scalding job is implemented with the aggregation config, we need to setup a capesos config similar to https://cgit.twitter.biz/source/tree/science/scalding/mesos/timelines/prod.yml: + +.. code-block:: scala + + # Common configuration shared by all aggregates v2 jobs + __aggregates_v2_common__: &__aggregates_v2_common__ + class: HadoopSummingbirdProducer + bundle: offline_aggregation-deploy.tar.gz + mainjar: offline_aggregation-deploy.jar + pants_target: "bundle timelines/data_processing/ad_hoc/aggregate_interactions/v2/offline_aggregation:bin" + cron_collision_policy: CANCEL_NEW + use_libjar_wild_card: true + +.. code-block:: scala + + # Specific job computing user aggregates + user_aggregates_v2: + <<: *__aggregates_v2_common__ + cron_schedule: "25 * * * *" + arguments: --batches 1 --output_stores user_aggregates --job_name timelines_user_aggregates_v2 + +.. admonition:: Important + + Each AggregateGroup in your config should have its own associated offline job which specifies `output_stores` pointing to the output store name you defined in your config. + +Running The Job +--------------- + +When you run the batch job for the first time, you need to add a temporary entry to your capesos yml file that looks like this: + +.. code-block:: scala + + user_aggregates_v2_initial_run: + <<: *__aggregates_v2_common__ + cron_schedule: "25 * * * *" + arguments: --batches 1 --start-time “2017-03-03 00:00:00” --output_stores user_aggregates --job_name timelines_user_aggregates_v2 + +.. admonition:: Start Time + + The additional `--start-time` argument should match the `startDate` in your config for that AggregateGroup, but in the format `yyyy-mm-dd hh:mm:ss`. + +To invoke the initial run via capesos, we would do the following (in Timelines case): + +.. code-block:: scala + + CAPESOSPY_ENV=prod capesospy-v2 update --build_locally --start_cron user_aggregates_v2_initial_run science/scalding/mesos/timelines/prod.yml + +Once it is running smoothly, you can deschedule the initial run job and delete the temporary entry from your production yml config. + +.. code-block:: scala + + aurora cron deschedule atla/timelines/prod/user_aggregates_v2_initial_run + +Note: deschedule it preemptively to avoid repeatedly overwriting the same initial results + +Then schedule the production job from jenkins using something like this: + +.. code-block:: scala + + CAPESOSPY_ENV=prod capesospy-v2 update user_aggregates_v2 science/scalding/mesos/timelines/prod.yml + +All future runs (2nd onwards) will use the permanent entry in the capesos yml config that does not have the `start-time` specified. + +.. admonition:: Job name has to match + + It's important that the production run should share the same `--job_name` with the initial_run so that eagleeye/statebird knows how to keep track of it correctly. + +Output Aggregate Features +------------------------- + +This scalding job using the example config from the earlier section would output a VersionedKeyValSource to `/user/timelines/processed/aggregates_v2/user_aggregates` on HDFS. + +Note that `/user/timelines/processed/aggregates_v2` is the explicitly defined root path while `user_aggregates` is the output directory of the example `AggregateGroup` defined earlier. The latter can be different for different `AggregateGroups` defined in your config. + + +The VersionedKeyValSource is difficult to use directly in your jobs/offline trainings, but we provide an adapted source `AggregatesV2FeatureSource` that makes it easy to join and use in your jobs: + +.. code-block:: scala + + import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion._ + + val pipe: DataSetPipe = AggregatesV2FeatureSource( + rootPath = "/user/timelines/processed/aggregates_v2", + storeName = "user_aggregates", + aggregates = TimelinesAggregationConfig.aggregatesToCompute, + trimThreshold = 0 + )(dateRange).read + +Simply replace the `rootPath`, `storeName` and `aggregates` object to whatever you defined. The `trimThreshold` tells the framework to trim all features below a certain cutoff: 0 is a safe default to use to begin with. + +.. admonition:: Usage + + This can now be used like any other `DataSetPipe` in offline ML jobs. You can write out the features to a `DailySuffixFeatureSource`, you can join them with your data offline for trainings, or you can write them to a Manhattan store for serving online. + +Aggregate Features Example +-------------------------- + +Here is an example of sample of the aggregate features we just computed: + +.. code-block:: scala + + user_aggregate_v2.pair.any_label.any_feature.50.days.count: 100.0 + user_aggregate_v2.pair.any_label.tweetsource.is_quote.50.days.count: 30.0 + user_aggregate_v2.pair.is_favorited.any_feature.50.days.count: 10.0 + user_aggregate_v2.pair.is_favorited.tweetsource.is_quote.50.days.count: 6.0 + meta.user_id: 123456789 + +Aggregate feature names match a `prefix.pair.label.feature.half_life.metric` schema and correspond to what was defined in the aggregation config for each of these fields. + +.. admonition:: Example + + In this example, the above features are capturing that userId 123456789L has: + + .. + A 50-day decayed count of 100 training records with any label or feature (“tweet impressions”) + + A 50-day decayed count of 30 records that are “quote tweets” (tweetsource.is_quote = true) + + A 50-day decayed count of 10 records that are favorites on any type of tweet (is_favorited = true) + + A 50-day decayed count of 6 records that are “favorites” on “quote tweets” (both of the above are true) + +By combining the above, a model might infer that for this specific user, quote tweets comprise 30% of all impressions, have a favorite rate of 6/30 = 20%, compared to a favorite rate of 10/100 = 10% on the total population of tweets. + +Therefore, being a quote tweet makes this specific user `123456789L` approximately twice as likely to favorite the tweet, which is useful for prediction and could result in the ML model giving higher scores to & ranking quote tweets higher in a personalized fashion for this user. + +Tests for Feature Names +-------------------------- +When you change or add AggregateGroup, feature names might change. And the Feature Store provides a testing mechanism to assert that the feature names change as you expect. See `tests for feature names `_. diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/conf.py b/timelines/data_processing/ml_util/aggregation_framework/docs/conf.py new file mode 100644 index 000000000..03996dfd7 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/conf.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +# +# docbird documentation build configuration file +# Note that not all possible configuration values are present in this +# autogenerated file. +# + +from os.path import abspath, dirname, isfile, join + + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", + "sphinx.ext.ifconfig", + "sphinx.ext.graphviz", + "twitter.docbird.ext.thriftlexer", + "twitter.docbird.ext.toctree_default_caption", + "sphinxcontrib.httpdomain", +] + + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix of source filenames. +source_suffix = ".rst" + +# The master toctree document. +master_doc = "index" + +# General information about the project. +project = u"""Aggregation Framework""" +description = u"""""" + +# The short X.Y version. +version = u"""1.0""" +# The full version, including alpha/beta/rc tags. +release = u"""1.0""" + +exclude_patterns = ["_build"] + +pygments_style = "sphinx" + +html_theme = "default" + +html_static_path = ["_static"] + +html_logo = u"""""" + +# Automagically add project logo, if it exists +# (checks on any build, not just init) +# Scan for some common defaults (png or svg format, +# called "logo" or project name, in docs folder) +if not html_logo: + location = dirname(abspath(__file__)) + for logo_file in ["logo.png", "logo.svg", ("%s.png" % project), ("%s.svg" % project)]: + html_logo = logo_file if isfile(join(location, logo_file)) else html_logo + +graphviz_output_format = "svg" diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/index.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/index.rst new file mode 100644 index 000000000..af703c688 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/index.rst @@ -0,0 +1,11 @@ +.. markdowninclude:: ../README.md + +.. toctree:: + :maxdepth: 2 + :hidden: + + aggregation + batch + real-time + joining + troubleshooting diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/joining.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/joining.rst new file mode 100644 index 000000000..2ecdf7612 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/joining.rst @@ -0,0 +1,72 @@ +.. _joining: + +Joining aggregates features to records +====================================== + +After setting up either offline batch jobs or online real-time summingbird jobs to produce +aggregate features and querying them, we are left with data records containing aggregate features. +This page will go over how to join them with other data records to produce offline training data. + +(To discuss: joining aggregates to records online) + +Joining Aggregates on Discrete/String Keys +------------------------------------------ + +Joining aggregate features keyed on discrete or text features to your training data is very easy - +you can use the built in methods provided by `DataSetPipe`. For example, suppose you have aggregates +keyed by `(USER_ID, AUTHOR_ID)`: + +.. code-block:: scala + + val userAuthorAggregates: DataSetPipe = AggregatesV2FeatureSource( + rootPath = “/path/to/my/aggregates”, + storeName = “user_author_aggregates”, + aggregates = MyConfig.aggregatesToCompute, + trimThreshold = 0 + )(dateRange).read + +Offline, you can then join with your training data set as follows: + +.. code-block:: scala + + val myTrainingData: DataSetPipe = ... + val joinedData = myTrainingData.joinWithLarger((USER_ID, AUTHOR_ID), userAuthorAggregates) + +You can read from `AggregatesV2MostRecentFeatureSourceBeforeDate` in order to read the most recent aggregates +before a provided date `beforeDate`. Just note that `beforeDate` must be aligned with the date boundary so if +you’re passing in a `dateRange`, use `dateRange.end`). + +Joining Aggregates on Sparse Binary Keys +---------------------------------------- + +When joining on sparse binary keys, there can be multiple aggregate records to join to each training record in +your training data set. For example, suppose you have setup an aggregate group that is keyed on `(INTEREST_ID, AUTHOR_ID)` +capturing engagement counts of users interested in a particular `INTEREST_ID` for specific authors provided by `AUTHOR_ID`. + +Suppose now that you have a training data record representing a specific user action. This training data record contains +a sparse binary feature `INTEREST_IDS` representing all the "interests" of that user - e.g. music, sports, and so on. Each `interest_id` +translates to a different set of counting features found in your aggregates data. Therefore we need a way to merge all of +these different sets of counting features to produce a more compact, fixed-size set of features. + +.. admonition:: Merge policies + + To do this, the aggregate framework provides a trait `SparseBinaryMergePolicy `_. Classes overriding this trait define policies + that state how to merge the individual aggregate features from each sparse binary value (in this case, each `INTEREST_ID` for a user). + Furthermore, we provide `SparseBinaryMultipleAggregateJoin` which executes these policies to merge aggregates. + +A simple policy might simply average all the counts from the individual interests, or just take the max, or +a specific quantile. More advanced policies might use custom criteria to decide which interest is most relevant and choose +features from that interest to represent the user, or use some weighted combination of counts. + +The framework provides two simple in-built policies (`PickTopCtrPolicy `_ +and `CombineCountsPolicy `_, which keeps the topK counts per +record) that you can get started with, though you likely want to implement your own policy based on domain knowledge to get +the best results for your specific problem domain. + +.. admonition:: Offline Code Example + + The scalding job `TrainingDataWithAggV2Generator `_ shows how multiple merge policies are defined and implemented to merge aggregates on sparse binary keys to the TQ's training data records. + +.. admonition:: Online Code Example + + In our (non-FeatureStore enabled) online code path, we merge aggregates on sparse binary keys using the `CombineCountsPolicy `_. diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/real-time.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/real-time.rst new file mode 100644 index 000000000..fc853ba69 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/real-time.rst @@ -0,0 +1,327 @@ +.. _real_time: + +Real-Time aggregate features +============================ + +In addition to computing batch aggregate features, the aggregation framework supports real-time aggregates as well. The framework concepts used here are identical to the batch use case, however, the underlying implementation differs and is provided by summingbird-storm jobs. + +RTA Runbook +----------- + +For operational details, please visit http://go/tqrealtimeaggregates. + +Prerequisites +------------- + +In order to start computing real-time aggregate features, the framework requires the following to be provided: + +* A backing memcached store that will hold the computed aggregate features. This is conceptually equivalent to the output HDFS store in the batch compute case. +* Implementation of `StormAggregateSource `_ that creates `DataRecords` with the necessary input features. This serves as the input to the aggregation operations. +* Definition of aggregate features by defining `AggregateGroup` in an implementation of `OnlineAggregationConfigTrait`. This is identical to the batch case. +* Job config file defining the backing memcached for feature storage and retrieval, and job-related parameters. + +We will now go through the details in setting up each required component. + +Memcached store +--------------- + +Real-time aggregates use Memcache as the backing cache to store and update aggregate features keys. Caches can be provisioned on `go/cacheboard `_. + +.. admonition:: Test and prod caches + + For development, it is sufficient to setup a test cache that your new job can query and write to. At the same time, a production cache request should also be submitted as these generally have significant lead times for provisioning. + +StormAggregateSource +-------------------- + +To enable aggregation of your features, we need to start with defining a `StormAggregateSource` that builds a `Producer[Storm, DataRecord]`. This summingbird producer generates `DataRecords` that contain the input features and labels that the real-time aggregate job will compute aggregate features on. Conceptually, this is equivalent to the input data set in the offline batch use case. + +.. admonition:: Example + + If you are planning to aggregate on client engagements, you would need to subscribe to the `ClientEvent` kafka stream and then convert each event to a `DataRecord` that contains the key and the engagement on which to aggregate. + +Typically, we would setup a julep filter for the relevant client events that we would like to aggregate on. This gives us a `Producer[Storm, LogEvent]` object which we then convert to `Producer[Storm, DataRecord]` with adapters that we wrote: + +.. code-block:: scala + + lazy val clientEventProducer: Producer[Storm, LogEvent] = + ClientEventSourceScrooge( + appId = AppId(jobConfig.appId), + topic = "julep_client_event_suggests", + resumeAtLastReadOffset = false + ).source.name("timelines_events") + + lazy val clientEventWithCachedFeaturesProducer: Producer[Storm, DataRecord] = clientEventProducer + .flatMap(mkDataRecords) + +Note that this way of composing the storm graph gives us flexiblity in how we can hydrate input features. If you would like to join more complex features to `DataRecord`, you can do so here with additional storm components which can implement cache queries. + +.. admonition:: Timelines Quality use case + + In Timelines Quality, we aggregate client engagements on `userId` or `tweetId` and implement + `TimelinesStormAggregateSource `_. We create + `Producer[Storm,LogEvent]` of Timelines engagements to which we apply `ClientLogEventAdapter `_ which converts the event to `DataRecord` containing `userId`, `tweetId`, `timestampFeature` of the engagement and the engagement label itself. + +.. admonition:: MagicRecs use case + + MagicRecs has a very similar setup for real-time aggregate features. In addition, they also implement a more complex cache query to fetch the user's history in the `StormAggregateSource` for each observed client engagement to hydrate a richer set of input `DataRecords`: + + .. code-block:: scala + + val userHistoryStoreService: Storm#Service[Long, History] = + Storm.service(UserHistoryReadableStore) + + val clientEventDataRecordProducer: Producer[Storm, DataRecord] = + magicRecsClientEventProducer + .flatMap { ... + (userId, logEvent) + }.leftJoin(userHistoryStoreService) + .flatMap { + case (_, (logEvent, history)) => + mkDataRecords(LogEventHistoryPair(logEvent, history)) + } + +.. admonition:: EmailRecs use case + + EmailRecs shares the same cache as MagicRecs. They combine notification scribe data with email history data to identify the particular item a user engaged with in an email: + + .. code-block:: scala + + val emailHistoryStoreService: Storm#Service[Long, History] = + Storm.service(EmailHistoryReadableStore) + + val emailEventDataRecordProducer: Producer[Storm, DataRecord] = + emailEventProducer + .flatMap { ... + (userId, logEvent) + }.leftJoin(emailHistoryStoreService) + .flatMap { + case (_, (scribe, history)) => + mkDataRecords(ScribeHistoryPair(scribe, history)) + } + + +Aggregation config +------------------ + +The real-time aggregation config is extended from `OnlineAggregationConfigTrait `_ and defines the features to aggregate and the backing memcached store to which they will be written. + +Setting up real-time aggregates follows the same rules as in the offline batch use case. The major difference here is that `inputSource` should point to the `StormAggregateSource` implementation that provides the `DataRecord` containing the engagements and core features on which to aggregate. In the offline case, this would have been an `OfflineAggregateSource` pointing to an offline source of daily records. + +Finally, `RealTimeAggregateStore` defines the backing memcache to be used and should be provided here as the `outputStore`. + +.. NOTE:: + + Please make sure to provide an `AggregateGroup` for both staging and production. The main difference should be the `outputStore` where features in either environment are read from and written to. You want to make sure that a staged real-time aggregates summingbird job is reading/writing only to the test memcache store and does not mutate the production store. + +Job config +---------- + +In addition to the aggregation config that defines the features to aggregate, the final piece we need to provide is a `RealTimeAggregatesJobConfig` that specificies job values such as `appId`, `teamName` and counts for the various topology components that define the capacity of the job (`Timelines example `_). + +Once you have the job config, implementing the storm job itself is easy and almost as concise as in the batch use case: + +.. code-block:: scala + + object TimelinesRealTimeAggregatesJob extends RealTimeAggregatesJobBase { + override lazy val statsReceiver = DefaultStatsReceiver.scope("timelines_real_time_aggregates") + override lazy val jobConfigs = TimelinesRealTimeAggregatesJobConfigs + override lazy val aggregatesToCompute = TimelinesOnlineAggregationConfig.AggregatesToCompute + } + +.. NOTE:: + There are some topology settings that are currently hard-coded. In particular, we enable `Config.TOPOLOGY_DROPTUPLES_UPON_BACKPRESSURE` to be true for added robustness. This may be made user-definable in the future. + +Steps to hydrate RTAs +-------------------- +1. Make the changes to RTAs and follow the steps for `Running the topology`. +2. Register the new RTAs to feature store. Sample phab: https://phabricator.twitter.biz/D718120 +3. Wire the features from feature store to TLX. This is usually done with the feature switch set to False. So it's just a code change and will not yet start hydrating the features yet. Merge the phab. Sample phab: https://phabricator.twitter.biz/D718424 +4. Now we hydrate the features to TLX gradually by doing it shard wise. For this, first create a PCM and then enable the hydration. Sample PCM: https://jira.twitter.biz/browse/PCM-147814 + +Running the topology +-------------------- +0. For phab that makes change to the topology (such as adding new ML features), before landing the phab, please create a PCM (`example `_) and deploy the change to devel topology first and then prod (atla and pdxa). Once it is confirmed that the prod topology can handle the change, the phab can be landed. +1. Go to https://ci.twitter.biz/job/tq-ci/build +2. In `commands` input + +.. code-block:: bash + + . src/scala/com/twitter/timelines/prediction/common/aggregates/real_time/deploy_local.sh [devel|atla|pdxa] + +One can only deploy either `devel`, `atla` (prod atla), `pdxa` (prod pdxa) at a time. +For example, to deploy both pdxa and atla prod topologies, one needs to build/run the above steps twice, one with `pdxa` and the other with `atla`. + +The status and performance stats of the topology are found at `go/heron-ui `_. Here you can view whether the job is processing tuples, whether it is under any memory or backpressure and provides general observability. + +Finally, since we enable `Config.TOPOLOGY_DROPTUPLES_UPON_BACKPRESSURE` by default in the topology, we also need to monitor and alert on the number of dropped tuples. Since this is a job generating features a small fraction of dropped tuples is tolerable if that enables us to avoid backpressure that would hold up global computation in the entire graph. + +Hydrating Real-Time Aggregate Features +-------------------------------------- + +Once the job is up and running, the aggregate features will be accessible in the backing memcached store. To access these features and hydrate to your online pipeline, we need to build a Memcache client with the right query key. + +.. admonition:: Example + + Some care needs to be taken to define the key injection and codec correctly for the memcached store. These types do not change and you can use the Timelines `memcache client builder `_ as an example. + +Aggregate features are written to store with a `(AggregationKey, BatchID)` key. + +`AggregationKey `_ is an instant of the keys that you previously defined in `AggregateGroup`. If your aggregation key is `USER_ID`, you would need to instantiate `AggregationKey` with the `USER_ID` featureId and the userId value. + +.. admonition:: Returned features + + The `DataRecord` that is returned by the cache now contains all real-time aggregate features for the query `AggregationKey` (similar to the batch use case). If your online hydration flow produces data records, the real-time aggregate features can be joined with your existing records in a straightforward way. + +Adding features from Feature Store to RTA +-------------------------------------------- +To add features from Feature Store to RTA and create real time aggregated features based on them, one needs to follow these steps: + +**Step 1** + +Copy Strato column for features that one wants to explore and add a cache if needed. See details at `Customize any Columns for your Team as Needed `_. As an `example `_, we copy Strato column of recommendationsUserFeaturesProd.User.strato and add a cache for timelines team's usage. + +**Step 2** + +Create a new ReadableStore which uses Feature Store Client to request features from Feature Store. Implement FeaturesAdapter which extends TimelinesAdapterBase and derive new features based on raw features from Feature Store. As an `example `_, we create UserFeaturesReadableStore which reads discrete feature user state, and convert it to a list of boolean user state features. + +**Step 3** + +Join these derived features from Feature Store to timelines storm aggregate source. Depends on the characteristic of these derived features, joined key could be tweet id, user id or others. As an `example `_, because user state is per user, the joined key is user id. + +**Step 4** + +Define `AggregateGroup` based on derived features in RTA + +Adding New Aggregate Features from an Existing Dataset +-------------------------------- +To add a new aggregate feature group from an existing dataset for use in home models, use the following steps: + +1. Identify the hypothesis being tested by the addition of the features, in accordance with `go/tpfeatureguide `_. +2. Modify or add a new AggregateGroup to `TimelinesOnlineAggregationConfigBase.scala `_ to define the aggregation key, set of features, labels and metrics. An example phab to add more halflives can be found at `D204415 `_. +3. If the change is expected to be very large, it may be recommended to perform capacity estimation. See :ref:`Capacity Estimation` for more details. +4. Create feature catalog items for the new RTAs. An example phab is `D706348 `_. For approval from a featurestore owner ping #help-ml-features on slack. +5. Add new features to the featurestore. An example phab is `D706112 `_. This change can be rolled out with feature switches or by canarying TLX, depending on the risk. An example PCM for feature switches is: `PCM-148654 `_. An example PCM for canarying is: `PCM-145753 `_. +6. Wait for redeploy and confirm the new features are available. One way is querying in BigQuery from a table like `twitter-bq-timelines-prod.continuous_training_recap_fav`. Another way is to inspect individual records using pcat. The command to be used is like: + +.. code-block:: bash + + java -cp pcat-deploy.jar:$(hadoop classpath) com.twitter.ml.tool.pcat.PredictionCatTool + -path /atla/proc2/user/timelines/processed/suggests/recap/continuous_training_data_records/fav/data/YYYY/MM/DD/01/part-00000.lzo + -fc /atla/proc2/user/timelines/processed/suggests/recap/continuous_training_data_records/fav/data_spec.json + -dates YYYY-MM-DDT01 -record_limit 100 | grep [feature_group] + + +7. Create a phab with the new features and test the performance of a model with them compared to a control model without them. Test offline using `Deepbird for training `_ and `RCE Hypothesis Testing `_ to test. Test online using a DDG. Some helpful instructions are available in `Serving Timelines Models `_ and the `Experiment Cookbook `_ + +Capacity Estimation +-------------------------------- +This section describes how to approximate the capacity required for a new aggregate group. It is not expected to be exact, but should give a rough estimate. + +There are two main components that must be stored for each aggregate group. + +Key space: Each AggregationKey struct consists of two maps, one of which is populated with tuples [Long, Long] representing of discrete features. This takes up 4 x 8 bytes or 32 bytes. The cache team estimates an additional 40 bytes of overhead. + +Features: An aggregate feature is represented as a pair (16 bytes) and is produced for each feature x label x metric x halflife combination. + +1. Use bigquery to estimate how many unique values exist for the selected key (key_count). Also collect the number of features, labels, metrics, and half-lives being used. +2. Compute the number of entries to be created, which is num_entires = feature_count * label_count * metric_count * halflife_count +3. Compute the number of bytes per entry, which is num_entry_bytes = 16*num_entries + 32 bytes (key storage) + 40 bytes (overhead) +4. Compute total space required = num_entry_bytes * key_count + +Debugging New Aggregate Features +-------------------------------- + +To debug problems in the setup of your job, there are several steps you can take. + +First, ensure that data is being received from the input stream and passed through to create data records. This can be achieved by logging results at various places in your code, and especially at the point of data record creation. + +For example, suppose you want to ensure that a data record is being created with +the features you expect. With push and email features, we find that data records +are created in the adaptor, using logic like the following: + +.. code-block:: scala + + val record = new SRichDataRecord(new DataRecord) + ... + record.setFeatureValue(feature, value) + +To see what these feature values look like, we can have our adaptor class extend +Twitter's `Logging` trait, and write each created record to a log file. + +.. code-block:: scala + + class MyEventAdaptor extends TimelinesAdapterBase[MyObject] with Logging { + ... + ... + def mkDataRecord(myFeatures: MyFeatures): DataRecord = { + val record = new SRichDataRecord(new DataRecord) + ... + record.setFeatureValue(feature, value) + logger.info("data record xyz: " + record.getRecord.toString) + } + +This way, every time a data record is sent to the aggregator, it will also be +logged. To inspect these logs, you can push these changes to a staging instance, +ssh into that aurora instance, and grep the `log-files` directory for `xyz`. The +data record objects you find should resemble a map from feature ids to their +values. + +To check that steps in the aggregation are being performed, you can also inspect the job's topology on go/heronui. + +Lastly, to verify that values are being written to your cache you can check the `set` chart in your cache's viz. + +To check particular feature values for a given key, you can spin up a Scala REPL like so: + +.. code-block:: bash + + $ ssh -fN -L*:2181:sdzookeeper-read.atla.twitter.com:2181 -D *:50001 nest.atlc.twitter.com + + $ ./pants repl --jvm-repl-scala-options='-DsocksProxyHost=localhost -DsocksProxyPort=50001 -Dcom.twitter.server.resolverZkHosts=localhost:2181' timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/real_time_aggregates_cache + +You will then need to create a connection to the cache, and a key with which to query it. + +.. code-block:: scala + + import com.twitter.conversions.DurationOps._ + import com.twitter.finagle.stats.{DefaultStatsReceiver, StatsReceiver} + import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey + import com.twitter.summingbird.batch.Batcher + import com.twitter.timelinemixer.clients.real_time_aggregates_cache.RealTimeAggregatesMemcacheBuilder + import com.twitter.timelines.clients.memcache_common.StorehausMemcacheConfig + + val userFeature = -1887718638306251279L // feature id corresponding to User feature + val userId = 12L // replace with a user id logged when creating your data record + val key = (AggregationKey(Map(userFeature -> userId), Map.empty), Batcher.unit.currentBatch) + + val dataset = "twemcache_magicrecs_real_time_aggregates_cache_staging" // replace with the appropriate cache name + val dest = s"/srv#/test/local/cache/twemcache_/$dataset" + + val statsReceiver: StatsReceiver = DefaultStatsReceiver + val cache = new RealTimeAggregatesMemcacheBuilder( + config = StorehausMemcacheConfig( + destName = dest, + keyPrefix = "", + requestTimeout = 10.seconds, + numTries = 1, + globalTimeout = 10.seconds, + tcpConnectTimeout = 10.seconds, + connectionAcquisitionTimeout = 10.seconds, + numPendingRequests = 250, + isReadOnly = true + ), + statsReceiver.scope(dataset) + ).build + + val result = cache.get(key) + +Another option is to create a debugger which points to the staging cache and creates a cache connection and key similar to the logic above. + +Run CQL query to find metrics/counters +-------------------------------- +We can also visualize the counters from our job to verify new features. Run CQL query on terminal to find the right path of metrics/counters. For example, in order to check counter mergeNumFeatures, run: + +cql -z atla keys heron/summingbird_timelines_real_time_aggregates Tail-FlatMap | grep mergeNumFeatures + + +Then use the right path to create the viz, example: https://monitoring.twitter.biz/tiny/2552105 diff --git a/timelines/data_processing/ml_util/aggregation_framework/docs/troubleshooting.rst b/timelines/data_processing/ml_util/aggregation_framework/docs/troubleshooting.rst new file mode 100644 index 000000000..d9799f433 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/docs/troubleshooting.rst @@ -0,0 +1,117 @@ +.. _troubleshooting: + +TroubleShooting +================== + + +[Batch] Regenerating a corrupt version +-------------------------------------- + +Symptom +~~~~~~~~~~ +The Summingbird batch job failed due to the following error: + +.. code:: bash + + Caused by: com.twitter.bijection.InversionFailure: ... + +It typically indicates the corrupt records of the aggregate store (not the other side of the DataRecord source). +The following describes the method to re-generate the required (typically the latest) version: + +Solution +~~~~~~~~~~ +1. Copy **the second to last version** of the problematic data to canaries folder. For example, if 11/20's job keeps failing, then copy the 11/19's data. + +.. code:: bash + + $ hadoop --config /etc/hadoop/hadoop-conf-proc2-atla/ \ + distcp -m 1000 \ + /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates/1605744000000 \ + /atla/proc2/user/timelines/canaries/processed/aggregates_v2/user_mention_aggregates/1605744000000 + + +2. Setup canary run for the date of the problem with fallback path pointing to `1605744000000` in the prod/canaries folder. + +3. Deschedule the production job and kill the current run: + +For example, + +.. code:: bash + + $ aurora cron deschedule atla/timelines/prod/user_mention_aggregates + $ aurora job killall atla/timelines/prod/user_mention_aggregates + +4. Create backup folder and move the corrupt prod store output there + +.. code:: bash + + $ hdfs dfs -mkdir /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup + $ hdfs dfs -mv /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates/1605830400000 /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup/ + $ hadoop fs -count /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup/1605830400000 + + 1 1001 10829136677614 /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup/1605830400000 + + +5. Copy canary output store to prod folder: + +.. code:: bash + + $ hadoop --config /etc/hadoop/hadoop-conf-proc2-atla/ distcp -m 1000 /atla/proc2/user/timelines/canaries/processed/aggregates_v2/user_mention_aggregates/1605830400000 /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates/1605830400000 + +We can see the slight difference of size: + +.. code:: bash + + $ hadoop fs -count /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup/1605830400000 + 1 1001 10829136677614 /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates_backup/1605830400000 + $ hadoop fs -count /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates/1605830400000 + 1 1001 10829136677844 /atla/proc2/user/timelines/processed/aggregates_v2/user_mention_aggregates/1605830400000 + +6. Deploy prod job again and observe whether it can successfully process the new output for the date of interest. + +7. Verify the new run succeeded and job is unblocked. + +Example +~~~~~~~~ + +There is an example in https://phabricator.twitter.biz/D591174 + + +[Batch] Skipping the offline job ahead +--------------------------------------- + +Symptom +~~~~~~~~~~ +The Summingbird batch job keeps failing and the DataRecord source is no longer available (e.g. due to retention) and there is no way for the job succeed **OR** + +.. +The job is stuck processing old data (more than one week old) and it will not catch up to the new data on its own if it is left alone + +Solution +~~~~~~~~ + +We will need to skip the job ahead. Unfortunately, this involves manual effort. We also need help from the ADP team (Slack #adp). + +1. Ask the ADP team to manually insert an entry into the store via the #adp Slack channel. You may refer to https://jira.twitter.biz/browse/AIPIPE-7520 and https://jira.twitter.biz/browse/AIPIPE-9300 as references. However, please don't create and assign tickets directly to an ADP team member unless they ask you to. + +2. Copy the latest version of the store to the same HDFS directory but with a different destination name. The name MUST be the same as the above inserted version. + +For example, if the ADP team manually inserted a version on 12/09/2020, then we can see the version by running + +.. code:: bash + + $ dalv2 segment list --name user_original_author_aggregates --role timelines --location-name proc2-atla --location-type hadoop-cluster + ... + None 2020-12-09T00:00:00Z viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1607472000000 Unknown None + +where `1607472000000` is the timestamp of 12/09/2020. +Then you will need to duplicate the latest version of the store to a dir of `1607472000000`. +For example, + +.. code:: bash + + $ hadoop --config /etc/hadoop/hadoop-conf-proc2-atla/ distcp -m 1000 /atla/proc2/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1605052800000 /atla/proc2/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1607472000000 + +3. Go to the EagleEye UI of the job and click on the "Skip Ahead" button to the desired datetime. In our example, it should be `2020-12-09 12am` + +4. Wait for the job to start. Now the job should be running the 2020-12-09 partition. diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/BUILD b/timelines/data_processing/ml_util/aggregation_framework/heron/BUILD new file mode 100644 index 000000000..0cc576e4e --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/BUILD @@ -0,0 +1,74 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + strict_deps = False, + tags = ["bazel-compatible"], + dependencies = [ + ":configs", + "3rdparty/jvm/storm:heron-oss-storm", + "3rdparty/src/jvm/com/twitter/scalding:args", + "3rdparty/src/jvm/com/twitter/summingbird:storm", + "src/java/com/twitter/heron/util", + "src/java/com/twitter/ml", + "src/scala/com/twitter/storehaus_internal/nighthawk_kv", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/scala/com/twitter/summingbird_internal/runner/common", + "src/scala/com/twitter/summingbird_internal/runner/storm", + "src/scala/com/twitter/timelines/prediction/features/common", + "timelines/data_processing/ml_util/aggregation_framework:user_job", + ], +) + +scala_library( + name = "configs", + sources = [ + "NighthawkUnderlyingStoreConfig.scala", + "OnlineAggregationConfigTrait.scala", + "OnlineAggregationStoresTrait.scala", + "RealTimeAggregateStore.scala", + "RealTimeAggregatesJobConfig.scala", + "StormAggregateSource.scala", + ], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + ":base-config", + "3rdparty/jvm/storm:heron-oss-storm", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "3rdparty/src/jvm/com/twitter/summingbird:storm", + "finagle/finagle-core/src/main", + "src/java/com/twitter/ml/api:api-base", + "src/scala/com/twitter/storehaus_internal/memcache", + "src/scala/com/twitter/storehaus_internal/memcache/config", + "src/scala/com/twitter/storehaus_internal/nighthawk_kv", + "src/scala/com/twitter/storehaus_internal/nighthawk_kv/config", + "src/scala/com/twitter/storehaus_internal/online", + "src/scala/com/twitter/storehaus_internal/store", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + "src/thrift/com/twitter/clientapp/gen:clientapp-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:data-scala", + "src/thrift/com/twitter/ml/api:feature_context-java", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/transforms", + "util/util-core:scala", + "util/util-core:util-core-util", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) + +scala_library( + name = "base-config", + sources = [ + "OnlineAggregationConfigTrait.scala", + ], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/ml/api:api-base", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/NighthawkUnderlyingStoreConfig.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/NighthawkUnderlyingStoreConfig.scala new file mode 100644 index 000000000..cf7668a20 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/NighthawkUnderlyingStoreConfig.scala @@ -0,0 +1,31 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.storehaus_internal.nighthawk_kv.CacheClientNighthawkConfig +import com.twitter.storehaus_internal.util.TTL +import com.twitter.storehaus_internal.util.TableName +import com.twitter.summingbird_internal.runner.store_config.OnlineStoreOnlyConfig +import com.twitter.util.Duration + +case class NighthawkUnderlyingStoreConfig( + serversetPath: String = "", + tableName: String = "", + cacheTTL: Duration = 1.day) + extends OnlineStoreOnlyConfig[CacheClientNighthawkConfig] { + + def online: CacheClientNighthawkConfig = online(EmptyServiceIdentifier) + + def online( + serviceIdentifier: ServiceIdentifier = EmptyServiceIdentifier + ): CacheClientNighthawkConfig = + CacheClientNighthawkConfig( + serversetPath, + TableName(tableName), + TTL(cacheTTL), + serviceIdentifier = serviceIdentifier, + opportunisticTlsLevel = OpportunisticTls.Required + ) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationConfigTrait.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationConfigTrait.scala new file mode 100644 index 000000000..aea649128 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationConfigTrait.scala @@ -0,0 +1,28 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.ml.api.Feature + +trait OnlineAggregationConfigTrait { + def ProdAggregates: Set[TypedAggregateGroup[_]] + def StagingAggregates: Set[TypedAggregateGroup[_]] + def ProdCommonAggregates: Set[TypedAggregateGroup[_]] + + /** + * AggregateToCompute: This defines the complete set of aggregates to be + * computed by the aggregation job and to be stored in memcache. + */ + def AggregatesToCompute: Set[TypedAggregateGroup[_]] + + /** + * ProdFeatures: This defines the subset of aggregates to be extracted + * and hydrated (or adapted) by callers to the aggregates features cache. + * This should only contain production aggregates and aggregates on + * product specific engagements. + * ProdCommonFeatures: Similar to ProdFeatures but containing user-level + * aggregate features. This is provided to PredictionService just + * once per user. + */ + lazy val ProdFeatures: Set[Feature[_]] = ProdAggregates.flatMap(_.allOutputFeatures) + lazy val ProdCommonFeatures: Set[Feature[_]] = ProdCommonAggregates.flatMap(_.allOutputFeatures) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationStoresTrait.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationStoresTrait.scala new file mode 100644 index 000000000..4f693190e --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/OnlineAggregationStoresTrait.scala @@ -0,0 +1,6 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +trait OnlineAggregationStoresTrait { + def ProductionStore: RealTimeAggregateStore + def StagingStore: RealTimeAggregateStore +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregateStore.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregateStore.scala new file mode 100644 index 000000000..2e75039d3 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregateStore.scala @@ -0,0 +1,50 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.storehaus_internal.memcache.ConnectionConfig +import com.twitter.storehaus_internal.memcache.MemcacheConfig +import com.twitter.storehaus_internal.util.KeyPrefix +import com.twitter.storehaus_internal.util.TTL +import com.twitter.storehaus_internal.util.ZkEndPoint +import com.twitter.summingbird_internal.runner.store_config.OnlineStoreOnlyConfig +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore +import com.twitter.util.Duration + +object RealTimeAggregateStore { + val twCacheWilyPrefix = "/srv#" // s2s is only supported for wily path + + def makeEndpoint( + memcacheDataSet: String, + isProd: Boolean, + twCacheWilyPrefix: String = twCacheWilyPrefix + ): String = { + val env = if (isProd) "prod" else "test" + s"$twCacheWilyPrefix/$env/local/cache/$memcacheDataSet" + } +} + +case class RealTimeAggregateStore( + memcacheDataSet: String, + isProd: Boolean = false, + cacheTTL: Duration = 1.day) + extends OnlineStoreOnlyConfig[MemcacheConfig] + with AggregateStore { + import RealTimeAggregateStore._ + + override val name: String = "" + val storeKeyPrefix: KeyPrefix = KeyPrefix(name) + val memcacheZkEndPoint: String = makeEndpoint(memcacheDataSet, isProd) + + def online: MemcacheConfig = online(serviceIdentifier = EmptyServiceIdentifier) + + def online(serviceIdentifier: ServiceIdentifier = EmptyServiceIdentifier): MemcacheConfig = + new MemcacheConfig { + val endpoint = ZkEndPoint(memcacheZkEndPoint) + override val connectionConfig = + ConnectionConfig(endpoint, serviceIdentifier = serviceIdentifier) + override val keyPrefix = storeKeyPrefix + override val ttl = TTL(Duration.fromMilliseconds(cacheTTL.inMillis)) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobBase.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobBase.scala new file mode 100644 index 000000000..906f7c1be --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobBase.scala @@ -0,0 +1,301 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.algebird.Monoid +import com.twitter.bijection.Injection +import com.twitter.bijection.thrift.CompactThriftCodec +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.heron.util.CommonMetric +import com.twitter.ml.api.DataRecord +import com.twitter.scalding.Args +import com.twitter.storehaus.algebra.MergeableStore +import com.twitter.storehaus.algebra.StoreAlgebra._ +import com.twitter.storehaus_internal.memcache.Memcache +import com.twitter.storehaus_internal.store.CombinedStore +import com.twitter.storehaus_internal.store.ReplicatingWritableStore +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird.batch.Batcher +import com.twitter.summingbird.online.MergeableStoreFactory +import com.twitter.summingbird.online.option._ +import com.twitter.summingbird.option.CacheSize +import com.twitter.summingbird.option.JobId +import com.twitter.summingbird.storm.option.FlatMapStormMetrics +import com.twitter.summingbird.storm.option.SummerStormMetrics +import com.twitter.summingbird.storm.Storm +import com.twitter.summingbird.storm.StormMetric +import com.twitter.summingbird.Options +import com.twitter.summingbird._ +import com.twitter.summingbird_internal.runner.common.CapTicket +import com.twitter.summingbird_internal.runner.common.JobName +import com.twitter.summingbird_internal.runner.common.TeamEmail +import com.twitter.summingbird_internal.runner.common.TeamName +import com.twitter.summingbird_internal.runner.storm.ProductionStormConfig +import com.twitter.timelines.data_processing.ml_util.aggregation_framework._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.job.AggregatesV2Job +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.job.AggregatesV2Job +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.job.DataRecordFeatureCounter +import org.apache.heron.api.{Config => HeronConfig} +import org.apache.heron.common.basics.ByteAmount +import org.apache.storm.Config +import scala.collection.JavaConverters._ + +object RealTimeAggregatesJobBase { + lazy val commonMetric: StormMetric[CommonMetric] = + StormMetric(new CommonMetric(), CommonMetric.NAME, CommonMetric.POLL_INTERVAL) + lazy val flatMapMetrics: FlatMapStormMetrics = FlatMapStormMetrics(Iterable(commonMetric)) + lazy val summerMetrics: SummerStormMetrics = SummerStormMetrics(Iterable(commonMetric)) +} + +trait RealTimeAggregatesJobBase extends Serializable { + import RealTimeAggregatesJobBase._ + import com.twitter.summingbird_internal.bijection.BatchPairImplicits._ + + def statsReceiver: StatsReceiver + + def aggregatesToCompute: Set[TypedAggregateGroup[_]] + + def jobConfigs: RealTimeAggregatesJobConfigs + + implicit lazy val dataRecordCodec: Injection[DataRecord, Array[Byte]] = + CompactThriftCodec[DataRecord] + implicit lazy val monoid: Monoid[DataRecord] = DataRecordAggregationMonoid(aggregatesToCompute) + implicit lazy val aggregationKeyInjection: Injection[AggregationKey, Array[Byte]] = + AggregationKeyInjection + + val clusters: Set[String] = Set("atla", "pdxa") + + def buildAggregateStoreToStorm( + isProd: Boolean, + serviceIdentifier: ServiceIdentifier, + jobConfig: RealTimeAggregatesJobConfig + ): (AggregateStore => Option[Storm#Store[AggregationKey, DataRecord]]) = { + (store: AggregateStore) => + store match { + case rtaStore: RealTimeAggregateStore if rtaStore.isProd == isProd => { + lazy val primaryStore: MergeableStore[(AggregationKey, BatchID), DataRecord] = + Memcache.getMemcacheStore[(AggregationKey, BatchID), DataRecord]( + rtaStore.online(serviceIdentifier)) + + lazy val mergeableStore: MergeableStore[(AggregationKey, BatchID), DataRecord] = + if (jobConfig.enableUserReindexingNighthawkBtreeStore + || jobConfig.enableUserReindexingNighthawkHashStore) { + val reindexingNighthawkBtreeWritableDataRecordStoreList = + if (jobConfig.enableUserReindexingNighthawkBtreeStore) { + lazy val cacheClientNighthawkConfig = + jobConfig.userReindexingNighthawkBtreeStoreConfig.online(serviceIdentifier) + List( + UserReindexingNighthawkWritableDataRecordStore.getBtreeStore( + nighthawkCacheConfig = cacheClientNighthawkConfig, + // Choose a reasonably large target size as this will be equivalent to the number of unique (user, timestamp) + // keys that are returned on read on the pKey, and we may have duplicate authors and associated records. + targetSize = 512, + statsReceiver = statsReceiver, + // Assuming trims are relatively expensive, choose a trimRate that's not as aggressive. In this case we trim on + // 10% of all writes. + trimRate = 0.1 + )) + } else { Nil } + val reindexingNighthawkHashWritableDataRecordStoreList = + if (jobConfig.enableUserReindexingNighthawkHashStore) { + lazy val cacheClientNighthawkConfig = + jobConfig.userReindexingNighthawkHashStoreConfig.online(serviceIdentifier) + List( + UserReindexingNighthawkWritableDataRecordStore.getHashStore( + nighthawkCacheConfig = cacheClientNighthawkConfig, + // Choose a reasonably large target size as this will be equivalent to the number of unique (user, timestamp) + // keys that are returned on read on the pKey, and we may have duplicate authors and associated records. + targetSize = 512, + statsReceiver = statsReceiver, + // Assuming trims are relatively expensive, choose a trimRate that's not as aggressive. In this case we trim on + // 10% of all writes. + trimRate = 0.1 + )) + } else { Nil } + + lazy val replicatingWritableStore = new ReplicatingWritableStore( + stores = List(primaryStore) ++ reindexingNighthawkBtreeWritableDataRecordStoreList + ++ reindexingNighthawkHashWritableDataRecordStoreList + ) + + lazy val combinedStoreWithReindexing = new CombinedStore( + read = primaryStore, + write = replicatingWritableStore + ) + + combinedStoreWithReindexing.toMergeable + } else { + primaryStore + } + + lazy val storeFactory: MergeableStoreFactory[(AggregationKey, BatchID), DataRecord] = + Storm.store(mergeableStore)(Batcher.unit) + Some(storeFactory) + } + case _ => None + } + } + + def buildDataRecordSourceToStorm( + jobConfig: RealTimeAggregatesJobConfig + ): (AggregateSource => Option[Producer[Storm, DataRecord]]) = { (source: AggregateSource) => + { + source match { + case stormAggregateSource: StormAggregateSource => + Some(stormAggregateSource.build(statsReceiver, jobConfig)) + case _ => None + } + } + } + + def apply(args: Args): ProductionStormConfig = { + lazy val isProd = args.boolean("production") + lazy val cluster = args.getOrElse("cluster", "") + lazy val isDebug = args.boolean("debug") + lazy val role = args.getOrElse("role", "") + lazy val service = + args.getOrElse( + "service_name", + "" + ) // don't use the argument service, which is a reserved heron argument + lazy val environment = if (isProd) "prod" else "devel" + lazy val s2sEnabled = args.boolean("s2s") + lazy val keyedByUserEnabled = args.boolean("keyed_by_user") + lazy val keyedByAuthorEnabled = args.boolean("keyed_by_author") + + require(clusters.contains(cluster)) + if (s2sEnabled) { + require(role.length() > 0) + require(service.length() > 0) + } + + lazy val serviceIdentifier = if (s2sEnabled) { + ServiceIdentifier( + role = role, + service = service, + environment = environment, + zone = cluster + ) + } else EmptyServiceIdentifier + + lazy val jobConfig = { + val jobConfig = if (isProd) jobConfigs.Prod else jobConfigs.Devel + jobConfig.copy( + serviceIdentifier = serviceIdentifier, + keyedByUserEnabled = keyedByUserEnabled, + keyedByAuthorEnabled = keyedByAuthorEnabled) + } + + lazy val dataRecordSourceToStorm = buildDataRecordSourceToStorm(jobConfig) + lazy val aggregateStoreToStorm = + buildAggregateStoreToStorm(isProd, serviceIdentifier, jobConfig) + + lazy val JaasConfigFlag = "-Djava.security.auth.login.config=resources/jaas.conf" + lazy val JaasDebugFlag = "-Dsun.security.krb5.debug=true" + lazy val JaasConfigString = + if (isDebug) { "%s %s".format(JaasConfigFlag, JaasDebugFlag) } + else JaasConfigFlag + + new ProductionStormConfig { + implicit val jobId: JobId = JobId(jobConfig.name) + override val jobName = JobName(jobConfig.name) + override val teamName = TeamName(jobConfig.teamName) + override val teamEmail = TeamEmail(jobConfig.teamEmail) + override val capTicket = CapTicket("n/a") + + val configureHeronJvmSettings = { + val heronJvmOptions = new java.util.HashMap[String, AnyRef]() + jobConfig.componentToRamGigaBytesMap.foreach { + case (component, gigabytes) => + HeronConfig.setComponentRam( + heronJvmOptions, + component, + ByteAmount.fromGigabytes(gigabytes)) + } + + HeronConfig.setContainerRamRequested( + heronJvmOptions, + ByteAmount.fromGigabytes(jobConfig.containerRamGigaBytes) + ) + + jobConfig.componentsToKerberize.foreach { component => + HeronConfig.setComponentJvmOptions( + heronJvmOptions, + component, + JaasConfigString + ) + } + + jobConfig.componentToMetaSpaceSizeMap.foreach { + case (component, metaspaceSize) => + HeronConfig.setComponentJvmOptions( + heronJvmOptions, + component, + metaspaceSize + ) + } + + heronJvmOptions.asScala.toMap ++ AggregatesV2Job + .aggregateNames(aggregatesToCompute).map { + case (prefix, aggNames) => (s"extras.aggregateNames.${prefix}", aggNames) + } + } + + override def transformConfig(m: Map[String, AnyRef]): Map[String, AnyRef] = { + super.transformConfig(m) ++ List( + /** + * Disable acking by setting acker executors to 0. Tuples that come off the + * spout will be immediately acked which effectively disables retries on tuple + * failures. This should help topology throughput/availability by relaxing consistency. + */ + Config.TOPOLOGY_ACKER_EXECUTORS -> int2Integer(0), + Config.TOPOLOGY_WORKERS -> int2Integer(jobConfig.topologyWorkers), + HeronConfig.TOPOLOGY_CONTAINER_CPU_REQUESTED -> int2Integer(8), + HeronConfig.TOPOLOGY_DROPTUPLES_UPON_BACKPRESSURE -> java.lang.Boolean.valueOf(true), + HeronConfig.TOPOLOGY_WORKER_CHILDOPTS -> List( + JaasConfigString, + s"-Dcom.twitter.eventbus.client.zoneName=${cluster}", + "-Dcom.twitter.eventbus.client.EnableKafkaSaslTls=true" + ).mkString(" "), + "storm.job.uniqueId" -> jobId.get + ) ++ configureHeronJvmSettings + + } + + override lazy val getNamedOptions: Map[String, Options] = jobConfig.topologyNamedOptions ++ + Map( + "DEFAULT" -> Options() + .set(flatMapMetrics) + .set(summerMetrics) + .set(MaxWaitingFutures(1000)) + .set(FlushFrequency(30.seconds)) + .set(UseAsyncCache(true)) + .set(AsyncPoolSize(4)) + .set(SourceParallelism(jobConfig.sourceCount)) + .set(SummerBatchMultiplier(1000)), + "FLATMAP" -> Options() + .set(FlatMapParallelism(jobConfig.flatMapCount)) + .set(CacheSize(0)), + "SUMMER" -> Options() + .set(SummerParallelism(jobConfig.summerCount)) + /** + * Sets number of tuples a Summer awaits before aggregation. Set higher + * if you need to lower qps to memcache at the expense of introducing + * some (stable) latency. + */ + .set(CacheSize(jobConfig.cacheSize)) + ) + + val featureCounters: Seq[DataRecordFeatureCounter] = + Seq(DataRecordFeatureCounter.any(Counter(Group("feature_counter"), Name("num_records")))) + + override def graph: TailProducer[Storm, Any] = AggregatesV2Job.generateJobGraph[Storm]( + aggregateSet = aggregatesToCompute, + aggregateSourceToSummingbird = dataRecordSourceToStorm, + aggregateStoreToSummingbird = aggregateStoreToStorm, + featureCounters = featureCounters + ) + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobConfig.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobConfig.scala new file mode 100644 index 000000000..8bed26264 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/RealTimeAggregatesJobConfig.scala @@ -0,0 +1,79 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.ml.api.DataRecord +import com.twitter.summingbird.Options +import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform + +/** + * + * @param appId application id for topology job + * @param topologyWorkers number of workers/containers of topology + * @param sourceCount number of parallel sprouts of topology + * @param summerCount number of Summer of topology + * @param cacheSize number of tuples a Summer awaits before aggregation. + * @param flatMapCount number of parallel FlatMap of topology + * @param containerRamGigaBytes total RAM of each worker/container has + * @param name name of topology job + * @param teamName name of team who owns topology job + * @param teamEmail email of team who owns topology job + * @param componentsToKerberize component of topology job (eg. Tail-FlatMap-Source) which enables kerberization + * @param componentToMetaSpaceSizeMap MetaSpaceSize settings for components of topology job + * @param topologyNamedOptions Sets spout allocations for named topology components + * @param serviceIdentifier represents the identifier used for Service to Service Authentication + * @param onlinePreTransforms sequential data record transforms applied to Producer of DataRecord before creating AggregateGroup. + * While preTransforms defined at AggregateGroup are applied to each aggregate group, onlinePreTransforms are applied to the whole producer source. + * @param keyedByUserEnabled boolean value to enable/disable merging user-level features from Feature Store + * @param keyedByAuthorEnabled boolean value to enable/disable merging author-level features from Feature Store + * @param enableUserReindexingNighthawkBtreeStore boolean value to enable reindexing RTAs on user id with btree backed nighthawk + * @param enableUserReindexingNighthawkHashStore boolean value to enable reindexing RTAs on user id with hash backed nighthawk + * @param userReindexingNighthawkBtreeStoreConfig NH btree store config used in reindexing user RTAs + * @param userReindexingNighthawkHashStoreConfig NH hash store config used in reindexing user RTAs + */ +case class RealTimeAggregatesJobConfig( + appId: String, + topologyWorkers: Int, + sourceCount: Int, + summerCount: Int, + cacheSize: Int, + flatMapCount: Int, + containerRamGigaBytes: Int, + name: String, + teamName: String, + teamEmail: String, + componentsToKerberize: Seq[String] = Seq.empty, + componentToMetaSpaceSizeMap: Map[String, String] = Map.empty, + componentToRamGigaBytesMap: Map[String, Int] = Map("Tail" -> 4), + topologyNamedOptions: Map[String, Options] = Map.empty, + serviceIdentifier: ServiceIdentifier = EmptyServiceIdentifier, + onlinePreTransforms: Seq[OneToSomeTransform] = Seq.empty, + keyedByUserEnabled: Boolean = false, + keyedByAuthorEnabled: Boolean = false, + keyedByTweetEnabled: Boolean = false, + enableUserReindexingNighthawkBtreeStore: Boolean = false, + enableUserReindexingNighthawkHashStore: Boolean = false, + userReindexingNighthawkBtreeStoreConfig: NighthawkUnderlyingStoreConfig = + NighthawkUnderlyingStoreConfig(), + userReindexingNighthawkHashStoreConfig: NighthawkUnderlyingStoreConfig = + NighthawkUnderlyingStoreConfig()) { + + /** + * Apply transforms sequentially. If any transform results in a dropped (None) + * DataRecord, then entire transform sequence will result in a dropped DataRecord. + * Note that transforms are order-dependent. + */ + def sequentiallyTransform(dataRecord: DataRecord): Option[DataRecord] = { + val recordOpt = Option(new DataRecord(dataRecord)) + onlinePreTransforms.foldLeft(recordOpt) { + case (Some(previousRecord), preTransform) => + preTransform(previousRecord) + case _ => Option.empty[DataRecord] + } + } +} + +trait RealTimeAggregatesJobConfigs { + def Prod: RealTimeAggregatesJobConfig + def Devel: RealTimeAggregatesJobConfig +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/StormAggregateSource.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/StormAggregateSource.scala new file mode 100644 index 000000000..a252cf197 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/StormAggregateSource.scala @@ -0,0 +1,27 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.summingbird._ +import com.twitter.summingbird.storm.Storm +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateSource +import java.lang.{Long => JLong} + +/** + * Use this trait to implement online summingbird producer that subscribes to + * spouts and generates a data record. + */ +trait StormAggregateSource extends AggregateSource { + def name: String + + def timestampFeature: Feature[JLong] + + /** + * Constructs the storm Producer with the implemented topology at runtime. + */ + def build( + statsReceiver: StatsReceiver, + jobConfig: RealTimeAggregatesJobConfig + ): Producer[Storm, DataRecord] +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/UserReindexingNighthawkStore.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/UserReindexingNighthawkStore.scala new file mode 100644 index 000000000..a4d2adeac --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/UserReindexingNighthawkStore.scala @@ -0,0 +1,309 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron + +import com.twitter.bijection.Injection +import com.twitter.bijection.thrift.CompactThriftCodec +import com.twitter.cache.client._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.storehaus.WritableStore +import com.twitter.storehaus_internal.nighthawk_kv.CacheClientNighthawkConfig +import com.twitter.storehaus_internal.nighthawk_kv.NighthawkStore +import com.twitter.summingbird.batch.BatchID +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.UserReindexingNighthawkWritableDataRecordStore._ +import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures +import com.twitter.util.Future +import com.twitter.util.Time +import com.twitter.util.Try +import com.twitter.util.logging.Logger +import java.nio.ByteBuffer +import java.util +import scala.util.Random + +object UserReindexingNighthawkWritableDataRecordStore { + implicit val longInjection = Injection.long2BigEndian + implicit val dataRecordInjection: Injection[DataRecord, Array[Byte]] = + CompactThriftCodec[DataRecord] + val arrayToByteBuffer = Injection.connect[Array[Byte], ByteBuffer] + val longToByteBuffer = longInjection.andThen(arrayToByteBuffer) + val dataRecordToByteBuffer = dataRecordInjection.andThen(arrayToByteBuffer) + + def getBtreeStore( + nighthawkCacheConfig: CacheClientNighthawkConfig, + targetSize: Int, + statsReceiver: StatsReceiver, + trimRate: Double + ): UserReindexingNighthawkBtreeWritableDataRecordStore = + new UserReindexingNighthawkBtreeWritableDataRecordStore( + nighthawkStore = NighthawkStore[UserId, TimestampMs, DataRecord](nighthawkCacheConfig) + .asInstanceOf[NighthawkStore[UserId, TimestampMs, DataRecord]], + tableName = nighthawkCacheConfig.table.toString, + targetSize = targetSize, + statsReceiver = statsReceiver, + trimRate = trimRate + ) + + def getHashStore( + nighthawkCacheConfig: CacheClientNighthawkConfig, + targetSize: Int, + statsReceiver: StatsReceiver, + trimRate: Double + ): UserReindexingNighthawkHashWritableDataRecordStore = + new UserReindexingNighthawkHashWritableDataRecordStore( + nighthawkStore = NighthawkStore[UserId, AuthorId, DataRecord](nighthawkCacheConfig) + .asInstanceOf[NighthawkStore[UserId, AuthorId, DataRecord]], + tableName = nighthawkCacheConfig.table.toString, + targetSize = targetSize, + statsReceiver = statsReceiver, + trimRate = trimRate + ) + + def buildTimestampedByteBuffer(timestamp: Long, bb: ByteBuffer): ByteBuffer = { + val timestampedBb = ByteBuffer.allocate(getLength(bb) + java.lang.Long.SIZE) + timestampedBb.putLong(timestamp) + timestampedBb.put(bb) + timestampedBb + } + + def extractTimestampFromTimestampedByteBuffer(bb: ByteBuffer): Long = { + bb.getLong(0) + } + + def extractValueFromTimestampedByteBuffer(bb: ByteBuffer): ByteBuffer = { + val bytes = new Array[Byte](getLength(bb) - java.lang.Long.SIZE) + util.Arrays.copyOfRange(bytes, java.lang.Long.SIZE, getLength(bb)) + ByteBuffer.wrap(bytes) + } + + def transformAndBuildKeyValueMapping( + table: String, + userId: UserId, + authorIdsAndDataRecords: Seq[(AuthorId, DataRecord)] + ): KeyValue = { + val timestamp = Time.now.inMillis + val pkey = longToByteBuffer(userId) + val lkeysAndTimestampedValues = authorIdsAndDataRecords.map { + case (authorId, dataRecord) => + val lkey = longToByteBuffer(authorId) + // Create a byte buffer with a prepended timestamp to reduce deserialization cost + // when parsing values. We only have to extract and deserialize the timestamp in the + // ByteBuffer in order to sort the value, as opposed to deserializing the DataRecord + // and having to get a timestamp feature value from the DataRecord. + val dataRecordBb = dataRecordToByteBuffer(dataRecord) + val timestampedValue = buildTimestampedByteBuffer(timestamp, dataRecordBb) + (lkey, timestampedValue) + } + buildKeyValueMapping(table, pkey, lkeysAndTimestampedValues) + } + + def buildKeyValueMapping( + table: String, + pkey: ByteBuffer, + lkeysAndTimestampedValues: Seq[(ByteBuffer, ByteBuffer)] + ): KeyValue = { + val lkeys = lkeysAndTimestampedValues.map { case (lkey, _) => lkey } + val timestampedValues = lkeysAndTimestampedValues.map { case (_, value) => value } + val kv = KeyValue( + key = Key(table = table, pkey = pkey, lkeys = lkeys), + value = Value(timestampedValues) + ) + kv + } + + private def getLength(bb: ByteBuffer): Int = { + // capacity can be an over-estimate of the actual length (remaining - start position) + // but it's the safest to avoid overflows. + bb.capacity() + } +} + +/** + * Implements a NH store that stores aggregate feature DataRecords using userId as the primary key. + * + * This store re-indexes user-author keyed real-time aggregate (RTA) features on userId by + * writing to a userId primary key (pkey) and timestamp secondary key (lkey). To fetch user-author + * RTAs for a given user from cache, the caller just needs to make a single RPC for the userId pkey. + * The downside of a re-indexing store is that we cannot store arbitrarily many secondary keys + * under the primary key. This specific implementation using the NH btree backend also mandates + * mandates an ordering of secondary keys - we therefore use timestamp as the secondary key + * as opposed to say authorId. + * + * Note that a caller of the btree backed NH re-indexing store receives back a response where the + * secondary key is a timestamp. The associated value is a DataRecord containing user-author related + * aggregate features which was last updated at the timestamp. The caller therefore needs to handle + * the response and dedupe on unique, most recent user-author pairs. + * + * For a discussion on this and other implementations, please see: + * https://docs.google.com/document/d/1yVzAbQ_ikLqwSf230URxCJmSKj5yZr5dYv6TwBlQw18/edit + */ +class UserReindexingNighthawkBtreeWritableDataRecordStore( + nighthawkStore: NighthawkStore[UserId, TimestampMs, DataRecord], + tableName: String, + targetSize: Int, + statsReceiver: StatsReceiver, + trimRate: Double = 0.1 // by default, trim on 10% of puts +) extends WritableStore[(AggregationKey, BatchID), Option[DataRecord]] { + + private val scope = getClass.getSimpleName + private val failures = statsReceiver.counter(scope, "failures") + private val log = Logger.getLogger(getClass) + private val random: Random = new Random(1729L) + + override def put(kv: ((AggregationKey, BatchID), Option[DataRecord])): Future[Unit] = { + val ((aggregationKey, _), dataRecordOpt) = kv + // Fire-and-forget below because the store itself should just be a side effect + // as it's just making re-indexed writes based on the writes to the primary store. + for { + userId <- aggregationKey.discreteFeaturesById.get(SharedFeatures.USER_ID.getFeatureId) + dataRecord <- dataRecordOpt + } yield { + SRichDataRecord(dataRecord) + .getFeatureValueOpt(TypedAggregateGroup.timestampFeature) + .map(_.toLong) // convert to Scala Long + .map { timestamp => + val trim: Future[Unit] = if (random.nextDouble <= trimRate) { + val trimKey = TrimKey( + table = tableName, + pkey = longToByteBuffer(userId), + targetSize = targetSize, + ascending = true + ) + nighthawkStore.client.trim(Seq(trimKey)).unit + } else { + Future.Unit + } + // We should wait for trim to complete above + val fireAndForget = trim.before { + val kvTuple = ((userId, timestamp), Some(dataRecord)) + nighthawkStore.put(kvTuple) + } + + fireAndForget.onFailure { + case e => + failures.incr() + log.error("Failure in UserReindexingNighthawkHashWritableDataRecordStore", e) + } + } + } + // Ignore fire-and-forget result above and simply return + Future.Unit + } +} + +/** + * Implements a NH store that stores aggregate feature DataRecords using userId as the primary key. + * + * This store re-indexes user-author keyed real-time aggregate (RTA) features on userId by + * writing to a userId primary key (pkey) and authorId secondary key (lkey). To fetch user-author + * RTAs for a given user from cache, the caller just needs to make a single RPC for the userId pkey. + * The downside of a re-indexing store is that we cannot store arbitrarily + * many secondary keys under the primary key. We have to limit them in some way; + * here, we do so by randomly (based on trimRate) issuing an HGETALL command (via scan) to + * retrieve the whole hash, sort by oldest timestamp, and then remove the oldest authors to keep + * only targetSize authors (aka trim), where targetSize is configurable. + * + * @note The full hash returned from scan could be as large (or even larger) than targetSize, + * which could mean many DataRecords to deserialize, especially at high write qps. + * To reduce deserialization cost post-scan, we use timestamped values with a prepended timestamp + * in the value ByteBuffer; this allows us to only deserialize the timestamp and not the full + * DataRecord when sorting. This is necessary in order to identify the oldest values to trim. + * When we do a put for a new (user, author) pair, we also write out timestamped values. + * + * For a discussion on this and other implementations, please see: + * https://docs.google.com/document/d/1yVzAbQ_ikLqwSf230URxCJmSKj5yZr5dYv6TwBlQw18/edit + */ +class UserReindexingNighthawkHashWritableDataRecordStore( + nighthawkStore: NighthawkStore[UserId, AuthorId, DataRecord], + tableName: String, + targetSize: Int, + statsReceiver: StatsReceiver, + trimRate: Double = 0.1 // by default, trim on 10% of puts +) extends WritableStore[(AggregationKey, BatchID), Option[DataRecord]] { + + private val scope = getClass.getSimpleName + private val scanMismatchErrors = statsReceiver.counter(scope, "scanMismatchErrors") + private val failures = statsReceiver.counter(scope, "failures") + private val log = Logger.getLogger(getClass) + private val random: Random = new Random(1729L) + private val arrayToByteBuffer = Injection.connect[Array[Byte], ByteBuffer] + private val longToByteBuffer = Injection.long2BigEndian.andThen(arrayToByteBuffer) + + override def put(kv: ((AggregationKey, BatchID), Option[DataRecord])): Future[Unit] = { + val ((aggregationKey, _), dataRecordOpt) = kv + // Fire-and-forget below because the store itself should just be a side effect + // as it's just making re-indexed writes based on the writes to the primary store. + for { + userId <- aggregationKey.discreteFeaturesById.get(SharedFeatures.USER_ID.getFeatureId) + authorId <- aggregationKey.discreteFeaturesById.get( + TimelinesSharedFeatures.SOURCE_AUTHOR_ID.getFeatureId) + dataRecord <- dataRecordOpt + } yield { + val scanAndTrim: Future[Unit] = if (random.nextDouble <= trimRate) { + val scanKey = ScanKey( + table = tableName, + pkey = longToByteBuffer(userId) + ) + nighthawkStore.client.scan(Seq(scanKey)).flatMap { scanResults: Seq[Try[KeyValue]] => + scanResults.headOption + .flatMap(_.toOption).map { keyValue: KeyValue => + val lkeys: Seq[ByteBuffer] = keyValue.key.lkeys + // these are timestamped bytebuffers + val timestampedValues: Seq[ByteBuffer] = keyValue.value.values + // this should fail loudly if this is not true. it would indicate + // there is a mistake in the scan. + if (lkeys.size != timestampedValues.size) scanMismatchErrors.incr() + assert(lkeys.size == timestampedValues.size) + if (lkeys.size > targetSize) { + val numToRemove = targetSize - lkeys.size + // sort by oldest and take top k oldest and remove - this is equivalent to a trim + val oldestKeys: Seq[ByteBuffer] = lkeys + .zip(timestampedValues) + .map { + case (lkey, timestampedValue) => + val timestamp = extractTimestampFromTimestampedByteBuffer(timestampedValue) + (timestamp, lkey) + } + .sortBy { case (timestamp, _) => timestamp } + .take(numToRemove) + .map { case (_, k) => k } + val pkey = longToByteBuffer(userId) + val key = Key(table = tableName, pkey = pkey, lkeys = oldestKeys) + // NOTE: `remove` is a batch API, and we group all lkeys into a single batch (batch + // size = single group of lkeys = 1). Instead, we could separate lkeys into smaller + // groups and have batch size = number of groups, but this is more complex. + // Performance implications of batching vs non-batching need to be assessed. + nighthawkStore.client + .remove(Seq(key)) + .map { responses => + responses.map(resp => nighthawkStore.processValue(resp)) + }.unit + } else { + Future.Unit + } + }.getOrElse(Future.Unit) + } + } else { + Future.Unit + } + // We should wait for scan and trim to complete above + val fireAndForget = scanAndTrim.before { + val kv = transformAndBuildKeyValueMapping(tableName, userId, Seq((authorId, dataRecord))) + nighthawkStore.client + .put(Seq(kv)) + .map { responses => + responses.map(resp => nighthawkStore.processValue(resp)) + }.unit + } + fireAndForget.onFailure { + case e => + failures.incr() + log.error("Failure in UserReindexingNighthawkHashWritableDataRecordStore", e) + } + } + // Ignore fire-and-forget result above and simply return + Future.Unit + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/heron/package.scala b/timelines/data_processing/ml_util/aggregation_framework/heron/package.scala new file mode 100644 index 000000000..e995cf202 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/heron/package.scala @@ -0,0 +1,8 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework + +package object heron { + // NOTE: please sort alphabetically + type AuthorId = Long + type UserId = Long + type TimestampMs = Long +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/job/AggregatesV2Job.scala b/timelines/data_processing/ml_util/aggregation_framework/job/AggregatesV2Job.scala new file mode 100644 index 000000000..7d9e1946e --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/job/AggregatesV2Job.scala @@ -0,0 +1,163 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job + +import com.twitter.algebird.Semigroup +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.DataRecordMerger +import com.twitter.summingbird.Platform +import com.twitter.summingbird.Producer +import com.twitter.summingbird.TailProducer +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateSource +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup + +object AggregatesV2Job { + private lazy val merger = new DataRecordMerger + + /** + * Merges all "incremental" records with the same aggregation key + * into a single record. + * + * @param recordsPerKey A set of (AggregationKey, DataRecord) tuples + * known to share the same AggregationKey + * @return A single merged datarecord + */ + def mergeRecords(recordsPerKey: Set[(AggregationKey, DataRecord)]): DataRecord = + recordsPerKey.foldLeft(new DataRecord) { + case (merged: DataRecord, (key: AggregationKey, elem: DataRecord)) => { + merger.merge(merged, elem) + merged + } + } + + /** + * Given a set of aggregates to compute and a datarecord, extract key-value + * pairs to output to the summingbird store. + * + * @param dataRecord input data record + * @param aggregates set of aggregates to compute + * @param featureCounters counters to apply to each input data record + * @return computed aggregates + */ + def computeAggregates( + dataRecord: DataRecord, + aggregates: Set[TypedAggregateGroup[_]], + featureCounters: Seq[DataRecordFeatureCounter] + ): Map[AggregationKey, DataRecord] = { + val computedAggregates = aggregates + .flatMap(_.computeAggregateKVPairs(dataRecord)) + .groupBy { case (aggregationKey: AggregationKey, _) => aggregationKey } + .mapValues(mergeRecords) + + featureCounters.foreach(counter => + computedAggregates.map(agg => DataRecordFeatureCounter(counter, agg._2))) + + computedAggregates + + } + + /** + * Util method to apply a filter on containment in an optional set. + * + * @param setOptional Optional set of items to check containment in. + * @param toCheck Item to check if contained in set. + * @return If the optional set is None, returns true. + */ + def setFilter[T](setOptional: Option[Set[T]], toCheck: T): Boolean = + setOptional.map(_.contains(toCheck)).getOrElse(true) + + /** + * Util for filtering a collection of `TypedAggregateGroup` + * + * @param aggregates a set of aggregates + * @param sourceNames Optional filter on which AggregateGroups to process + * based on the name of the input source. + * @param storeNames Optional filter on which AggregateGroups to process + * based on the name of the output store. + * @return filtered aggregates + */ + def filterAggregates( + aggregates: Set[TypedAggregateGroup[_]], + sourceNames: Option[Set[String]], + storeNames: Option[Set[String]] + ): Set[TypedAggregateGroup[_]] = + aggregates + .filter { aggregateGroup => + val sourceName = aggregateGroup.inputSource.name + val storeName = aggregateGroup.outputStore.name + val containsSource = setFilter(sourceNames, sourceName) + val containsStore = setFilter(storeNames, storeName) + containsSource && containsStore + } + + /** + * The core summingbird job code. + * + * For each aggregate in the set passed in, the job + * processes all datarecords in the input producer + * stream to generate "incremental" contributions to + * these aggregates, and emits them grouped by + * aggregation key so that summingbird can aggregate them. + * + * It is important that after applying the sourceNameFilter and storeNameFilter, + * all the result AggregateGroups share the same startDate, otherwise the job + * will fail or give invalid results. + * + * @param aggregateSet A set of aggregates to compute. All aggregates + * in this set that pass the sourceNameFilter and storeNameFilter + * defined below, if any, will be computed. + * @param aggregateSourceToSummingbird Function that maps from our logical + * AggregateSource abstraction to the underlying physical summingbird + * producer of data records to aggregate (e.g. scalding/eventbus source) + * @param aggregateStoreToSummingbird Function that maps from our logical + * AggregateStore abstraction to the underlying physical summingbird + * store to write output aggregate records to (e.g. mahattan for scalding, + * or memcache for heron) + * @param featureCounters counters to use with each input DataRecord + * @return summingbird tail producer + */ + def generateJobGraph[P <: Platform[P]]( + aggregateSet: Set[TypedAggregateGroup[_]], + aggregateSourceToSummingbird: AggregateSource => Option[Producer[P, DataRecord]], + aggregateStoreToSummingbird: AggregateStore => Option[P#Store[AggregationKey, DataRecord]], + featureCounters: Seq[DataRecordFeatureCounter] = Seq.empty + )( + implicit semigroup: Semigroup[DataRecord] + ): TailProducer[P, Any] = { + val tailProducerList: List[TailProducer[P, Any]] = aggregateSet + .groupBy { aggregate => (aggregate.inputSource, aggregate.outputStore) } + .flatMap { + case ( + (inputSource: AggregateSource, outputStore: AggregateStore), + aggregatesInThisStore + ) => { + val producerOpt = aggregateSourceToSummingbird(inputSource) + val storeOpt = aggregateStoreToSummingbird(outputStore) + + (producerOpt, storeOpt) match { + case (Some(producer), Some(store)) => + Some( + producer + .flatMap(computeAggregates(_, aggregatesInThisStore, featureCounters)) + .name("FLATMAP") + .sumByKey(store) + .name("SUMMER") + ) + case _ => None + } + } + } + .toList + + tailProducerList.reduceLeft { (left, right) => left.also(right) } + } + + def aggregateNames(aggregateSet: Set[TypedAggregateGroup[_]]) = { + aggregateSet + .map(typedGroup => + ( + typedGroup.aggregatePrefix, + typedGroup.individualAggregateDescriptors + .flatMap(_.outputFeatures.map(_.getFeatureName)).mkString(","))) + }.toMap +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/job/BUILD b/timelines/data_processing/ml_util/aggregation_framework/job/BUILD new file mode 100644 index 000000000..57593fa34 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/job/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/algebird:core", + "3rdparty/jvm/com/twitter/algebird:util", + "3rdparty/jvm/com/twitter/storehaus:algebra", + "3rdparty/jvm/com/twitter/storehaus:core", + "3rdparty/src/jvm/com/twitter/scalding:commons", + "3rdparty/src/jvm/com/twitter/scalding:core", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala b/timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala new file mode 100644 index 000000000..eb1580a11 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala @@ -0,0 +1,39 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job + +import com.twitter.ml.api.DataRecord +import com.twitter.summingbird.Counter + +/** + * A summingbird Counter which is associated with a predicate which operates on + * [[com.twitter.ml.api.DataRecord]] instances. + * + * For example, for a data record which represents a Tweet, one could define a predicate + * which checks whether the Tweet contains a binary feature representing the presence of + * an image. The counter can then be used to represent the the count of Tweets with + * images processed. + * + * @param predicate a predicate which gates the counter + * @param counter a summingbird Counter instance + */ +case class DataRecordFeatureCounter(predicate: DataRecord => Boolean, counter: Counter) + +object DataRecordFeatureCounter { + + /** + * Increments the counter if the record satisfies the predicate + * + * @param recordCounter a data record counter + * @param record a data record + */ + def apply(recordCounter: DataRecordFeatureCounter, record: DataRecord): Unit = + if (recordCounter.predicate(record)) recordCounter.counter.incr() + + /** + * Defines a feature counter with a predicate that is always true + * + * @param counter a summingbird Counter instance + * @return a data record counter + */ + def any(counter: Counter): DataRecordFeatureCounter = + DataRecordFeatureCounter({ _: DataRecord => true }, counter) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregateFeature.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregateFeature.scala new file mode 100644 index 000000000..4f80490bc --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregateFeature.scala @@ -0,0 +1,51 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.util.Duration +import com.twitter.ml.api._ +import java.lang.{Boolean => JBoolean} + +/** + * Case class used as shared argument for + * getAggregateValue() and setAggregateValue() in AggregationMetric. + * + * @param aggregatePrefix Prefix for aggregate feature name + * @param feature Simple (non-aggregate) feature being aggregated. This + is optional; if None, then the label is aggregated on its own without + being crossed with any feature. + * @param label Label being paired with. This is optional; if None, then + the feature is aggregated on its own without being crossed with any label. + * @param halfLife Half life being used for aggregation + */ +case class AggregateFeature[T]( + aggregatePrefix: String, + feature: Option[Feature[T]], + label: Option[Feature[JBoolean]], + halfLife: Duration) { + val aggregateType = "pair" + val labelName: String = label.map(_.getDenseFeatureName()).getOrElse("any_label") + val featureName: String = feature.map(_.getDenseFeatureName()).getOrElse("any_feature") + + /* + * This val precomputes a portion of the feature name + * for faster processing. String building turns + * out to be a significant bottleneck. + */ + val featurePrefix: String = List( + aggregatePrefix, + aggregateType, + labelName, + featureName, + halfLife.toString + ).mkString(".") +} + +/* Companion object with util methods. */ +object AggregateFeature { + def parseHalfLife(aggregateFeature: Feature[_]): Duration = { + val aggregateComponents = aggregateFeature.getDenseFeatureName().split("\\.") + val numComponents = aggregateComponents.length + val halfLifeStr = aggregateComponents(numComponents - 3) + "." + + aggregateComponents(numComponents - 2) + Duration.parse(halfLifeStr) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetric.scala new file mode 100644 index 000000000..4278c8812 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetric.scala @@ -0,0 +1,184 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.util.Duration +import java.lang.{Long => JLong} + +/** + * Represents an aggregation operator (e.g. count or mean). + * Override all functions in this trait to implement your own metric. + * The operator is parameterized on an input type T, which is the type + * of feature it aggregates, and a TimedValue[A] which is + * the result type of aggregation for this metric. + */ +trait AggregationMetric[T, A] extends FeatureCache[T] { + /* + * Combines two timed aggregate values ''left'' and ''right'' + * with the specified half life ''halfLife'' to produce a result + * TimedValue + * + * @param left Left timed value + * @param right Right timed value + * @param halfLife Half life to use for adding timed values + * @return Result timed value + */ + def plus(left: TimedValue[A], right: TimedValue[A], halfLife: Duration): TimedValue[A] + + /* + * Gets increment value given a datarecord and a feature. + * + * @param dataRecord to get increment value from. + * @param feature Feature to get increment value for. If None, + then the semantics is to just aggregate the label. + * @param timestampFeature Feature to use as millisecond timestamp + for decayed value aggregation. + * @return The incremental contribution to the aggregate of ''feature'' from ''dataRecord''. + * + * For example, if the aggregation metric is count, the incremental + * contribution is always a TimedValue (1.0, time). If the aggregation metric + * is mean, and the feature is a continuous feature (double), the incremental + * contribution looks like a tuple (value, 1.0, time) + */ + def getIncrementValue( + dataRecord: DataRecord, + feature: Option[Feature[T]], + timestampFeature: Feature[JLong] + ): TimedValue[A] + + /* + * The "zero" value for aggregation. + * For example, the zero is 0 for the count operator. + */ + def zero(timeOpt: Option[Long] = None): TimedValue[A] + + /* + * Gets the value of aggregate feature(s) stored in a datarecord, if any. + * Different aggregate operators might store this info in the datarecord + * differently. E.g. count just stores a count, while mean needs to + * store both a sum and a count, and compile them into a TimedValue. We call + * these features stored in the record "output" features. + * + * @param record Record to get value from + * @param query AggregateFeature (see above) specifying details of aggregate + * @param aggregateOutputs An optional precomputed set of aggregation "output" + * feature hashes for this (query, metric) pair. This can be derived from ''query'', + * but we precompute and pass this in for significantly (approximately 4x = 400%) + * faster performance. If not passed in, the operator should reconstruct these features + * from scratch. + * + * @return The aggregate value if found in ''record'', else the appropriate "zero" + for this type of aggregation. + */ + def getAggregateValue( + record: DataRecord, + query: AggregateFeature[T], + aggregateOutputs: Option[List[JLong]] = None + ): TimedValue[A] + + /* + * Sets the value of aggregate feature(s) in a datarecord. Different operators + * will have different representations (see example above). + * + * @param record Record to set value in + * @param query AggregateFeature (see above) specifying details of aggregate + * @param aggregateOutputs An optional precomputed set of aggregation "output" + * features for this (query, metric) pair. This can be derived from ''query'', + * but we precompute and pass this in for significantly (approximately 4x = 400%) + * faster performance. If not passed in, the operator should reconstruct these features + * from scratch. + * + * @param value Value to set for aggregate feature in the record being passed in via ''query'' + */ + def setAggregateValue( + record: DataRecord, + query: AggregateFeature[T], + aggregateOutputs: Option[List[JLong]] = None, + value: TimedValue[A] + ): Unit + + /** + * Get features used to store aggregate output representation + * in partially aggregated data records. + * + * @query AggregateFeature (see above) specifying details of aggregate + * @return A list of "output" features used by this metric to store + * output representation. For example, for the "count" operator, we + * have only one element in this list, which is the result "count" feature. + * For the "mean" operator, we have three elements in this list: the "count" + * feature, the "sum" feature and the "mean" feature. + */ + def getOutputFeatures(query: AggregateFeature[T]): List[Feature[_]] + + /** + * Get feature hashes used to store aggregate output representation + * in partially aggregated data records. + * + * @query AggregateFeature (see above) specifying details of aggregate + * @return A list of "output" feature hashes used by this metric to store + * output representation. For example, for the "count" operator, we + * have only one element in this list, which is the result "count" feature. + * For the "mean" operator, we have three elements in this list: the "count" + * feature, the "sum" feature and the "mean" feature. + */ + def getOutputFeatureIds(query: AggregateFeature[T]): List[JLong] = + getOutputFeatures(query) + .map(_.getDenseFeatureId().asInstanceOf[JLong]) + + /* + * Sums the given feature in two datarecords into a result record + * WARNING: this method has side-effects; it modifies combined + * + * @param combined Result datarecord to mutate and store addition result in + * @param left Left datarecord to add + * @param right Right datarecord to add + * @param query Details of aggregate to add + * @param aggregateOutputs An optional precomputed set of aggregation "output" + * feature hashes for this (query, metric) pair. This can be derived from ''query'', + * but we precompute and pass this in for significantly (approximately 4x = 400%) + * faster performance. If not passed in, the operator should reconstruct these features + * from scratch. + */ + def mutatePlus( + combined: DataRecord, + left: DataRecord, + right: DataRecord, + query: AggregateFeature[T], + aggregateOutputs: Option[List[JLong]] = None + ): Unit = { + val leftValue = getAggregateValue(left, query, aggregateOutputs) + val rightValue = getAggregateValue(right, query, aggregateOutputs) + val combinedValue = plus(leftValue, rightValue, query.halfLife) + setAggregateValue(combined, query, aggregateOutputs, combinedValue) + } + + /** + * Helper function to get increment value from an input DataRecord + * and copy it to an output DataRecord, given an AggregateFeature query spec. + * + * @param output Datarecord to output increment to (will be mutated by this method) + * @param input Datarecord to get increment from + * @param query Details of aggregation + * @param aggregateOutputs An optional precomputed set of aggregation "output" + * feature hashes for this (query, metric) pair. This can be derived from ''query'', + * but we precompute and pass this in for significantly (approximately 4x = 400%) + * faster performance. If not passed in, the operator should reconstruct these features + * from scratch. + * @return True if an increment was set in the output record, else false + */ + def setIncrement( + output: DataRecord, + input: DataRecord, + query: AggregateFeature[T], + timestampFeature: Feature[JLong] = SharedFeatures.TIMESTAMP, + aggregateOutputs: Option[List[JLong]] = None + ): Boolean = { + if (query.label == None || + (query.label.isDefined && SRichDataRecord(input).hasFeature(query.label.get))) { + val incrementValue: TimedValue[A] = getIncrementValue(input, query.feature, timestampFeature) + setAggregateValue(output, query, aggregateOutputs, incrementValue) + true + } else false + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetricCommon.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetricCommon.scala new file mode 100644 index 000000000..e7b97e07b --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/AggregationMetricCommon.scala @@ -0,0 +1,55 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.algebird.DecayedValue +import com.twitter.algebird.DecayedValueMonoid +import com.twitter.algebird.Monoid +import com.twitter.dal.personal_data.thriftjava.PersonalDataType +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.util.Duration +import java.lang.{Long => JLong} +import java.util.{HashSet => JHashSet} +import java.util.{Set => JSet} + +object AggregationMetricCommon { + /* Shared definitions and utils that can be reused by child classes */ + val Epsilon: Double = 1e-6 + val decayedValueMonoid: Monoid[DecayedValue] = DecayedValueMonoid(Epsilon) + val TimestampHash: JLong = SharedFeatures.TIMESTAMP.getDenseFeatureId() + + def toDecayedValue(tv: TimedValue[Double], halfLife: Duration): DecayedValue = { + DecayedValue.build( + tv.value, + tv.timestamp.inMilliseconds, + halfLife.inMilliseconds + ) + } + + def getTimestamp( + record: DataRecord, + timestampFeature: Feature[JLong] = SharedFeatures.TIMESTAMP + ): Long = { + Option( + SRichDataRecord(record) + .getFeatureValue(timestampFeature) + ).map(_.toLong) + .getOrElse(0L) + } + + /* + * Union the PDTs of the input featureOpts. + * Return null if empty, else the JSet[PersonalDataType] + */ + def derivePersonalDataTypes(features: Option[Feature[_]]*): JSet[PersonalDataType] = { + val unionPersonalDataTypes = new JHashSet[PersonalDataType]() + for { + featureOpt <- features + feature <- featureOpt + pdtSetOptional = feature.getPersonalDataTypes + if pdtSetOptional.isPresent + pdtSet = pdtSetOptional.get + } unionPersonalDataTypes.addAll(pdtSet) + if (unionPersonalDataTypes.isEmpty) null else unionPersonalDataTypes + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/BUILD b/timelines/data_processing/ml_util/aggregation_framework/metrics/BUILD new file mode 100644 index 000000000..676b31d81 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/algebird:core", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/ml/api/util:datarecord", + "src/thrift/com/twitter/dal/personal_data:personal_data-java", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "util/util-core:scala", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/ConversionUtils.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/ConversionUtils.scala new file mode 100644 index 000000000..b04263ea0 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/ConversionUtils.scala @@ -0,0 +1,5 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +object ConversionUtils { + def booleanToDouble(value: Boolean): Double = if (value) 1.0 else 0.0 +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/CountMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/CountMetric.scala new file mode 100644 index 000000000..720fa68e5 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/CountMetric.scala @@ -0,0 +1,41 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.util.Time +import java.lang.{Long => JLong} + +case class TypedCountMetric[T]( +) extends TypedSumLikeMetric[T] { + import AggregationMetricCommon._ + import ConversionUtils._ + override val operatorName = "count" + + override def getIncrementValue( + record: DataRecord, + feature: Option[Feature[T]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = { + val featureExists: Boolean = feature match { + case Some(f) => SRichDataRecord(record).hasFeature(f) + case None => true + } + + TimedValue[Double]( + value = booleanToDouble(featureExists), + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } +} + +/** + * Syntactic sugar for the count metric that works with + * any feature type as opposed to being tied to a specific type. + * See EasyMetric.scala for more details on why this is useful. + */ +object CountMetric extends EasyMetric { + override def forFeatureType[T]( + featureType: FeatureType, + ): Option[AggregationMetric[T, _]] = + Some(TypedCountMetric[T]()) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/EasyMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/EasyMetric.scala new file mode 100644 index 000000000..67edce7ce --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/EasyMetric.scala @@ -0,0 +1,34 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ + +/** + * A "human-readable" metric that can be applied to features of multiple + * different types. Wrapper around AggregationMetric used as syntactic sugar + * for easier config. + */ +trait EasyMetric extends Serializable { + /* + * Given a feature type, fetches the corrrect underlying AggregationMetric + * to perform this operation over the given feature type, if any. If no such + * metric is available, returns None. For example, MEAN cannot be applied + * to FeatureType.String and would return None. + * + * @param featureType Type of feature to fetch metric for + * @param useFixedDecay Param to control whether the metric should use fixed decay + * logic (if appropriate) + * @return Strongly typed aggregation metric to use for this feature type + * + * For example, if the EasyMetric is MEAN and the featureType is + * FeatureType.Continuous, the underlying AggregationMetric should be a + * scalar mean. If the EasyMetric is MEAN and the featureType is + * FeatureType.SparseContinuous, the AggregationMetric returned could be a + * "vector" mean that averages sparse maps. Using the single logical name + * MEAN for both is nice syntactic sugar making for an easier to read top + * level config, though different underlying operators are used underneath + * for the actual implementation. + */ + def forFeatureType[T]( + featureType: FeatureType, + ): Option[AggregationMetric[T, _]] +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/FeatureCache.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/FeatureCache.scala new file mode 100644 index 000000000..e5f384100 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/FeatureCache.scala @@ -0,0 +1,72 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import scala.collection.mutable + +trait FeatureCache[T] { + /* + * Constructs feature names from scratch given an aggregate query and an output + * feature name. E.g. given mean operator and "sum". This function is slow and should + * only be called at pre-computation time. + * + * @param query Details of aggregate feature + * @name Name of "output" feature for which we want to construct feature name + * @return Full name of output feature + */ + private def uncachedFullFeatureName(query: AggregateFeature[T], name: String): String = + List(query.featurePrefix, name).mkString(".") + + /* + * A cache from (aggregate query, output feature name) -> fully qualified feature name + * lazy since it doesn't need to be serialized to the mappers + */ + private lazy val featureNameCache = mutable.Map[(AggregateFeature[T], String), String]() + + /* + * A cache from (aggregate query, output feature name) -> precomputed output feature + * lazy since it doesn't need to be serialized to the mappers + */ + private lazy val featureCache = mutable.Map[(AggregateFeature[T], String), Feature[_]]() + + /** + * Given an (aggregate query, output feature name, output feature type), + * look it up using featureNameCache and featureCache, falling back to uncachedFullFeatureName() + * as a last resort to construct a precomputed output feature. Should only be + * called at pre-computation time. + * + * @param query Details of aggregate feature + * @name Name of "output" feature we want to precompute + * @aggregateFeatureType type of "output" feature we want to precompute + */ + def cachedFullFeature( + query: AggregateFeature[T], + name: String, + aggregateFeatureType: FeatureType + ): Feature[_] = { + lazy val cachedFeatureName = featureNameCache.getOrElseUpdate( + (query, name), + uncachedFullFeatureName(query, name) + ) + + def uncachedFullFeature(): Feature[_] = { + val personalDataTypes = + AggregationMetricCommon.derivePersonalDataTypes(query.feature, query.label) + + aggregateFeatureType match { + case FeatureType.BINARY => new Feature.Binary(cachedFeatureName, personalDataTypes) + case FeatureType.DISCRETE => new Feature.Discrete(cachedFeatureName, personalDataTypes) + case FeatureType.STRING => new Feature.Text(cachedFeatureName, personalDataTypes) + case FeatureType.CONTINUOUS => new Feature.Continuous(cachedFeatureName, personalDataTypes) + case FeatureType.SPARSE_BINARY => + new Feature.SparseBinary(cachedFeatureName, personalDataTypes) + case FeatureType.SPARSE_CONTINUOUS => + new Feature.SparseContinuous(cachedFeatureName, personalDataTypes) + } + } + + featureCache.getOrElseUpdate( + (query, name), + uncachedFullFeature() + ) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/LastResetMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/LastResetMetric.scala new file mode 100644 index 000000000..67fe444aa --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/LastResetMetric.scala @@ -0,0 +1,107 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import java.lang.{Long => JLong} +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.ConversionUtils._ +import com.twitter.util.Duration +import com.twitter.util.Time +import scala.math.max + +/** + * This metric measures how recently an action has taken place. A value of 1.0 + * indicates the action happened just now. This value decays with time if the + * action has not taken place and is reset to 1 when the action happens. So lower + * value indicates a stale or older action. + * + * For example consider an action of "user liking a video". The last reset metric + * value changes as follows for a half life of 1 day. + * + * ---------------------------------------------------------------------------- + * day | action | feature value | Description + * ---------------------------------------------------------------------------- + * 1 | user likes the video | 1.0 | Set the value to 1 + * 2 | user does not like video | 0.5 | Decay the value + * 3 | user does not like video | 0.25 | Decay the value + * 4 | user likes the video | 1.0 | Reset the value to 1 + * ----------------------------------------------------------------------------- + * + * @tparam T + */ +case class TypedLastResetMetric[T]() extends TimedValueAggregationMetric[T] { + import AggregationMetricCommon._ + + override val operatorName = "last_reset" + + override def getIncrementValue( + record: DataRecord, + feature: Option[Feature[T]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = { + val featureExists: Boolean = feature match { + case Some(f) => SRichDataRecord(record).hasFeature(f) + case None => true + } + + TimedValue[Double]( + value = booleanToDouble(featureExists), + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } + private def getDecayedValue( + olderTimedValue: TimedValue[Double], + newerTimestamp: Time, + halfLife: Duration + ): Double = { + if (halfLife.inMilliseconds == 0L) { + 0.0 + } else { + val timeDelta = newerTimestamp.inMilliseconds - olderTimedValue.timestamp.inMilliseconds + val resultValue = olderTimedValue.value / math.pow(2.0, timeDelta / halfLife.inMillis) + if (resultValue > AggregationMetricCommon.Epsilon) resultValue else 0.0 + } + } + + override def plus( + left: TimedValue[Double], + right: TimedValue[Double], + halfLife: Duration + ): TimedValue[Double] = { + + val (newerTimedValue, olderTimedValue) = if (left.timestamp > right.timestamp) { + (left, right) + } else { + (right, left) + } + + val optionallyDecayedOlderValue = if (halfLife == Duration.Top) { + // Since we don't want to decay, older value is not changed + olderTimedValue.value + } else { + // Decay older value + getDecayedValue(olderTimedValue, newerTimedValue.timestamp, halfLife) + } + + TimedValue[Double]( + value = max(newerTimedValue.value, optionallyDecayedOlderValue), + timestamp = newerTimedValue.timestamp + ) + } + + override def zero(timeOpt: Option[Long]): TimedValue[Double] = TimedValue[Double]( + value = 0.0, + timestamp = Time.fromMilliseconds(0) + ) +} + +/** + * Syntactic sugar for the last reset metric that works with + * any feature type as opposed to being tied to a specific type. + * See EasyMetric.scala for more details on why this is useful. + */ +object LastResetMetric extends EasyMetric { + override def forFeatureType[T]( + featureType: FeatureType + ): Option[AggregationMetric[T, _]] = + Some(TypedLastResetMetric[T]()) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/LatestMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/LatestMetric.scala new file mode 100644 index 000000000..08bd6483a --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/LatestMetric.scala @@ -0,0 +1,69 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureType +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon.getTimestamp +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetric +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.EasyMetric +import com.twitter.util.Duration +import com.twitter.util.Time +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import java.lang.{Number => JNumber} + +case class TypedLatestMetric[T <: JNumber](defaultValue: Double = 0.0) + extends TimedValueAggregationMetric[T] { + override val operatorName = "latest" + + override def plus( + left: TimedValue[Double], + right: TimedValue[Double], + halfLife: Duration + ): TimedValue[Double] = { + assert( + halfLife.toString == "Duration.Top", + s"halfLife must be Duration.Top when using latest metric, but ${halfLife.toString} is used" + ) + + if (left.timestamp > right.timestamp) { + left + } else { + right + } + } + + override def getIncrementValue( + dataRecord: DataRecord, + feature: Option[Feature[T]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = { + val value = feature + .flatMap(SRichDataRecord(dataRecord).getFeatureValueOpt(_)) + .map(_.doubleValue()).getOrElse(defaultValue) + val timestamp = Time.fromMilliseconds(getTimestamp(dataRecord, timestampFeature)) + TimedValue[Double](value = value, timestamp = timestamp) + } + + override def zero(timeOpt: Option[Long]): TimedValue[Double] = + TimedValue[Double]( + value = 0.0, + timestamp = Time.fromMilliseconds(0) + ) +} + +object LatestMetric extends EasyMetric { + override def forFeatureType[T]( + featureType: FeatureType + ): Option[AggregationMetric[T, _]] = { + featureType match { + case FeatureType.CONTINUOUS => + Some(TypedLatestMetric[JDouble]().asInstanceOf[AggregationMetric[T, Double]]) + case FeatureType.DISCRETE => + Some(TypedLatestMetric[JLong]().asInstanceOf[AggregationMetric[T, Double]]) + case _ => None + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/MaxMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/MaxMetric.scala new file mode 100644 index 000000000..b9e9176bb --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/MaxMetric.scala @@ -0,0 +1,64 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon.getTimestamp +import com.twitter.util.Duration +import com.twitter.util.Time +import java.lang.{Long => JLong} +import java.lang.{Number => JNumber} +import java.lang.{Double => JDouble} +import scala.math.max + +case class TypedMaxMetric[T <: JNumber](defaultValue: Double = 0.0) + extends TimedValueAggregationMetric[T] { + override val operatorName = "max" + + override def getIncrementValue( + dataRecord: DataRecord, + feature: Option[Feature[T]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = { + val value = feature + .flatMap(SRichDataRecord(dataRecord).getFeatureValueOpt(_)) + .map(_.doubleValue()).getOrElse(defaultValue) + val timestamp = Time.fromMilliseconds(getTimestamp(dataRecord, timestampFeature)) + TimedValue[Double](value = value, timestamp = timestamp) + } + + override def plus( + left: TimedValue[Double], + right: TimedValue[Double], + halfLife: Duration + ): TimedValue[Double] = { + + assert( + halfLife.toString == "Duration.Top", + s"halfLife must be Duration.Top when using max metric, but ${halfLife.toString} is used" + ) + + TimedValue[Double]( + value = max(left.value, right.value), + timestamp = left.timestamp.max(right.timestamp) + ) + } + + override def zero(timeOpt: Option[Long]): TimedValue[Double] = + TimedValue[Double]( + value = 0.0, + timestamp = Time.fromMilliseconds(0) + ) +} + +object MaxMetric extends EasyMetric { + def forFeatureType[T]( + featureType: FeatureType, + ): Option[AggregationMetric[T, _]] = + featureType match { + case FeatureType.CONTINUOUS => + Some(TypedMaxMetric[JDouble]().asInstanceOf[AggregationMetric[T, Double]]) + case FeatureType.DISCRETE => + Some(TypedMaxMetric[JLong]().asInstanceOf[AggregationMetric[T, Double]]) + case _ => None + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/SumLikeMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumLikeMetric.scala new file mode 100644 index 000000000..1f7aeb58a --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumLikeMetric.scala @@ -0,0 +1,66 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.util.Duration +import com.twitter.util.Time +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import java.util.{Map => JMap} + +/* + * TypedSumLikeMetric aggregates a sum over any feature transform. + * TypedCountMetric, TypedSumMetric, TypedSumSqMetric are examples + * of metrics that are inherited from this trait. To implement a new + * "sum like" metric, override the getIncrementValue() and operatorName + * members of this trait. + * + * getIncrementValue() is inherited from the + * parent trait AggregationMetric, but not overriden in this trait, so + * it needs to be overloaded by any metric that extends TypedSumLikeMetric. + * + * operatorName is a string used for naming the resultant aggregate feature + * (e.g. "count" if its a count feature, or "sum" if a sum feature). + */ +trait TypedSumLikeMetric[T] extends TimedValueAggregationMetric[T] { + import AggregationMetricCommon._ + + def useFixedDecay = true + + override def plus( + left: TimedValue[Double], + right: TimedValue[Double], + halfLife: Duration + ): TimedValue[Double] = { + val resultValue = if (halfLife == Duration.Top) { + /* We could use decayedValueMonoid here, but + * a simple addition is slightly more accurate */ + left.value + right.value + } else { + val decayedLeft = toDecayedValue(left, halfLife) + val decayedRight = toDecayedValue(right, halfLife) + decayedValueMonoid.plus(decayedLeft, decayedRight).value + } + + TimedValue[Double]( + resultValue, + left.timestamp.max(right.timestamp) + ) + } + + override def zero(timeOpt: Option[Long]): TimedValue[Double] = { + val timestamp = + /* + * Please see TQ-11279 for documentation for this fix to the decay logic. + */ + if (useFixedDecay) { + Time.fromMilliseconds(timeOpt.getOrElse(0L)) + } else { + Time.fromMilliseconds(0L) + } + + TimedValue[Double]( + value = 0.0, + timestamp = timestamp + ) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/SumMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumMetric.scala new file mode 100644 index 000000000..bd93d5bae --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumMetric.scala @@ -0,0 +1,52 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.util.Time +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} + +case class TypedSumMetric( +) extends TypedSumLikeMetric[JDouble] { + import AggregationMetricCommon._ + + override val operatorName = "sum" + + /* + * Transform feature -> its value in the given record, + * or 0 when feature = None (sum has no meaning in this case) + */ + override def getIncrementValue( + record: DataRecord, + feature: Option[Feature[JDouble]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = feature match { + case Some(f) => { + TimedValue[Double]( + value = Option(SRichDataRecord(record).getFeatureValue(f)).map(_.toDouble).getOrElse(0.0), + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } + + case None => + TimedValue[Double]( + value = 0.0, + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } +} + +/** + * Syntactic sugar for the sum metric that works with continuous features. + * See EasyMetric.scala for more details on why this is useful. + */ +object SumMetric extends EasyMetric { + override def forFeatureType[T]( + featureType: FeatureType + ): Option[AggregationMetric[T, _]] = + featureType match { + case FeatureType.CONTINUOUS => + Some(TypedSumMetric().asInstanceOf[AggregationMetric[T, Double]]) + case _ => None + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/SumSqMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumSqMetric.scala new file mode 100644 index 000000000..b24b16377 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/SumSqMetric.scala @@ -0,0 +1,53 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.util.Time +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} + +case class TypedSumSqMetric() extends TypedSumLikeMetric[JDouble] { + import AggregationMetricCommon._ + + override val operatorName = "sumsq" + + /* + * Transform feature -> its squared value in the given record + * or 0 when feature = None (sumsq has no meaning in this case) + */ + override def getIncrementValue( + record: DataRecord, + feature: Option[Feature[JDouble]], + timestampFeature: Feature[JLong] + ): TimedValue[Double] = feature match { + case Some(f) => { + val featureVal = + Option(SRichDataRecord(record).getFeatureValue(f)).map(_.toDouble).getOrElse(0.0) + TimedValue[Double]( + value = featureVal * featureVal, + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } + + case None => + TimedValue[Double]( + value = 0.0, + timestamp = Time.fromMilliseconds(getTimestamp(record, timestampFeature)) + ) + } +} + +/** + * Syntactic sugar for the sum of squares metric that works with continuous features. + * See EasyMetric.scala for more details on why this is useful. + */ +object SumSqMetric extends EasyMetric { + override def forFeatureType[T]( + featureType: FeatureType + ): Option[AggregationMetric[T, _]] = + featureType match { + case FeatureType.CONTINUOUS => + Some(TypedSumSqMetric().asInstanceOf[AggregationMetric[T, Double]]) + case _ => None + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValue.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValue.scala new file mode 100644 index 000000000..7f9fb5090 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValue.scala @@ -0,0 +1,14 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.util.Time + +/** + * Case class wrapping a (value, timestamp) tuple. + * All aggregate metrics must operate over this class + * to ensure we can implement decay and half lives for them. + * This is translated to an algebird DecayedValue under the hood. + * + * @param value Value being wrapped + * @param timestamp Time after epoch at which value is being measured + */ +case class TimedValue[T](value: T, timestamp: Time) diff --git a/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValueAggregationMetric.scala b/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValueAggregationMetric.scala new file mode 100644 index 000000000..f31152a23 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/metrics/TimedValueAggregationMetric.scala @@ -0,0 +1,90 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics + +import com.twitter.ml.api._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregateFeature +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.TimedValue +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetric +import com.twitter.util.Duration +import com.twitter.util.Time +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import java.util.{Map => JMap} + +/* + * ContinuousAggregationMetric overrides method AggregationMetric dealing + * with reading and writing continuous values from a data record. + * + * operatorName is a string used for naming the resultant aggregate feature + * (e.g. "count" if its a count feature, or "sum" if a sum feature). + */ +trait TimedValueAggregationMetric[T] extends AggregationMetric[T, Double] { + import AggregationMetricCommon._ + + val operatorName: String + + override def getAggregateValue( + record: DataRecord, + query: AggregateFeature[T], + aggregateOutputs: Option[List[JLong]] = None + ): TimedValue[Double] = { + /* + * We know aggregateOutputs(0) will have the continuous feature, + * since we put it there in getOutputFeatureIds() - see code below. + * This helps us get a 4x speedup. Using any structure more complex + * than a list was also a performance bottleneck. + */ + val featureHash: JLong = aggregateOutputs + .getOrElse(getOutputFeatureIds(query)) + .head + + val continuousValueOption: Option[Double] = Option(record.continuousFeatures) + .flatMap { case jmap: JMap[JLong, JDouble] => Option(jmap.get(featureHash)) } + .map(_.toDouble) + + val timeOption = Option(record.discreteFeatures) + .flatMap { case jmap: JMap[JLong, JLong] => Option(jmap.get(TimestampHash)) } + .map(_.toLong) + + val resultOption: Option[TimedValue[Double]] = (continuousValueOption, timeOption) match { + case (Some(featureValue), Some(timesamp)) => + Some(TimedValue[Double](featureValue, Time.fromMilliseconds(timesamp))) + case _ => None + } + + resultOption.getOrElse(zero(timeOption)) + } + + override def setAggregateValue( + record: DataRecord, + query: AggregateFeature[T], + aggregateOutputs: Option[List[JLong]] = None, + value: TimedValue[Double] + ): Unit = { + /* + * We know aggregateOutputs(0) will have the continuous feature, + * since we put it there in getOutputFeatureIds() - see code below. + * This helps us get a 4x speedup. Using any structure more complex + * than a list was also a performance bottleneck. + */ + val featureHash: JLong = aggregateOutputs + .getOrElse(getOutputFeatureIds(query)) + .head + + /* Only set value if non-zero to save space */ + if (value.value != 0.0) { + record.putToContinuousFeatures(featureHash, value.value) + } + + /* + * We do not set timestamp since that might affect correctness of + * future aggregations due to the decay semantics. + */ + } + + /* Only one feature stored in the aggregated datarecord: the result continuous value */ + override def getOutputFeatures(query: AggregateFeature[T]): List[Feature[_]] = { + val feature = cachedFullFeature(query, operatorName, FeatureType.CONTINUOUS) + List(feature) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/package.scala b/timelines/data_processing/ml_util/aggregation_framework/package.scala new file mode 100644 index 000000000..824398a7f --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/package.scala @@ -0,0 +1,19 @@ +package com.twitter.timelines.data_processing.ml_util + +import com.twitter.ml.api.DataRecord + +package object aggregation_framework { + object AggregateType extends Enumeration { + type AggregateType = Value + val User, UserAuthor, UserEngager, UserMention, UserRequestHour, UserRequestDow, + UserOriginalAuthor, UserList, UserTopic, UserInferredTopic, UserMediaUnderstandingAnnotation = + Value + } + + type AggregateUserEntityKey = (Long, AggregateType.Value, Option[Long]) + + case class MergedRecordsDescriptor( + userId: Long, + keyedRecords: Map[AggregateType.Value, Option[KeyedRecord]], + keyedRecordMaps: Map[AggregateType.Value, Option[KeyedRecordMap]]) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/query/BUILD b/timelines/data_processing/ml_util/aggregation_framework/query/BUILD new file mode 100644 index 000000000..97e6d1ea7 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/query/BUILD @@ -0,0 +1,12 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-stats", + "src/java/com/twitter/ml/api:api-base", + "src/thrift/com/twitter/ml/api:data-scala", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "timelines/data_processing/ml_util/aggregation_framework/metrics", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/query/ScopedAggregateBuilder.scala b/timelines/data_processing/ml_util/aggregation_framework/query/ScopedAggregateBuilder.scala new file mode 100644 index 000000000..2fcce3312 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/query/ScopedAggregateBuilder.scala @@ -0,0 +1,159 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.query + +import com.twitter.dal.personal_data.thriftjava.PersonalDataType +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureBuilder +import com.twitter.ml.api.FeatureContext +import com.twitter.ml.api.thriftscala.{DataRecord => ScalaDataRecord} +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import scala.collection.JavaConverters._ + +/** + * Provides methods to build "scoped" aggregates, where base features generated by aggregates + * V2 are scoped with a specific key. + * + * The class provides methods that take a Map of T -> DataRecord, where T is a key type, and + * the DataRecord contains features produced by the aggregation_framework. The methods then + * generate a _new_ DataRecord, containing "scoped" aggregate features, where each scoped + * feature has the value of the scope key in the feature name, and the value of the feature + * is the value of the original aggregate feature in the corresponding value from the original + * Map. + * + * For efficiency reasons, the builder is initialized with the set of features that should be + * scoped and the set of keys for which scoping should be supported. + * + * To understand how scope feature names are constructed, consider the following: + * + * {{{ + * val features = Set( + * new Feature.Continuous("user_injection_aggregate.pair.any_label.any_feature.5.days.count"), + * new Feature.Continuous("user_injection_aggregate.pair.any_label.any_feature.10.days.count") + * ) + * val scopes = Set(SuggestType.Recap, SuggestType.WhoToFollow) + * val scopeName = "InjectionType" + * val scopedAggregateBuilder = ScopedAggregateBuilder(features, scopes, scopeName) + * + * }}} + * + * Then, generated scoped features would be among the following: + * - user_injection_aggregate.scoped.pair.any_label.any_feature.5.days.count/scope_name=InjectionType/scope=Recap + * - user_injection_aggregate.scoped.pair.any_label.any_feature.5.days.count/scope_name=InjectionType/scope=WhoToFollow + * - user_injection_aggregate.scoped.pair.any_label.any_feature.10.days.count/scope_name=InjectionType/scope=Recap + * - user_injection_aggregate.scoped.pair.any_label.any_feature.10.days.count/scope_name=InjectionType/scope=WhoToFollow + * + * @param featuresToScope the set of features for which one should generate scoped versions + * @param scopeKeys the set of scope keys to generate scopes with + * @param scopeName a string indicating what the scopes represent. This is also added to the scoped feature + * @tparam K the type of scope key + */ +class ScopedAggregateBuilder[K]( + featuresToScope: Set[Feature[JDouble]], + scopeKeys: Set[K], + scopeName: String) { + + private[this] def buildScopedAggregateFeature( + baseName: String, + scopeValue: String, + personalDataTypes: java.util.Set[PersonalDataType] + ): Feature[JDouble] = { + val components = baseName.split("\\.").toList + + val newName = (components.head :: "scoped" :: components.tail).mkString(".") + + new FeatureBuilder.Continuous() + .addExtensionDimensions("scope_name", "scope") + .setBaseName(newName) + .setPersonalDataTypes(personalDataTypes) + .extensionBuilder() + .addExtension("scope_name", scopeName) + .addExtension("scope", scopeValue) + .build() + } + + /** + * Index of (base aggregate feature name, key) -> key scoped count feature. + */ + private[this] val keyScopedAggregateMap: Map[(String, K), Feature[JDouble]] = { + featuresToScope.flatMap { feat => + scopeKeys.map { key => + (feat.getFeatureName, key) -> + buildScopedAggregateFeature( + feat.getFeatureName, + key.toString, + AggregationMetricCommon.derivePersonalDataTypes(Some(feat)) + ) + } + }.toMap + } + + type ContinuousFeaturesMap = Map[JLong, JDouble] + + /** + * Create key-scoped features for raw aggregate feature ID to value maps, partitioned by key. + */ + private[this] def buildAggregates(featureMapsByKey: Map[K, ContinuousFeaturesMap]): DataRecord = { + val continuousFeatures = featureMapsByKey + .flatMap { + case (key, featureMap) => + featuresToScope.flatMap { feature => + val newFeatureOpt = keyScopedAggregateMap.get((feature.getFeatureName, key)) + newFeatureOpt.flatMap { newFeature => + featureMap.get(feature.getFeatureId).map(new JLong(newFeature.getFeatureId) -> _) + } + }.toMap + } + + new DataRecord().setContinuousFeatures(continuousFeatures.asJava) + } + + /** + * Create key-scoped features for Java [[DataRecord]] aggregate records partitioned by key. + * + * As an example, if the provided Map includes the key `SuggestType.Recap`, and [[scopeKeys]] + * includes this key, then for a feature "xyz.pair.any_label.any_feature.5.days.count", the method + * will generate the scoped feature "xyz.scoped.pair.any_label.any_feature.5.days.count/scope_name=InjectionType/scope=Recap", + * with the value being the value of the original feature from the Map. + * + * @param aggregatesByKey a map from key to a continuous feature map (ie. feature ID -> Double) + * @return a Java [[DataRecord]] containing key-scoped features + */ + def buildAggregatesJava(aggregatesByKey: Map[K, DataRecord]): DataRecord = { + val featureMapsByKey = aggregatesByKey.mapValues(_.continuousFeatures.asScala.toMap) + buildAggregates(featureMapsByKey) + } + + /** + * Create key-scoped features for Scala [[DataRecord]] aggregate records partitioned by key. + * + * As an example, if the provided Map includes the key `SuggestType.Recap`, and [[scopeKeys]] + * includes this key, then for a feature "xyz.pair.any_label.any_feature.5.days.count", the method + * will generate the scoped feature "xyz.scoped.pair.any_label.any_feature.5.days.count/scope_name=InjectionType/scope=Recap", + * with the value being the value of the original feature from the Map. + * + * This is a convenience method for some use cases where aggregates are read from Scala + * thrift objects. Note that this still returns a Java [[DataRecord]], since most ML API + * use the Java version. + * + * @param aggregatesByKey a map from key to a continuous feature map (ie. feature ID -> Double) + * @return a Java [[DataRecord]] containing key-scoped features + */ + def buildAggregatesScala(aggregatesByKey: Map[K, ScalaDataRecord]): DataRecord = { + val featureMapsByKey = + aggregatesByKey + .mapValues { record => + val featureMap = record.continuousFeatures.getOrElse(Map[Long, Double]()).toMap + featureMap.map { case (k, v) => new JLong(k) -> new JDouble(v) } + } + buildAggregates(featureMapsByKey) + } + + /** + * Returns a [[FeatureContext]] including all possible scoped features generated using this builder. + * + * @return a [[FeatureContext]] containing all scoped features. + */ + def scopedFeatureContext: FeatureContext = new FeatureContext(keyScopedAggregateMap.values.asJava) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregateFeaturesMerger.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregateFeaturesMerger.scala new file mode 100644 index 000000000..156168a9d --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregateFeaturesMerger.scala @@ -0,0 +1,213 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures._ +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding.Stat +import com.twitter.scalding.typed.TypedPipe +import com.twitter.timelines.data_processing.ml_util.aggregation_framework._ +import com.twitter.timelines.data_processing.ml_util.sampling.SamplingUtils + +trait AggregateFeaturesMergerBase { + import Utils._ + + def samplingRateOpt: Option[Double] + def numReducers: Int = 2000 + def numReducersMerge: Int = 20000 + + def aggregationConfig: AggregationConfig + def storeRegister: StoreRegister + def storeMerger: StoreMerger + + def getAggregatePipe(storeName: String): DataSetPipe + def applyMaxSizeByTypeOpt(aggregateType: AggregateType.Value): Option[Int] = Option.empty[Int] + + def usersActiveSourcePipe: TypedPipe[Long] + def numRecords: Stat + def numFilteredRecords: Stat + + /* + * This method should only be called with a storeName that corresponds + * to a user aggregate store. + */ + def extractUserFeaturesMap(storeName: String): TypedPipe[(Long, KeyedRecord)] = { + val aggregateKey = storeRegister.storeNameToTypeMap(storeName) + samplingRateOpt + .map(rate => SamplingUtils.userBasedSample(getAggregatePipe(storeName), rate)) + .getOrElse(getAggregatePipe(storeName)) // must return store with only user aggregates + .records + .map { r: DataRecord => + val record = SRichDataRecord(r) + val userId = record.getFeatureValue(USER_ID).longValue + record.clearFeature(USER_ID) + (userId, KeyedRecord(aggregateKey, r)) + } + } + + /* + * When the secondaryKey being used is a String, then the shouldHash function should be set to true. + * Refactor such that the shouldHash parameter is removed and the behavior + * is defaulted to true. + * + * This method should only be called with a storeName that contains records with the + * desired secondaryKey. We provide secondaryKeyFilterPipeOpt against which secondary + * keys can be filtered to help prune the final merged MH dataset. + */ + def extractSecondaryTuples[T]( + storeName: String, + secondaryKey: Feature[T], + shouldHash: Boolean = false, + maxSizeOpt: Option[Int] = None, + secondaryKeyFilterPipeOpt: Option[TypedPipe[Long]] = None + ): TypedPipe[(Long, KeyedRecordMap)] = { + val aggregateKey = storeRegister.storeNameToTypeMap(storeName) + + val extractedRecordsBySecondaryKey = + samplingRateOpt + .map(rate => SamplingUtils.userBasedSample(getAggregatePipe(storeName), rate)) + .getOrElse(getAggregatePipe(storeName)) + .records + .map { r: DataRecord => + val record = SRichDataRecord(r) + val userId = keyFromLong(r, USER_ID) + val secondaryId = extractSecondary(r, secondaryKey, shouldHash) + record.clearFeature(USER_ID) + record.clearFeature(secondaryKey) + + numRecords.inc() + (userId, secondaryId -> r) + } + + val grouped = + (secondaryKeyFilterPipeOpt match { + case Some(secondaryKeyFilterPipe: TypedPipe[Long]) => + extractedRecordsBySecondaryKey + .map { + // In this step, we swap `userId` with `secondaryId` to join on the `secondaryId` + // It is important to swap them back after the join, otherwise the job will fail. + case (userId, (secondaryId, r)) => + (secondaryId, (userId, r)) + } + .join(secondaryKeyFilterPipe.groupBy(identity)) + .map { + case (secondaryId, ((userId, r), _)) => + numFilteredRecords.inc() + (userId, secondaryId -> r) + } + case _ => extractedRecordsBySecondaryKey + }).group + .withReducers(numReducers) + + maxSizeOpt match { + case Some(maxSize) => + grouped + .take(maxSize) + .mapValueStream(recordsIter => Iterator(KeyedRecordMap(aggregateKey, recordsIter.toMap))) + .toTypedPipe + case None => + grouped + .mapValueStream(recordsIter => Iterator(KeyedRecordMap(aggregateKey, recordsIter.toMap))) + .toTypedPipe + } + } + + def userPipes: Seq[TypedPipe[(Long, KeyedRecord)]] = + storeRegister.allStores.flatMap { storeConfig => + val StoreConfig(storeNames, aggregateType, _) = storeConfig + require(storeMerger.isValidToMerge(storeNames)) + + if (aggregateType == AggregateType.User) { + storeNames.map(extractUserFeaturesMap) + } else None + }.toSeq + + private def getSecondaryKeyFilterPipeOpt( + aggregateType: AggregateType.Value + ): Option[TypedPipe[Long]] = { + if (aggregateType == AggregateType.UserAuthor) { + Some(usersActiveSourcePipe) + } else None + } + + def userSecondaryKeyPipes: Seq[TypedPipe[(Long, KeyedRecordMap)]] = { + storeRegister.allStores.flatMap { storeConfig => + val StoreConfig(storeNames, aggregateType, shouldHash) = storeConfig + require(storeMerger.isValidToMerge(storeNames)) + + if (aggregateType != AggregateType.User) { + storeNames.flatMap { storeName => + storeConfig.secondaryKeyFeatureOpt + .map { secondaryFeature => + extractSecondaryTuples( + storeName, + secondaryFeature, + shouldHash, + applyMaxSizeByTypeOpt(aggregateType), + getSecondaryKeyFilterPipeOpt(aggregateType) + ) + } + } + } else None + }.toSeq + } + + def joinedAggregates: TypedPipe[(Long, MergedRecordsDescriptor)] = { + (userPipes ++ userSecondaryKeyPipes) + .reduce(_ ++ _) + .group + .withReducers(numReducersMerge) + .mapGroup { + case (uid, keyedRecordsAndMaps) => + /* + * For every user, partition their records by aggregate type. + * AggregateType.User should only contain KeyedRecord whereas + * other aggregate types (with secondary keys) contain KeyedRecordMap. + */ + val (userRecords, userSecondaryKeyRecords) = keyedRecordsAndMaps.toList + .map { record => + record match { + case record: KeyedRecord => (record.aggregateType, record) + case record: KeyedRecordMap => (record.aggregateType, record) + } + } + .groupBy(_._1) + .mapValues(_.map(_._2)) + .partition(_._1 == AggregateType.User) + + val userAggregateRecordMap: Map[AggregateType.Value, Option[KeyedRecord]] = + userRecords + .asInstanceOf[Map[AggregateType.Value, List[KeyedRecord]]] + .map { + case (aggregateType, keyedRecords) => + val mergedKeyedRecordOpt = mergeKeyedRecordOpts(keyedRecords.map(Some(_)): _*) + (aggregateType, mergedKeyedRecordOpt) + } + + val userSecondaryKeyAggregateRecordOpt: Map[AggregateType.Value, Option[KeyedRecordMap]] = + userSecondaryKeyRecords + .asInstanceOf[Map[AggregateType.Value, List[KeyedRecordMap]]] + .map { + case (aggregateType, keyedRecordMaps) => + val keyedRecordMapOpt = + keyedRecordMaps.foldLeft(Option.empty[KeyedRecordMap]) { + (mergedRecOpt, nextRec) => + applyMaxSizeByTypeOpt(aggregateType) + .map { maxSize => + mergeKeyedRecordMapOpts(mergedRecOpt, Some(nextRec), maxSize) + }.getOrElse { + mergeKeyedRecordMapOpts(mergedRecOpt, Some(nextRec)) + } + } + (aggregateType, keyedRecordMapOpt) + } + + Iterator( + MergedRecordsDescriptor( + userId = uid, + keyedRecords = userAggregateRecordMap, + keyedRecordMaps = userSecondaryKeyAggregateRecordOpt + ) + ) + }.toTypedPipe + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesStoreComparisonJob.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesStoreComparisonJob.scala new file mode 100644 index 000000000..054d5d428 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesStoreComparisonJob.scala @@ -0,0 +1,200 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.algebird.ScMapMonoid +import com.twitter.bijection.Injection +import com.twitter.bijection.thrift.CompactThriftCodec +import com.twitter.ml.api.util.CompactDataRecordConverter +import com.twitter.ml.api.CompactDataRecord +import com.twitter.ml.api.DataRecord +import com.twitter.scalding.commons.source.VersionedKeyValSource +import com.twitter.scalding.Args +import com.twitter.scalding.Days +import com.twitter.scalding.Duration +import com.twitter.scalding.RichDate +import com.twitter.scalding.TypedPipe +import com.twitter.scalding.TypedTsv +import com.twitter.scalding_internal.job.HasDateRange +import com.twitter.scalding_internal.job.analytics_batch.AnalyticsBatchJob +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird_internal.bijection.BatchPairImplicits +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKeyInjection +import java.lang.{Double => JDouble} +import java.lang.{Long => JLong} +import scala.collection.JavaConverters._ + +/** + * The job takes four inputs: + * - The path to a AggregateStore using the DataRecord format. + * - The path to a AggregateStore using the CompactDataRecord format. + * - A version that must be present in both sources. + * - A sink to write the comparison statistics. + * + * The job reads in the two stores, converts the second one to DataRecords and + * then compared each key to see if the two stores have identical DataRecords, + * modulo the loss in precision on converting the Double to Float. + */ +class AggregatesStoreComparisonJob(args: Args) + extends AnalyticsBatchJob(args) + with BatchPairImplicits + with HasDateRange { + + import AggregatesStoreComparisonJob._ + override def batchIncrement: Duration = Days(1) + override def firstTime: RichDate = RichDate(args("firstTime")) + + private val dataRecordSourcePath = args("dataRecordSource") + private val compactDataRecordSourcePath = args("compactDataRecordSource") + + private val version = args.long("version") + + private val statsSink = args("sink") + + require(dataRecordSourcePath != compactDataRecordSourcePath) + + private val dataRecordSource = + VersionedKeyValSource[AggregationKey, (BatchID, DataRecord)]( + path = dataRecordSourcePath, + sourceVersion = Some(version) + ) + private val compactDataRecordSource = + VersionedKeyValSource[AggregationKey, (BatchID, CompactDataRecord)]( + path = compactDataRecordSourcePath, + sourceVersion = Some(version) + ) + + private val dataRecordPipe: TypedPipe[((AggregationKey, BatchID), DataRecord)] = TypedPipe + .from(dataRecordSource) + .map { case (key, (batchId, record)) => ((key, batchId), record) } + + private val compactDataRecordPipe: TypedPipe[((AggregationKey, BatchID), DataRecord)] = TypedPipe + .from(compactDataRecordSource) + .map { + case (key, (batchId, compactRecord)) => + val record = compactConverter.compactDataRecordToDataRecord(compactRecord) + ((key, batchId), record) + } + + dataRecordPipe + .outerJoin(compactDataRecordPipe) + .mapValues { case (leftOpt, rightOpt) => compareDataRecords(leftOpt, rightOpt) } + .values + .sum(mapMonoid) + .flatMap(_.toList) + .write(TypedTsv(statsSink)) +} + +object AggregatesStoreComparisonJob { + + val mapMonoid: ScMapMonoid[String, Long] = new ScMapMonoid[String, Long]() + + implicit private val aggregationKeyInjection: Injection[AggregationKey, Array[Byte]] = + AggregationKeyInjection + implicit private val aggregationKeyOrdering: Ordering[AggregationKey] = AggregationKeyOrdering + implicit private val dataRecordCodec: Injection[DataRecord, Array[Byte]] = + CompactThriftCodec[DataRecord] + implicit private val compactDataRecordCodec: Injection[CompactDataRecord, Array[Byte]] = + CompactThriftCodec[CompactDataRecord] + + private val compactConverter = new CompactDataRecordConverter + + val missingRecordFromLeft = "missingRecordFromLeft" + val missingRecordFromRight = "missingRecordFromRight" + val nonContinuousFeaturesDidNotMatch = "nonContinuousFeaturesDidNotMatch" + val missingFeaturesFromLeft = "missingFeaturesFromLeft" + val missingFeaturesFromRight = "missingFeaturesFromRight" + val recordsWithUnmatchedKeys = "recordsWithUnmatchedKeys" + val featureValuesMatched = "featureValuesMatched" + val featureValuesThatDidNotMatch = "featureValuesThatDidNotMatch" + val equalRecords = "equalRecords" + val keyCount = "keyCount" + + def compareDataRecords( + leftOpt: Option[DataRecord], + rightOpt: Option[DataRecord] + ): collection.Map[String, Long] = { + val stats = collection.Map((keyCount, 1L)) + (leftOpt, rightOpt) match { + case (Some(left), Some(right)) => + if (isIdenticalNonContinuousFeatureSet(left, right)) { + getContinuousFeaturesStats(left, right).foldLeft(stats)(mapMonoid.add) + } else { + mapMonoid.add(stats, (nonContinuousFeaturesDidNotMatch, 1L)) + } + case (Some(_), None) => mapMonoid.add(stats, (missingRecordFromRight, 1L)) + case (None, Some(_)) => mapMonoid.add(stats, (missingRecordFromLeft, 1L)) + case (None, None) => throw new IllegalArgumentException("Should never be possible") + } + } + + /** + * For Continuous features. + */ + private def getContinuousFeaturesStats( + left: DataRecord, + right: DataRecord + ): Seq[(String, Long)] = { + val leftFeatures = Option(left.getContinuousFeatures) + .map(_.asScala.toMap) + .getOrElse(Map.empty[JLong, JDouble]) + + val rightFeatures = Option(right.getContinuousFeatures) + .map(_.asScala.toMap) + .getOrElse(Map.empty[JLong, JDouble]) + + val numMissingFeaturesLeft = (rightFeatures.keySet diff leftFeatures.keySet).size + val numMissingFeaturesRight = (leftFeatures.keySet diff rightFeatures.keySet).size + + if (numMissingFeaturesLeft == 0 && numMissingFeaturesRight == 0) { + val Epsilon = 1e-5 + val numUnmatchedValues = leftFeatures.map { + case (id, lValue) => + val rValue = rightFeatures(id) + // The approximate match is to account for the precision loss due to + // the Double -> Float -> Double conversion. + if (math.abs(lValue - rValue) <= Epsilon) 0L else 1L + }.sum + + if (numUnmatchedValues == 0) { + Seq( + (equalRecords, 1L), + (featureValuesMatched, leftFeatures.size.toLong) + ) + } else { + Seq( + (featureValuesThatDidNotMatch, numUnmatchedValues), + ( + featureValuesMatched, + math.max(leftFeatures.size, rightFeatures.size) - numUnmatchedValues) + ) + } + } else { + Seq( + (recordsWithUnmatchedKeys, 1L), + (missingFeaturesFromLeft, numMissingFeaturesLeft.toLong), + (missingFeaturesFromRight, numMissingFeaturesRight.toLong) + ) + } + } + + /** + * For feature types that are not Feature.Continuous. We expect these to match exactly in the two stores. + * Mutable change + */ + private def isIdenticalNonContinuousFeatureSet(left: DataRecord, right: DataRecord): Boolean = { + val booleanMatched = safeEquals(left.binaryFeatures, right.binaryFeatures) + val discreteMatched = safeEquals(left.discreteFeatures, right.discreteFeatures) + val stringMatched = safeEquals(left.stringFeatures, right.stringFeatures) + val sparseBinaryMatched = safeEquals(left.sparseBinaryFeatures, right.sparseBinaryFeatures) + val sparseContinuousMatched = + safeEquals(left.sparseContinuousFeatures, right.sparseContinuousFeatures) + val blobMatched = safeEquals(left.blobFeatures, right.blobFeatures) + val tensorsMatched = safeEquals(left.tensors, right.tensors) + val sparseTensorsMatched = safeEquals(left.sparseTensors, right.sparseTensors) + + booleanMatched && discreteMatched && stringMatched && sparseBinaryMatched && + sparseContinuousMatched && blobMatched && tensorsMatched && sparseTensorsMatched + } + + def safeEquals[T](l: T, r: T): Boolean = Option(l).equals(Option(r)) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesV2ScaldingJob.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesV2ScaldingJob.scala new file mode 100644 index 000000000..aa8ae3612 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregatesV2ScaldingJob.scala @@ -0,0 +1,216 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.bijection.thrift.CompactThriftCodec +import com.twitter.bijection.Codec +import com.twitter.bijection.Injection +import com.twitter.ml.api._ +import com.twitter.ml.api.constant.SharedFeatures.TIMESTAMP +import com.twitter.ml.api.util.CompactDataRecordConverter +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding.Args +import com.twitter.scalding_internal.dalv2.DALWrite.D +import com.twitter.storehaus_internal.manhattan.ManhattanROConfig +import com.twitter.summingbird.batch.option.Reducers +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird.batch.Batcher +import com.twitter.summingbird.batch.Timestamp +import com.twitter.summingbird.option._ +import com.twitter.summingbird.scalding.Scalding +import com.twitter.summingbird.scalding.batch.{BatchedStore => ScaldingBatchedStore} +import com.twitter.summingbird.Options +import com.twitter.summingbird.Producer +import com.twitter.summingbird_internal.bijection.BatchPairImplicits._ +import com.twitter.summingbird_internal.runner.common.JobName +import com.twitter.summingbird_internal.runner.scalding.GenericRunner +import com.twitter.summingbird_internal.runner.scalding.ScaldingConfig +import com.twitter.summingbird_internal.runner.scalding.StatebirdState +import com.twitter.summingbird_internal.dalv2.DAL +import com.twitter.summingbird_internal.runner.store_config._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework._ +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding.sources._ +import job.AggregatesV2Job +import org.apache.hadoop.conf.Configuration +/* + * Offline scalding version of summingbird job to compute aggregates v2. + * This is loosely based on the template created by sb-gen. + * Extend this trait in your own scalding job, and override the val + * "aggregatesToCompute" with your own desired set of aggregates. + */ +trait AggregatesV2ScaldingJob { + val aggregatesToCompute: Set[TypedAggregateGroup[_]] + + implicit val aggregationKeyInjection: Injection[AggregationKey, Array[Byte]] = + AggregationKeyInjection + + implicit val aggregationKeyOrdering: AggregationKeyOrdering.type = AggregationKeyOrdering + + implicit val dataRecordCodec: Injection[DataRecord, Array[Byte]] = CompactThriftCodec[DataRecord] + + private implicit val compactDataRecordCodec: Injection[CompactDataRecord, Array[Byte]] = + CompactThriftCodec[CompactDataRecord] + + private val compactDataRecordConverter = new CompactDataRecordConverter() + + def numReducers: Int = -1 + + /** + * Function that maps from a logical ''AggregateSource'' + * to an underlying physical source. The physical source + * for the scalding platform is a ScaldingAggregateSource. + */ + def dataRecordSourceToScalding( + source: AggregateSource + ): Option[Producer[Scalding, DataRecord]] = { + source match { + case offlineSource: OfflineAggregateSource => + Some(ScaldingAggregateSource(offlineSource).source) + case _ => None + } + } + + /** + * Creates and returns a versioned store using the config parameters + * with a specific number of versions to keep, and which can read from + * the most recent available version on HDFS rather than a specific + * version number. The store applies a timestamp correction based on the + * number of days of aggregate data skipped over at read time to ensure + * that skipping data plays nicely with halfLife decay. + * + * @param config specifying the Manhattan store parameters + * @param versionsToKeep number of old versions to keep + */ + def getMostRecentLagCorrectingVersionedStoreWithRetention[ + Key: Codec: Ordering, + ValInStore: Codec, + ValInMemory + ]( + config: OfflineStoreOnlyConfig[ManhattanROConfig], + versionsToKeep: Int, + lagCorrector: (ValInMemory, Long) => ValInMemory, + packer: ValInMemory => ValInStore, + unpacker: ValInStore => ValInMemory + ): ScaldingBatchedStore[Key, ValInMemory] = { + MostRecentLagCorrectingVersionedStore[Key, ValInStore, ValInMemory]( + config.offline.hdfsPath.toString, + packer = packer, + unpacker = unpacker, + versionsToKeep = versionsToKeep)( + Injection.connect[(Key, (BatchID, ValInStore)), (Array[Byte], Array[Byte])], + config.batcher, + implicitly[Ordering[Key]], + lagCorrector + ).withInitialBatch(config.batcher.batchOf(config.startTime.value)) + } + + def mutablyCorrectDataRecordTimestamp( + record: DataRecord, + lagToCorrectMillis: Long + ): DataRecord = { + val richRecord = SRichDataRecord(record) + if (richRecord.hasFeature(TIMESTAMP)) { + val timestamp = richRecord.getFeatureValue(TIMESTAMP).toLong + richRecord.setFeatureValue(TIMESTAMP, timestamp + lagToCorrectMillis) + } + record + } + + /** + * Function that maps from a logical ''AggregateStore'' + * to an underlying physical store. The physical store for + * scalding is a HDFS VersionedKeyValSource dataset. + */ + def aggregateStoreToScalding( + store: AggregateStore + ): Option[Scalding#Store[AggregationKey, DataRecord]] = { + store match { + case offlineStore: OfflineAggregateDataRecordStore => + Some( + getMostRecentLagCorrectingVersionedStoreWithRetention[ + AggregationKey, + DataRecord, + DataRecord]( + offlineStore, + versionsToKeep = offlineStore.batchesToKeep, + lagCorrector = mutablyCorrectDataRecordTimestamp, + packer = Injection.identity[DataRecord], + unpacker = Injection.identity[DataRecord] + ) + ) + case offlineStore: OfflineAggregateDataRecordStoreWithDAL => + Some( + DAL.versionedKeyValStore[AggregationKey, DataRecord]( + dataset = offlineStore.dalDataset, + pathLayout = D.Suffix(offlineStore.offline.hdfsPath.toString), + batcher = offlineStore.batcher, + maybeStartTime = Some(offlineStore.startTime), + maxErrors = offlineStore.maxKvSourceFailures + )) + case _ => None + } + } + + def generate(args: Args): ScaldingConfig = new ScaldingConfig { + val jobName = JobName(args("job_name")) + + /* + * Add registrars for chill serialization for user-defined types. + * We use the default: an empty List(). + */ + override def registrars = List() + + /* Use transformConfig to set Hadoop options. */ + override def transformConfig(config: Map[String, AnyRef]): Map[String, AnyRef] = + super.transformConfig(config) ++ Map( + "mapreduce.output.fileoutputformat.compress" -> "true", + "mapreduce.output.fileoutputformat.compress.codec" -> "com.hadoop.compression.lzo.LzoCodec", + "mapreduce.output.fileoutputformat.compress.type" -> "BLOCK" + ) + + /* + * Use getNamedOptions to set Summingbird runtime options + * The options we set are: + * 1) Set monoid to non-commutative to disable map-side + * aggregation and force all aggregation to reducers (provides a 20% speedup) + */ + override def getNamedOptions: Map[String, Options] = Map( + "DEFAULT" -> Options() + .set(MonoidIsCommutative(false)) + .set(Reducers(numReducers)) + ) + + implicit val batcher: Batcher = Batcher.ofHours(24) + + /* State implementation that uses Statebird (go/statebird) to track the batches processed. */ + def getWaitingState(hadoopConfig: Configuration, startDate: Option[Timestamp], batches: Int) = + StatebirdState( + jobName, + startDate, + batches, + args.optional("statebird_service_destination"), + args.optional("statebird_client_id_name") + )(batcher) + + val sourceNameFilter: Option[Set[String]] = + args.optional("input_sources").map(_.split(",").toSet) + val storeNameFilter: Option[Set[String]] = + args.optional("output_stores").map(_.split(",").toSet) + + val filteredAggregates = + AggregatesV2Job.filterAggregates( + aggregates = aggregatesToCompute, + sourceNames = sourceNameFilter, + storeNames = storeNameFilter + ) + + override val graph = + AggregatesV2Job.generateJobGraph[Scalding]( + filteredAggregates, + dataRecordSourceToScalding, + aggregateStoreToScalding + )(DataRecordAggregationMonoid(filteredAggregates)) + } + def main(args: Array[String]): Unit = { + GenericRunner(args, generate(_)) + + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregationKeyOrdering.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregationKeyOrdering.scala new file mode 100644 index 000000000..af6f14ff2 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/AggregationKeyOrdering.scala @@ -0,0 +1,17 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.scalding_internal.job.RequiredBinaryComparators.ordSer +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.scalding.serialization.macros.impl.ordered_serialization.runtime_helpers.MacroEqualityOrderedSerialization + +object AggregationKeyOrdering extends Ordering[AggregationKey] { + implicit val featureMapsOrdering: MacroEqualityOrderedSerialization[ + (Map[Long, Long], Map[Long, String]) + ] = ordSer[(Map[Long, Long], Map[Long, String])] + + override def compare(left: AggregationKey, right: AggregationKey): Int = + featureMapsOrdering.compare( + AggregationKey.unapply(left).get, + AggregationKey.unapply(right).get + ) +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/BUILD b/timelines/data_processing/ml_util/aggregation_framework/scalding/BUILD new file mode 100644 index 000000000..d03766619 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/BUILD @@ -0,0 +1,72 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:json", + "3rdparty/jvm/com/twitter/bijection:netty", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/com/twitter/bijection:util", + "3rdparty/jvm/com/twitter/chill:bijection", + "3rdparty/jvm/com/twitter/storehaus:algebra", + "3rdparty/jvm/com/twitter/storehaus:core", + "3rdparty/jvm/org/apache/hadoop:hadoop-client-default", + "3rdparty/src/jvm/com/twitter/scalding:args", + "3rdparty/src/jvm/com/twitter/scalding:commons", + "3rdparty/src/jvm/com/twitter/scalding:core", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "3rdparty/src/jvm/com/twitter/summingbird:batch-hadoop", + "3rdparty/src/jvm/com/twitter/summingbird:chill", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "3rdparty/src/jvm/com/twitter/summingbird:scalding", + "finagle/finagle-core/src/main", + "gizmoduck/snapshot/src/main/scala/com/twitter/gizmoduck/snapshot:deleted_user-scala", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/api/constant", + "src/scala/com/twitter/ml/api/util", + "src/scala/com/twitter/scalding_internal/dalv2", + "src/scala/com/twitter/scalding_internal/job/analytics_batch", + "src/scala/com/twitter/scalding_internal/util", + "src/scala/com/twitter/storehaus_internal/manhattan/config", + "src/scala/com/twitter/storehaus_internal/offline", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/summingbird_internal/bijection", + "src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits", + "src/scala/com/twitter/summingbird_internal/dalv2", + "src/scala/com/twitter/summingbird_internal/runner/common", + "src/scala/com/twitter/summingbird_internal/runner/scalding", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + "src/scala/com/twitter/summingbird_internal/runner/store_config/versioned_store", + "src/scala/com/twitter/summingbird_internal/sources/common", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "src/thrift/com/twitter/statebird:compiled-v2-java", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + "timelines/data_processing/ml_util/aggregation_framework:user_job", + "timelines/data_processing/ml_util/aggregation_framework/scalding/sources", + "timelines/data_processing/ml_util/sampling:sampling_utils", + ], + exports = [ + "3rdparty/src/jvm/com/twitter/summingbird:scalding", + "src/scala/com/twitter/storehaus_internal/manhattan/config", + "src/scala/com/twitter/summingbird_internal/runner/store_config", + ], +) + +hadoop_binary( + name = "bin", + basename = "aggregation_framework_scalding-deploy", + main = "com.twitter.scalding.Tool", + platform = "java8", + runtime_platform = "java8", + tags = [ + "bazel-compatible", + "bazel-compatible:migrated", + "bazel-only", + ], + dependencies = [ + ":scalding", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/DeletedUserPruner.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/DeletedUserPruner.scala new file mode 100644 index 000000000..7e2f7a95c --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/DeletedUserPruner.scala @@ -0,0 +1,97 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.gizmoduck.snapshot.DeletedUserScalaDataset +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.scalding.typed.TypedPipe +import com.twitter.scalding.DateOps +import com.twitter.scalding.DateRange +import com.twitter.scalding.Days +import com.twitter.scalding.RichDate +import com.twitter.scalding_internal.dalv2.DAL +import com.twitter.scalding_internal.dalv2.remote_access.AllowCrossClusterSameDC +import com.twitter.scalding_internal.job.RequiredBinaryComparators.ordSer +import com.twitter.scalding_internal.pruner.Pruner +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup +import com.twitter.scalding.serialization.macros.impl.ordered_serialization.runtime_helpers.MacroEqualityOrderedSerialization +import java.{util => ju} + +object DeletedUserSeqPruner extends Pruner[Seq[Long]] { + implicit val tz: ju.TimeZone = DateOps.UTC + implicit val userIdSequenceOrdering: MacroEqualityOrderedSerialization[Seq[Long]] = + ordSer[Seq[Long]] + + private[scalding] def pruneDeletedUsers[T]( + input: TypedPipe[T], + extractor: T => Seq[Long], + deletedUsers: TypedPipe[Long] + ): TypedPipe[T] = { + val userIdsAndValues = input.map { t: T => + val userIds: Seq[Long] = extractor(t) + (userIds, t) + } + + // Find all valid sequences of userids in the input pipe + // that contain at least one deleted user. This is efficient + // as long as the number of deleted users is small. + val userSequencesWithDeletedUsers = userIdsAndValues + .flatMap { case (userIds, _) => userIds.map((_, userIds)) } + .leftJoin(deletedUsers.asKeys) + .collect { case (_, (userIds, Some(_))) => userIds } + .distinct + + userIdsAndValues + .leftJoin(userSequencesWithDeletedUsers.asKeys) + .collect { case (_, (t, None)) => t } + } + + override def prune[T]( + input: TypedPipe[T], + put: (T, Seq[Long]) => Option[T], + get: T => Seq[Long], + writeTime: RichDate + ): TypedPipe[T] = { + lazy val deletedUsers = DAL + .readMostRecentSnapshot(DeletedUserScalaDataset, DateRange(writeTime - Days(7), writeTime)) + .withRemoteReadPolicy(AllowCrossClusterSameDC) + .toTypedPipe + .map(_.userId) + + pruneDeletedUsers(input, get, deletedUsers) + } +} + +object AggregationKeyPruner { + + /** + * Makes a pruner that prunes aggregate records where any of the + * "userIdFeatures" set in the aggregation key correspond to a + * user who has deleted their account. Here, "userIdFeatures" is + * intended as a catch-all term for all features corresponding to + * a Twitter user in the input data record -- the feature itself + * could represent an authorId, retweeterId, engagerId, etc. + */ + def mkDeletedUsersPruner( + userIdFeatures: Seq[Feature[_]] + ): Pruner[(AggregationKey, DataRecord)] = { + val userIdFeatureIds = userIdFeatures.map(TypedAggregateGroup.getDenseFeatureId) + + def getter(tupled: (AggregationKey, DataRecord)): Seq[Long] = { + tupled match { + case (aggregationKey, _) => + userIdFeatureIds.flatMap { id => + aggregationKey.discreteFeaturesById + .get(id) + .orElse(aggregationKey.textFeaturesById.get(id).map(_.toLong)) + } + } + } + + // Setting putter to always return None here. The put function is not used within pruneDeletedUsers, this function is just needed for xmap api. + def putter: ((AggregationKey, DataRecord), Seq[Long]) => Option[(AggregationKey, DataRecord)] = + (t, seq) => None + + DeletedUserSeqPruner.xmap(putter, getter) + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/MostRecentVersionedStore.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/MostRecentVersionedStore.scala new file mode 100644 index 000000000..d60e67716 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/MostRecentVersionedStore.scala @@ -0,0 +1,100 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding + +import com.twitter.bijection.Injection +import com.twitter.scalding.commons.source.VersionedKeyValSource +import com.twitter.scalding.TypedPipe +import com.twitter.scalding.{Hdfs => HdfsMode} +import com.twitter.summingbird.batch.store.HDFSMetadata +import com.twitter.summingbird.batch.BatchID +import com.twitter.summingbird.batch.Batcher +import com.twitter.summingbird.batch.OrderedFromOrderingExt +import com.twitter.summingbird.batch.PrunedSpace +import com.twitter.summingbird.scalding._ +import com.twitter.summingbird.scalding.store.VersionedBatchStore +import org.slf4j.LoggerFactory + +object MostRecentLagCorrectingVersionedStore { + def apply[Key, ValInStore, ValInMemory]( + rootPath: String, + packer: ValInMemory => ValInStore, + unpacker: ValInStore => ValInMemory, + versionsToKeep: Int = VersionedKeyValSource.defaultVersionsToKeep, + prunedSpace: PrunedSpace[(Key, ValInMemory)] = PrunedSpace.neverPruned + )( + implicit injection: Injection[(Key, (BatchID, ValInStore)), (Array[Byte], Array[Byte])], + batcher: Batcher, + ord: Ordering[Key], + lagCorrector: (ValInMemory, Long) => ValInMemory + ): MostRecentLagCorrectingVersionedBatchStore[Key, ValInMemory, Key, (BatchID, ValInStore)] = { + new MostRecentLagCorrectingVersionedBatchStore[Key, ValInMemory, Key, (BatchID, ValInStore)]( + rootPath, + versionsToKeep, + batcher + )(lagCorrector)({ case (batchID, (k, v)) => (k, (batchID.next, packer(v))) })({ + case (k, (_, v)) => (k, unpacker(v)) + }) { + override def select(b: List[BatchID]) = List(b.last) + override def pruning: PrunedSpace[(Key, ValInMemory)] = prunedSpace + } + } +} + +/** + * @param lagCorrector lagCorrector allows one to take data from one batch and pretend as if it + * came from a different batch. + * @param pack Converts the in-memory tuples to the type used by the underlying key-val store. + * @param unpack Converts the key-val tuples from the store in the form used by the calling object. + */ +class MostRecentLagCorrectingVersionedBatchStore[KeyInMemory, ValInMemory, KeyInStore, ValInStore]( + rootPath: String, + versionsToKeep: Int, + override val batcher: Batcher +)( + lagCorrector: (ValInMemory, Long) => ValInMemory +)( + pack: (BatchID, (KeyInMemory, ValInMemory)) => (KeyInStore, ValInStore) +)( + unpack: ((KeyInStore, ValInStore)) => (KeyInMemory, ValInMemory) +)( + implicit @transient injection: Injection[(KeyInStore, ValInStore), (Array[Byte], Array[Byte])], + override val ordering: Ordering[KeyInMemory]) + extends VersionedBatchStore[KeyInMemory, ValInMemory, KeyInStore, ValInStore]( + rootPath, + versionsToKeep, + batcher)(pack)(unpack)(injection, ordering) { + + import OrderedFromOrderingExt._ + + @transient private val logger = + LoggerFactory.getLogger(classOf[MostRecentLagCorrectingVersionedBatchStore[_, _, _, _]]) + + override protected def lastBatch( + exclusiveUB: BatchID, + mode: HdfsMode + ): Option[(BatchID, FlowProducer[TypedPipe[(KeyInMemory, ValInMemory)]])] = { + val batchToPretendAs = exclusiveUB.prev + val versionToPretendAs = batchIDToVersion(batchToPretendAs) + logger.info( + s"Most recent lag correcting versioned batched store at $rootPath entering lastBatch method versionToPretendAs = $versionToPretendAs") + val meta = new HDFSMetadata(mode.conf, rootPath) + meta.versions + .map { ver => (versionToBatchID(ver), readVersion(ver)) } + .filter { _._1 < exclusiveUB } + .reduceOption { (a, b) => if (a._1 > b._1) a else b } + .map { + case ( + lastBatchID: BatchID, + flowProducer: FlowProducer[TypedPipe[(KeyInMemory, ValInMemory)]]) => + val lastVersion = batchIDToVersion(lastBatchID) + val lagToCorrectMillis: Long = + batchIDToVersion(batchToPretendAs) - batchIDToVersion(lastBatchID) + logger.info( + s"Most recent available version is $lastVersion, so lagToCorrectMillis is $lagToCorrectMillis") + val lagCorrectedFlowProducer = flowProducer.map { + pipe: TypedPipe[(KeyInMemory, ValInMemory)] => + pipe.map { case (k, v) => (k, lagCorrector(v, lagToCorrectMillis)) } + } + (batchToPretendAs, lagCorrectedFlowProducer) + } + } +} diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/BUILD b/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/BUILD new file mode 100644 index 000000000..ba065ecd7 --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/BUILD @@ -0,0 +1,26 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/storehaus:algebra", + "3rdparty/src/jvm/com/twitter/scalding:commons", + "3rdparty/src/jvm/com/twitter/scalding:core", + "3rdparty/src/jvm/com/twitter/scalding:date", + "3rdparty/src/jvm/com/twitter/summingbird:batch", + "3rdparty/src/jvm/com/twitter/summingbird:batch-hadoop", + "3rdparty/src/jvm/com/twitter/summingbird:chill", + "3rdparty/src/jvm/com/twitter/summingbird:core", + "3rdparty/src/jvm/com/twitter/summingbird:scalding", + "src/java/com/twitter/ml/api:api-base", + "src/scala/com/twitter/ml/api:api-base", + "src/scala/com/twitter/ml/api/internal", + "src/scala/com/twitter/ml/api/util", + "src/scala/com/twitter/scalding_internal/dalv2", + "src/scala/com/twitter/scalding_internal/dalv2/remote_access", + "src/scala/com/twitter/summingbird_internal/sources/common", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:interpretable-model-java", + "timelines/data_processing/ml_util/aggregation_framework:common_types", + ], +) diff --git a/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/ScaldingAggregateSource.scala b/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/ScaldingAggregateSource.scala new file mode 100644 index 000000000..d1820b4fc --- /dev/null +++ b/timelines/data_processing/ml_util/aggregation_framework/scalding/sources/ScaldingAggregateSource.scala @@ -0,0 +1,77 @@ +package com.twitter.timelines.data_processing.ml_util.aggregation_framework.scalding.sources + +import com.twitter.ml.api.DailySuffixFeatureSource +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.FixedPathFeatureSource +import com.twitter.ml.api.HourlySuffixFeatureSource +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.scalding._ +import com.twitter.scalding_internal.dalv2.DAL +import com.twitter.scalding_internal.dalv2.remote_access.AllowCrossClusterSameDC +import com.twitter.statebird.v2.thriftscala.Environment +import com.twitter.summingbird._ +import com.twitter.summingbird.scalding.Scalding.pipeFactoryExact +import com.twitter.summingbird.scalding._ +import com.twitter.summingbird_internal.sources.SourceFactory +import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateSource +import java.lang.{Long => JLong} + +/* + * Summingbird offline HDFS source that reads from data records on HDFS. + * + * @param offlineSource Underlying offline source that contains + * all the config info to build this platform-specific (scalding) source. + */ +case class ScaldingAggregateSource(offlineSource: OfflineAggregateSource) + extends SourceFactory[Scalding, DataRecord] { + + val hdfsPath: String = offlineSource.scaldingHdfsPath.getOrElse("") + val suffixType: String = offlineSource.scaldingSuffixType.getOrElse("daily") + val withValidation: Boolean = offlineSource.withValidation + def name: String = offlineSource.name + def description: String = + "Summingbird offline source that reads from data records at: " + hdfsPath + + implicit val timeExtractor: TimeExtractor[DataRecord] = TimeExtractor((record: DataRecord) => + SRichDataRecord(record).getFeatureValue[JLong, JLong](offlineSource.timestampFeature)) + + def getSourceForDateRange(dateRange: DateRange) = { + suffixType match { + case "daily" => DailySuffixFeatureSource(hdfsPath)(dateRange).source + case "hourly" => HourlySuffixFeatureSource(hdfsPath)(dateRange).source + case "fixed_path" => FixedPathFeatureSource(hdfsPath).source + case "dal" => + offlineSource.dalDataSet match { + case Some(dataset) => + DAL + .read(dataset, dateRange) + .withRemoteReadPolicy(AllowCrossClusterSameDC) + .withEnvironment(Environment.Prod) + .toTypedSource + case _ => + throw new IllegalArgumentException( + "cannot provide an empty dataset when defining DAL as the suffix type" + ) + } + } + } + + /** + * This method is similar to [[Scalding.sourceFromMappable]] except that this uses [[pipeFactoryExact]] + * instead of [[pipeFactory]]. [[pipeFactoryExact]] also invokes [[FileSource.validateTaps]] on the source. + * The validation ensures the presence of _SUCCESS file before processing. For more details, please refer to + * https://jira.twitter.biz/browse/TQ-10618 + */ + def sourceFromMappableWithValidation[T: TimeExtractor: Manifest]( + factory: (DateRange) => Mappable[T] + ): Producer[Scalding, T] = { + Producer.source[Scalding, T](pipeFactoryExact(factory)) + } + + def source: Producer[Scalding, DataRecord] = { + if (withValidation) + sourceFromMappableWithValidation(getSourceForDateRange) + else + Scalding.sourceFromMappable(getSourceForDateRange) + } +} From 43cdcf2ed620721911b696223a482c5d2934aa82 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Tue, 18 Apr 2023 13:04:42 -0700 Subject: [PATCH 06/11] Open-sourcing Representation Manager Representation Manager (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services. --- representation-manager/BUILD.bazel | 1 + representation-manager/README.md | 4 + representation-manager/bin/deploy.sh | 4 + .../com/twitter/representation_manager/BUILD | 17 + .../representation_manager/StoreBuilder.scala | 208 +++++ .../representation_manager/config/BUILD | 12 + .../config/ClientConfig.scala | 25 + .../config/InMemoryCacheConfig.scala | 53 ++ representation-manager/server/BUILD | 21 + .../server/src/main/resources/BUILD | 7 + .../src/main/resources/config/decider.yml | 219 +++++ .../server/src/main/resources/logback.xml | 165 ++++ .../com/twitter/representation_manager/BUILD | 13 + .../RepresentationManagerFedServer.scala | 40 + .../representation_manager/columns/BUILD | 9 + .../columns/ColumnConfigBase.scala | 26 + .../columns/topic/BUILD | 14 + ...ocaleEntityIdSimClustersEmbeddingCol.scala | 77 ++ .../TopicIdSimClustersEmbeddingCol.scala | 74 ++ .../columns/tweet/BUILD | 14 + .../tweet/TweetSimClustersEmbeddingCol.scala | 73 ++ .../representation_manager/columns/user/BUILD | 14 + .../user/UserSimClustersEmbeddingCol.scala | 73 ++ .../representation_manager/common/BUILD | 13 + .../common/MemCacheConfig.scala | 153 ++++ .../common/RepresentationManagerDecider.scala | 25 + .../representation_manager/migration/BUILD | 25 + .../migration/LegacyRMS.scala | 846 ++++++++++++++++++ .../representation_manager/modules/BUILD | 18 + .../modules/CacheModule.scala | 34 + .../modules/InterestsThriftClientModule.scala | 40 + .../modules/LegacyRMSConfigModule.scala | 18 + .../modules/StoreModule.scala | 24 + .../modules/TimerModule.scala | 13 + .../modules/UttClientModule.scala | 39 + .../representation_manager/store/BUILD | 16 + .../store/DeciderConstants.scala | 39 + .../TopicSimClustersEmbeddingStore.scala | 198 ++++ .../TweetSimClustersEmbeddingStore.scala | 141 +++ .../store/UserSimClustersEmbeddingStore.scala | 602 +++++++++++++ .../server/src/main/thrift/BUILD | 18 + .../representation_manager/service.thrift | 14 + 42 files changed, 3439 insertions(+) create mode 100644 representation-manager/BUILD.bazel create mode 100644 representation-manager/README.md create mode 100755 representation-manager/bin/deploy.sh create mode 100644 representation-manager/client/src/main/scala/com/twitter/representation_manager/BUILD create mode 100644 representation-manager/client/src/main/scala/com/twitter/representation_manager/StoreBuilder.scala create mode 100644 representation-manager/client/src/main/scala/com/twitter/representation_manager/config/BUILD create mode 100644 representation-manager/client/src/main/scala/com/twitter/representation_manager/config/ClientConfig.scala create mode 100644 representation-manager/client/src/main/scala/com/twitter/representation_manager/config/InMemoryCacheConfig.scala create mode 100644 representation-manager/server/BUILD create mode 100644 representation-manager/server/src/main/resources/BUILD create mode 100644 representation-manager/server/src/main/resources/config/decider.yml create mode 100644 representation-manager/server/src/main/resources/logback.xml create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/RepresentationManagerFedServer.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/ColumnConfigBase.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/LocaleEntityIdSimClustersEmbeddingCol.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/TopicIdSimClustersEmbeddingCol.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/TweetSimClustersEmbeddingCol.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/UserSimClustersEmbeddingCol.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/common/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/common/MemCacheConfig.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/common/RepresentationManagerDecider.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/LegacyRMS.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/CacheModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/InterestsThriftClientModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/LegacyRMSConfigModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/StoreModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/TimerModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/UttClientModule.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/store/BUILD create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/store/DeciderConstants.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TopicSimClustersEmbeddingStore.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TweetSimClustersEmbeddingStore.scala create mode 100644 representation-manager/server/src/main/scala/com/twitter/representation_manager/store/UserSimClustersEmbeddingStore.scala create mode 100644 representation-manager/server/src/main/thrift/BUILD create mode 100644 representation-manager/server/src/main/thrift/com/twitter/representation_manager/service.thrift diff --git a/representation-manager/BUILD.bazel b/representation-manager/BUILD.bazel new file mode 100644 index 000000000..1624a57d4 --- /dev/null +++ b/representation-manager/BUILD.bazel @@ -0,0 +1 @@ +# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD diff --git a/representation-manager/README.md b/representation-manager/README.md new file mode 100644 index 000000000..44cd25ee7 --- /dev/null +++ b/representation-manager/README.md @@ -0,0 +1,4 @@ +# Representation Manager # + +**Representation Manager** (RMS) serves as a centralized embedding management system, providing SimClusters or other embeddings as facade of the underlying storage or services. + diff --git a/representation-manager/bin/deploy.sh b/representation-manager/bin/deploy.sh new file mode 100755 index 000000000..5729d9903 --- /dev/null +++ b/representation-manager/bin/deploy.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +JOB=representation-manager bazel run --ui_event_filters=-info,-stdout,-stderr --noshow_progress \ + //relevance-platform/src/main/python/deploy -- "$@" diff --git a/representation-manager/client/src/main/scala/com/twitter/representation_manager/BUILD b/representation-manager/client/src/main/scala/com/twitter/representation_manager/BUILD new file mode 100644 index 000000000..1f69a2176 --- /dev/null +++ b/representation-manager/client/src/main/scala/com/twitter/representation_manager/BUILD @@ -0,0 +1,17 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-thrift-client", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore", + "representation-manager/client/src/main/scala/com/twitter/representation_manager/config", + "representation-manager/server/src/main/thrift:thrift-scala", + "src/scala/com/twitter/simclusters_v2/common", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "stitch/stitch-storehaus", + "strato/src/main/scala/com/twitter/strato/client", + ], +) diff --git a/representation-manager/client/src/main/scala/com/twitter/representation_manager/StoreBuilder.scala b/representation-manager/client/src/main/scala/com/twitter/representation_manager/StoreBuilder.scala new file mode 100644 index 000000000..2314a8254 --- /dev/null +++ b/representation-manager/client/src/main/scala/com/twitter/representation_manager/StoreBuilder.scala @@ -0,0 +1,208 @@ +package com.twitter.representation_manager + +import com.twitter.finagle.memcached.{Client => MemcachedClient} +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.hermit.store.common.ObservedCachedReadableStore +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.representation_manager.config.ClientConfig +import com.twitter.representation_manager.config.DisabledInMemoryCacheParams +import com.twitter.representation_manager.config.EnabledInMemoryCacheParams +import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.LocaleEntityId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.TopicId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.strato.thrift.ScroogeConvImplicits._ + +/** + * This is the class that offers features to build readable stores for a given + * SimClustersEmbeddingView (i.e. embeddingType and modelVersion). It applies ClientConfig + * for a particular service and build ReadableStores which implement that config. + */ +class StoreBuilder( + clientConfig: ClientConfig, + stratoClient: StratoClient, + memCachedClient: MemcachedClient, + globalStats: StatsReceiver, +) { + private val stats = + globalStats.scope("representation_manager_client").scope(this.getClass.getSimpleName) + + // Column consts + private val ColPathPrefix = "recommendations/representation_manager/" + private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet" + private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User" + private val SimclustersTopicIdColPath = ColPathPrefix + "simClustersEmbedding.TopicId" + private val SimclustersLocaleEntityIdColPath = + ColPathPrefix + "simClustersEmbedding.LocaleEntityId" + + def buildSimclustersTweetEmbeddingStore( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[Long, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersTweetColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + + addCacheLayer(rawStore, embeddingColumnView) + } + + def buildSimclustersUserEmbeddingStore( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[Long, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersUserColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + + addCacheLayer(rawStore, embeddingColumnView) + } + + def buildSimclustersTopicIdEmbeddingStore( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[TopicId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersTopicIdColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + + addCacheLayer(rawStore, embeddingColumnView) + } + + def buildSimclustersLocaleEntityIdEmbeddingStore( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[LocaleEntityId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersLocaleEntityIdColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + + addCacheLayer(rawStore, embeddingColumnView) + } + + def buildSimclustersTweetEmbeddingStoreWithEmbeddingIdAsKey( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersTweetColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) => + tweetId + } + + addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView) + } + + def buildSimclustersUserEmbeddingStoreWithEmbeddingIdAsKey( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersUserColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) => + userId + } + + addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView) + } + + def buildSimclustersTopicEmbeddingStoreWithEmbeddingIdAsKey( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersTopicIdColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) => + topicId + } + + addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView) + } + + def buildSimclustersTopicIdEmbeddingStoreWithEmbeddingIdAsKey( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[TopicId, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersTopicIdColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) => + topicId + } + + addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView) + } + + def buildSimclustersLocaleEntityIdEmbeddingStoreWithEmbeddingIdAsKey( + embeddingColumnView: SimClustersEmbeddingView + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val rawStore = StratoFetchableStore + .withView[LocaleEntityId, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( + stratoClient, + SimclustersLocaleEntityIdColPath, + embeddingColumnView) + .mapValues(SimClustersEmbedding(_)) + val embeddingIdAsKeyStore = rawStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) => + localeEntityId + } + + addCacheLayer(embeddingIdAsKeyStore, embeddingColumnView) + } + + private def addCacheLayer[K]( + rawStore: ReadableStore[K, SimClustersEmbedding], + embeddingColumnView: SimClustersEmbeddingView, + ): ReadableStore[K, SimClustersEmbedding] = { + // Add in-memory caching based on ClientConfig + val inMemCacheParams = clientConfig.inMemoryCacheConfig + .getCacheSetup(embeddingColumnView.embeddingType, embeddingColumnView.modelVersion) + + val statsPerStore = stats + .scope(embeddingColumnView.embeddingType.name).scope(embeddingColumnView.modelVersion.name) + + inMemCacheParams match { + case DisabledInMemoryCacheParams => + ObservedReadableStore( + store = rawStore + )(statsPerStore) + case EnabledInMemoryCacheParams(ttl, maxKeys, cacheName) => + ObservedCachedReadableStore.from[K, SimClustersEmbedding]( + rawStore, + ttl = ttl, + maxKeys = maxKeys, + cacheName = cacheName, + windowSize = 10000L + )(statsPerStore) + } + } + +} diff --git a/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/BUILD b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/BUILD new file mode 100644 index 000000000..8418563d5 --- /dev/null +++ b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/BUILD @@ -0,0 +1,12 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-thrift-client", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/common", + "representation-manager/server/src/main/thrift:thrift-scala", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "strato/src/main/scala/com/twitter/strato/client", + ], +) diff --git a/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/ClientConfig.scala b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/ClientConfig.scala new file mode 100644 index 000000000..9ae0c49e7 --- /dev/null +++ b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/ClientConfig.scala @@ -0,0 +1,25 @@ +package com.twitter.representation_manager.config + +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.ModelVersion + +/* + * This is RMS client config class. + * We only support setting up in memory cache params for now, but we expect to enable other + * customisations in the near future e.g. request timeout + * + * -------------------------------------------- + * PLEASE NOTE: + * Having in-memory cache is not necessarily a free performance win, anyone considering it should + * investigate rather than blindly enabling it + * */ +class ClientConfig(inMemCacheParamsOverrides: Map[ + (EmbeddingType, ModelVersion), + InMemoryCacheParams +] = Map.empty) { + // In memory cache config per embedding + val inMemCacheParams = DefaultInMemoryCacheConfig.cacheParamsMap ++ inMemCacheParamsOverrides + val inMemoryCacheConfig = new InMemoryCacheConfig(inMemCacheParams) +} + +object DefaultClientConfig extends ClientConfig diff --git a/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/InMemoryCacheConfig.scala b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/InMemoryCacheConfig.scala new file mode 100644 index 000000000..eab569b51 --- /dev/null +++ b/representation-manager/client/src/main/scala/com/twitter/representation_manager/config/InMemoryCacheConfig.scala @@ -0,0 +1,53 @@ +package com.twitter.representation_manager.config + +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.util.Duration + +/* + * -------------------------------------------- + * PLEASE NOTE: + * Having in-memory cache is not necessarily a free performance win, anyone considering it should + * investigate rather than blindly enabling it + * -------------------------------------------- + * */ + +sealed trait InMemoryCacheParams + +/* + * This holds params that is required to set up a in-mem cache for a single embedding store + */ +case class EnabledInMemoryCacheParams( + ttl: Duration, + maxKeys: Int, + cacheName: String) + extends InMemoryCacheParams +object DisabledInMemoryCacheParams extends InMemoryCacheParams + +/* + * This is the class for the in-memory cache config. Client could pass in their own cacheParamsMap to + * create a new InMemoryCacheConfig instead of using the DefaultInMemoryCacheConfig object below + * */ +class InMemoryCacheConfig( + cacheParamsMap: Map[ + (EmbeddingType, ModelVersion), + InMemoryCacheParams + ] = Map.empty) { + + def getCacheSetup( + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): InMemoryCacheParams = { + // When requested embedding type doesn't exist, we return DisabledInMemoryCacheParams + cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledInMemoryCacheParams) + } +} + +/* + * Default config for the in-memory cache + * Clients can directly import and use this one if they don't want to set up a customised config + * */ +object DefaultInMemoryCacheConfig extends InMemoryCacheConfig { + // set default to no in-memory caching + val cacheParamsMap = Map.empty +} diff --git a/representation-manager/server/BUILD b/representation-manager/server/BUILD new file mode 100644 index 000000000..427fc1d3b --- /dev/null +++ b/representation-manager/server/BUILD @@ -0,0 +1,21 @@ +jvm_binary( + name = "bin", + basename = "representation-manager", + main = "com.twitter.representation_manager.RepresentationManagerFedServerMain", + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-logback/src/main/scala", + "loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback", + "representation-manager/server/src/main/resources", + "representation-manager/server/src/main/scala/com/twitter/representation_manager", + "twitter-server/logback-classic/src/main/scala", + ], +) + +# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app +jvm_app( + name = "representation-manager-app", + archive = "zip", + binary = ":bin", +) diff --git a/representation-manager/server/src/main/resources/BUILD b/representation-manager/server/src/main/resources/BUILD new file mode 100644 index 000000000..b3a752276 --- /dev/null +++ b/representation-manager/server/src/main/resources/BUILD @@ -0,0 +1,7 @@ +resources( + sources = [ + "*.xml", + "config/*.yml", + ], + tags = ["bazel-compatible"], +) diff --git a/representation-manager/server/src/main/resources/config/decider.yml b/representation-manager/server/src/main/resources/config/decider.yml new file mode 100644 index 000000000..e75ebf89d --- /dev/null +++ b/representation-manager/server/src/main/resources/config/decider.yml @@ -0,0 +1,219 @@ +# ---------- traffic percentage by embedding type and model version ---------- +# Decider strings are build dynamically following the rule in there +# i.e. s"enable_${embeddingType.name}_${modelVersion.name}" +# Hence this should be updated accordingly if usage is changed in the embedding stores + +# Tweet embeddings +"enable_LogFavBasedTweet_Model20m145k2020": + comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145k2020. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedTweet_Model20m145kUpdated": + comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavBasedTweet - Model20m145kUpdated. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavLongestL2EmbeddingTweet_Model20m145k2020": + comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145k2020. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavLongestL2EmbeddingTweet_Model20m145kUpdated": + comment: "Enable x% read traffic (0<=x<=10000, e.g. 1000=10%) for LogFavLongestL2EmbeddingTweet - Model20m145kUpdated. 0 means return EMPTY for all requests." + default_availability: 10000 + +# Topic embeddings +"enable_FavTfgTopic_Model20m145k2020": + comment: "Enable the read traffic to FavTfgTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedKgoApeTopic_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedKgoApeTopic - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +# User embeddings - KnownFor +"enable_FavBasedProducer_Model20m145kUpdated": + comment: "Enable the read traffic to FavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FavBasedProducer_Model20m145k2020": + comment: "Enable the read traffic to FavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FollowBasedProducer_Model20m145k2020": + comment: "Enable the read traffic to FollowBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_AggregatableFavBasedProducer_Model20m145kUpdated": + comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_AggregatableFavBasedProducer_Model20m145k2020": + comment: "Enable the read traffic to AggregatableFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_AggregatableLogFavBasedProducer_Model20m145kUpdated": + comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_AggregatableLogFavBasedProducer_Model20m145k2020": + comment: "Enable the read traffic to AggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +enable_RelaxedAggregatableLogFavBasedProducer_Model20m145kUpdated: + comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +enable_RelaxedAggregatableLogFavBasedProducer_Model20m145k2020: + comment: "Enable the read traffic to RelaxedAggregatableLogFavBasedProducer - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +# User embeddings - InterestedIn +"enable_LogFavBasedUserInterestedInFromAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FollowBasedUserInterestedInFromAPE_Model20m145k2020": + comment: "Enable the read traffic to FollowBasedUserInterestedInFromAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FavBasedUserInterestedIn_Model20m145kUpdated": + comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FavBasedUserInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to FavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FollowBasedUserInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to FollowBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FavBasedUserInterestedInFromPE_Model20m145kUpdated": + comment: "Enable the read traffic to FavBasedUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FilteredUserInterestedIn_Model20m145kUpdated": + comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FilteredUserInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to FilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_FilteredUserInterestedInFromPE_Model20m145kUpdated": + comment: "Enable the read traffic to FilteredUserInterestedInFromPE - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_UnfilteredUserInterestedIn_Model20m145kUpdated": + comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145kUpdated from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_UnfilteredUserInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to UnfilteredUserInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_UserNextInterestedIn_Model20m145k2020": + comment: "Enable the read traffic to UserNextInterestedIn - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedAverageAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +"enable_LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE_Model20m145k2020": + comment: "Enable the read traffic to LogFavBasedUserInterestedAverageAddressBookFromIIAPE - Model20m145k2020 from 0% to 100%. 0 means return EMPTY for all requests." + default_availability: 10000 + +# ---------- load shedding by caller id ---------- +# To create a new decider, add here with the same format and caller's details : +# "representation-manager_load_shed_by_caller_id_twtr:{{role}}:{{name}}:{{environment}}:{{cluster}}" +# All the deciders below are generated by this script: +# ./strato/bin/fed deciders representation-manager --service-role=representation-manager --service-name=representation-manager +# If you need to run the script and paste the output, add ONLY the prod deciders here. +"representation-manager_load_shed_by_caller_id_all": + comment: "Reject all traffic from caller id: all" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:cr-mixer:cr-mixer:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:cr-mixer:cr-mixer:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-1:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-3:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-4:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann-experimental:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:simclusters-ann:simclusters-ann:prod:pdxa" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoapi:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoapi:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:atla" + default_availability: 0 + +"representation-manager_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:pdxa" + default_availability: 0 + +# ---------- Dark Traffic Proxy ---------- +representation-manager_forward_dark_traffic: + comment: "Defines the percentage of traffic to forward to diffy-proxy. Set to 0 to disable dark traffic forwarding" + default_availability: 0 diff --git a/representation-manager/server/src/main/resources/logback.xml b/representation-manager/server/src/main/resources/logback.xml new file mode 100644 index 000000000..47b3ed16d --- /dev/null +++ b/representation-manager/server/src/main/resources/logback.xml @@ -0,0 +1,165 @@ + + + + + + + + + + + + + + + + + true + + + + + + + + + + + ${log.service.output} + + + ${log.service.output}.%d.gz + + 3GB + + 21 + true + + + %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n + + + + + + ${log.access.output} + + + ${log.access.output}.%d.gz + + 100MB + + 7 + true + + + ${DEFAULT_ACCESS_PATTERN}%n + + + + + + true + ${log.lens.category} + ${log.lens.index} + ${log.lens.tag}/service + + %msg + + + + + + true + ${log.lens.category} + ${log.lens.index} + ${log.lens.tag}/access + + %msg + + + + + + allow_listed_pipeline_executions.log + + + allow_listed_pipeline_executions.log.%d.gz + + 100MB + + 7 + true + + + %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n + + + + + + + + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/BUILD new file mode 100644 index 000000000..d8ca301f6 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/BUILD @@ -0,0 +1,13 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-thrift-client", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/RepresentationManagerFedServer.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/RepresentationManagerFedServer.scala new file mode 100644 index 000000000..5bc820bb4 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/RepresentationManagerFedServer.scala @@ -0,0 +1,40 @@ +package com.twitter.representation_manager + +import com.google.inject.Module +import com.twitter.inject.thrift.modules.ThriftClientIdModule +import com.twitter.representation_manager.columns.topic.LocaleEntityIdSimClustersEmbeddingCol +import com.twitter.representation_manager.columns.topic.TopicIdSimClustersEmbeddingCol +import com.twitter.representation_manager.columns.tweet.TweetSimClustersEmbeddingCol +import com.twitter.representation_manager.columns.user.UserSimClustersEmbeddingCol +import com.twitter.representation_manager.modules.CacheModule +import com.twitter.representation_manager.modules.InterestsThriftClientModule +import com.twitter.representation_manager.modules.LegacyRMSConfigModule +import com.twitter.representation_manager.modules.StoreModule +import com.twitter.representation_manager.modules.TimerModule +import com.twitter.representation_manager.modules.UttClientModule +import com.twitter.strato.fed._ +import com.twitter.strato.fed.server._ + +object RepresentationManagerFedServerMain extends RepresentationManagerFedServer + +trait RepresentationManagerFedServer extends StratoFedServer { + override def dest: String = "/s/representation-manager/representation-manager" + override val modules: Seq[Module] = + Seq( + CacheModule, + InterestsThriftClientModule, + LegacyRMSConfigModule, + StoreModule, + ThriftClientIdModule, + TimerModule, + UttClientModule + ) + + override def columns: Seq[Class[_ <: StratoFed.Column]] = + Seq( + classOf[TweetSimClustersEmbeddingCol], + classOf[UserSimClustersEmbeddingCol], + classOf[TopicIdSimClustersEmbeddingCol], + classOf[LocaleEntityIdSimClustersEmbeddingCol] + ) +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/BUILD new file mode 100644 index 000000000..6ebd77ef8 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/BUILD @@ -0,0 +1,9 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/ColumnConfigBase.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/ColumnConfigBase.scala new file mode 100644 index 000000000..143ccdc4c --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/ColumnConfigBase.scala @@ -0,0 +1,26 @@ +package com.twitter.representation_manager.columns + +import com.twitter.strato.access.Access.LdapGroup +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Has +import com.twitter.strato.config.Prefix +import com.twitter.strato.config.ServiceIdentifierPattern + +object ColumnConfigBase { + + /****************** Internal permissions *******************/ + val recosPermissions: Seq[com.twitter.strato.config.Policy] = Seq() + + /****************** External permissions *******************/ + // This is used to grant limited access to members outside of RP team. + val externalPermissions: Seq[com.twitter.strato.config.Policy] = Seq() + + val contactInfo: ContactInfo = ContactInfo( + description = "Please contact Relevance Platform for more details", + contactEmail = "no-reply@twitter.com", + ldapGroup = "ldap", + jiraProject = "JIRA", + links = Seq("http://go/rms-runbook") + ) +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/BUILD new file mode 100644 index 000000000..26022ebe5 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/BUILD @@ -0,0 +1,14 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-core/src/main/scala", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/modules", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/store", + "representation-manager/server/src/main/thrift:thrift-scala", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/LocaleEntityIdSimClustersEmbeddingCol.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/LocaleEntityIdSimClustersEmbeddingCol.scala new file mode 100644 index 000000000..7b7952300 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/LocaleEntityIdSimClustersEmbeddingCol.scala @@ -0,0 +1,77 @@ +package com.twitter.representation_manager.columns.topic + +import com.twitter.representation_manager.columns.ColumnConfigBase +import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore +import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.LocaleEntityId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.stitch.storehaus.StitchOfReadableStore +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Policy +import com.twitter.strato.config.Prefix +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class LocaleEntityIdSimClustersEmbeddingCol @Inject() ( + embeddingStore: TopicSimClustersEmbeddingStore) + extends StratoFed.Column( + "recommendations/representation_manager/simClustersEmbedding.LocaleEntityId") + with StratoFed.Fetch.Stitch { + + private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] = + StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift)) + + val colPermissions: Seq[com.twitter.strato.config.Policy] = + ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns( + Set( + Prefix("ml/featureStore/simClusters"), + )) + + override val policy: Policy = AnyOf({ + colPermissions + }) + + override type Key = LocaleEntityId + override type View = SimClustersEmbeddingView + override type Value = SimClustersEmbedding + + override val keyConv: Conv[Key] = ScroogeConv.fromStruct[LocaleEntityId] + override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding] + + override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText( + "The Topic SimClusters Embedding Endpoint in Representation Management Service with LocaleEntityId." + + " TDD: http://go/rms-tdd")) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val embeddingId = SimClustersEmbeddingId( + view.embeddingType, + view.modelVersion, + InternalId.LocaleEntityId(key) + ) + + storeStitch(embeddingId) + .map(embedding => found(embedding)) + .handle { + case stitch.NotFound => missing + } + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/TopicIdSimClustersEmbeddingCol.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/TopicIdSimClustersEmbeddingCol.scala new file mode 100644 index 000000000..4afddbb4c --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/topic/TopicIdSimClustersEmbeddingCol.scala @@ -0,0 +1,74 @@ +package com.twitter.representation_manager.columns.topic + +import com.twitter.representation_manager.columns.ColumnConfigBase +import com.twitter.representation_manager.store.TopicSimClustersEmbeddingStore +import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.TopicId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.stitch.storehaus.StitchOfReadableStore +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Policy +import com.twitter.strato.config.Prefix +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class TopicIdSimClustersEmbeddingCol @Inject() (embeddingStore: TopicSimClustersEmbeddingStore) + extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.TopicId") + with StratoFed.Fetch.Stitch { + + private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] = + StitchOfReadableStore(embeddingStore.topicSimClustersEmbeddingStore.mapValues(_.toThrift)) + + val colPermissions: Seq[com.twitter.strato.config.Policy] = + ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns( + Set( + Prefix("ml/featureStore/simClusters"), + )) + + override val policy: Policy = AnyOf({ + colPermissions + }) + + override type Key = TopicId + override type View = SimClustersEmbeddingView + override type Value = SimClustersEmbedding + + override val keyConv: Conv[Key] = ScroogeConv.fromStruct[TopicId] + override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding] + + override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some(PlainText( + "The Topic SimClusters Embedding Endpoint in Representation Management Service with TopicId." + + " TDD: http://go/rms-tdd")) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val embeddingId = SimClustersEmbeddingId( + view.embeddingType, + view.modelVersion, + InternalId.TopicId(key) + ) + + storeStitch(embeddingId) + .map(embedding => found(embedding)) + .handle { + case stitch.NotFound => missing + } + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/BUILD new file mode 100644 index 000000000..26022ebe5 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/BUILD @@ -0,0 +1,14 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-core/src/main/scala", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/modules", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/store", + "representation-manager/server/src/main/thrift:thrift-scala", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/TweetSimClustersEmbeddingCol.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/TweetSimClustersEmbeddingCol.scala new file mode 100644 index 000000000..15cd4247c --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/tweet/TweetSimClustersEmbeddingCol.scala @@ -0,0 +1,73 @@ +package com.twitter.representation_manager.columns.tweet + +import com.twitter.representation_manager.columns.ColumnConfigBase +import com.twitter.representation_manager.store.TweetSimClustersEmbeddingStore +import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.stitch.storehaus.StitchOfReadableStore +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Policy +import com.twitter.strato.config.Prefix +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class TweetSimClustersEmbeddingCol @Inject() (embeddingStore: TweetSimClustersEmbeddingStore) + extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.Tweet") + with StratoFed.Fetch.Stitch { + + private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] = + StitchOfReadableStore(embeddingStore.tweetSimClustersEmbeddingStore.mapValues(_.toThrift)) + + val colPermissions: Seq[com.twitter.strato.config.Policy] = + ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns( + Set( + Prefix("ml/featureStore/simClusters"), + )) + + override val policy: Policy = AnyOf({ + colPermissions + }) + + override type Key = Long // TweetId + override type View = SimClustersEmbeddingView + override type Value = SimClustersEmbedding + + override val keyConv: Conv[Key] = Conv.long + override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding] + + override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText("The Tweet SimClusters Embedding Endpoint in Representation Management Service." + + " TDD: http://go/rms-tdd")) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val embeddingId = SimClustersEmbeddingId( + view.embeddingType, + view.modelVersion, + InternalId.TweetId(key) + ) + + storeStitch(embeddingId) + .map(embedding => found(embedding)) + .handle { + case stitch.NotFound => missing + } + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/BUILD new file mode 100644 index 000000000..26022ebe5 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/BUILD @@ -0,0 +1,14 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-core/src/main/scala", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/columns", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/modules", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/store", + "representation-manager/server/src/main/thrift:thrift-scala", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/UserSimClustersEmbeddingCol.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/UserSimClustersEmbeddingCol.scala new file mode 100644 index 000000000..ebcf22a1d --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/columns/user/UserSimClustersEmbeddingCol.scala @@ -0,0 +1,73 @@ +package com.twitter.representation_manager.columns.user + +import com.twitter.representation_manager.columns.ColumnConfigBase +import com.twitter.representation_manager.store.UserSimClustersEmbeddingStore +import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.stitch.storehaus.StitchOfReadableStore +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Policy +import com.twitter.strato.config.Prefix +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class UserSimClustersEmbeddingCol @Inject() (embeddingStore: UserSimClustersEmbeddingStore) + extends StratoFed.Column("recommendations/representation_manager/simClustersEmbedding.User") + with StratoFed.Fetch.Stitch { + + private val storeStitch: SimClustersEmbeddingId => Stitch[SimClustersEmbedding] = + StitchOfReadableStore(embeddingStore.userSimClustersEmbeddingStore.mapValues(_.toThrift)) + + val colPermissions: Seq[com.twitter.strato.config.Policy] = + ColumnConfigBase.recosPermissions ++ ColumnConfigBase.externalPermissions :+ FromColumns( + Set( + Prefix("ml/featureStore/simClusters"), + )) + + override val policy: Policy = AnyOf({ + colPermissions + }) + + override type Key = Long // UserId + override type View = SimClustersEmbeddingView + override type Value = SimClustersEmbedding + + override val keyConv: Conv[Key] = Conv.long + override val viewConv: Conv[View] = ScroogeConv.fromStruct[SimClustersEmbeddingView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[SimClustersEmbedding] + + override val contactInfo: ContactInfo = ColumnConfigBase.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText("The User SimClusters Embedding Endpoint in Representation Management Service." + + " TDD: http://go/rms-tdd")) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val embeddingId = SimClustersEmbeddingId( + view.embeddingType, + view.modelVersion, + InternalId.UserId(key) + ) + + storeStitch(embeddingId) + .map(embedding => found(embedding)) + .handle { + case stitch.NotFound => missing + } + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/BUILD new file mode 100644 index 000000000..62b8f5dd2 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/BUILD @@ -0,0 +1,13 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "decider/src/main/scala", + "finagle/finagle-memcached", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection", + "src/scala/com/twitter/simclusters_v2/common", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/MemCacheConfig.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/MemCacheConfig.scala new file mode 100644 index 000000000..4741edb2d --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/MemCacheConfig.scala @@ -0,0 +1,153 @@ +package com.twitter.representation_manager.common + +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.hashing.KeyHasher +import com.twitter.hermit.store.common.ObservedMemcachedReadableStore +import com.twitter.relevance_platform.common.injection.LZ4Injection +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Duration + +/* + * NOTE - ALL the cache configs here are just placeholders, NONE of them is used anyweher in RMS yet + * */ +sealed trait MemCacheParams +sealed trait MemCacheConfig + +/* + * This holds params that is required to set up a memcache cache for a single embedding store + * */ +case class EnabledMemCacheParams(ttl: Duration) extends MemCacheParams +object DisabledMemCacheParams extends MemCacheParams + +/* + * We use this MemcacheConfig as the single source to set up the memcache for all RMS use cases + * NO OVERRIDE FROM CLIENT + * */ +object MemCacheConfig { + val keyHasher: KeyHasher = KeyHasher.FNV1A_64 + val hashKeyPrefix: String = "RMS" + val simclustersEmbeddingCacheKeyBuilder = + SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, hashKeyPrefix) + + val cacheParamsMap: Map[ + (EmbeddingType, ModelVersion), + MemCacheParams + ] = Map( + // Tweet Embeddings + (LogFavBasedTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes), + (LogFavBasedTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes), + (LogFavLongestL2EmbeddingTweet, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 10.minutes), + (LogFavLongestL2EmbeddingTweet, Model20m145k2020) -> EnabledMemCacheParams(ttl = 10.minutes), + // User - KnownFor Embeddings + (FavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (FavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FollowBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (AggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = + 12.hours), + (RelaxedAggregatableLogFavBasedProducer, Model20m145k2020) -> EnabledMemCacheParams(ttl = + 12.hours), + // User - InterestedIn Embeddings + (LogFavBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FollowBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FavBasedUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (FavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FollowBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (LogFavBasedUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FavBasedUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (FilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (FilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (FilteredUserInterestedInFromPE, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (UnfilteredUserInterestedIn, Model20m145kUpdated) -> EnabledMemCacheParams(ttl = 12.hours), + (UnfilteredUserInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (UserNextInterestedIn, Model20m145k2020) -> EnabledMemCacheParams(ttl = + 30.minutes), //embedding is updated every 2 hours, keeping it lower to avoid staleness + ( + LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ( + LogFavBasedUserInterestedAverageAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ( + LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ( + LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ( + LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ( + LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + // Topic Embeddings + (FavTfgTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + (LogFavBasedKgoApeTopic, Model20m145k2020) -> EnabledMemCacheParams(ttl = 12.hours), + ) + + def getCacheSetup( + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): MemCacheParams = { + // When requested (embeddingType, modelVersion) doesn't exist, we return DisabledMemCacheParams + cacheParamsMap.getOrElse((embeddingType, modelVersion), DisabledMemCacheParams) + } + + def getCacheKeyPrefix(embeddingType: EmbeddingType, modelVersion: ModelVersion) = + s"${embeddingType.value}_${modelVersion.value}_" + + def getStatsName(embeddingType: EmbeddingType, modelVersion: ModelVersion) = + s"${embeddingType.name}_${modelVersion.name}_mem_cache" + + /** + * Build a ReadableStore based on MemCacheConfig. + * + * If memcache is disabled, it will return a normal readable store wrapper of the rawStore, + * with SimClustersEmbedding as value; + * If memcache is enabled, it will return a ObservedMemcachedReadableStore wrapper of the rawStore, + * with memcache set up according to the EnabledMemCacheParams + * */ + def buildMemCacheStoreForSimClustersEmbedding( + rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding], + cacheClient: Client, + embeddingType: EmbeddingType, + modelVersion: ModelVersion, + stats: StatsReceiver + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val cacheParams = getCacheSetup(embeddingType, modelVersion) + val store = cacheParams match { + case DisabledMemCacheParams => rawStore + case EnabledMemCacheParams(ttl) => + val memCacheKeyPrefix = MemCacheConfig.getCacheKeyPrefix( + embeddingType, + modelVersion + ) + val statsName = MemCacheConfig.getStatsName( + embeddingType, + modelVersion + ) + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = rawStore, + cacheClient = cacheClient, + ttl = ttl + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = stats.scope(statsName), + keyToString = { k => memCacheKeyPrefix + k.toString } + ) + } + store.mapValues(SimClustersEmbedding(_)) + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/RepresentationManagerDecider.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/RepresentationManagerDecider.scala new file mode 100644 index 000000000..97179e25f --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/common/RepresentationManagerDecider.scala @@ -0,0 +1,25 @@ +package com.twitter.representation_manager.common + +import com.twitter.decider.Decider +import com.twitter.decider.RandomRecipient +import com.twitter.decider.Recipient +import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing +import javax.inject.Inject + +case class RepresentationManagerDecider @Inject() (decider: Decider) { + + val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider) + + def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = { + decider.isAvailable(feature, recipient) + } + + /** + * When useRandomRecipient is set to false, the decider is either completely on or off. + * When useRandomRecipient is set to true, the decider is on for the specified % of traffic. + */ + def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = { + if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient)) + else isAvailable(feature, None) + } +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/BUILD new file mode 100644 index 000000000..d8bf04fc0 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/BUILD @@ -0,0 +1,25 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection", + "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/readablestore", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/common", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/store", + "src/scala/com/twitter/ml/api/embedding", + "src/scala/com/twitter/simclusters_v2/common", + "src/scala/com/twitter/simclusters_v2/score", + "src/scala/com/twitter/simclusters_v2/summingbird/stores", + "src/scala/com/twitter/storehaus_internal/manhattan", + "src/scala/com/twitter/storehaus_internal/util", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "storage/clients/manhattan/client/src/main/scala", + "tweetypie/src/scala/com/twitter/tweetypie/util", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/LegacyRMS.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/LegacyRMS.scala new file mode 100644 index 000000000..378f33594 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/migration/LegacyRMS.scala @@ -0,0 +1,846 @@ +package com.twitter.representation_manager.migration + +import com.twitter.bijection.Injection +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore +import com.twitter.contentrecommender.store.InterestsOptOutStore +import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore +import com.twitter.contentrecommender.twistly +import com.twitter.conversions.DurationOps._ +import com.twitter.decider.Decider +import com.twitter.escherbird.util.uttclient.CacheConfigV2 +import com.twitter.escherbird.util.uttclient.CachedUttClientV2 +import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2 +import com.twitter.escherbird.utt.strato.thriftscala.Environment +import com.twitter.finagle.ThriftMux +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.service.ReqRep +import com.twitter.finagle.service.ResponseClass +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.frigate.common.util.SeqLongInjection +import com.twitter.hashing.KeyHasher +import com.twitter.hermit.store.common.DeciderableReadableStore +import com.twitter.hermit.store.common.ObservedCachedReadableStore +import com.twitter.hermit.store.common.ObservedMemcachedReadableStore +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.interests.thriftscala.InterestsThriftService +import com.twitter.relevance_platform.common.injection.LZ4Injection +import com.twitter.relevance_platform.common.readablestore.ReadableStoreWithTimeout +import com.twitter.representation_manager.common.RepresentationManagerDecider +import com.twitter.representation_manager.store.DeciderConstants +import com.twitter.representation_manager.store.DeciderKey +import com.twitter.simclusters_v2.common.ModelVersions +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.common.SimClustersEmbeddingIdCacheKeyBuilder +import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore +import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore +import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore +import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145k2020 +import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145kUpdated +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbedding +import com.twitter.simclusters_v2.thriftscala.SimClustersMultiEmbeddingId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storehaus.ReadableStore +import com.twitter.storehaus_internal.manhattan.Athena +import com.twitter.storehaus_internal.manhattan.ManhattanRO +import com.twitter.storehaus_internal.manhattan.ManhattanROConfig +import com.twitter.storehaus_internal.util.ApplicationID +import com.twitter.storehaus_internal.util.DatasetName +import com.twitter.storehaus_internal.util.HDFSPath +import com.twitter.strato.client.Strato +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.tweetypie.util.UserId +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Throw +import com.twitter.util.Timer +import javax.inject.Inject +import javax.inject.Named +import scala.reflect.ClassTag + +class LegacyRMS @Inject() ( + serviceIdentifier: ServiceIdentifier, + cacheClient: Client, + stats: StatsReceiver, + decider: Decider, + clientId: ClientId, + timer: Timer, + @Named("cacheHashKeyPrefix") val cacheHashKeyPrefix: String = "RMS", + @Named("useContentRecommenderConfiguration") val useContentRecommenderConfiguration: Boolean = + false) { + + private val mhMtlsParams: ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams( + serviceIdentifier) + private val rmsDecider = RepresentationManagerDecider(decider) + val keyHasher: KeyHasher = KeyHasher.FNV1A_64 + + private val embeddingCacheKeyBuilder = + SimClustersEmbeddingIdCacheKeyBuilder(keyHasher.hashKey, cacheHashKeyPrefix) + private val statsReceiver = stats.scope("representation_management") + + // Strato client, default timeout = 280ms + val stratoClient: StratoClient = + Strato.client + .withMutualTls(serviceIdentifier) + .build() + + // Builds ThriftMux client builder for Content-Recommender service + private def makeThriftClientBuilder( + requestTimeout: Duration + ): ThriftMux.Client = { + ThriftMux.client + .withClientId(clientId) + .withMutualTls(serviceIdentifier) + .withRequestTimeout(requestTimeout) + .withStatsReceiver(statsReceiver.scope("clnt")) + .withResponseClassifier { + case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable + } + } + + private def makeThriftClient[ThriftServiceType: ClassTag]( + dest: String, + label: String, + requestTimeout: Duration = 450.milliseconds + ): ThriftServiceType = { + makeThriftClientBuilder(requestTimeout) + .build[ThriftServiceType](dest, label) + } + + /** *** SimCluster Embedding Stores ******/ + implicit val simClustersEmbeddingIdInjection: Injection[SimClustersEmbeddingId, Array[Byte]] = + BinaryScalaCodec(SimClustersEmbeddingId) + implicit val simClustersEmbeddingInjection: Injection[ThriftSimClustersEmbedding, Array[Byte]] = + BinaryScalaCodec(ThriftSimClustersEmbedding) + implicit val simClustersMultiEmbeddingInjection: Injection[SimClustersMultiEmbedding, Array[ + Byte + ]] = + BinaryScalaCodec(SimClustersMultiEmbedding) + implicit val simClustersMultiEmbeddingIdInjection: Injection[SimClustersMultiEmbeddingId, Array[ + Byte + ]] = + BinaryScalaCodec(SimClustersMultiEmbeddingId) + + def getEmbeddingsDataset( + mhMtlsParams: ManhattanKVClientMtlsParams, + datasetName: String + ): ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = { + ManhattanRO.getReadableStoreWithMtls[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + ManhattanROConfig( + HDFSPath(""), // not needed + ApplicationID("content_recommender_athena"), + DatasetName(datasetName), // this should be correct + Athena + ), + mhMtlsParams + ) + } + + lazy val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .longestL2NormTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset, + statsReceiver, + maxLength = 10, + ).mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = rawStore, + cacheClient = cacheClient, + ttl = 15.minutes + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = + statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_mem_cache"), + keyToString = { k => + s"scez_l2:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k" + } + ) + + val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + memcachedStore + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + LogFavLongestL2EmbeddingTweet, + Model20m145k2020, + InternalId.TweetId(tweetId)) => + tweetId + } + .mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + inMemoryCacheStore, + ttl = 12.minute, + maxKeys = 1048575, + cacheName = "log_fav_based_longest_l2_tweet_embedding_20m145k2020_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_longest_l2_tweet_embedding_20m145k2020_store")) + } + + lazy val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .mostRecentTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset, + statsReceiver + ).mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = rawStore, + cacheClient = cacheClient, + ttl = 10.minutes + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_mem_cache"), + keyToString = { k => + // SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId + s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145KUpdated}_$k" + } + ) + + val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + memcachedStore + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + LogFavBasedTweet, + Model20m145kUpdated, + InternalId.TweetId(tweetId)) => + tweetId + } + .mapValues(SimClustersEmbedding(_)) + } + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + inMemoryCacheStore, + ttl = 5.minute, + maxKeys = 1048575, // 200MB + cacheName = "log_fav_based_tweet_embedding_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_tweet_embedding_store")) + } + + lazy val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .mostRecentTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset, + statsReceiver, + maxLength = 10, + ).mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = rawStore, + cacheClient = cacheClient, + ttl = 15.minutes + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_mem_cache"), + keyToString = { k => + // SimClusters_embedding_LZ4/embeddingType_modelVersion_tweetId + s"scez:${LogFavBasedTweet}_${ModelVersions.Model20M145K2020}_$k" + } + ) + + val inMemoryCacheStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + memcachedStore + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + LogFavBasedTweet, + Model20m145k2020, + InternalId.TweetId(tweetId)) => + tweetId + } + .mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + inMemoryCacheStore, + ttl = 12.minute, + maxKeys = 16777215, + cacheName = "log_fav_based_tweet_embedding_20m145k2020_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_tweet_embedding_20m145k2020_store")) + } + + lazy val favBasedTfgTopicEmbedding2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val stratoStore = + StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020") + + val truncatedStore = stratoStore.mapValues { embedding => + SimClustersEmbedding(embedding, truncate = 50) + } + + ObservedCachedReadableStore.from( + ObservedReadableStore(truncatedStore)( + statsReceiver.scope("fav_tfg_topic_embedding_2020_cache_backing_store")), + ttl = 12.hours, + maxKeys = 262143, // 200MB + cacheName = "fav_tfg_topic_embedding_2020_cache", + windowSize = 10000L + )(statsReceiver.scope("fav_tfg_topic_embedding_2020_cache")) + } + + lazy val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + ObservedReadableStore( + StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020") + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + AggregatableLogFavBasedProducer, + Model20m145k2020, + internalId) => + SimClustersEmbeddingId(AggregatableLogFavBasedProducer, Model20m145k2020, internalId) + } + .mapValues(embedding => SimClustersEmbedding(embedding, 50)) + )(statsReceiver.scope("aggregatable_producer_embeddings_by_logfav_score_2020")) + } + + val interestService: InterestsThriftService.MethodPerEndpoint = + makeThriftClient[InterestsThriftService.MethodPerEndpoint]( + "/s/interests-thrift-service/interests-thrift-service", + "interests_thrift_service" + ) + + val interestsOptOutStore: InterestsOptOutStore = InterestsOptOutStore(interestService) + + // Save 2 ^ 18 UTTs. Promising 100% cache rate + lazy val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143) + lazy val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2( + getTaxonomyConfig = defaultCacheConfigV2, + getUttTaxonomyConfig = defaultCacheConfigV2, + getLeafIds = defaultCacheConfigV2, + getLeafUttEntities = defaultCacheConfigV2 + ) + + // CachedUttClient to use StratoClient + lazy val cachedUttClientV2: CachedUttClientV2 = new CachedUttClientV2( + stratoClient = stratoClient, + env = Environment.Prod, + cacheConfigs = uttClientCacheConfigsV2, + statsReceiver = statsReceiver.scope("cached_utt_client") + ) + + lazy val semanticCoreTopicSeedStore: ReadableStore[ + SemanticCoreTopicSeedStore.Key, + Seq[UserId] + ] = { + /* + Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case) + Assume ~10k active topic/languages ~= 650MB (worst case) + */ + val underlying = new SemanticCoreTopicSeedStore(cachedUttClientV2, interestsOptOutStore)( + statsReceiver.scope("semantic_core_topic_seed_store")) + + val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = underlying, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = SeqLongInjection, + statsReceiver = statsReceiver.scope("topic_producer_seed_store_mem_cache"), + keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" } + ) + + ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]]( + store = memcacheStore, + ttl = 6.hours, + maxKeys = 20e3.toInt, + cacheName = "topic_producer_seed_store_cache", + windowSize = 5000 + )(statsReceiver.scope("topic_producer_seed_store_cache")) + } + + lazy val logFavBasedApeEntity20M145K2020EmbeddingStore: ApeEntityEmbeddingStore = { + val apeStore = logFavBasedApe20M145K2020EmbeddingStore.composeKeyMapping[UserId]({ id => + SimClustersEmbeddingId( + AggregatableLogFavBasedProducer, + Model20m145k2020, + InternalId.UserId(id)) + }) + + new ApeEntityEmbeddingStore( + semanticCoreSeedStore = semanticCoreTopicSeedStore, + aggregatableProducerEmbeddingStore = apeStore, + statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_store")) + } + + lazy val logFavBasedApeEntity20M145K2020EmbeddingCachedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val truncatedStore = + logFavBasedApeEntity20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = truncatedStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + val inMemoryCachedStore = + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "log_fav_based_ape_entity_2020_embedding_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_cached_store")) + + DeciderableReadableStore( + inMemoryCachedStore, + rmsDecider.deciderGateBuilder.idGateWithHashing[SimClustersEmbeddingId]( + DeciderKey.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore), + statsReceiver.scope("log_fav_based_ape_entity_2020_embedding_deciderable_store") + ) + } + + lazy val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + ObservedReadableStore( + StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020") + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020, + internalId) => + SimClustersEmbeddingId( + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020, + internalId) + } + .mapValues(embedding => SimClustersEmbedding(embedding).truncate(50)) + )(statsReceiver.scope( + "aggregatable_producer_embeddings_by_logfav_score_relaxed_fav_engagement_threshold_2020")) + } + + lazy val relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val truncatedStore = + relaxedLogFavBasedApe20M145K2020EmbeddingStore.mapValues(_.truncate(50).toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = truncatedStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = + statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_mem_cache"), + keyToString = { k: SimClustersEmbeddingId => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "relaxed_log_fav_based_ape_entity_2020_embedding_cache", + windowSize = 10000L + )(statsReceiver.scope("relaxed_log_fav_based_ape_entity_2020_embedding_cache_store")) + } + + lazy val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = ProducerClusterEmbeddingReadableStores + .getProducerTopKSimClusters2020EmbeddingsStore( + mhMtlsParams + ).composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + FavBasedProducer, + Model20m145k2020, + InternalId.UserId(userId)) => + userId + }.mapValues { topSimClustersWithScore => + ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters.take(10)) + } + + // same memcache config as for favBasedUserInterestedIn20M145K2020Store + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 24.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 12.hours, + maxKeys = 16777215, + cacheName = "fav_based_producer_embedding_20M_145K_2020_embedding_cache", + windowSize = 10000L + )(statsReceiver.scope("fav_based_producer_embedding_20M_145K_2020_embedding_store")) + } + + // Production + lazy val interestedIn20M145KUpdatedStore: ReadableStore[UserId, ClustersUserIsInterestedIn] = { + UserInterestedInReadableStore.defaultStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145KUpdated + ) + } + + // Production + lazy val interestedIn20M145K2020Store: ReadableStore[UserId, ClustersUserIsInterestedIn] = { + UserInterestedInReadableStore.defaultStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145K2020 + ) + } + + // Production + lazy val InterestedInFromPE20M145KUpdatedStore: ReadableStore[ + UserId, + ClustersUserIsInterestedIn + ] = { + UserInterestedInReadableStore.defaultIIPEStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145KUpdated) + } + + lazy val simClustersInterestedInStore: ReadableStore[ + (UserId, ModelVersion), + ClustersUserIsInterestedIn + ] = { + new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] { + override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = { + k match { + case (userId, Model20m145kUpdated) => + interestedIn20M145KUpdatedStore.get(userId) + case (userId, Model20m145k2020) => + interestedIn20M145K2020Store.get(userId) + case _ => + Future.None + } + } + } + } + + lazy val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[ + (UserId, ModelVersion), + ClustersUserIsInterestedIn + ] = { + new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] { + override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = { + k match { + case (userId, ModelVersion.Model20m145kUpdated) => + InterestedInFromPE20M145KUpdatedStore.get(userId) + case _ => + Future.None + } + } + } + } + + lazy val userInterestedInStore = + new twistly.interestedin.EmbeddingStore( + interestedInStore = simClustersInterestedInStore, + interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore, + statsReceiver = statsReceiver + ) + + // Production + lazy val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = + UserInterestedInReadableStore + .defaultSimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.FavBasedUserInterestedIn, + ModelVersion.Model20m145kUpdated) + .mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("fav_based_user_interested_in_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "fav_based_user_interested_in_cache", + windowSize = 10000L + )(statsReceiver.scope("fav_based_user_interested_in_store")) + } + + // Production + lazy val LogFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = + UserInterestedInReadableStore + .defaultIIAPESimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.LogFavBasedUserInterestedInFromAPE, + ModelVersion.Model20m145k2020) + .mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_from_ape_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "log_fav_based_user_interested_in_from_ape_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_user_interested_in_from_ape_store")) + } + + // Production + lazy val FollowBasedInterestedInFromAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = + UserInterestedInReadableStore + .defaultIIAPESimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.FollowBasedUserInterestedInFromAPE, + ModelVersion.Model20m145k2020) + .mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("follow_based_user_interested_in_from_ape_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "follow_based_user_interested_in_from_ape_cache", + windowSize = 10000L + )(statsReceiver.scope("follow_based_user_interested_in_from_ape_store")) + } + + // production + lazy val favBasedUserInterestedIn20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding] = + UserInterestedInReadableStore + .defaultSimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.FavBasedUserInterestedIn, + ModelVersion.Model20m145k2020).mapValues(_.toThrift) + + ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("fav_based_user_interested_in_2020_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + } + + // Production + lazy val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = + UserInterestedInReadableStore + .defaultSimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.LogFavBasedUserInterestedIn, + ModelVersion.Model20m145k2020) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore.mapValues(_.toThrift), + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("log_fav_based_user_interested_in_2020_store"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "log_fav_based_user_interested_in_2020_cache", + windowSize = 10000L + )(statsReceiver.scope("log_fav_based_user_interested_in_2020_store")) + } + + // Production + lazy val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val underlyingStore = + UserInterestedInReadableStore + .defaultIIPESimClustersEmbeddingStoreWithMtls( + mhMtlsParams, + EmbeddingType.FavBasedUserInterestedInFromPE, + ModelVersion.Model20m145kUpdated) + .mapValues(_.toThrift) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = cacheClient, + ttl = 12.hours + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(ThriftSimClustersEmbedding)), + statsReceiver = statsReceiver.scope("fav_based_user_interested_in_from_pe_mem_cache"), + keyToString = { k => embeddingCacheKeyBuilder.apply(k) } + ).mapValues(SimClustersEmbedding(_)) + + ObservedCachedReadableStore.from[SimClustersEmbeddingId, SimClustersEmbedding]( + memcachedStore, + ttl = 6.hours, + maxKeys = 262143, + cacheName = "fav_based_user_interested_in_from_pe_cache", + windowSize = 10000L + )(statsReceiver.scope("fav_based_user_interested_in_from_pe_cache")) + } + + private val underlyingStores: Map[ + (EmbeddingType, ModelVersion), + ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] + ] = Map( + // Tweet Embeddings + (LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore, + (LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore, + ( + LogFavLongestL2EmbeddingTweet, + Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore, + // Entity Embeddings + (FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding2020Store, + ( + LogFavBasedKgoApeTopic, + Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingCachedStore, + // KnownFor Embeddings + (FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore, + ( + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingCachedStore, + // InterestedIn Embeddings + ( + LogFavBasedUserInterestedInFromAPE, + Model20m145k2020) -> LogFavBasedInterestedInFromAPE20M145K2020Store, + ( + FollowBasedUserInterestedInFromAPE, + Model20m145k2020) -> FollowBasedInterestedInFromAPE20M145K2020Store, + (FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore, + (FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store, + (LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store, + ( + FavBasedUserInterestedInFromPE, + Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore, + (FilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore, + (FilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore, + (FilteredUserInterestedInFromPE, Model20m145kUpdated) -> userInterestedInStore, + (UnfilteredUserInterestedIn, Model20m145kUpdated) -> userInterestedInStore, + (UnfilteredUserInterestedIn, Model20m145k2020) -> userInterestedInStore, + ) + + val simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val underlying: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + SimClustersEmbeddingStore.buildWithDecider( + underlyingStores = underlyingStores, + decider = rmsDecider.decider, + statsReceiver = statsReceiver.scope("simClusters_embeddings_store_deciderable") + ) + + val underlyingWithTimeout: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + new ReadableStoreWithTimeout( + rs = underlying, + decider = rmsDecider.decider, + enableTimeoutDeciderKey = DeciderConstants.enableSimClustersEmbeddingStoreTimeouts, + timeoutValueKey = DeciderConstants.simClustersEmbeddingStoreTimeoutValueMillis, + timer = timer, + statsReceiver = statsReceiver.scope("simClusters_embedding_store_timeouts") + ) + + ObservedReadableStore( + store = underlyingWithTimeout + )(statsReceiver.scope("simClusters_embeddings_store")) + } +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/BUILD new file mode 100644 index 000000000..ab19a1dd7 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/BUILD @@ -0,0 +1,18 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-stats", + "finatra/inject/inject-core/src/main/scala", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", + "interests-service/thrift/src/main/thrift:thrift-scala", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/common", + "servo/util", + "src/scala/com/twitter/storehaus_internal/manhattan", + "src/scala/com/twitter/storehaus_internal/memcache", + "src/scala/com/twitter/storehaus_internal/util", + "strato/src/main/scala/com/twitter/strato/client", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/CacheModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/CacheModule.scala new file mode 100644 index 000000000..a042225fa --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/CacheModule.scala @@ -0,0 +1,34 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import com.twitter.finagle.memcached.Client +import javax.inject.Singleton +import com.twitter.conversions.DurationOps._ +import com.twitter.inject.TwitterModule +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.storehaus_internal.memcache.MemcacheStore +import com.twitter.storehaus_internal.util.ClientName +import com.twitter.storehaus_internal.util.ZkEndPoint + +object CacheModule extends TwitterModule { + + private val cacheDest = flag[String]("cache_module.dest", "Path to memcache service") + private val timeout = flag[Int]("memcache.timeout", "Memcache client timeout") + private val retries = flag[Int]("memcache.retries", "Memcache timeout retries") + + @Singleton + @Provides + def providesCache( + serviceIdentifier: ServiceIdentifier, + stats: StatsReceiver + ): Client = + MemcacheStore.memcachedClient( + name = ClientName("memcache_representation_manager"), + dest = ZkEndPoint(cacheDest()), + timeout = timeout().milliseconds, + retries = retries(), + statsReceiver = stats.scope("cache_client"), + serviceIdentifier = serviceIdentifier + ) +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/InterestsThriftClientModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/InterestsThriftClientModule.scala new file mode 100644 index 000000000..82a5a5004 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/InterestsThriftClientModule.scala @@ -0,0 +1,40 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.ThriftMux +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.service.ReqRep +import com.twitter.finagle.service.ResponseClass +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.inject.TwitterModule +import com.twitter.interests.thriftscala.InterestsThriftService +import com.twitter.util.Throw +import javax.inject.Singleton + +object InterestsThriftClientModule extends TwitterModule { + + @Singleton + @Provides + def providesInterestsThriftClient( + clientId: ClientId, + serviceIdentifier: ServiceIdentifier, + statsReceiver: StatsReceiver + ): InterestsThriftService.MethodPerEndpoint = { + ThriftMux.client + .withClientId(clientId) + .withMutualTls(serviceIdentifier) + .withRequestTimeout(450.milliseconds) + .withStatsReceiver(statsReceiver.scope("InterestsThriftClient")) + .withResponseClassifier { + case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable + } + .build[InterestsThriftService.MethodPerEndpoint]( + dest = "/s/interests-thrift-service/interests-thrift-service", + label = "interests_thrift_service" + ) + } +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/LegacyRMSConfigModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/LegacyRMSConfigModule.scala new file mode 100644 index 000000000..0a06dffe6 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/LegacyRMSConfigModule.scala @@ -0,0 +1,18 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import com.twitter.inject.TwitterModule +import javax.inject.Named +import javax.inject.Singleton + +object LegacyRMSConfigModule extends TwitterModule { + @Singleton + @Provides + @Named("cacheHashKeyPrefix") + def providesCacheHashKeyPrefix: String = "RMS" + + @Singleton + @Provides + @Named("useContentRecommenderConfiguration") + def providesUseContentRecommenderConfiguration: Boolean = false +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/StoreModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/StoreModule.scala new file mode 100644 index 000000000..a2efe5925 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/StoreModule.scala @@ -0,0 +1,24 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import javax.inject.Singleton +import com.twitter.inject.TwitterModule +import com.twitter.decider.Decider +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.representation_manager.common.RepresentationManagerDecider +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams + +object StoreModule extends TwitterModule { + @Singleton + @Provides + def providesMhMtlsParams( + serviceIdentifier: ServiceIdentifier + ): ManhattanKVClientMtlsParams = ManhattanKVClientMtlsParams(serviceIdentifier) + + @Singleton + @Provides + def providesRmsDecider( + decider: Decider + ): RepresentationManagerDecider = RepresentationManagerDecider(decider) + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/TimerModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/TimerModule.scala new file mode 100644 index 000000000..fe7fddb45 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/TimerModule.scala @@ -0,0 +1,13 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import com.twitter.finagle.util.DefaultTimer +import com.twitter.inject.TwitterModule +import com.twitter.util.Timer +import javax.inject.Singleton + +object TimerModule extends TwitterModule { + @Singleton + @Provides + def providesTimer: Timer = DefaultTimer +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/UttClientModule.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/UttClientModule.scala new file mode 100644 index 000000000..cc2100c1c --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/modules/UttClientModule.scala @@ -0,0 +1,39 @@ +package com.twitter.representation_manager.modules + +import com.google.inject.Provides +import com.twitter.escherbird.util.uttclient.CacheConfigV2 +import com.twitter.escherbird.util.uttclient.CachedUttClientV2 +import com.twitter.escherbird.util.uttclient.UttClientCacheConfigsV2 +import com.twitter.escherbird.utt.strato.thriftscala.Environment +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.inject.TwitterModule +import com.twitter.strato.client.{Client => StratoClient} +import javax.inject.Singleton + +object UttClientModule extends TwitterModule { + + @Singleton + @Provides + def providesUttClient( + stratoClient: StratoClient, + statsReceiver: StatsReceiver + ): CachedUttClientV2 = { + // Save 2 ^ 18 UTTs. Promising 100% cache rate + val defaultCacheConfigV2: CacheConfigV2 = CacheConfigV2(262143) + + val uttClientCacheConfigsV2: UttClientCacheConfigsV2 = UttClientCacheConfigsV2( + getTaxonomyConfig = defaultCacheConfigV2, + getUttTaxonomyConfig = defaultCacheConfigV2, + getLeafIds = defaultCacheConfigV2, + getLeafUttEntities = defaultCacheConfigV2 + ) + + // CachedUttClient to use StratoClient + new CachedUttClientV2( + stratoClient = stratoClient, + env = Environment.Prod, + cacheConfigs = uttClientCacheConfigsV2, + statsReceiver = statsReceiver.scope("cached_utt_client") + ) + } +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/BUILD b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/BUILD new file mode 100644 index 000000000..1731a2649 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/BUILD @@ -0,0 +1,16 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "content-recommender/server/src/main/scala/com/twitter/contentrecommender:representation-manager-deps", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/common", + "src/scala/com/twitter/simclusters_v2/stores", + "src/scala/com/twitter/simclusters_v2/summingbird/stores", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "storage/clients/manhattan/client/src/main/scala", + "tweetypie/src/scala/com/twitter/tweetypie/util", + ], +) diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/DeciderConstants.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/DeciderConstants.scala new file mode 100644 index 000000000..dd00ea126 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/DeciderConstants.scala @@ -0,0 +1,39 @@ +package com.twitter.representation_manager.store + +import com.twitter.servo.decider.DeciderKeyEnum + +object DeciderConstants { + // Deciders inherited from CR and RSX and only used in LegacyRMS + // Their value are manipulated by CR and RSX's yml file and their decider dashboard + // We will remove them after migration completed + val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore = + "enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore" + + val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore = + "enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore" + + val enablelogFavBased20M145K2020TweetEmbeddingStoreTimeouts = + "enable_log_fav_based_tweet_embedding_20m145k2020_timeouts" + val logFavBased20M145K2020TweetEmbeddingStoreTimeoutValueMillis = + "log_fav_based_tweet_embedding_20m145k2020_timeout_value_millis" + + val enablelogFavBased20M145KUpdatedTweetEmbeddingStoreTimeouts = + "enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts" + val logFavBased20M145KUpdatedTweetEmbeddingStoreTimeoutValueMillis = + "log_fav_based_tweet_embedding_20m145kUpdated_timeout_value_millis" + + val enableSimClustersEmbeddingStoreTimeouts = "enable_sim_clusters_embedding_store_timeouts" + val simClustersEmbeddingStoreTimeoutValueMillis = + "sim_clusters_embedding_store_timeout_value_millis" +} + +// Necessary for using servo Gates +object DeciderKey extends DeciderKeyEnum { + val enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore: Value = Value( + DeciderConstants.enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore + ) + + val enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore: Value = Value( + DeciderConstants.enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore + ) +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TopicSimClustersEmbeddingStore.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TopicSimClustersEmbeddingStore.scala new file mode 100644 index 000000000..cc6485b79 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TopicSimClustersEmbeddingStore.scala @@ -0,0 +1,198 @@ +package com.twitter.representation_manager.store + +import com.twitter.contentrecommender.store.ApeEntityEmbeddingStore +import com.twitter.contentrecommender.store.InterestsOptOutStore +import com.twitter.contentrecommender.store.SemanticCoreTopicSeedStore +import com.twitter.conversions.DurationOps._ +import com.twitter.escherbird.util.uttclient.CachedUttClientV2 +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.frigate.common.util.SeqLongInjection +import com.twitter.hermit.store.common.ObservedCachedReadableStore +import com.twitter.hermit.store.common.ObservedMemcachedReadableStore +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.interests.thriftscala.InterestsThriftService +import com.twitter.representation_manager.common.MemCacheConfig +import com.twitter.representation_manager.common.RepresentationManagerDecider +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.TopicId +import com.twitter.simclusters_v2.thriftscala.LocaleEntityId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.util.UserId +import javax.inject.Inject + +class TopicSimClustersEmbeddingStore @Inject() ( + stratoClient: StratoClient, + cacheClient: Client, + globalStats: StatsReceiver, + mhMtlsParams: ManhattanKVClientMtlsParams, + rmsDecider: RepresentationManagerDecider, + interestService: InterestsThriftService.MethodPerEndpoint, + uttClient: CachedUttClientV2) { + + private val stats = globalStats.scope(this.getClass.getSimpleName) + private val interestsOptOutStore = InterestsOptOutStore(interestService) + + /** + * Note this is NOT an embedding store. It is a list of author account ids we use to represent + * topics + */ + private val semanticCoreTopicSeedStore: ReadableStore[ + SemanticCoreTopicSeedStore.Key, + Seq[UserId] + ] = { + /* + Up to 1000 Long seeds per topic/language = 62.5kb per topic/language (worst case) + Assume ~10k active topic/languages ~= 650MB (worst case) + */ + val underlying = new SemanticCoreTopicSeedStore(uttClient, interestsOptOutStore)( + stats.scope("semantic_core_topic_seed_store")) + + val memcacheStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = underlying, + cacheClient = cacheClient, + ttl = 12.hours)( + valueInjection = SeqLongInjection, + statsReceiver = stats.scope("topic_producer_seed_store_mem_cache"), + keyToString = { k => s"tpss:${k.entityId}_${k.languageCode}" } + ) + + ObservedCachedReadableStore.from[SemanticCoreTopicSeedStore.Key, Seq[UserId]]( + store = memcacheStore, + ttl = 6.hours, + maxKeys = 20e3.toInt, + cacheName = "topic_producer_seed_store_cache", + windowSize = 5000 + )(stats.scope("topic_producer_seed_store_cache")) + } + + private val favBasedTfgTopicEmbedding20m145k2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/favBasedTFGTopic20M145K2020").mapValues( + embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift) + .composeKeyMapping[LocaleEntityId] { localeEntityId => + SimClustersEmbeddingId( + FavTfgTopic, + Model20m145k2020, + InternalId.LocaleEntityId(localeEntityId)) + } + + buildLocaleEntityIdMemCacheStore(rawStore, FavTfgTopic, Model20m145k2020) + } + + private val logFavBasedApeEntity20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val apeStore = StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020") + .mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50)) + .composeKeyMapping[UserId]({ id => + SimClustersEmbeddingId( + AggregatableLogFavBasedProducer, + Model20m145k2020, + InternalId.UserId(id)) + }) + val rawStore = new ApeEntityEmbeddingStore( + semanticCoreSeedStore = semanticCoreTopicSeedStore, + aggregatableProducerEmbeddingStore = apeStore, + statsReceiver = stats.scope("log_fav_based_ape_entity_2020_embedding_store")) + .mapValues(embedding => SimClustersEmbedding(embedding.toThrift, truncate = 50).toThrift) + .composeKeyMapping[TopicId] { topicId => + SimClustersEmbeddingId( + LogFavBasedKgoApeTopic, + Model20m145k2020, + InternalId.TopicId(topicId)) + } + + buildTopicIdMemCacheStore(rawStore, LogFavBasedKgoApeTopic, Model20m145k2020) + } + + private def buildTopicIdMemCacheStore( + rawStore: ReadableStore[TopicId, ThriftSimClustersEmbedding], + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val observedStore: ObservedReadableStore[TopicId, ThriftSimClustersEmbedding] = + ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.TopicId(topicId)) => + topicId + } + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + storeWithKeyMapping, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private def buildLocaleEntityIdMemCacheStore( + rawStore: ReadableStore[LocaleEntityId, ThriftSimClustersEmbedding], + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val observedStore: ObservedReadableStore[LocaleEntityId, ThriftSimClustersEmbedding] = + ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.LocaleEntityId(localeEntityId)) => + localeEntityId + } + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + storeWithKeyMapping, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private val underlyingStores: Map[ + (EmbeddingType, ModelVersion), + ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] + ] = Map( + // Topic Embeddings + (FavTfgTopic, Model20m145k2020) -> favBasedTfgTopicEmbedding20m145k2020Store, + (LogFavBasedKgoApeTopic, Model20m145k2020) -> logFavBasedApeEntity20M145K2020EmbeddingStore, + ) + + val topicSimClustersEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + SimClustersEmbeddingStore.buildWithDecider( + underlyingStores = underlyingStores, + decider = rmsDecider.decider, + statsReceiver = stats + ) + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TweetSimClustersEmbeddingStore.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TweetSimClustersEmbeddingStore.scala new file mode 100644 index 000000000..857e38649 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/TweetSimClustersEmbeddingStore.scala @@ -0,0 +1,141 @@ +package com.twitter.representation_manager.store + +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.representation_manager.common.MemCacheConfig +import com.twitter.representation_manager.common.RepresentationManagerDecider +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.common.TweetId +import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore +import com.twitter.simclusters_v2.summingbird.stores.PersistentTweetEmbeddingStore +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storehaus.ReadableStore +import javax.inject.Inject + +class TweetSimClustersEmbeddingStore @Inject() ( + cacheClient: Client, + globalStats: StatsReceiver, + mhMtlsParams: ManhattanKVClientMtlsParams, + rmsDecider: RepresentationManagerDecider) { + + private val stats = globalStats.scope(this.getClass.getSimpleName) + + val logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .longestL2NormTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset, + stats + ).mapValues(_.toThrift) + + buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145kUpdated) + } + + val logFavBasedLongestL2Tweet20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .longestL2NormTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset, + stats + ).mapValues(_.toThrift) + + buildMemCacheStore(rawStore, LogFavLongestL2EmbeddingTweet, Model20m145k2020) + } + + val logFavBased20M145KUpdatedTweetEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .mostRecentTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145kUpdatedDataset, + stats + ).mapValues(_.toThrift) + + buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145kUpdated) + } + + val logFavBased20M145K2020TweetEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + PersistentTweetEmbeddingStore + .mostRecentTweetEmbeddingStoreManhattan( + mhMtlsParams, + PersistentTweetEmbeddingStore.LogFavBased20m145k2020Dataset, + stats + ).mapValues(_.toThrift) + + buildMemCacheStore(rawStore, LogFavBasedTweet, Model20m145k2020) + } + + private def buildMemCacheStore( + rawStore: ReadableStore[TweetId, ThriftSimClustersEmbedding], + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val observedStore: ObservedReadableStore[TweetId, ThriftSimClustersEmbedding] = + ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + val storeWithKeyMapping = observedStore.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.TweetId(tweetId)) => + tweetId + } + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + storeWithKeyMapping, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private val underlyingStores: Map[ + (EmbeddingType, ModelVersion), + ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] + ] = Map( + // Tweet Embeddings + (LogFavBasedTweet, Model20m145kUpdated) -> logFavBased20M145KUpdatedTweetEmbeddingStore, + (LogFavBasedTweet, Model20m145k2020) -> logFavBased20M145K2020TweetEmbeddingStore, + ( + LogFavLongestL2EmbeddingTweet, + Model20m145kUpdated) -> logFavBasedLongestL2Tweet20M145KUpdatedEmbeddingStore, + ( + LogFavLongestL2EmbeddingTweet, + Model20m145k2020) -> logFavBasedLongestL2Tweet20M145K2020EmbeddingStore, + ) + + val tweetSimClustersEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + SimClustersEmbeddingStore.buildWithDecider( + underlyingStores = underlyingStores, + decider = rmsDecider.decider, + statsReceiver = stats + ) + } + +} diff --git a/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/UserSimClustersEmbeddingStore.scala b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/UserSimClustersEmbeddingStore.scala new file mode 100644 index 000000000..b416d9b17 --- /dev/null +++ b/representation-manager/server/src/main/scala/com/twitter/representation_manager/store/UserSimClustersEmbeddingStore.scala @@ -0,0 +1,602 @@ +package com.twitter.representation_manager.store + +import com.twitter.contentrecommender.twistly +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.representation_manager.common.MemCacheConfig +import com.twitter.representation_manager.common.RepresentationManagerDecider +import com.twitter.simclusters_v2.common.ModelVersions +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore +import com.twitter.simclusters_v2.summingbird.stores.ProducerClusterEmbeddingReadableStores +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.getStore +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.modelVersionToDatasetMap +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.knownModelVersions +import com.twitter.simclusters_v2.summingbird.stores.UserInterestedInReadableStore.toSimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storehaus.ReadableStore +import com.twitter.storehaus_internal.manhattan.Apollo +import com.twitter.storehaus_internal.manhattan.ManhattanCluster +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.tweetypie.util.UserId +import com.twitter.util.Future +import javax.inject.Inject + +class UserSimClustersEmbeddingStore @Inject() ( + stratoClient: StratoClient, + cacheClient: Client, + globalStats: StatsReceiver, + mhMtlsParams: ManhattanKVClientMtlsParams, + rmsDecider: RepresentationManagerDecider) { + + private val stats = globalStats.scope(this.getClass.getSimpleName) + + private val favBasedProducer20M145KUpdatedEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = ProducerClusterEmbeddingReadableStores + .getProducerTopKSimClustersEmbeddingsStore( + mhMtlsParams + ).mapValues { topSimClustersWithScore => + ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters) + }.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) => + userId + } + + buildMemCacheStore(rawStore, FavBasedProducer, Model20m145kUpdated) + } + + private val favBasedProducer20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = ProducerClusterEmbeddingReadableStores + .getProducerTopKSimClusters2020EmbeddingsStore( + mhMtlsParams + ).mapValues { topSimClustersWithScore => + ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters) + }.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) => + userId + } + + buildMemCacheStore(rawStore, FavBasedProducer, Model20m145k2020) + } + + private val followBasedProducer20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = ProducerClusterEmbeddingReadableStores + .getProducerTopKSimClustersEmbeddingsByFollowStore( + mhMtlsParams + ).mapValues { topSimClustersWithScore => + ThriftSimClustersEmbedding(topSimClustersWithScore.topClusters) + }.composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(_, _, InternalId.UserId(userId)) => + userId + } + + buildMemCacheStore(rawStore, FollowBasedProducer, Model20m145k2020) + } + + private val logFavBasedApe20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/logFavBasedAPE20M145K2020") + .mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift) + + buildMemCacheStore(rawStore, AggregatableLogFavBasedProducer, Model20m145k2020) + } + + private val rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + ThriftSimClustersEmbedding + ] = { + StratoFetchableStore + .withUnitView[SimClustersEmbeddingId, ThriftSimClustersEmbedding]( + stratoClient, + "recommendations/simclusters_v2/embeddings/logFavBasedAPERelaxedFavEngagementThreshold20M145K2020") + .mapValues(embedding => SimClustersEmbedding(embedding, truncate = 50).toThrift) + } + + private val relaxedLogFavBasedApe20M145K2020EmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore( + rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore, + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020) + } + + private val relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = rawRelaxedLogFavBasedApe20M145K2020EmbeddingStore + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId( + RelaxedAggregatableLogFavBasedProducer, + Model20m145kUpdated, + internalId) => + SimClustersEmbeddingId( + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020, + internalId) + } + + buildMemCacheStore(rawStore, RelaxedAggregatableLogFavBasedProducer, Model20m145kUpdated) + } + + private val logFavBasedInterestedInFromAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedInFromAPE, + Model20m145k2020) + } + + private val followBasedInterestedInFromAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultIIAPESimClustersEmbeddingStoreWithMtls, + FollowBasedUserInterestedInFromAPE, + Model20m145k2020) + } + + private val favBasedUserInterestedIn20M145KUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls, + FavBasedUserInterestedIn, + Model20m145kUpdated) + } + + private val favBasedUserInterestedIn20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls, + FavBasedUserInterestedIn, + Model20m145k2020) + } + + private val followBasedUserInterestedIn20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls, + FollowBasedUserInterestedIn, + Model20m145k2020) + } + + private val logFavBasedUserInterestedIn20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultSimClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedIn, + Model20m145k2020) + } + + private val favBasedUserInterestedInFromPE20M145KUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultIIPESimClustersEmbeddingStoreWithMtls, + FavBasedUserInterestedInFromPE, + Model20m145kUpdated) + } + + private val twistlyUserInterestedInStore: ReadableStore[ + SimClustersEmbeddingId, + ThriftSimClustersEmbedding + ] = { + val interestedIn20M145KUpdatedStore = { + UserInterestedInReadableStore.defaultStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145KUpdated + ) + } + val interestedIn20M145K2020Store = { + UserInterestedInReadableStore.defaultStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145K2020 + ) + } + val interestedInFromPE20M145KUpdatedStore = { + UserInterestedInReadableStore.defaultIIPEStoreWithMtls( + mhMtlsParams, + modelVersion = ModelVersions.Model20M145KUpdated) + } + val simClustersInterestedInStore: ReadableStore[ + (UserId, ModelVersion), + ClustersUserIsInterestedIn + ] = { + new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] { + override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = { + k match { + case (userId, Model20m145kUpdated) => + interestedIn20M145KUpdatedStore.get(userId) + case (userId, Model20m145k2020) => + interestedIn20M145K2020Store.get(userId) + case _ => + Future.None + } + } + } + } + val simClustersInterestedInFromProducerEmbeddingsStore: ReadableStore[ + (UserId, ModelVersion), + ClustersUserIsInterestedIn + ] = { + new ReadableStore[(UserId, ModelVersion), ClustersUserIsInterestedIn] { + override def get(k: (UserId, ModelVersion)): Future[Option[ClustersUserIsInterestedIn]] = { + k match { + case (userId, ModelVersion.Model20m145kUpdated) => + interestedInFromPE20M145KUpdatedStore.get(userId) + case _ => + Future.None + } + } + } + } + new twistly.interestedin.EmbeddingStore( + interestedInStore = simClustersInterestedInStore, + interestedInFromProducerEmbeddingStore = simClustersInterestedInFromProducerEmbeddingsStore, + statsReceiver = stats + ).mapValues(_.toThrift) + } + + private val userNextInterestedIn20m145k2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildUserInterestedInStore( + UserInterestedInReadableStore.defaultNextInterestedInStoreWithMtls, + UserNextInterestedIn, + Model20m145k2020) + } + + private val filteredUserInterestedIn20m145kUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145kUpdated) + } + + private val filteredUserInterestedIn20m145k2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore(twistlyUserInterestedInStore, FilteredUserInterestedIn, Model20m145k2020) + } + + private val filteredUserInterestedInFromPE20m145kUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore( + twistlyUserInterestedInStore, + FilteredUserInterestedInFromPE, + Model20m145kUpdated) + } + + private val unfilteredUserInterestedIn20m145kUpdatedStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore( + twistlyUserInterestedInStore, + UnfilteredUserInterestedIn, + Model20m145kUpdated) + } + + private val unfilteredUserInterestedIn20m145k2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + buildMemCacheStore(twistlyUserInterestedInStore, UnfilteredUserInterestedIn, Model20m145k2020) + } + + // [Experimental] User InterestedIn, generated by aggregating IIAPE embedding from AddressBook + + private val logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_maxpooling" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + private val logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_average" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedAverageAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + private val logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_booktype_maxpooling" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + private val logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_largestdim_maxpooling" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + private val logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_louvain_maxpooling" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + private val logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val datasetName = "addressbook_sims_embedding_iiape_connected_maxpooling" + val appId = "wtf_embedding_apollo" + buildUserInterestedInStoreGeneric( + simClustersEmbeddingStoreWithMtls, + LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020, + datasetName = datasetName, + appId = appId, + manhattanCluster = Apollo + ) + } + + /** + * Helper func to build a readable store for some UserInterestedIn embeddings with + * 1. A storeFunc from UserInterestedInReadableStore + * 2. EmbeddingType + * 3. ModelVersion + * 4. MemCacheConfig + * */ + private def buildUserInterestedInStore( + storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion) => ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ], + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = storeFunc(mhMtlsParams, embeddingType, modelVersion) + .mapValues(_.toThrift) + val observedStore = ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + observedStore, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private def buildUserInterestedInStoreGeneric( + storeFunc: (ManhattanKVClientMtlsParams, EmbeddingType, ModelVersion, String, String, + ManhattanCluster) => ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ], + embeddingType: EmbeddingType, + modelVersion: ModelVersion, + datasetName: String, + appId: String, + manhattanCluster: ManhattanCluster + ): ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + val rawStore = + storeFunc(mhMtlsParams, embeddingType, modelVersion, datasetName, appId, manhattanCluster) + .mapValues(_.toThrift) + val observedStore = ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + observedStore, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private def simClustersEmbeddingStoreWithMtls( + mhMtlsParams: ManhattanKVClientMtlsParams, + embeddingType: EmbeddingType, + modelVersion: ModelVersion, + datasetName: String, + appId: String, + manhattanCluster: ManhattanCluster + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + + if (!modelVersionToDatasetMap.contains(ModelVersions.toKnownForModelVersion(modelVersion))) { + throw new IllegalArgumentException( + "Unknown model version: " + modelVersion + ". Known model versions: " + knownModelVersions) + } + getStore(appId, mhMtlsParams, datasetName, manhattanCluster) + .composeKeyMapping[SimClustersEmbeddingId] { + case SimClustersEmbeddingId(theEmbeddingType, theModelVersion, InternalId.UserId(userId)) + if theEmbeddingType == embeddingType && theModelVersion == modelVersion => + userId + }.mapValues(toSimClustersEmbedding(_, embeddingType)) + } + + private def buildMemCacheStore( + rawStore: ReadableStore[SimClustersEmbeddingId, ThriftSimClustersEmbedding], + embeddingType: EmbeddingType, + modelVersion: ModelVersion + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val observedStore = ObservedReadableStore( + store = rawStore + )(stats.scope(embeddingType.name).scope(modelVersion.name)) + + MemCacheConfig.buildMemCacheStoreForSimClustersEmbedding( + observedStore, + cacheClient, + embeddingType, + modelVersion, + stats + ) + } + + private val underlyingStores: Map[ + (EmbeddingType, ModelVersion), + ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] + ] = Map( + // KnownFor Embeddings + (FavBasedProducer, Model20m145kUpdated) -> favBasedProducer20M145KUpdatedEmbeddingStore, + (FavBasedProducer, Model20m145k2020) -> favBasedProducer20M145K2020EmbeddingStore, + (FollowBasedProducer, Model20m145k2020) -> followBasedProducer20M145K2020EmbeddingStore, + (AggregatableLogFavBasedProducer, Model20m145k2020) -> logFavBasedApe20M145K2020EmbeddingStore, + ( + RelaxedAggregatableLogFavBasedProducer, + Model20m145kUpdated) -> relaxedLogFavBasedApe20m145kUpdatedEmbeddingStore, + ( + RelaxedAggregatableLogFavBasedProducer, + Model20m145k2020) -> relaxedLogFavBasedApe20M145K2020EmbeddingStore, + // InterestedIn Embeddings + ( + LogFavBasedUserInterestedInFromAPE, + Model20m145k2020) -> logFavBasedInterestedInFromAPE20M145K2020Store, + ( + FollowBasedUserInterestedInFromAPE, + Model20m145k2020) -> followBasedInterestedInFromAPE20M145K2020Store, + (FavBasedUserInterestedIn, Model20m145kUpdated) -> favBasedUserInterestedIn20M145KUpdatedStore, + (FavBasedUserInterestedIn, Model20m145k2020) -> favBasedUserInterestedIn20M145K2020Store, + (FollowBasedUserInterestedIn, Model20m145k2020) -> followBasedUserInterestedIn20M145K2020Store, + (LogFavBasedUserInterestedIn, Model20m145k2020) -> logFavBasedUserInterestedIn20M145K2020Store, + ( + FavBasedUserInterestedInFromPE, + Model20m145kUpdated) -> favBasedUserInterestedInFromPE20M145KUpdatedStore, + (FilteredUserInterestedIn, Model20m145kUpdated) -> filteredUserInterestedIn20m145kUpdatedStore, + (FilteredUserInterestedIn, Model20m145k2020) -> filteredUserInterestedIn20m145k2020Store, + ( + FilteredUserInterestedInFromPE, + Model20m145kUpdated) -> filteredUserInterestedInFromPE20m145kUpdatedStore, + ( + UnfilteredUserInterestedIn, + Model20m145kUpdated) -> unfilteredUserInterestedIn20m145kUpdatedStore, + (UnfilteredUserInterestedIn, Model20m145k2020) -> unfilteredUserInterestedIn20m145k2020Store, + (UserNextInterestedIn, Model20m145k2020) -> userNextInterestedIn20m145k2020Store, + ( + LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedInterestedMaxpoolingAddressBookFromIIAPE20M145K2020Store, + ( + LogFavBasedUserInterestedAverageAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedInterestedAverageAddressBookFromIIAPE20M145K2020Store, + ( + LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE20M145K2020Store, + ( + LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE20M145K2020Store, + ( + LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE20M145K2020Store, + ( + LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE, + Model20m145k2020) -> logFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE20M145K2020Store, + ) + + val userSimClustersEmbeddingStore: ReadableStore[ + SimClustersEmbeddingId, + SimClustersEmbedding + ] = { + SimClustersEmbeddingStore.buildWithDecider( + underlyingStores = underlyingStores, + decider = rmsDecider.decider, + statsReceiver = stats + ) + } + +} diff --git a/representation-manager/server/src/main/thrift/BUILD b/representation-manager/server/src/main/thrift/BUILD new file mode 100644 index 000000000..f4edb5dcb --- /dev/null +++ b/representation-manager/server/src/main/thrift/BUILD @@ -0,0 +1,18 @@ +create_thrift_libraries( + base_name = "thrift", + sources = [ + "com/twitter/representation_manager/service.thrift", + ], + platform = "java8", + tags = [ + "bazel-compatible", + ], + dependency_roots = [ + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], +) diff --git a/representation-manager/server/src/main/thrift/com/twitter/representation_manager/service.thrift b/representation-manager/server/src/main/thrift/com/twitter/representation_manager/service.thrift new file mode 100644 index 000000000..4eb36e999 --- /dev/null +++ b/representation-manager/server/src/main/thrift/com/twitter/representation_manager/service.thrift @@ -0,0 +1,14 @@ +namespace java com.twitter.representation_manager.thriftjava +#@namespace scala com.twitter.representation_manager.thriftscala +#@namespace strato com.twitter.representation_manager + +include "com/twitter/simclusters_v2/online_store.thrift" +include "com/twitter/simclusters_v2/identifier.thrift" + +/** + * A uniform column view for all kinds of SimClusters based embeddings. + **/ +struct SimClustersEmbeddingView { + 1: required identifier.EmbeddingType embeddingType + 2: required online_store.ModelVersion modelVersion +}(persisted = 'false', hasPersonalData = 'false') From 5edbbeedb3be4a68bf9bec39e32ba2f5e36f3808 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Tue, 18 Apr 2023 15:30:22 -0700 Subject: [PATCH 07/11] Open-sourcing Representation Scorer Representation Scorer (RSX) serves as a centralized scoring system, offering SimClusters or other embedding-based scoring solutions as machine learning features. --- representation-scorer/BUILD.bazel | 1 + representation-scorer/README.md | 5 + representation-scorer/bin/canary-check.sh | 8 + representation-scorer/bin/deploy.sh | 4 + .../bin/remote-debug-tunnel.sh | 66 +++ representation-scorer/docs/index.rst | 39 ++ representation-scorer/server/BUILD | 22 + .../server/src/main/resources/BUILD | 9 + .../main/resources/com/twitter/slo/slo.json | 55 ++ .../src/main/resources/config/decider.yml | 155 ++++++ .../server/src/main/resources/logback.xml | 165 ++++++ .../com/twitter/representationscorer/BUILD | 13 + .../RepresentationScorerFedServer.scala | 38 ++ .../representationscorer/columns/BUILD | 16 + .../representationscorer/columns/Info.scala | 13 + .../columns/ListScoreColumn.scala | 116 +++++ .../columns/ScoreColumn.scala | 48 ++ ...tersRecentEngagementSimilarityColumn.scala | 52 ++ ...agementSimilarityUserTweetEdgeColumn.scala | 52 ++ .../twitter/representationscorer/common/BUILD | 9 + .../common/DeciderConstants.scala | 7 + .../common/RepresentationScorerDecider.scala | 27 + .../representationscorer/common/package.scala | 6 + .../representationscorer/modules/BUILD | 19 + .../modules/CacheModule.scala | 34 ++ .../modules/EmbeddingStoreModule.scala | 100 ++++ .../modules/RMSConfigModule.scala | 63 +++ .../modules/TimerModule.scala | 13 + .../representationscorer/scorestore/BUILD | 19 + .../scorestore/ScoreStore.scala | 168 +++++++ .../TopicTweetCertoScoreStore.scala | 106 ++++ .../TopicTweetRankingScoreStore.scala | 48 ++ ...TweetsCosineSimilarityAggregateStore.scala | 148 ++++++ .../twistlyfeatures/BUILD | 20 + .../twistlyfeatures/Engagements.scala | 65 +++ .../twistlyfeatures/ScoreResult.scala | 3 + .../twistlyfeatures/Scorer.scala | 474 ++++++++++++++++++ ...SignalServiceRecentEngagementsClient.scala | 155 ++++++ ...ServiceRecentEngagementsClientModule.scala | 57 +++ .../server/src/main/thrift/BUILD | 20 + .../representationscorer/service.thrift | 106 ++++ 41 files changed, 2544 insertions(+) create mode 100644 representation-scorer/BUILD.bazel create mode 100644 representation-scorer/README.md create mode 100755 representation-scorer/bin/canary-check.sh create mode 100755 representation-scorer/bin/deploy.sh create mode 100755 representation-scorer/bin/remote-debug-tunnel.sh create mode 100644 representation-scorer/docs/index.rst create mode 100644 representation-scorer/server/BUILD create mode 100644 representation-scorer/server/src/main/resources/BUILD create mode 100644 representation-scorer/server/src/main/resources/com/twitter/slo/slo.json create mode 100644 representation-scorer/server/src/main/resources/config/decider.yml create mode 100644 representation-scorer/server/src/main/resources/logback.xml create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/RepresentationScorerFedServer.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/Info.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ListScoreColumn.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ScoreColumn.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityColumn.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityUserTweetEdgeColumn.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/DeciderConstants.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/RepresentationScorerDecider.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/package.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/CacheModule.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/EmbeddingStoreModule.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/RMSConfigModule.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/TimerModule.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/ScoreStore.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetCertoScoreStore.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetRankingScoreStore.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetsCosineSimilarityAggregateStore.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/BUILD create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Engagements.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/ScoreResult.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Scorer.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClient.scala create mode 100644 representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClientModule.scala create mode 100644 representation-scorer/server/src/main/thrift/BUILD create mode 100644 representation-scorer/server/src/main/thrift/com/twitter/representationscorer/service.thrift diff --git a/representation-scorer/BUILD.bazel b/representation-scorer/BUILD.bazel new file mode 100644 index 000000000..1624a57d4 --- /dev/null +++ b/representation-scorer/BUILD.bazel @@ -0,0 +1 @@ +# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD diff --git a/representation-scorer/README.md b/representation-scorer/README.md new file mode 100644 index 000000000..b74e3472f --- /dev/null +++ b/representation-scorer/README.md @@ -0,0 +1,5 @@ +# Representation Scorer # + +**Representation Scorer** (RSX) serves as a centralized scoring system, offering SimClusters or other embedding-based scoring solutions as machine learning features. + +The Representation Scorer acquires user behavior data from the User Signal Service (USS) and extracts embeddings from the Representation Manager (RMS). It then calculates both pairwise and listwise features. These features are used at various stages, including candidate retrieval and ranking. \ No newline at end of file diff --git a/representation-scorer/bin/canary-check.sh b/representation-scorer/bin/canary-check.sh new file mode 100755 index 000000000..cbb31f9ad --- /dev/null +++ b/representation-scorer/bin/canary-check.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +export CANARY_CHECK_ROLE="representation-scorer" +export CANARY_CHECK_NAME="representation-scorer" +export CANARY_CHECK_INSTANCES="0-19" + +python3 relevance-platform/tools/canary_check.py "$@" + diff --git a/representation-scorer/bin/deploy.sh b/representation-scorer/bin/deploy.sh new file mode 100755 index 000000000..2f1ab8a69 --- /dev/null +++ b/representation-scorer/bin/deploy.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +JOB=representation-scorer bazel run --ui_event_filters=-info,-stdout,-stderr --noshow_progress \ + //relevance-platform/src/main/python/deploy -- "$@" diff --git a/representation-scorer/bin/remote-debug-tunnel.sh b/representation-scorer/bin/remote-debug-tunnel.sh new file mode 100755 index 000000000..2a6e71511 --- /dev/null +++ b/representation-scorer/bin/remote-debug-tunnel.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +set -o nounset +set -eu + +DC="atla" +ROLE="$USER" +SERVICE="representation-scorer" +INSTANCE="0" +KEY="$DC/$ROLE/devel/$SERVICE/$INSTANCE" + +while test $# -gt 0; do + case "$1" in + -h|--help) + echo "$0 Set up an ssh tunnel for $SERVICE remote debugging and disable aurora health checks" + echo " " + echo "See representation-scorer/README.md for details of how to use this script, and go/remote-debug for" + echo "general information about remote debugging in Aurora" + echo " " + echo "Default instance if called with no args:" + echo " $KEY" + echo " " + echo "Positional args:" + echo " $0 [datacentre] [role] [service_name] [instance]" + echo " " + echo "Options:" + echo " -h, --help show brief help" + exit 0 + ;; + *) + break + ;; + esac +done + +if [ -n "${1-}" ]; then + DC="$1" +fi + +if [ -n "${2-}" ]; then + ROLE="$2" +fi + +if [ -n "${3-}" ]; then + SERVICE="$3" +fi + +if [ -n "${4-}" ]; then + INSTANCE="$4" +fi + +KEY="$DC/$ROLE/devel/$SERVICE/$INSTANCE" +read -p "Set up remote debugger tunnel for $KEY? (y/n) " -r CONFIRM +if [[ ! $CONFIRM =~ ^[Yy]$ ]]; then + echo "Exiting, tunnel not created" + exit 1 +fi + +echo "Disabling health check and opening tunnel. Exit with control-c when you're finished" +CMD="aurora task ssh $KEY -c 'touch .healthchecksnooze' && aurora task ssh $KEY -L '5005:debug' --ssh-options '-N -S none -v '" + +echo "Running $CMD" +eval "$CMD" + + + diff --git a/representation-scorer/docs/index.rst b/representation-scorer/docs/index.rst new file mode 100644 index 000000000..c4fd8966d --- /dev/null +++ b/representation-scorer/docs/index.rst @@ -0,0 +1,39 @@ +Representation Scorer (RSX) +########################### + +Overview +======== + +Representation Scorer (RSX) is a StratoFed service which serves scores for pairs of entities (User, Tweet, Topic...) based on some representation of those entities. For example, it serves User-Tweet scores based on the cosine similarity of SimClusters embeddings for each of these. It aims to provide these with low latency and at high scale, to support applications such as scoring for ANN candidate generation and feature hydration via feature store. + + +Current use cases +----------------- + +RSX currently serves traffic for the following use cases: + +- User-Tweet similarity scores for Home ranking, using SimClusters embedding dot product +- Topic-Tweet similarity scores for topical tweet candidate generation and topic social proof, using SimClusters embedding cosine similarity and CERTO scores +- Tweet-Tweet and User-Tweet similarity scores for ANN candidate generation, using SimClusters embedding cosine similarity +- (in development) User-Tweet similarity scores for Home ranking, based on various aggregations of similarities with recent faves, retweets and follows performed by the user + +Getting Started +=============== + +Fetching scores +--------------- + +Scores are served from the recommendations/representation_scorer/score column. + +Using RSX for your application +------------------------------ + +RSX may be a good fit for your application if you need scores based on combinations of SimCluster embeddings for core nouns. We also plan to support other embeddings and scoring approaches in the future. + +.. toctree:: + :maxdepth: 2 + :hidden: + + index + + diff --git a/representation-scorer/server/BUILD b/representation-scorer/server/BUILD new file mode 100644 index 000000000..cc7325192 --- /dev/null +++ b/representation-scorer/server/BUILD @@ -0,0 +1,22 @@ +jvm_binary( + name = "bin", + basename = "representation-scorer", + main = "com.twitter.representationscorer.RepresentationScorerFedServerMain", + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finatra/inject/inject-logback/src/main/scala", + "loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback", + "representation-scorer/server/src/main/resources", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer", + "twitter-server/logback-classic/src/main/scala", + ], +) + +# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app +jvm_app( + name = "representation-scorer-app", + archive = "zip", + binary = ":bin", + tags = ["bazel-compatible"], +) diff --git a/representation-scorer/server/src/main/resources/BUILD b/representation-scorer/server/src/main/resources/BUILD new file mode 100644 index 000000000..150a224ff --- /dev/null +++ b/representation-scorer/server/src/main/resources/BUILD @@ -0,0 +1,9 @@ +resources( + sources = [ + "*.xml", + "*.yml", + "com/twitter/slo/slo.json", + "config/*.yml", + ], + tags = ["bazel-compatible"], +) diff --git a/representation-scorer/server/src/main/resources/com/twitter/slo/slo.json b/representation-scorer/server/src/main/resources/com/twitter/slo/slo.json new file mode 100644 index 000000000..836b44058 --- /dev/null +++ b/representation-scorer/server/src/main/resources/com/twitter/slo/slo.json @@ -0,0 +1,55 @@ +{ + "servers": [ + { + "name": "strato", + "indicators": [ + { + "id": "success_rate_3m", + "indicator_type": "SuccessRateIndicator", + "duration": 3, + "duration_unit": "MINUTES" + }, { + "id": "latency_3m_p99", + "indicator_type": "LatencyIndicator", + "duration": 3, + "duration_unit": "MINUTES", + "percentile": 0.99 + } + ], + "objectives": [ + { + "indicator": "success_rate_3m", + "objective_type": "SuccessRateObjective", + "operator": ">=", + "threshold": 0.995 + }, + { + "indicator": "latency_3m_p99", + "objective_type": "LatencyObjective", + "operator": "<=", + "threshold": 50 + } + ], + "long_term_objectives": [ + { + "id": "success_rate_28_days", + "objective_type": "SuccessRateObjective", + "operator": ">=", + "threshold": 0.993, + "duration": 28, + "duration_unit": "DAYS" + }, + { + "id": "latency_p99_28_days", + "objective_type": "LatencyObjective", + "operator": "<=", + "threshold": 60, + "duration": 28, + "duration_unit": "DAYS", + "percentile": 0.99 + } + ] + } + ], + "@version": 1 +} diff --git a/representation-scorer/server/src/main/resources/config/decider.yml b/representation-scorer/server/src/main/resources/config/decider.yml new file mode 100644 index 000000000..56ae90418 --- /dev/null +++ b/representation-scorer/server/src/main/resources/config/decider.yml @@ -0,0 +1,155 @@ +enableLogFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore: + comment: "Enable to use the non-empty store for logFavBasedApeEntity20M145KUpdatedEmbeddingCachedStore (from 0% to 100%). 0 means use EMPTY readable store for all requests." + default_availability: 0 + +enableLogFavBasedApeEntity20M145K2020EmbeddingCachedStore: + comment: "Enable to use the non-empty store for logFavBasedApeEntity20M145K2020EmbeddingCachedStore (from 0% to 100%). 0 means use EMPTY readable store for all requests." + default_availability: 0 + +representation-scorer_forward_dark_traffic: + comment: "Defines the percentage of traffic to forward to diffy-proxy. Set to 0 to disable dark traffic forwarding" + default_availability: 0 + +"representation-scorer_load_shed_non_prod_callers": + comment: "Discard traffic from all non-prod callers" + default_availability: 0 + +enable_log_fav_based_tweet_embedding_20m145k2020_timeouts: + comment: "If enabled, set a timeout on calls to the logFavBased20M145K2020TweetEmbeddingStore" + default_availability: 0 + +log_fav_based_tweet_embedding_20m145k2020_timeout_value_millis: + comment: "The value of this decider defines the timeout (in milliseconds) to use on calls to the logFavBased20M145K2020TweetEmbeddingStore, i.e. 1.50% is 150ms. Only applied if enable_log_fav_based_tweet_embedding_20m145k2020_timeouts is true" + default_availability: 2000 + +enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts: + comment: "If enabled, set a timeout on calls to the logFavBased20M145KUpdatedTweetEmbeddingStore" + default_availability: 0 + +log_fav_based_tweet_embedding_20m145kUpdated_timeout_value_millis: + comment: "The value of this decider defines the timeout (in milliseconds) to use on calls to the logFavBased20M145KUpdatedTweetEmbeddingStore, i.e. 1.50% is 150ms. Only applied if enable_log_fav_based_tweet_embedding_20m145kUpdated_timeouts is true" + default_availability: 2000 + +enable_cluster_tweet_index_store_timeouts: + comment: "If enabled, set a timeout on calls to the ClusterTweetIndexStore" + default_availability: 0 + +cluster_tweet_index_store_timeout_value_millis: + comment: "The value of this decider defines the timeout (in milliseconds) to use on calls to the ClusterTweetIndexStore, i.e. 1.50% is 150ms. Only applied if enable_cluster_tweet_index_store_timeouts is true" + default_availability: 2000 + +representation_scorer_fetch_signal_share: + comment: "If enabled, fetches share signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_reply: + comment: "If enabled, fetches reply signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_original_tweet: + comment: "If enabled, fetches original tweet signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_video_playback: + comment: "If enabled, fetches video playback signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_block: + comment: "If enabled, fetches account block signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_mute: + comment: "If enabled, fetches account mute signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_report: + comment: "If enabled, fetches tweet report signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_dont_like: + comment: "If enabled, fetches tweet don't like signals from USS" + default_availability: 0 + +representation_scorer_fetch_signal_see_fewer: + comment: "If enabled, fetches tweet see fewer signals from USS" + default_availability: 0 + +# To create a new decider, add here with the same format and caller's details : "representation-scorer_load_shed_by_caller_id_twtr:{{role}}:{{name}}:{{environment}}:{{cluster}}" +# All the deciders below are generated by this script - ./strato/bin/fed deciders ./ --service-role=representation-scorer --service-name=representation-scorer +# If you need to run the script and paste the output, add only the prod deciders here. Non-prod ones are being taken care of by representation-scorer_load_shed_non_prod_callers + +"representation-scorer_load_shed_by_caller_id_all": + comment: "Reject all traffic from caller id: all" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice-canary:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice-canary:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice-canary:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice-canary:prod:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice-send:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice-send:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice:prod:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice:staging:atla": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice:staging:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:frigate:frigate-pushservice:staging:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:frigate:frigate-pushservice:staging:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:home-scorer:home-scorer:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:home-scorer:home-scorer:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:home-scorer:home-scorer:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:home-scorer:home-scorer:prod:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:stratostore:stratoapi:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoapi:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:stratostore:stratoserver:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:stratostore:stratoserver:prod:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:timelinescorer:timelinescorer:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:timelinescorer:timelinescorer:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:timelinescorer:timelinescorer:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:timelinescorer:timelinescorer:prod:pdxa" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:topic-social-proof:topic-social-proof:prod:atla": + comment: "Reject all traffic from caller id: twtr:svc:topic-social-proof:topic-social-proof:prod:atla" + default_availability: 0 + +"representation-scorer_load_shed_by_caller_id_twtr:svc:topic-social-proof:topic-social-proof:prod:pdxa": + comment: "Reject all traffic from caller id: twtr:svc:topic-social-proof:topic-social-proof:prod:pdxa" + default_availability: 0 + +"enable_sim_clusters_embedding_store_timeouts": + comment: "If enabled, set a timeout on calls to the SimClustersEmbeddingStore" + default_availability: 10000 + +sim_clusters_embedding_store_timeout_value_millis: + comment: "The value of this decider defines the timeout (in milliseconds) to use on calls to the SimClustersEmbeddingStore, i.e. 1.50% is 150ms. Only applied if enable_sim_clusters_embedding_store_timeouts is true" + default_availability: 2000 diff --git a/representation-scorer/server/src/main/resources/logback.xml b/representation-scorer/server/src/main/resources/logback.xml new file mode 100644 index 000000000..cf7028151 --- /dev/null +++ b/representation-scorer/server/src/main/resources/logback.xml @@ -0,0 +1,165 @@ + + + + + + + + + + + + + + + + + true + + + + + + + + + + + ${log.service.output} + + + ${log.service.output}.%d.gz + + 3GB + + 21 + true + + + %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n + + + + + + ${log.access.output} + + + ${log.access.output}.%d.gz + + 100MB + + 7 + true + + + ${DEFAULT_ACCESS_PATTERN}%n + + + + + + true + ${log.lens.category} + ${log.lens.index} + ${log.lens.tag}/service + + %msg + + + + + + true + ${log.lens.category} + ${log.lens.index} + ${log.lens.tag}/access + + %msg + + + + + + allow_listed_pipeline_executions.log + + + allow_listed_pipeline_executions.log.%d.gz + + 100MB + + 7 + true + + + %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n + + + + + + + + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + ${async_queue_size} + ${async_max_flush_time} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/BUILD new file mode 100644 index 000000000..fdb60da54 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/BUILD @@ -0,0 +1,13 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/slo/src/main/scala/com/twitter/finagle/slo", + "finatra/inject/inject-thrift-client", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + "twitter-server-internal/src/main/scala", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/RepresentationScorerFedServer.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/RepresentationScorerFedServer.scala new file mode 100644 index 000000000..a0a203311 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/RepresentationScorerFedServer.scala @@ -0,0 +1,38 @@ +package com.twitter.representationscorer + +import com.google.inject.Module +import com.twitter.inject.thrift.modules.ThriftClientIdModule +import com.twitter.representationscorer.columns.ListScoreColumn +import com.twitter.representationscorer.columns.ScoreColumn +import com.twitter.representationscorer.columns.SimClustersRecentEngagementSimilarityColumn +import com.twitter.representationscorer.columns.SimClustersRecentEngagementSimilarityUserTweetEdgeColumn +import com.twitter.representationscorer.modules.CacheModule +import com.twitter.representationscorer.modules.EmbeddingStoreModule +import com.twitter.representationscorer.modules.RMSConfigModule +import com.twitter.representationscorer.modules.TimerModule +import com.twitter.representationscorer.twistlyfeatures.UserSignalServiceRecentEngagementsClientModule +import com.twitter.strato.fed._ +import com.twitter.strato.fed.server._ + +object RepresentationScorerFedServerMain extends RepresentationScorerFedServer + +trait RepresentationScorerFedServer extends StratoFedServer { + override def dest: String = "/s/representation-scorer/representation-scorer" + override val modules: Seq[Module] = + Seq( + CacheModule, + ThriftClientIdModule, + UserSignalServiceRecentEngagementsClientModule, + TimerModule, + RMSConfigModule, + EmbeddingStoreModule + ) + + override def columns: Seq[Class[_ <: StratoFed.Column]] = + Seq( + classOf[ListScoreColumn], + classOf[ScoreColumn], + classOf[SimClustersRecentEngagementSimilarityUserTweetEdgeColumn], + classOf[SimClustersRecentEngagementSimilarityColumn] + ) +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/BUILD new file mode 100644 index 000000000..3352a51b9 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/BUILD @@ -0,0 +1,16 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "content-recommender/thrift/src/main/thrift:thrift-scala", + "finatra/inject/inject-core/src/main/scala", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/common", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures", + "representation-scorer/server/src/main/thrift:thrift-scala", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/fed/server", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/Info.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/Info.scala new file mode 100644 index 000000000..3b14a491f --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/Info.scala @@ -0,0 +1,13 @@ +package com.twitter.representationscorer.columns + +import com.twitter.strato.config.{ContactInfo => StratoContactInfo} + +object Info { + val contactInfo: StratoContactInfo = StratoContactInfo( + description = "Please contact Relevance Platform team for more details", + contactEmail = "no-reply@twitter.com", + ldapGroup = "representation-scorer-admins", + jiraProject = "JIRA", + links = Seq("http://go.twitter.biz/rsx-runbook") + ) +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ListScoreColumn.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ListScoreColumn.scala new file mode 100644 index 000000000..04d8b8cb1 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ListScoreColumn.scala @@ -0,0 +1,116 @@ +package com.twitter.representationscorer.columns + +import com.twitter.representationscorer.thriftscala.ListScoreId +import com.twitter.representationscorer.thriftscala.ListScoreResponse +import com.twitter.representationscorer.scorestore.ScoreStore +import com.twitter.representationscorer.thriftscala.ScoreResult +import com.twitter.simclusters_v2.common.SimClustersEmbeddingId.LongInternalId +import com.twitter.simclusters_v2.common.SimClustersEmbeddingId.LongSimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.Score +import com.twitter.simclusters_v2.thriftscala.ScoreId +import com.twitter.simclusters_v2.thriftscala.ScoreInternalId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingPairScoreId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import javax.inject.Inject + +class ListScoreColumn @Inject() (scoreStore: ScoreStore) + extends StratoFed.Column("recommendations/representation_scorer/listScore") + with StratoFed.Fetch.Stitch { + + override val policy: Policy = Common.rsxReadPolicy + + override type Key = ListScoreId + override type View = Unit + override type Value = ListScoreResponse + + override val keyConv: Conv[Key] = ScroogeConv.fromStruct[ListScoreId] + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[ListScoreResponse] + + override val contactInfo: ContactInfo = Info.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText( + "Scoring for multiple candidate entities against a single target entity" + )) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + + val target = SimClustersEmbeddingId( + embeddingType = key.targetEmbeddingType, + modelVersion = key.modelVersion, + internalId = key.targetId + ) + val scoreIds = key.candidateIds.map { candidateId => + val candidate = SimClustersEmbeddingId( + embeddingType = key.candidateEmbeddingType, + modelVersion = key.modelVersion, + internalId = candidateId + ) + ScoreId( + algorithm = key.algorithm, + internalId = ScoreInternalId.SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingPairScoreId(target, candidate) + ) + ) + } + + Stitch + .callFuture { + val (keys: Iterable[ScoreId], vals: Iterable[Future[Option[Score]]]) = + scoreStore.uniformScoringStore.multiGet(scoreIds.toSet).unzip + val results: Future[Iterable[Option[Score]]] = Future.collectToTry(vals.toSeq) map { + tryOptVals => + tryOptVals map { + case Return(Some(v)) => Some(v) + case Return(None) => None + case Throw(_) => None + } + } + val scoreMap: Future[Map[Long, Double]] = results.map { scores => + keys + .zip(scores).collect { + case ( + ScoreId( + _, + ScoreInternalId.SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingPairScoreId( + _, + LongSimClustersEmbeddingId(candidateId)))), + Some(score)) => + (candidateId, score.score) + }.toMap + } + scoreMap + } + .map { (scores: Map[Long, Double]) => + val orderedScores = key.candidateIds.collect { + case LongInternalId(id) => ScoreResult(scores.get(id)) + case _ => + // This will return None scores for candidates which don't have Long ids, but that's fine: + // at the moment we're only scoring for Tweets + ScoreResult(None) + } + found(ListScoreResponse(orderedScores)) + } + .handle { + case stitch.NotFound => missing + } + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ScoreColumn.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ScoreColumn.scala new file mode 100644 index 000000000..6b565288b --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/ScoreColumn.scala @@ -0,0 +1,48 @@ +package com.twitter.representationscorer.columns + +import com.twitter.contentrecommender.thriftscala.ScoringResponse +import com.twitter.representationscorer.scorestore.ScoreStore +import com.twitter.simclusters_v2.thriftscala.ScoreId +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class ScoreColumn @Inject() (scoreStore: ScoreStore) + extends StratoFed.Column("recommendations/representation_scorer/score") + with StratoFed.Fetch.Stitch { + + override val policy: Policy = Common.rsxReadPolicy + + override type Key = ScoreId + override type View = Unit + override type Value = ScoringResponse + + override val keyConv: Conv[Key] = ScroogeConv.fromStruct[ScoreId] + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[ScoringResponse] + + override val contactInfo: ContactInfo = Info.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some(PlainText( + "The Uniform Scoring Endpoint in Representation Scorer for the Content-Recommender." + + " TDD: http://go/representation-scorer-tdd Guideline: http://go/uniform-scoring-guideline")) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = + scoreStore + .uniformScoringStoreStitch(key) + .map(score => found(ScoringResponse(Some(score)))) + .handle { + case stitch.NotFound => missing + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityColumn.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityColumn.scala new file mode 100644 index 000000000..e14a67eae --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityColumn.scala @@ -0,0 +1,52 @@ +package com.twitter.representationscorer.columns + +import com.twitter.representationscorer.common.TweetId +import com.twitter.representationscorer.common.UserId +import com.twitter.representationscorer.thriftscala.RecentEngagementSimilaritiesResponse +import com.twitter.representationscorer.twistlyfeatures.Scorer +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class SimClustersRecentEngagementSimilarityColumn @Inject() (scorer: Scorer) + extends StratoFed.Column( + "recommendations/representation_scorer/simClustersRecentEngagementSimilarity") + with StratoFed.Fetch.Stitch { + + override val policy: Policy = Common.rsxReadPolicy + + override type Key = (UserId, Seq[TweetId]) + override type View = Unit + override type Value = RecentEngagementSimilaritiesResponse + + override val keyConv: Conv[Key] = Conv.ofType[(Long, Seq[Long])] + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = + ScroogeConv.fromStruct[RecentEngagementSimilaritiesResponse] + + override val contactInfo: ContactInfo = Info.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText( + "User-Tweet scores based on the user's recent engagements for multiple tweets." + )) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = + scorer + .get(key._1, key._2) + .map(results => found(RecentEngagementSimilaritiesResponse(results))) + .handle { + case stitch.NotFound => missing + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityUserTweetEdgeColumn.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityUserTweetEdgeColumn.scala new file mode 100644 index 000000000..e54d3a71b --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/columns/SimClustersRecentEngagementSimilarityUserTweetEdgeColumn.scala @@ -0,0 +1,52 @@ +package com.twitter.representationscorer.columns + +import com.twitter.representationscorer.common.TweetId +import com.twitter.representationscorer.common.UserId +import com.twitter.representationscorer.thriftscala.SimClustersRecentEngagementSimilarities +import com.twitter.representationscorer.twistlyfeatures.Scorer +import com.twitter.stitch +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle +import com.twitter.strato.fed._ +import com.twitter.strato.thrift.ScroogeConv +import javax.inject.Inject + +class SimClustersRecentEngagementSimilarityUserTweetEdgeColumn @Inject() (scorer: Scorer) + extends StratoFed.Column( + "recommendations/representation_scorer/simClustersRecentEngagementSimilarity.UserTweetEdge") + with StratoFed.Fetch.Stitch { + + override val policy: Policy = Common.rsxReadPolicy + + override type Key = (UserId, TweetId) + override type View = Unit + override type Value = SimClustersRecentEngagementSimilarities + + override val keyConv: Conv[Key] = Conv.ofType[(Long, Long)] + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = + ScroogeConv.fromStruct[SimClustersRecentEngagementSimilarities] + + override val contactInfo: ContactInfo = Info.contactInfo + + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Lifecycle.Production), + description = Some( + PlainText( + "User-Tweet scores based on the user's recent engagements" + )) + ) + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = + scorer + .get(key._1, key._2) + .map(found(_)) + .handle { + case stitch.NotFound => missing + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/BUILD new file mode 100644 index 000000000..018cef9eb --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/BUILD @@ -0,0 +1,9 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "decider/src/main/scala", + "src/scala/com/twitter/simclusters_v2/common", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/DeciderConstants.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/DeciderConstants.scala new file mode 100644 index 000000000..838835616 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/DeciderConstants.scala @@ -0,0 +1,7 @@ +package com.twitter.representationscorer + +object DeciderConstants { + val enableSimClustersEmbeddingStoreTimeouts = "enable_sim_clusters_embedding_store_timeouts" + val simClustersEmbeddingStoreTimeoutValueMillis = + "sim_clusters_embedding_store_timeout_value_millis" +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/RepresentationScorerDecider.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/RepresentationScorerDecider.scala new file mode 100644 index 000000000..5aa4b4f2c --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/RepresentationScorerDecider.scala @@ -0,0 +1,27 @@ +package com.twitter.representationscorer.common + +import com.twitter.decider.Decider +import com.twitter.decider.RandomRecipient +import com.twitter.decider.Recipient +import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing +import javax.inject.Inject +import javax.inject.Singleton + +@Singleton +case class RepresentationScorerDecider @Inject() (decider: Decider) { + + val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider) + + def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = { + decider.isAvailable(feature, recipient) + } + + /** + * When useRandomRecipient is set to false, the decider is either completely on or off. + * When useRandomRecipient is set to true, the decider is on for the specified % of traffic. + */ + def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = { + if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient)) + else isAvailable(feature, None) + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/package.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/package.scala new file mode 100644 index 000000000..c5bf9c60a --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/common/package.scala @@ -0,0 +1,6 @@ +package com.twitter.representationscorer + +package object common { + type UserId = Long + type TweetId = Long +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/BUILD new file mode 100644 index 000000000..c73f2a68e --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/BUILD @@ -0,0 +1,19 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-stats", + "finatra/inject/inject-core/src/main/scala", + "representation-manager/client/src/main/scala/com/twitter/representation_manager", + "representation-manager/client/src/main/scala/com/twitter/representation_manager/config", + "representation-manager/server/src/main/scala/com/twitter/representation_manager/migration", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/common", + "servo/util", + "src/scala/com/twitter/simclusters_v2/stores", + "src/scala/com/twitter/storehaus_internal/memcache", + "src/scala/com/twitter/storehaus_internal/util", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/CacheModule.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/CacheModule.scala new file mode 100644 index 000000000..b8b815872 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/CacheModule.scala @@ -0,0 +1,34 @@ +package com.twitter.representationscorer.modules + +import com.google.inject.Provides +import com.twitter.finagle.memcached.Client +import javax.inject.Singleton +import com.twitter.conversions.DurationOps._ +import com.twitter.inject.TwitterModule +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.storehaus_internal.memcache.MemcacheStore +import com.twitter.storehaus_internal.util.ClientName +import com.twitter.storehaus_internal.util.ZkEndPoint + +object CacheModule extends TwitterModule { + + private val cacheDest = flag[String]("cache_module.dest", "Path to memcache service") + private val timeout = flag[Int]("memcache.timeout", "Memcache client timeout") + private val retries = flag[Int]("memcache.retries", "Memcache timeout retries") + + @Singleton + @Provides + def providesCache( + serviceIdentifier: ServiceIdentifier, + stats: StatsReceiver + ): Client = + MemcacheStore.memcachedClient( + name = ClientName("memcache_representation_manager"), + dest = ZkEndPoint(cacheDest()), + timeout = timeout().milliseconds, + retries = retries(), + statsReceiver = stats.scope("cache_client"), + serviceIdentifier = serviceIdentifier + ) +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/EmbeddingStoreModule.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/EmbeddingStoreModule.scala new file mode 100644 index 000000000..bff5d491c --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/EmbeddingStoreModule.scala @@ -0,0 +1,100 @@ +package com.twitter.representationscorer.modules + +import com.google.inject.Provides +import com.twitter.decider.Decider +import com.twitter.finagle.memcached.{Client => MemcachedClient} +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.inject.TwitterModule +import com.twitter.relevance_platform.common.readablestore.ReadableStoreWithTimeout +import com.twitter.representation_manager.migration.LegacyRMS +import com.twitter.representationscorer.DeciderConstants +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.stores.SimClustersEmbeddingStore +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Timer +import javax.inject.Singleton + +object EmbeddingStoreModule extends TwitterModule { + @Singleton + @Provides + def providesEmbeddingStore( + memCachedClient: MemcachedClient, + serviceIdentifier: ServiceIdentifier, + clientId: ClientId, + timer: Timer, + decider: Decider, + stats: StatsReceiver + ): ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val cacheHashKeyPrefix: String = "RMS" + val embeddingStoreClient = new LegacyRMS( + serviceIdentifier, + memCachedClient, + stats, + decider, + clientId, + timer, + cacheHashKeyPrefix + ) + + val underlyingStores: Map[ + (EmbeddingType, ModelVersion), + ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] + ] = Map( + // Tweet Embeddings + ( + LogFavBasedTweet, + Model20m145k2020) -> embeddingStoreClient.logFavBased20M145K2020TweetEmbeddingStore, + ( + LogFavLongestL2EmbeddingTweet, + Model20m145k2020) -> embeddingStoreClient.logFavBasedLongestL2Tweet20M145K2020EmbeddingStore, + // InterestedIn Embeddings + ( + LogFavBasedUserInterestedInFromAPE, + Model20m145k2020) -> embeddingStoreClient.LogFavBasedInterestedInFromAPE20M145K2020Store, + ( + FavBasedUserInterestedIn, + Model20m145k2020) -> embeddingStoreClient.favBasedUserInterestedIn20M145K2020Store, + // Author Embeddings + ( + FavBasedProducer, + Model20m145k2020) -> embeddingStoreClient.favBasedProducer20M145K2020EmbeddingStore, + // Entity Embeddings + ( + LogFavBasedKgoApeTopic, + Model20m145k2020) -> embeddingStoreClient.logFavBasedApeEntity20M145K2020EmbeddingCachedStore, + (FavTfgTopic, Model20m145k2020) -> embeddingStoreClient.favBasedTfgTopicEmbedding2020Store, + ) + + val simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = { + val underlying: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + SimClustersEmbeddingStore.buildWithDecider( + underlyingStores = underlyingStores, + decider = decider, + statsReceiver = stats.scope("simClusters_embeddings_store_deciderable") + ) + + val underlyingWithTimeout: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding] = + new ReadableStoreWithTimeout( + rs = underlying, + decider = decider, + enableTimeoutDeciderKey = DeciderConstants.enableSimClustersEmbeddingStoreTimeouts, + timeoutValueKey = DeciderConstants.simClustersEmbeddingStoreTimeoutValueMillis, + timer = timer, + statsReceiver = stats.scope("simClusters_embedding_store_timeouts") + ) + + ObservedReadableStore( + store = underlyingWithTimeout + )(stats.scope("simClusters_embeddings_store")) + } + simClustersEmbeddingStore + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/RMSConfigModule.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/RMSConfigModule.scala new file mode 100644 index 000000000..08ac0cb93 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/RMSConfigModule.scala @@ -0,0 +1,63 @@ +package com.twitter.representationscorer.modules + +import com.google.inject.Provides +import com.twitter.conversions.DurationOps._ +import com.twitter.inject.TwitterModule +import com.twitter.representation_manager.config.ClientConfig +import com.twitter.representation_manager.config.EnabledInMemoryCacheParams +import com.twitter.representation_manager.config.InMemoryCacheParams +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.EmbeddingType._ +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ModelVersion._ +import javax.inject.Singleton + +object RMSConfigModule extends TwitterModule { + def getCacheName(embedingType: EmbeddingType, modelVersion: ModelVersion): String = + s"${embedingType.name}_${modelVersion.name}_in_mem_cache" + + @Singleton + @Provides + def providesRMSClientConfig: ClientConfig = { + val cacheParamsMap: Map[ + (EmbeddingType, ModelVersion), + InMemoryCacheParams + ] = Map( + // Tweet Embeddings + (LogFavBasedTweet, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 10.minutes, + maxKeys = 1048575, // 800MB + cacheName = getCacheName(LogFavBasedTweet, Model20m145k2020)), + (LogFavLongestL2EmbeddingTweet, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 5.minute, + maxKeys = 1048575, // 800MB + cacheName = getCacheName(LogFavLongestL2EmbeddingTweet, Model20m145k2020)), + // User - KnownFor Embeddings + (FavBasedProducer, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 1.day, + maxKeys = 500000, // 400MB + cacheName = getCacheName(FavBasedProducer, Model20m145k2020)), + // User - InterestedIn Embeddings + (LogFavBasedUserInterestedInFromAPE, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 6.hours, + maxKeys = 262143, + cacheName = getCacheName(LogFavBasedUserInterestedInFromAPE, Model20m145k2020)), + (FavBasedUserInterestedIn, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 6.hours, + maxKeys = 262143, + cacheName = getCacheName(FavBasedUserInterestedIn, Model20m145k2020)), + // Topic Embeddings + (FavTfgTopic, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 12.hours, + maxKeys = 262143, // 200MB + cacheName = getCacheName(FavTfgTopic, Model20m145k2020)), + (LogFavBasedKgoApeTopic, Model20m145k2020) -> EnabledInMemoryCacheParams( + ttl = 6.hours, + maxKeys = 262143, + cacheName = getCacheName(LogFavBasedKgoApeTopic, Model20m145k2020)), + ) + + new ClientConfig(inMemCacheParamsOverrides = cacheParamsMap) + } + +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/TimerModule.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/TimerModule.scala new file mode 100644 index 000000000..b425d516a --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/modules/TimerModule.scala @@ -0,0 +1,13 @@ +package com.twitter.representationscorer.modules + +import com.google.inject.Provides +import com.twitter.finagle.util.DefaultTimer +import com.twitter.inject.TwitterModule +import com.twitter.util.Timer +import javax.inject.Singleton + +object TimerModule extends TwitterModule { + @Singleton + @Provides + def providesTimer: Timer = DefaultTimer +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/BUILD new file mode 100644 index 000000000..3c259cfc4 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/BUILD @@ -0,0 +1,19 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection", + "representation-manager/client/src/main/scala/com/twitter/representation_manager", + "representation-manager/client/src/main/scala/com/twitter/representation_manager/config", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/common", + "src/scala/com/twitter/simclusters_v2/score", + "src/scala/com/twitter/topic_recos/common", + "src/scala/com/twitter/topic_recos/stores", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala", + "stitch/stitch-storehaus", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/ScoreStore.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/ScoreStore.scala new file mode 100644 index 000000000..db7cbefa9 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/ScoreStore.scala @@ -0,0 +1,168 @@ +package com.twitter.representationscorer.scorestore + +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.hashing.KeyHasher +import com.twitter.hermit.store.common.ObservedCachedReadableStore +import com.twitter.hermit.store.common.ObservedMemcachedReadableStore +import com.twitter.hermit.store.common.ObservedReadableStore +import com.twitter.relevance_platform.common.injection.LZ4Injection +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.score.ScoreFacadeStore +import com.twitter.simclusters_v2.score.SimClustersEmbeddingPairScoreStore +import com.twitter.simclusters_v2.thriftscala.EmbeddingType.FavTfgTopic +import com.twitter.simclusters_v2.thriftscala.EmbeddingType.LogFavBasedKgoApeTopic +import com.twitter.simclusters_v2.thriftscala.EmbeddingType.LogFavBasedTweet +import com.twitter.simclusters_v2.thriftscala.ModelVersion.Model20m145kUpdated +import com.twitter.simclusters_v2.thriftscala.Score +import com.twitter.simclusters_v2.thriftscala.ScoreId +import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.stitch.storehaus.StitchOfReadableStore +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.topic_recos.stores.CertoTweetTopicScoresStore +import javax.inject.Inject +import javax.inject.Singleton + +@Singleton() +class ScoreStore @Inject() ( + simClustersEmbeddingStore: ReadableStore[SimClustersEmbeddingId, SimClustersEmbedding], + stratoClient: StratoClient, + representationScorerCacheClient: Client, + stats: StatsReceiver) { + + private val keyHasher = KeyHasher.FNV1A_64 + private val statsReceiver = stats.scope("score_store") + + /** ** Score Store *****/ + private val simClustersEmbeddingCosineSimilarityScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildCosineSimilarityStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_cosine_similarity_score_store")) + + private val simClustersEmbeddingDotProductScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildDotProductStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_dot_product_score_store")) + + private val simClustersEmbeddingJaccardSimilarityScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildJaccardSimilarityStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_jaccard_similarity_score_store")) + + private val simClustersEmbeddingEuclideanDistanceScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildEuclideanDistanceStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_euclidean_distance_score_store")) + + private val simClustersEmbeddingManhattanDistanceScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildManhattanDistanceStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_manhattan_distance_score_store")) + + private val simClustersEmbeddingLogCosineSimilarityScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildLogCosineSimilarityStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_log_cosine_similarity_score_store")) + + private val simClustersEmbeddingExpScaledCosineSimilarityScoreStore = + ObservedReadableStore( + SimClustersEmbeddingPairScoreStore + .buildExpScaledCosineSimilarityStore(simClustersEmbeddingStore) + .toThriftStore + )(statsReceiver.scope("simClusters_embedding_exp_scaled_cosine_similarity_score_store")) + + // Use the default setting + private val topicTweetRankingScoreStore = + TopicTweetRankingScoreStore.buildTopicTweetRankingStore( + FavTfgTopic, + LogFavBasedKgoApeTopic, + LogFavBasedTweet, + Model20m145kUpdated, + consumerEmbeddingMultiplier = 1.0, + producerEmbeddingMultiplier = 1.0 + ) + + private val topicTweetsCortexThresholdStore = TopicTweetsCosineSimilarityAggregateStore( + TopicTweetsCosineSimilarityAggregateStore.DefaultScoreKeys, + statsReceiver.scope("topic_tweets_cortex_threshold_store") + ) + + val topicTweetCertoScoreStore: ObservedCachedReadableStore[ScoreId, Score] = { + val underlyingStore = ObservedReadableStore( + TopicTweetCertoScoreStore(CertoTweetTopicScoresStore.prodStore(stratoClient)) + )(statsReceiver.scope("topic_tweet_certo_score_store")) + + val memcachedStore = ObservedMemcachedReadableStore + .fromCacheClient( + backingStore = underlyingStore, + cacheClient = representationScorerCacheClient, + ttl = 10.minutes + )( + valueInjection = LZ4Injection.compose(BinaryScalaCodec(Score)), + statsReceiver = statsReceiver.scope("topic_tweet_certo_store_memcache"), + keyToString = { k: ScoreId => + s"certocs:${keyHasher.hashKey(k.toString.getBytes)}" + } + ) + + ObservedCachedReadableStore.from[ScoreId, Score]( + memcachedStore, + ttl = 5.minutes, + maxKeys = 1000000, + cacheName = "topic_tweet_certo_store_cache", + windowSize = 10000L + )(statsReceiver.scope("topic_tweet_certo_store_cache")) + } + + val uniformScoringStore: ReadableStore[ScoreId, Score] = + ScoreFacadeStore.buildWithMetrics( + readableStores = Map( + ScoringAlgorithm.PairEmbeddingCosineSimilarity -> + simClustersEmbeddingCosineSimilarityScoreStore, + ScoringAlgorithm.PairEmbeddingDotProduct -> + simClustersEmbeddingDotProductScoreStore, + ScoringAlgorithm.PairEmbeddingJaccardSimilarity -> + simClustersEmbeddingJaccardSimilarityScoreStore, + ScoringAlgorithm.PairEmbeddingEuclideanDistance -> + simClustersEmbeddingEuclideanDistanceScoreStore, + ScoringAlgorithm.PairEmbeddingManhattanDistance -> + simClustersEmbeddingManhattanDistanceScoreStore, + ScoringAlgorithm.PairEmbeddingLogCosineSimilarity -> + simClustersEmbeddingLogCosineSimilarityScoreStore, + ScoringAlgorithm.PairEmbeddingExpScaledCosineSimilarity -> + simClustersEmbeddingExpScaledCosineSimilarityScoreStore, + // Certo normalized cosine score between topic-tweet pairs + ScoringAlgorithm.CertoNormalizedCosineScore + -> topicTweetCertoScoreStore, + // Certo normalized dot-product score between topic-tweet pairs + ScoringAlgorithm.CertoNormalizedDotProductScore + -> topicTweetCertoScoreStore + ), + aggregatedStores = Map( + ScoringAlgorithm.WeightedSumTopicTweetRanking -> + topicTweetRankingScoreStore, + ScoringAlgorithm.CortexTopicTweetLabel -> + topicTweetsCortexThresholdStore, + ), + statsReceiver = stats + ) + + val uniformScoringStoreStitch: ScoreId => com.twitter.stitch.Stitch[Score] = + StitchOfReadableStore(uniformScoringStore) +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetCertoScoreStore.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetCertoScoreStore.scala new file mode 100644 index 000000000..b6216985f --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetCertoScoreStore.scala @@ -0,0 +1,106 @@ +package com.twitter.representationscorer.scorestore + +import com.twitter.simclusters_v2.common.TweetId +import com.twitter.simclusters_v2.thriftscala.ScoreInternalId.GenericPairScoreId +import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm.CertoNormalizedDotProductScore +import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm.CertoNormalizedCosineScore +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.TopicId +import com.twitter.simclusters_v2.thriftscala.{Score => ThriftScore} +import com.twitter.simclusters_v2.thriftscala.{ScoreId => ThriftScoreId} +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.topic_recos.thriftscala.Scores +import com.twitter.topic_recos.thriftscala.TopicToScores +import com.twitter.util.Future + +/** + * Score store to get Certo scores. + * Currently, the store supports two Scoring Algorithms (i.e., two types of Certo scores): + * 1. NormalizedDotProduct + * 2. NormalizedCosine + * Querying with corresponding scoring algorithms results in different Certo scores. + */ +case class TopicTweetCertoScoreStore(certoStratoStore: ReadableStore[TweetId, TopicToScores]) + extends ReadableStore[ThriftScoreId, ThriftScore] { + + override def multiGet[K1 <: ThriftScoreId](ks: Set[K1]): Map[K1, Future[Option[ThriftScore]]] = { + val tweetIds = + ks.map(_.internalId).collect { + case GenericPairScoreId(scoreId) => + ((scoreId.id1, scoreId.id2): @annotation.nowarn( + "msg=may not be exhaustive|max recursion depth")) match { + case (InternalId.TweetId(tweetId), _) => tweetId + case (_, InternalId.TweetId(tweetId)) => tweetId + } + } + + val result = for { + certoScores <- Future.collect(certoStratoStore.multiGet(tweetIds)) + } yield { + ks.map { k => + (k.algorithm, k.internalId) match { + case (CertoNormalizedDotProductScore, GenericPairScoreId(scoreId)) => + (scoreId.id1, scoreId.id2) match { + case (InternalId.TweetId(tweetId), InternalId.TopicId(topicId)) => + ( + k, + extractScore( + tweetId, + topicId, + certoScores, + _.followerL2NormalizedDotProduct8HrHalfLife)) + case (InternalId.TopicId(topicId), InternalId.TweetId(tweetId)) => + ( + k, + extractScore( + tweetId, + topicId, + certoScores, + _.followerL2NormalizedDotProduct8HrHalfLife)) + case _ => (k, None) + } + case (CertoNormalizedCosineScore, GenericPairScoreId(scoreId)) => + (scoreId.id1, scoreId.id2) match { + case (InternalId.TweetId(tweetId), InternalId.TopicId(topicId)) => + ( + k, + extractScore( + tweetId, + topicId, + certoScores, + _.followerL2NormalizedCosineSimilarity8HrHalfLife)) + case (InternalId.TopicId(topicId), InternalId.TweetId(tweetId)) => + ( + k, + extractScore( + tweetId, + topicId, + certoScores, + _.followerL2NormalizedCosineSimilarity8HrHalfLife)) + case _ => (k, None) + } + case _ => (k, None) + } + }.toMap + } + FutureOps.liftValues(ks, result) + } + + /** + * Given tweetToCertoScores, extract certain Certo score between the given tweetId and topicId. + * The Certo score of interest is specified using scoreExtractor. + */ + def extractScore( + tweetId: TweetId, + topicId: TopicId, + tweetToCertoScores: Map[TweetId, Option[TopicToScores]], + scoreExtractor: Scores => Double + ): Option[ThriftScore] = { + tweetToCertoScores.get(tweetId).flatMap { + case Some(topicToScores) => + topicToScores.topicToScores.flatMap(_.get(topicId).map(scoreExtractor).map(ThriftScore(_))) + case _ => Some(ThriftScore(0.0)) + } + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetRankingScoreStore.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetRankingScoreStore.scala new file mode 100644 index 000000000..9ff502fd6 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetRankingScoreStore.scala @@ -0,0 +1,48 @@ +package com.twitter.representationscorer.scorestore + +import com.twitter.simclusters_v2.score.WeightedSumAggregatedScoreStore +import com.twitter.simclusters_v2.score.WeightedSumAggregatedScoreStore.WeightedSumAggregatedScoreParameter +import com.twitter.simclusters_v2.thriftscala.{EmbeddingType, ModelVersion, ScoringAlgorithm} + +object TopicTweetRankingScoreStore { + val producerEmbeddingScoreMultiplier = 1.0 + val consumerEmbeddingScoreMultiplier = 1.0 + + /** + * Build the scoring store for TopicTweet Ranking based on Default Multipliers. + * If you want to compare the ranking between different multipliers, register a new + * ScoringAlgorithm and let the upstream uses different scoringAlgorithm by params. + */ + def buildTopicTweetRankingStore( + consumerEmbeddingType: EmbeddingType, + producerEmbeddingType: EmbeddingType, + tweetEmbeddingType: EmbeddingType, + modelVersion: ModelVersion, + consumerEmbeddingMultiplier: Double = consumerEmbeddingScoreMultiplier, + producerEmbeddingMultiplier: Double = producerEmbeddingScoreMultiplier + ): WeightedSumAggregatedScoreStore = { + WeightedSumAggregatedScoreStore( + List( + WeightedSumAggregatedScoreParameter( + ScoringAlgorithm.PairEmbeddingCosineSimilarity, + consumerEmbeddingMultiplier, + WeightedSumAggregatedScoreStore.genericPairScoreIdToSimClustersEmbeddingPairScoreId( + consumerEmbeddingType, + tweetEmbeddingType, + modelVersion + ) + ), + WeightedSumAggregatedScoreParameter( + ScoringAlgorithm.PairEmbeddingCosineSimilarity, + producerEmbeddingMultiplier, + WeightedSumAggregatedScoreStore.genericPairScoreIdToSimClustersEmbeddingPairScoreId( + producerEmbeddingType, + tweetEmbeddingType, + modelVersion + ) + ) + ) + ) + } + +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetsCosineSimilarityAggregateStore.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetsCosineSimilarityAggregateStore.scala new file mode 100644 index 000000000..f835158b8 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore/TopicTweetsCosineSimilarityAggregateStore.scala @@ -0,0 +1,148 @@ +package com.twitter.representationscorer.scorestore + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.StatsUtil +import com.twitter.representationscorer.scorestore.TopicTweetsCosineSimilarityAggregateStore.ScoreKey +import com.twitter.simclusters_v2.common.TweetId +import com.twitter.simclusters_v2.score.AggregatedScoreStore +import com.twitter.simclusters_v2.thriftscala.ScoreInternalId.GenericPairScoreId +import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm.CortexTopicTweetLabel +import com.twitter.simclusters_v2.thriftscala.{ + EmbeddingType, + InternalId, + ModelVersion, + ScoreInternalId, + ScoringAlgorithm, + SimClustersEmbeddingId, + TopicId, + Score => ThriftScore, + ScoreId => ThriftScoreId, + SimClustersEmbeddingPairScoreId => ThriftSimClustersEmbeddingPairScoreId +} +import com.twitter.storehaus.ReadableStore +import com.twitter.topic_recos.common.Configs.{DefaultModelVersion, MinCosineSimilarityScore} +import com.twitter.topic_recos.common._ +import com.twitter.util.Future + +/** + * Calculates the cosine similarity scores of arbitrary combinations of TopicEmbeddings and + * TweetEmbeddings. + * The class has 2 uses: + * 1. For internal uses. TSP will call this store to fetch the raw scores for (topic, tweet) with + * all available embedding types. We calculate all the scores here, so the caller can do filtering + * & score caching on their side. This will make it possible to DDG different embedding scores. + * + * 2. For external calls from Cortex. We return true (or 1.0) for any given (topic, tweet) if their + * cosine similarity passes the threshold for any of the embedding types. + * The expected input type is + * ScoreId( + * PairEmbeddingCosineSimilarity, + * GenericPairScoreId(TopicId, TweetId) + * ) + */ +case class TopicTweetsCosineSimilarityAggregateStore( + scoreKeys: Seq[ScoreKey], + statsReceiver: StatsReceiver) + extends AggregatedScoreStore { + + def toCortexScore(scoresMap: Map[ScoreKey, Double]): Double = { + val passThreshold = scoresMap.exists { + case (_, score) => score >= MinCosineSimilarityScore + } + if (passThreshold) 1.0 else 0.0 + } + + /** + * To be called by Cortex through Unified Score API ONLY. Calculates all possible (topic, tweet), + * return 1.0 if any of the embedding scores passes the minimum threshold. + * + * Expect a GenericPairScoreId(PairEmbeddingCosineSimilarity, (TopicId, TweetId)) as input + */ + override def get(k: ThriftScoreId): Future[Option[ThriftScore]] = { + StatsUtil.trackOptionStats(statsReceiver) { + (k.algorithm, k.internalId) match { + case (CortexTopicTweetLabel, GenericPairScoreId(genericPairScoreId)) => + (genericPairScoreId.id1, genericPairScoreId.id2) match { + case (InternalId.TopicId(topicId), InternalId.TweetId(tweetId)) => + TopicTweetsCosineSimilarityAggregateStore + .getRawScoresMap(topicId, tweetId, scoreKeys, scoreFacadeStore) + .map { scoresMap => Some(ThriftScore(toCortexScore(scoresMap))) } + case (InternalId.TweetId(tweetId), InternalId.TopicId(topicId)) => + TopicTweetsCosineSimilarityAggregateStore + .getRawScoresMap(topicId, tweetId, scoreKeys, scoreFacadeStore) + .map { scoresMap => Some(ThriftScore(toCortexScore(scoresMap))) } + case _ => + Future.None + // Do not accept other InternalId combinations + } + case _ => + // Do not accept other Id types for now + Future.None + } + } + } +} + +object TopicTweetsCosineSimilarityAggregateStore { + + val TopicEmbeddingTypes: Seq[EmbeddingType] = + Seq( + EmbeddingType.FavTfgTopic, + EmbeddingType.LogFavBasedKgoApeTopic + ) + + // Add the new embedding types if want to test the new Tweet embedding performance. + val TweetEmbeddingTypes: Seq[EmbeddingType] = Seq(EmbeddingType.LogFavBasedTweet) + + val ModelVersions: Seq[ModelVersion] = + Seq(DefaultModelVersion) + + val DefaultScoreKeys: Seq[ScoreKey] = { + for { + modelVersion <- ModelVersions + topicEmbeddingType <- TopicEmbeddingTypes + tweetEmbeddingType <- TweetEmbeddingTypes + } yield { + ScoreKey( + topicEmbeddingType = topicEmbeddingType, + tweetEmbeddingType = tweetEmbeddingType, + modelVersion = modelVersion + ) + } + } + case class ScoreKey( + topicEmbeddingType: EmbeddingType, + tweetEmbeddingType: EmbeddingType, + modelVersion: ModelVersion) + + def getRawScoresMap( + topicId: TopicId, + tweetId: TweetId, + scoreKeys: Seq[ScoreKey], + uniformScoringStore: ReadableStore[ThriftScoreId, ThriftScore] + ): Future[Map[ScoreKey, Double]] = { + val scoresMapFut = scoreKeys.map { key => + val scoreInternalId = ScoreInternalId.SimClustersEmbeddingPairScoreId( + ThriftSimClustersEmbeddingPairScoreId( + buildTopicEmbedding(topicId, key.topicEmbeddingType, key.modelVersion), + SimClustersEmbeddingId( + key.tweetEmbeddingType, + key.modelVersion, + InternalId.TweetId(tweetId)) + )) + val scoreFut = uniformScoringStore + .get( + ThriftScoreId( + algorithm = ScoringAlgorithm.PairEmbeddingCosineSimilarity, // Hard code as cosine sim + internalId = scoreInternalId + )) + key -> scoreFut + }.toMap + + Future + .collect(scoresMapFut).map(_.collect { + case (key, Some(ThriftScore(score))) => + (key, score) + }) + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/BUILD b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/BUILD new file mode 100644 index 000000000..1c617e9a0 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/BUILD @@ -0,0 +1,20 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/github/ben-manes/caffeine", + "finatra/inject/inject-core/src/main/scala", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/common", + "representation-scorer/server/src/main/scala/com/twitter/representationscorer/scorestore", + "representation-scorer/server/src/main/thrift:thrift-scala", + "src/thrift/com/twitter/twistly:twistly-scala", + "stitch/stitch-core", + "stitch/stitch-core:cache", + "strato/config/columns/recommendations/twistly:twistly-strato-client", + "strato/config/columns/recommendations/user-signal-service:user-signal-service-strato-client", + "strato/src/main/scala/com/twitter/strato/client", + "user-signal-service/thrift/src/main/thrift:thrift-scala", + "util/util-core", + ], +) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Engagements.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Engagements.scala new file mode 100644 index 000000000..2da828ce6 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Engagements.scala @@ -0,0 +1,65 @@ +package com.twitter.representationscorer.twistlyfeatures + +import com.twitter.conversions.DurationOps._ +import com.twitter.util.Duration +import com.twitter.util.Time + +case class Engagements( + favs7d: Seq[UserSignal] = Nil, + retweets7d: Seq[UserSignal] = Nil, + follows30d: Seq[UserSignal] = Nil, + shares7d: Seq[UserSignal] = Nil, + replies7d: Seq[UserSignal] = Nil, + originalTweets7d: Seq[UserSignal] = Nil, + videoPlaybacks7d: Seq[UserSignal] = Nil, + block30d: Seq[UserSignal] = Nil, + mute30d: Seq[UserSignal] = Nil, + report30d: Seq[UserSignal] = Nil, + dontlike30d: Seq[UserSignal] = Nil, + seeFewer30d: Seq[UserSignal] = Nil) { + + import Engagements._ + + private val now = Time.now + private val oneDayAgo = (now - OneDaySpan).inMillis + private val sevenDaysAgo = (now - SevenDaysSpan).inMillis + + // All ids from the signals grouped by type (tweetIds, userIds, etc) + val tweetIds: Seq[Long] = + (favs7d ++ retweets7d ++ shares7d + ++ replies7d ++ originalTweets7d ++ videoPlaybacks7d + ++ report30d ++ dontlike30d ++ seeFewer30d) + .map(_.targetId) + val authorIds: Seq[Long] = (follows30d ++ block30d ++ mute30d).map(_.targetId) + + // Tweet signals + val dontlike7d: Seq[UserSignal] = dontlike30d.filter(_.timestamp > sevenDaysAgo) + val seeFewer7d: Seq[UserSignal] = seeFewer30d.filter(_.timestamp > sevenDaysAgo) + + val favs1d: Seq[UserSignal] = favs7d.filter(_.timestamp > oneDayAgo) + val retweets1d: Seq[UserSignal] = retweets7d.filter(_.timestamp > oneDayAgo) + val shares1d: Seq[UserSignal] = shares7d.filter(_.timestamp > oneDayAgo) + val replies1d: Seq[UserSignal] = replies7d.filter(_.timestamp > oneDayAgo) + val originalTweets1d: Seq[UserSignal] = originalTweets7d.filter(_.timestamp > oneDayAgo) + val videoPlaybacks1d: Seq[UserSignal] = videoPlaybacks7d.filter(_.timestamp > oneDayAgo) + val dontlike1d: Seq[UserSignal] = dontlike7d.filter(_.timestamp > oneDayAgo) + val seeFewer1d: Seq[UserSignal] = seeFewer7d.filter(_.timestamp > oneDayAgo) + + // User signals + val follows7d: Seq[UserSignal] = follows30d.filter(_.timestamp > sevenDaysAgo) + val block7d: Seq[UserSignal] = block30d.filter(_.timestamp > sevenDaysAgo) + val mute7d: Seq[UserSignal] = mute30d.filter(_.timestamp > sevenDaysAgo) + val report7d: Seq[UserSignal] = report30d.filter(_.timestamp > sevenDaysAgo) + + val block1d: Seq[UserSignal] = block7d.filter(_.timestamp > oneDayAgo) + val mute1d: Seq[UserSignal] = mute7d.filter(_.timestamp > oneDayAgo) + val report1d: Seq[UserSignal] = report7d.filter(_.timestamp > oneDayAgo) +} + +object Engagements { + val OneDaySpan: Duration = 1.days + val SevenDaysSpan: Duration = 7.days + val ThirtyDaysSpan: Duration = 30.days +} + +case class UserSignal(targetId: Long, timestamp: Long) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/ScoreResult.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/ScoreResult.scala new file mode 100644 index 000000000..71df34a19 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/ScoreResult.scala @@ -0,0 +1,3 @@ +package com.twitter.representationscorer.twistlyfeatures + +case class ScoreResult(id: Long, score: Option[Double]) diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Scorer.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Scorer.scala new file mode 100644 index 000000000..731412d0a --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Scorer.scala @@ -0,0 +1,474 @@ +package com.twitter.representationscorer.twistlyfeatures + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.representationscorer.common.TweetId +import com.twitter.representationscorer.common.UserId +import com.twitter.representationscorer.scorestore.ScoreStore +import com.twitter.representationscorer.thriftscala.SimClustersRecentEngagementSimilarities +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.InternalId +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.ScoreId +import com.twitter.simclusters_v2.thriftscala.ScoreInternalId +import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingPairScoreId +import com.twitter.stitch.Stitch +import javax.inject.Inject + +class Scorer @Inject() ( + fetchEngagementsFromUSS: Long => Stitch[Engagements], + scoreStore: ScoreStore, + stats: StatsReceiver) { + + import Scorer._ + + private val scoreStats = stats.scope("score") + private val scoreCalculationStats = scoreStats.scope("calculation") + private val scoreResultStats = scoreStats.scope("result") + + private val scoresNonEmptyCounter = scoreResultStats.scope("all").counter("nonEmpty") + private val scoresNonZeroCounter = scoreResultStats.scope("all").counter("nonZero") + + private val tweetScoreStats = scoreCalculationStats.scope("tweetScore").stat("latency") + private val userScoreStats = scoreCalculationStats.scope("userScore").stat("latency") + + private val favNonZero = scoreResultStats.scope("favs").counter("nonZero") + private val favNonEmpty = scoreResultStats.scope("favs").counter("nonEmpty") + + private val retweetsNonZero = scoreResultStats.scope("retweets").counter("nonZero") + private val retweetsNonEmpty = scoreResultStats.scope("retweets").counter("nonEmpty") + + private val followsNonZero = scoreResultStats.scope("follows").counter("nonZero") + private val followsNonEmpty = scoreResultStats.scope("follows").counter("nonEmpty") + + private val sharesNonZero = scoreResultStats.scope("shares").counter("nonZero") + private val sharesNonEmpty = scoreResultStats.scope("shares").counter("nonEmpty") + + private val repliesNonZero = scoreResultStats.scope("replies").counter("nonZero") + private val repliesNonEmpty = scoreResultStats.scope("replies").counter("nonEmpty") + + private val originalTweetsNonZero = scoreResultStats.scope("originalTweets").counter("nonZero") + private val originalTweetsNonEmpty = scoreResultStats.scope("originalTweets").counter("nonEmpty") + + private val videoViewsNonZero = scoreResultStats.scope("videoViews").counter("nonZero") + private val videoViewsNonEmpty = scoreResultStats.scope("videoViews").counter("nonEmpty") + + private val blockNonZero = scoreResultStats.scope("block").counter("nonZero") + private val blockNonEmpty = scoreResultStats.scope("block").counter("nonEmpty") + + private val muteNonZero = scoreResultStats.scope("mute").counter("nonZero") + private val muteNonEmpty = scoreResultStats.scope("mute").counter("nonEmpty") + + private val reportNonZero = scoreResultStats.scope("report").counter("nonZero") + private val reportNonEmpty = scoreResultStats.scope("report").counter("nonEmpty") + + private val dontlikeNonZero = scoreResultStats.scope("dontlike").counter("nonZero") + private val dontlikeNonEmpty = scoreResultStats.scope("dontlike").counter("nonEmpty") + + private val seeFewerNonZero = scoreResultStats.scope("seeFewer").counter("nonZero") + private val seeFewerNonEmpty = scoreResultStats.scope("seeFewer").counter("nonEmpty") + + private def getTweetScores( + candidateTweetId: TweetId, + sourceTweetIds: Seq[TweetId] + ): Stitch[Seq[ScoreResult]] = { + val getScoresStitch = Stitch.traverse(sourceTweetIds) { sourceTweetId => + scoreStore + .uniformScoringStoreStitch(getTweetScoreId(sourceTweetId, candidateTweetId)) + .liftNotFoundToOption + .map(score => ScoreResult(sourceTweetId, score.map(_.score))) + } + + Stitch.time(getScoresStitch).flatMap { + case (tryResult, duration) => + tweetScoreStats.add(duration.inMillis) + Stitch.const(tryResult) + } + } + + private def getUserScores( + tweetId: TweetId, + authorIds: Seq[UserId] + ): Stitch[Seq[ScoreResult]] = { + val getScoresStitch = Stitch.traverse(authorIds) { authorId => + scoreStore + .uniformScoringStoreStitch(getAuthorScoreId(authorId, tweetId)) + .liftNotFoundToOption + .map(score => ScoreResult(authorId, score.map(_.score))) + } + + Stitch.time(getScoresStitch).flatMap { + case (tryResult, duration) => + userScoreStats.add(duration.inMillis) + Stitch.const(tryResult) + } + } + + /** + * Get the [[SimClustersRecentEngagementSimilarities]] result containing the similarity + * features for the given userId-TweetId. + */ + def get( + userId: UserId, + tweetId: TweetId + ): Stitch[SimClustersRecentEngagementSimilarities] = { + get(userId, Seq(tweetId)).map(x => x.head) + } + + /** + * Get a list of [[SimClustersRecentEngagementSimilarities]] results containing the similarity + * features for the given tweets of the user Id. + * Guaranteed to be the same number/order as requested. + */ + def get( + userId: UserId, + tweetIds: Seq[TweetId] + ): Stitch[Seq[SimClustersRecentEngagementSimilarities]] = { + fetchEngagementsFromUSS(userId) + .flatMap(engagements => { + // For each tweet received in the request, compute the similarity scores between them + // and the user signals fetched from USS. + Stitch + .join( + Stitch.traverse(tweetIds)(id => getTweetScores(id, engagements.tweetIds)), + Stitch.traverse(tweetIds)(id => getUserScores(id, engagements.authorIds)), + ) + .map { + case (tweetScoresSeq, userScoreSeq) => + // All seq have = size because when scores don't exist, they are returned as Option + (tweetScoresSeq, userScoreSeq).zipped.map { (tweetScores, userScores) => + computeSimilarityScoresPerTweet( + engagements, + tweetScores.groupBy(_.id), + userScores.groupBy(_.id)) + } + } + }) + } + + /** + * + * Computes the [[SimClustersRecentEngagementSimilarities]] + * using the given tweet-tweet and user-tweet scores in TweetScoresMap + * and the user signals in [[Engagements]]. + */ + private def computeSimilarityScoresPerTweet( + engagements: Engagements, + tweetScores: Map[TweetId, Seq[ScoreResult]], + authorScores: Map[UserId, Seq[ScoreResult]] + ): SimClustersRecentEngagementSimilarities = { + val favs7d = engagements.favs7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val favs1d = engagements.favs1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val retweets7d = engagements.retweets7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val retweets1d = engagements.retweets1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val follows30d = engagements.follows30d.view + .flatMap(s => authorScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val follows7d = engagements.follows7d.view + .flatMap(s => authorScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val shares7d = engagements.shares7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val shares1d = engagements.shares1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val replies7d = engagements.replies7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val replies1d = engagements.replies1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val originalTweets7d = engagements.originalTweets7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val originalTweets1d = engagements.originalTweets1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val videoViews7d = engagements.videoPlaybacks7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val videoViews1d = engagements.videoPlaybacks1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val block30d = engagements.block30d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val block7d = engagements.block7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val block1d = engagements.block1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val mute30d = engagements.mute30d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val mute7d = engagements.mute7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val mute1d = engagements.mute1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val report30d = engagements.report30d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val report7d = engagements.report7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val report1d = engagements.report1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val dontlike30d = engagements.dontlike30d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val dontlike7d = engagements.dontlike7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val dontlike1d = engagements.dontlike1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val seeFewer30d = engagements.seeFewer30d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val seeFewer7d = engagements.seeFewer7d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val seeFewer1d = engagements.seeFewer1d.view + .flatMap(s => tweetScores.get(s.targetId)) + .flatten.flatMap(_.score) + .force + + val result = SimClustersRecentEngagementSimilarities( + fav1dLast10Max = max(favs1d), + fav1dLast10Avg = avg(favs1d), + fav7dLast10Max = max(favs7d), + fav7dLast10Avg = avg(favs7d), + retweet1dLast10Max = max(retweets1d), + retweet1dLast10Avg = avg(retweets1d), + retweet7dLast10Max = max(retweets7d), + retweet7dLast10Avg = avg(retweets7d), + follow7dLast10Max = max(follows7d), + follow7dLast10Avg = avg(follows7d), + follow30dLast10Max = max(follows30d), + follow30dLast10Avg = avg(follows30d), + share1dLast10Max = max(shares1d), + share1dLast10Avg = avg(shares1d), + share7dLast10Max = max(shares7d), + share7dLast10Avg = avg(shares7d), + reply1dLast10Max = max(replies1d), + reply1dLast10Avg = avg(replies1d), + reply7dLast10Max = max(replies7d), + reply7dLast10Avg = avg(replies7d), + originalTweet1dLast10Max = max(originalTweets1d), + originalTweet1dLast10Avg = avg(originalTweets1d), + originalTweet7dLast10Max = max(originalTweets7d), + originalTweet7dLast10Avg = avg(originalTweets7d), + videoPlayback1dLast10Max = max(videoViews1d), + videoPlayback1dLast10Avg = avg(videoViews1d), + videoPlayback7dLast10Max = max(videoViews7d), + videoPlayback7dLast10Avg = avg(videoViews7d), + block1dLast10Max = max(block1d), + block1dLast10Avg = avg(block1d), + block7dLast10Max = max(block7d), + block7dLast10Avg = avg(block7d), + block30dLast10Max = max(block30d), + block30dLast10Avg = avg(block30d), + mute1dLast10Max = max(mute1d), + mute1dLast10Avg = avg(mute1d), + mute7dLast10Max = max(mute7d), + mute7dLast10Avg = avg(mute7d), + mute30dLast10Max = max(mute30d), + mute30dLast10Avg = avg(mute30d), + report1dLast10Max = max(report1d), + report1dLast10Avg = avg(report1d), + report7dLast10Max = max(report7d), + report7dLast10Avg = avg(report7d), + report30dLast10Max = max(report30d), + report30dLast10Avg = avg(report30d), + dontlike1dLast10Max = max(dontlike1d), + dontlike1dLast10Avg = avg(dontlike1d), + dontlike7dLast10Max = max(dontlike7d), + dontlike7dLast10Avg = avg(dontlike7d), + dontlike30dLast10Max = max(dontlike30d), + dontlike30dLast10Avg = avg(dontlike30d), + seeFewer1dLast10Max = max(seeFewer1d), + seeFewer1dLast10Avg = avg(seeFewer1d), + seeFewer7dLast10Max = max(seeFewer7d), + seeFewer7dLast10Avg = avg(seeFewer7d), + seeFewer30dLast10Max = max(seeFewer30d), + seeFewer30dLast10Avg = avg(seeFewer30d), + ) + trackStats(result) + result + } + + private def trackStats(result: SimClustersRecentEngagementSimilarities): Unit = { + val scores = Seq( + result.fav7dLast10Max, + result.retweet7dLast10Max, + result.follow30dLast10Max, + result.share1dLast10Max, + result.share7dLast10Max, + result.reply7dLast10Max, + result.originalTweet7dLast10Max, + result.videoPlayback7dLast10Max, + result.block30dLast10Max, + result.mute30dLast10Max, + result.report30dLast10Max, + result.dontlike30dLast10Max, + result.seeFewer30dLast10Max + ) + + val nonEmpty = scores.exists(_.isDefined) + val nonZero = scores.exists { case Some(score) if score > 0 => true; case _ => false } + + if (nonEmpty) { + scoresNonEmptyCounter.incr() + } + + if (nonZero) { + scoresNonZeroCounter.incr() + } + + // We use the largest window of a given type of score, + // because the largest window is inclusive of smaller windows. + trackSignalStats(favNonEmpty, favNonZero, result.fav7dLast10Avg) + trackSignalStats(retweetsNonEmpty, retweetsNonZero, result.retweet7dLast10Avg) + trackSignalStats(followsNonEmpty, followsNonZero, result.follow30dLast10Avg) + trackSignalStats(sharesNonEmpty, sharesNonZero, result.share7dLast10Avg) + trackSignalStats(repliesNonEmpty, repliesNonZero, result.reply7dLast10Avg) + trackSignalStats(originalTweetsNonEmpty, originalTweetsNonZero, result.originalTweet7dLast10Avg) + trackSignalStats(videoViewsNonEmpty, videoViewsNonZero, result.videoPlayback7dLast10Avg) + trackSignalStats(blockNonEmpty, blockNonZero, result.block30dLast10Avg) + trackSignalStats(muteNonEmpty, muteNonZero, result.mute30dLast10Avg) + trackSignalStats(reportNonEmpty, reportNonZero, result.report30dLast10Avg) + trackSignalStats(dontlikeNonEmpty, dontlikeNonZero, result.dontlike30dLast10Avg) + trackSignalStats(seeFewerNonEmpty, seeFewerNonZero, result.seeFewer30dLast10Avg) + } + + private def trackSignalStats(nonEmpty: Counter, nonZero: Counter, score: Option[Double]): Unit = { + if (score.nonEmpty) { + nonEmpty.incr() + + if (score.get > 0) + nonZero.incr() + } + } +} + +object Scorer { + def avg(s: Traversable[Double]): Option[Double] = + if (s.isEmpty) None else Some(s.sum / s.size) + def max(s: Traversable[Double]): Option[Double] = + if (s.isEmpty) None else Some(s.foldLeft(0.0D) { (curr, _max) => math.max(curr, _max) }) + + private def getAuthorScoreId( + userId: UserId, + tweetId: TweetId + ) = { + ScoreId( + algorithm = ScoringAlgorithm.PairEmbeddingCosineSimilarity, + internalId = ScoreInternalId.SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingId( + internalId = InternalId.UserId(userId), + modelVersion = ModelVersion.Model20m145k2020, + embeddingType = EmbeddingType.FavBasedProducer + ), + SimClustersEmbeddingId( + internalId = InternalId.TweetId(tweetId), + modelVersion = ModelVersion.Model20m145k2020, + embeddingType = EmbeddingType.LogFavBasedTweet + ) + )) + ) + } + + private def getTweetScoreId( + sourceTweetId: TweetId, + candidateTweetId: TweetId + ) = { + ScoreId( + algorithm = ScoringAlgorithm.PairEmbeddingCosineSimilarity, + internalId = ScoreInternalId.SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingPairScoreId( + SimClustersEmbeddingId( + internalId = InternalId.TweetId(sourceTweetId), + modelVersion = ModelVersion.Model20m145k2020, + embeddingType = EmbeddingType.LogFavLongestL2EmbeddingTweet + ), + SimClustersEmbeddingId( + internalId = InternalId.TweetId(candidateTweetId), + modelVersion = ModelVersion.Model20m145k2020, + embeddingType = EmbeddingType.LogFavBasedTweet + ) + )) + ) + } +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClient.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClient.scala new file mode 100644 index 000000000..fb09c1e57 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClient.scala @@ -0,0 +1,155 @@ +package com.twitter.representationscorer.twistlyfeatures + +import com.twitter.decider.SimpleRecipient +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.representationscorer.common._ +import com.twitter.representationscorer.twistlyfeatures.Engagements._ +import com.twitter.simclusters_v2.common.SimClustersEmbeddingId.LongInternalId +import com.twitter.stitch.Stitch +import com.twitter.strato.generated.client.recommendations.user_signal_service.SignalsClientColumn +import com.twitter.strato.generated.client.recommendations.user_signal_service.SignalsClientColumn.Value +import com.twitter.usersignalservice.thriftscala.BatchSignalRequest +import com.twitter.usersignalservice.thriftscala.SignalRequest +import com.twitter.usersignalservice.thriftscala.SignalType +import com.twitter.util.Time +import scala.collection.mutable.ArrayBuffer +import com.twitter.usersignalservice.thriftscala.ClientIdentifier + +class UserSignalServiceRecentEngagementsClient( + stratoClient: SignalsClientColumn, + decider: RepresentationScorerDecider, + stats: StatsReceiver) { + + import UserSignalServiceRecentEngagementsClient._ + + private val signalStats = stats.scope("user-signal-service", "signal") + private val signalTypeStats: Map[SignalType, Stat] = + SignalType.list.map(s => (s, signalStats.scope(s.name).stat("size"))).toMap + + def get(userId: UserId): Stitch[Engagements] = { + val request = buildRequest(userId) + stratoClient.fetcher.fetch(request).map(_.v).lowerFromOption().map { response => + val now = Time.now + val sevenDaysAgo = now - SevenDaysSpan + val thirtyDaysAgo = now - ThirtyDaysSpan + + Engagements( + favs7d = getUserSignals(response, SignalType.TweetFavorite, sevenDaysAgo), + retweets7d = getUserSignals(response, SignalType.Retweet, sevenDaysAgo), + follows30d = getUserSignals(response, SignalType.AccountFollowWithDelay, thirtyDaysAgo), + shares7d = getUserSignals(response, SignalType.TweetShareV1, sevenDaysAgo), + replies7d = getUserSignals(response, SignalType.Reply, sevenDaysAgo), + originalTweets7d = getUserSignals(response, SignalType.OriginalTweet, sevenDaysAgo), + videoPlaybacks7d = + getUserSignals(response, SignalType.VideoView90dPlayback50V1, sevenDaysAgo), + block30d = getUserSignals(response, SignalType.AccountBlock, thirtyDaysAgo), + mute30d = getUserSignals(response, SignalType.AccountMute, thirtyDaysAgo), + report30d = getUserSignals(response, SignalType.TweetReport, thirtyDaysAgo), + dontlike30d = getUserSignals(response, SignalType.TweetDontLike, thirtyDaysAgo), + seeFewer30d = getUserSignals(response, SignalType.TweetSeeFewer, thirtyDaysAgo), + ) + } + } + + private def getUserSignals( + response: Value, + signalType: SignalType, + earliestValidTimestamp: Time + ): Seq[UserSignal] = { + val signals = response.signalResponse + .getOrElse(signalType, Seq.empty) + .view + .filter(_.timestamp > earliestValidTimestamp.inMillis) + .map(s => s.targetInternalId.collect { case LongInternalId(id) => (id, s.timestamp) }) + .collect { case Some((id, engagedAt)) => UserSignal(id, engagedAt) } + .take(EngagementsToScore) + .force + + signalTypeStats(signalType).add(signals.size) + signals + } + + private def buildRequest(userId: Long) = { + val recipient = Some(SimpleRecipient(userId)) + + // Signals RSX always fetches + val requestSignals = ArrayBuffer( + SignalRequestFav, + SignalRequestRetweet, + SignalRequestFollow + ) + + // Signals under experimentation. We use individual deciders to disable them if necessary. + // If experiments are successful, they will become permanent. + if (decider.isAvailable(FetchSignalShareDeciderKey, recipient)) + requestSignals.append(SignalRequestShare) + + if (decider.isAvailable(FetchSignalReplyDeciderKey, recipient)) + requestSignals.append(SignalRequestReply) + + if (decider.isAvailable(FetchSignalOriginalTweetDeciderKey, recipient)) + requestSignals.append(SignalRequestOriginalTweet) + + if (decider.isAvailable(FetchSignalVideoPlaybackDeciderKey, recipient)) + requestSignals.append(SignalRequestVideoPlayback) + + if (decider.isAvailable(FetchSignalBlockDeciderKey, recipient)) + requestSignals.append(SignalRequestBlock) + + if (decider.isAvailable(FetchSignalMuteDeciderKey, recipient)) + requestSignals.append(SignalRequestMute) + + if (decider.isAvailable(FetchSignalReportDeciderKey, recipient)) + requestSignals.append(SignalRequestReport) + + if (decider.isAvailable(FetchSignalDontlikeDeciderKey, recipient)) + requestSignals.append(SignalRequestDontlike) + + if (decider.isAvailable(FetchSignalSeeFewerDeciderKey, recipient)) + requestSignals.append(SignalRequestSeeFewer) + + BatchSignalRequest(userId, requestSignals, Some(ClientIdentifier.RepresentationScorerHome)) + } +} + +object UserSignalServiceRecentEngagementsClient { + val FetchSignalShareDeciderKey = "representation_scorer_fetch_signal_share" + val FetchSignalReplyDeciderKey = "representation_scorer_fetch_signal_reply" + val FetchSignalOriginalTweetDeciderKey = "representation_scorer_fetch_signal_original_tweet" + val FetchSignalVideoPlaybackDeciderKey = "representation_scorer_fetch_signal_video_playback" + val FetchSignalBlockDeciderKey = "representation_scorer_fetch_signal_block" + val FetchSignalMuteDeciderKey = "representation_scorer_fetch_signal_mute" + val FetchSignalReportDeciderKey = "representation_scorer_fetch_signal_report" + val FetchSignalDontlikeDeciderKey = "representation_scorer_fetch_signal_dont_like" + val FetchSignalSeeFewerDeciderKey = "representation_scorer_fetch_signal_see_fewer" + + val EngagementsToScore = 10 + private val engagementsToScoreOpt: Option[Long] = Some(EngagementsToScore) + + val SignalRequestFav: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.TweetFavorite) + val SignalRequestRetweet: SignalRequest = SignalRequest(engagementsToScoreOpt, SignalType.Retweet) + val SignalRequestFollow: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.AccountFollowWithDelay) + // New experimental signals + val SignalRequestShare: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.TweetShareV1) + val SignalRequestReply: SignalRequest = SignalRequest(engagementsToScoreOpt, SignalType.Reply) + val SignalRequestOriginalTweet: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.OriginalTweet) + val SignalRequestVideoPlayback: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.VideoView90dPlayback50V1) + + // Negative signals + val SignalRequestBlock: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.AccountBlock) + val SignalRequestMute: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.AccountMute) + val SignalRequestReport: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.TweetReport) + val SignalRequestDontlike: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.TweetDontLike) + val SignalRequestSeeFewer: SignalRequest = + SignalRequest(engagementsToScoreOpt, SignalType.TweetSeeFewer) +} diff --git a/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClientModule.scala b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClientModule.scala new file mode 100644 index 000000000..ee9f61df4 --- /dev/null +++ b/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/UserSignalServiceRecentEngagementsClientModule.scala @@ -0,0 +1,57 @@ +package com.twitter.representationscorer.twistlyfeatures + +import com.github.benmanes.caffeine.cache.Caffeine +import com.twitter.stitch.cache.EvictingCache +import com.google.inject.Provides +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.inject.TwitterModule +import com.twitter.representationscorer.common.RepresentationScorerDecider +import com.twitter.stitch.Stitch +import com.twitter.stitch.cache.ConcurrentMapCache +import com.twitter.stitch.cache.MemoizeQuery +import com.twitter.strato.client.Client +import com.twitter.strato.generated.client.recommendations.user_signal_service.SignalsClientColumn +import java.util.concurrent.ConcurrentMap +import java.util.concurrent.TimeUnit +import javax.inject.Singleton + +object UserSignalServiceRecentEngagementsClientModule extends TwitterModule { + + @Singleton + @Provides + def provide( + client: Client, + decider: RepresentationScorerDecider, + statsReceiver: StatsReceiver + ): Long => Stitch[Engagements] = { + val stratoClient = new SignalsClientColumn(client) + + /* + This cache holds a users recent engagements for a short period of time, such that batched requests + for multiple (userid, tweetid) pairs don't all need to fetch them. + + [1] Caffeine cache keys/values must be objects, so we cannot use the `Long` primitive directly. + The boxed java.lang.Long works as a key, since it is an object. In most situations the compiler + can see where auto(un)boxing can occur. However, here we seem to need some wrapper functions + with explicit types to allow the boxing to happen. + */ + val mapCache: ConcurrentMap[java.lang.Long, Stitch[Engagements]] = + Caffeine + .newBuilder() + .expireAfterWrite(5, TimeUnit.SECONDS) + .maximumSize( + 1000 // We estimate 5M unique users in a 5m period - with 2k RSX instances, assume that one will see < 1k in a 5s period + ) + .build[java.lang.Long, Stitch[Engagements]] + .asMap + + statsReceiver.provideGauge("ussRecentEngagementsClient", "cache_size") { mapCache.size.toFloat } + + val engagementsClient = + new UserSignalServiceRecentEngagementsClient(stratoClient, decider, statsReceiver) + + val f = (l: java.lang.Long) => engagementsClient.get(l) // See note [1] above + val cachedCall = MemoizeQuery(f, EvictingCache.lazily(new ConcurrentMapCache(mapCache))) + (l: Long) => cachedCall(l) // see note [1] above + } +} diff --git a/representation-scorer/server/src/main/thrift/BUILD b/representation-scorer/server/src/main/thrift/BUILD new file mode 100644 index 000000000..f7ea37675 --- /dev/null +++ b/representation-scorer/server/src/main/thrift/BUILD @@ -0,0 +1,20 @@ +create_thrift_libraries( + base_name = "thrift", + sources = [ + "com/twitter/representationscorer/service.thrift", + ], + platform = "java8", + tags = [ + "bazel-compatible", + ], + dependency_roots = [ + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "representationscorer-service-thrift-java", + provides_scala_name = "representationscorer-service-thrift-scala", +) diff --git a/representation-scorer/server/src/main/thrift/com/twitter/representationscorer/service.thrift b/representation-scorer/server/src/main/thrift/com/twitter/representationscorer/service.thrift new file mode 100644 index 000000000..0e2f23a31 --- /dev/null +++ b/representation-scorer/server/src/main/thrift/com/twitter/representationscorer/service.thrift @@ -0,0 +1,106 @@ +namespace java com.twitter.representationscorer.thriftjava +#@namespace scala com.twitter.representationscorer.thriftscala +#@namespace strato com.twitter.representationscorer + +include "com/twitter/simclusters_v2/identifier.thrift" +include "com/twitter/simclusters_v2/online_store.thrift" +include "com/twitter/simclusters_v2/score.thrift" + +struct SimClustersRecentEngagementSimilarities { + // All scores computed using cosine similarity + // 1 - 1000 Positive Signals + 1: optional double fav1dLast10Max // max score from last 10 faves in the last 1 day + 2: optional double fav1dLast10Avg // avg score from last 10 faves in the last 1 day + 3: optional double fav7dLast10Max // max score from last 10 faves in the last 7 days + 4: optional double fav7dLast10Avg // avg score from last 10 faves in the last 7 days + 5: optional double retweet1dLast10Max // max score from last 10 retweets in the last 1 days + 6: optional double retweet1dLast10Avg // avg score from last 10 retweets in the last 1 days + 7: optional double retweet7dLast10Max // max score from last 10 retweets in the last 7 days + 8: optional double retweet7dLast10Avg // avg score from last 10 retweets in the last 7 days + 9: optional double follow7dLast10Max // max score from the last 10 follows in the last 7 days + 10: optional double follow7dLast10Avg // avg score from the last 10 follows in the last 7 days + 11: optional double follow30dLast10Max // max score from the last 10 follows in the last 30 days + 12: optional double follow30dLast10Avg // avg score from the last 10 follows in the last 30 days + 13: optional double share1dLast10Max // max score from last 10 shares in the last 1 day + 14: optional double share1dLast10Avg // avg score from last 10 shares in the last 1 day + 15: optional double share7dLast10Max // max score from last 10 shares in the last 7 days + 16: optional double share7dLast10Avg // avg score from last 10 shares in the last 7 days + 17: optional double reply1dLast10Max // max score from last 10 replies in the last 1 day + 18: optional double reply1dLast10Avg // avg score from last 10 replies in the last 1 day + 19: optional double reply7dLast10Max // max score from last 10 replies in the last 7 days + 20: optional double reply7dLast10Avg // avg score from last 10 replies in the last 7 days + 21: optional double originalTweet1dLast10Max // max score from last 10 original tweets in the last 1 day + 22: optional double originalTweet1dLast10Avg // avg score from last 10 original tweets in the last 1 day + 23: optional double originalTweet7dLast10Max // max score from last 10 original tweets in the last 7 days + 24: optional double originalTweet7dLast10Avg // avg score from last 10 original tweets in the last 7 days + 25: optional double videoPlayback1dLast10Max // max score from last 10 video playback50 in the last 1 day + 26: optional double videoPlayback1dLast10Avg // avg score from last 10 video playback50 in the last 1 day + 27: optional double videoPlayback7dLast10Max // max score from last 10 video playback50 in the last 7 days + 28: optional double videoPlayback7dLast10Avg // avg score from last 10 video playback50 in the last 7 days + + // 1001 - 2000 Implicit Signals + + // 2001 - 3000 Negative Signals + // Block Series + 2001: optional double block1dLast10Avg + 2002: optional double block1dLast10Max + 2003: optional double block7dLast10Avg + 2004: optional double block7dLast10Max + 2005: optional double block30dLast10Avg + 2006: optional double block30dLast10Max + // Mute Series + 2101: optional double mute1dLast10Avg + 2102: optional double mute1dLast10Max + 2103: optional double mute7dLast10Avg + 2104: optional double mute7dLast10Max + 2105: optional double mute30dLast10Avg + 2106: optional double mute30dLast10Max + // Report Series + 2201: optional double report1dLast10Avg + 2202: optional double report1dLast10Max + 2203: optional double report7dLast10Avg + 2204: optional double report7dLast10Max + 2205: optional double report30dLast10Avg + 2206: optional double report30dLast10Max + // Dontlike + 2301: optional double dontlike1dLast10Avg + 2302: optional double dontlike1dLast10Max + 2303: optional double dontlike7dLast10Avg + 2304: optional double dontlike7dLast10Max + 2305: optional double dontlike30dLast10Avg + 2306: optional double dontlike30dLast10Max + // SeeFewer + 2401: optional double seeFewer1dLast10Avg + 2402: optional double seeFewer1dLast10Max + 2403: optional double seeFewer7dLast10Avg + 2404: optional double seeFewer7dLast10Max + 2405: optional double seeFewer30dLast10Avg + 2406: optional double seeFewer30dLast10Max +}(persisted='true', hasPersonalData = 'true') + +/* + * List score API + */ +struct ListScoreId { + 1: required score.ScoringAlgorithm algorithm + 2: required online_store.ModelVersion modelVersion + 3: required identifier.EmbeddingType targetEmbeddingType + 4: required identifier.InternalId targetId + 5: required identifier.EmbeddingType candidateEmbeddingType + 6: required list candidateIds +}(hasPersonalData = 'true') + +struct ScoreResult { + // This api does not communicate why a score is missing. For example, it may be unavailable + // because the referenced entities do not exist (e.g. the embedding was not found) or because + // timeouts prevented us from calculating it. + 1: optional double score +} + +struct ListScoreResponse { + 1: required list scores // Guaranteed to be the same number/order as requested +} + +struct RecentEngagementSimilaritiesResponse { + 1: required list results // Guaranteed to be the same number/order as requested +} From 90d7ea370e4db804fb8f57fcb133a84af767dbfb Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 28 Apr 2023 11:09:29 -0700 Subject: [PATCH 08/11] README updates: representation-manager and representation-scorer --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 691c3a96c..818e7334e 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,11 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | | [recos-injector](recos-injector/README.md) | Streaming event processor for building input streams for [GraphJet](https://github.com/twitter/GraphJet) based services. | | | [graph-feature-service](graph-feature-service/README.md) | Serves graph features for a directed pair of Users (e.g. how many of User A's following liked Tweets from User B). | | | [topic-social-proof](topic-social-proof/README.md) | Identifies topics related to individual Tweets. | +| | [representation-scorer](representation-scorer/README.md) | Compute scores between pairs of entities (Users, Tweets, etc.) using embedding similarity. | | Software framework | [navi](navi/README.md) | High performance, machine learning model serving written in Rust. | | | [product-mixer](product-mixer/README.md) | Software framework for building feeds of content. | -| | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. +| | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. | +| | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). | | | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. | The product surface currently included in this repository is the For You Timeline. From 01dbfee4c05b1aa5cf7c6abf8978cd7f9ab76927 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 12 May 2023 09:11:38 -0700 Subject: [PATCH 09/11] Open-sourcing Tweetypie Tweetypie is the core Tweet service that handles the reading and writing of Tweet data. --- README.md | 1 + .../additionalfields/AdditionalFields.scala | 118 + .../twitter/tweetypie/additionalfields/BUILD | 15 + .../scala/com/twitter/tweetypie/caching/BUILD | 15 + .../tweetypie/caching/CacheOperations.scala | 241 ++ .../tweetypie/caching/CacheResult.scala | 45 + .../twitter/tweetypie/caching/Expiry.scala | 34 + .../caching/ServoCachedValueSerializer.scala | 140 + .../twitter/tweetypie/caching/SoftTtl.scala | 120 + .../tweetypie/caching/StitchAsync.scala | 65 + .../caching/StitchCacheOperations.scala | 62 + .../tweetypie/caching/StitchCaching.scala | 36 + .../tweetypie/caching/ValueSerializer.scala | 47 + .../com/twitter/tweetypie/client_id/BUILD | 15 + .../tweetypie/client_id/ClientIdHelper.scala | 185 ++ .../scala/com/twitter/tweetypie/context/BUILD | 19 + .../tweetypie/context/TweetypieContext.scala | 135 + .../scala/com/twitter/tweetypie/decider/BUILD | 15 + .../tweetypie/decider/DeciderGates.scala | 60 + .../twitter/tweetypie/decider/overrides/BUILD | 10 + .../overrides/TweetyPieDeciderOverrides.scala | 42 + .../twitter/tweetypie/jiminy/tweetypie/BUILD | 15 + .../jiminy/tweetypie/NudgeBuilder.scala | 165 ++ .../com/twitter/tweetypie/matching/BUILD | 18 + .../tweetypie/matching/TokenSequence.scala | 92 + .../tweetypie/matching/Tokenizer.scala | 156 ++ .../tweetypie/matching/TweetTokenizer.scala | 45 + .../tweetypie/matching/UserMutes.scala | 128 + .../scala/com/twitter/tweetypie/media/BUILD | 17 + .../com/twitter/tweetypie/media/Media.scala | 149 ++ .../twitter/tweetypie/media/MediaUrl.scala | 108 + .../com/twitter/tweetypie/media/package.scala | 7 + .../tweetypie/storage/AddTweetHandler.scala | 80 + .../scala/com/twitter/tweetypie/storage/BUILD | 47 + .../storage/BounceDeleteHandler.scala | 20 + .../twitter/tweetypie/storage/Codecs.scala | 242 ++ .../DeleteAdditionalFieldsHandler.scala | 67 + .../com/twitter/tweetypie/storage/Field.scala | 41 + .../storage/GetDeletedTweetsHandler.scala | 150 ++ .../storage/GetStoredTweetHandler.scala | 126 + .../tweetypie/storage/GetTweetHandler.scala | 167 ++ .../storage/HardDeleteTweetHandler.scala | 153 ++ .../tweetypie/storage/InspectFields.scala | 228 ++ .../com/twitter/tweetypie/storage/Json.scala | 17 + .../storage/ManhattanOperations.scala | 103 + .../storage/ManhattanTweetStorageClient.scala | 451 ++++ .../twitter/tweetypie/storage/Response.scala | 30 + .../twitter/tweetypie/storage/Scribe.scala | 85 + .../tweetypie/storage/ScrubHandler.scala | 71 + .../tweetypie/storage/SoftDeleteHandler.scala | 20 + .../com/twitter/tweetypie/storage/Stats.scala | 33 + .../tweetypie/storage/StatusConversions.scala | 129 + .../storage/StorageConversions.scala | 346 +++ .../tweetypie/storage/TimestampDecoder.scala | 92 + .../twitter/tweetypie/storage/TweetKey.scala | 164 ++ .../tweetypie/storage/TweetStateRecord.scala | 90 + .../storage/TweetStorageClient.scala | 201 ++ .../storage/TweetStorageException.scala | 34 + .../tweetypie/storage/TweetUtils.scala | 265 ++ .../tweetypie/storage/UndeleteHandler.scala | 106 + .../storage/UpdateTweetHandler.scala | 64 + .../twitter/tweetypie/storage/package.scala | 11 + .../scala/com/twitter/tweetypie/tflock/BUILD | 20 + .../tweetypie/tflock/TFlockIndexer.scala | 532 ++++ .../tweetypie/tflock/TweetIndexer.scala | 30 + .../com/twitter/tweetypie/thriftscala/BUILD | 13 + .../NotImplementedTweetService.scala | 8 + .../thriftscala/TweetServiceProxy.scala | 79 + .../tweetypie/thriftscala/entities/BUILD | 15 + .../entities/CashtagTextEntity.scala | 11 + .../entities/EntityExtractor.scala | 118 + .../entities/HashtagTextEntity.scala | 11 + .../thriftscala/entities/Implicits.scala | 10 + .../entities/MediaTextEntity.scala | 11 + .../entities/MentionTextEntity.scala | 11 + .../entities/TextRangeEntityAdapter.scala | 11 + .../thriftscala/entities/UrlTextEntity.scala | 11 + .../com/twitter/tweetypie/tweettext/BUILD | 16 + .../tweettext/GraphemeIndexIterator.scala | 44 + .../tweetypie/tweettext/IndexConverter.scala | 85 + .../twitter/tweetypie/tweettext/Offset.scala | 253 ++ .../tweettext/PartialHtmlEncoding.scala | 55 + .../tweetypie/tweettext/Preprocessor.scala | 251 ++ .../tweetypie/tweettext/TextEntity.scala | 24 + .../tweettext/TextModification.scala | 232 ++ .../tweetypie/tweettext/Truncator.scala | 159 ++ .../tweetypie/tweettext/TweetText.scala | 62 + .../scala/com/twitter/tweetypie/util/BUILD | 76 + .../tweetypie/util/CommunityAnnotation.scala | 29 + .../tweetypie/util/CommunityUtil.scala | 19 + .../tweetypie/util/ConversationControls.scala | 112 + .../tweetypie/util/EditControlUtil.scala | 174 ++ .../tweetypie/util/RetryPolicyBuilder.scala | 45 + .../twitter/tweetypie/util/StitchUtils.scala | 54 + .../tweetypie/util/StringLiteral.scala | 31 + .../twitter/tweetypie/util/Takedowns.scala | 49 + .../tweetypie/util/TransientContextUtil.scala | 17 + .../tweetypie/util/TweetCreationLock.scala | 203 ++ .../twitter/tweetypie/util/TweetLenses.scala | 506 ++++ .../tweetypie/util/TweetPermalinkUtil.scala | 18 + .../tweetypie/util/TweetTransformer.scala | 128 + .../AlertableExceptionLoggingFilter.scala | 41 + .../com/twitter/tweetypie/util/logging/BUILD | 17 + .../OnlyImportantLogsLoggingFilter.scala | 30 + .../com/twitter/tweetypie/util/package.scala | 9 + .../src/thrift/com/twitter/tweetypie/BUILD | 353 +++ .../com/twitter/tweetypie/api_fields.thrift | 18 + .../creative_entity_enrichments.thrift | 21 + .../tweetypie/delete_location_data.thrift | 32 + .../twitter/tweetypie/deleted_tweet.thrift | 86 + .../com/twitter/tweetypie/deprecated.thrift | 99 + .../com/twitter/tweetypie/edit_control.thrift | 71 + .../tweetypie/geo/tweet_location_info.thrift | 72 + .../twitter/tweetypie/media/media_ref.thrift | 20 + .../com/twitter/tweetypie/media_entity.thrift | 135 + .../com/twitter/tweetypie/note_tweet.thrift | 13 + .../tweetypie/retweet_archival_event.thrift | 30 + .../twitter/tweetypie/storage_internal/BUILD | 11 + .../storage_internal/storage_internal.thrift | 79 + .../tweetypie/stored_tweet_info.thrift | 52 + .../tweetypie/transient_context.thrift | 64 + .../thrift/com/twitter/tweetypie/tweet.thrift | 1652 ++++++++++++ .../com/twitter/tweetypie/tweet_audit.thrift | 32 + .../tweetypie/tweet_comparison_service.thrift | 28 + .../com/twitter/tweetypie/tweet_events.thrift | 277 ++ .../twitter/tweetypie/tweet_service.thrift | 2320 +++++++++++++++++ .../tweetypie/tweet_service_federated.thrift | 32 + .../tweetypie/tweet_service_graphql.thrift | 391 +++ .../tweetypie/unmentions/unmentions.thrift | 9 + tweetypie/server/BUILD | 16 + tweetypie/server/README.md | 45 + tweetypie/server/config/BUILD | 7 + tweetypie/server/config/decider.yml | 313 +++ tweetypie/server/config/decider_staging.yml | 0 .../config/logging/logback-all-include.xml | 23 + .../logging/logback-without-loglens.xml | 12 + tweetypie/server/config/logging/logback.xml | 146 ++ tweetypie/server/config/partner_media.yml | 30 + .../main/scala/com/twitter/tweetypie/BUILD | 19 + .../com/twitter/tweetypie/backends/BUILD | 48 + .../twitter/tweetypie/backends/Backend.scala | 172 ++ .../tweetypie/backends/ConfigBus.scala | 50 + .../backends/CreativesContainerService.scala | 71 + .../tweetypie/backends/Escherbird.scala | 43 + .../tweetypie/backends/Expandodo.scala | 83 + .../backends/GeoScrubEventStore.scala | 84 + .../tweetypie/backends/Gizmoduck.scala | 93 + .../tweetypie/backends/GnipEnricherator.scala | 42 + .../tweetypie/backends/LimiterBackend.scala | 55 + .../tweetypie/backends/LimiterService.scala | 193 ++ .../tweetypie/backends/Manhattan.scala | 46 + .../tweetypie/backends/MediaInfoService.scala | 43 + .../tweetypie/backends/Scarecrow.scala | 73 + .../backends/SocialGraphService.scala | 52 + .../twitter/tweetypie/backends/TFlock.scala | 98 + .../twitter/tweetypie/backends/Talon.scala | 94 + .../tweetypie/backends/TimelineService.scala | 84 + .../tweetypie/backends/UserImageService.scala | 71 + .../twitter/tweetypie/backends/Warmup.scala | 266 ++ .../scala/com/twitter/tweetypie/config/BUILD | 135 + .../tweetypie/config/BackendClients.scala | 796 ++++++ .../com/twitter/tweetypie/config/Caches.scala | 281 ++ .../tweetypie/config/ClientsParser.scala | 126 + .../tweetypie/config/DynamicConfig.scala | 100 + .../config/DynamicConfigLoader.scala | 69 + .../config/ExternalRepositories.scala | 182 ++ .../config/LogicalRepositories.scala | 807 ++++++ .../com/twitter/tweetypie/config/Main.scala | 314 +++ .../MemcacheExceptionLoggingFilter.scala | 62 + .../twitter/tweetypie/config/Resources.scala | 15 + .../config/ScribeTweetCacheWrites.scala | 102 + .../tweetypie/config/TweetBuilders.scala | 300 +++ .../tweetypie/config/TweetHydrators.scala | 341 +++ .../tweetypie/config/TweetServerBuilder.scala | 300 +++ .../config/TweetServiceAuthorizers.scala | 399 +++ .../config/TweetServiceBuilder.scala | 683 +++++ .../TweetServiceInvocationBuilder.scala | 34 + .../config/TweetServiceSettings.scala | 475 ++++ .../tweetypie/config/TweetStores.scala | 577 ++++ .../config/TweetypieDeciderGates.scala | 91 + .../tweetypie/config/WritePathHydration.scala | 223 ++ .../twitter/tweetypie/config/package.scala | 11 + .../scala/com/twitter/tweetypie/core/BUILD | 19 + .../core/CardReferenceUriExtractor.scala | 32 + .../twitter/tweetypie/core/EditState.scala | 48 + .../twitter/tweetypie/core/Exceptions.scala | 14 + .../tweetypie/core/FilteredState.scala | 96 + .../tweetypie/core/GeoSearchRequestId.scala | 3 + .../tweetypie/core/HydrationState.scala | 122 + .../tweetypie/core/QuotedTweetResult.scala | 46 + .../twitter/tweetypie/core/Serializer.scala | 31 + .../tweetypie/core/StoredTweetResult.scala | 42 + .../tweetypie/core/TweetCreateFailure.scala | 39 + .../twitter/tweetypie/core/TweetData.scala | 86 + .../twitter/tweetypie/core/TweetResult.scala | 39 + .../tweetypie/core/UpstreamFailure.scala | 37 + .../twitter/tweetypie/core/ValueState.scala | 452 ++++ .../com/twitter/tweetypie/core/package.scala | 5 + .../com/twitter/tweetypie/federated/BUILD | 35 + .../federated/StratoCatalogBuilder.scala | 128 + .../federated/columns/AccessPolicy.scala | 41 + .../federated/columns/ApiErrors.scala | 110 + .../twitter/tweetypie/federated/columns/BUILD | 43 + .../columns/CreateRetweetColumn.scala | 184 ++ .../federated/columns/CreateTweetColumn.scala | 546 ++++ .../federated/columns/DeleteTweetColumn.scala | 81 + .../columns/FederatedFieldColumn.scala | 141 + .../columns/FederatedFieldGroup.scala | 88 + .../federated/columns/FederatedFieldReq.scala | 7 + .../columns/GetStoredTweetsByUserColumn.scala | 83 + .../columns/GetStoredTweetsColumn.scala | 99 + .../columns/GetTweetFieldsColumn.scala | 172 ++ .../federated/columns/HydrationOptions.scala | 22 + .../federated/columns/TrackingId.scala | 29 + .../columns/TweetypieContactInfo.scala | 11 + .../federated/columns/UnretweetColumn.scala | 69 + .../twitter/tweetypie/federated/context/BUILD | 27 + .../federated/context/RequestContext.scala | 131 + .../tweetypie/federated/prefetcheddata/BUILD | 32 + .../PrefetchedDataRepository.scala | 166 ++ .../tweetypie/federated/promotedcontent/BUILD | 18 + .../TweetPromotedContentLogger.scala | 40 + .../twitter/tweetypie/federated/warmups/BUILD | 43 + .../warmups/StratoCatalogWarmups.scala | 140 + .../tweetypie/handler/AttachmentBuilder.scala | 185 ++ .../scala/com/twitter/tweetypie/handler/BUILD | 88 + .../CardReferenceValidationHandler.scala | 74 + .../tweetypie/handler/CardUsersFinder.scala | 52 + .../handler/CollabControlBuilder.scala | 109 + .../handler/CommunitiesValidator.scala | 40 + .../handler/ConversationControlBuilder.scala | 272 ++ .../DeleteAdditionalFieldsBuilder.scala | 66 + .../handler/DeleteLocationDataHandler.scala | 62 + .../handler/DuplicateTweetFinder.scala | 254 ++ .../handler/EditControlBuilder.scala | 361 +++ .../tweetypie/handler/EditValidator.scala | 137 + .../handler/EraseUserTweetsHandler.scala | 102 + .../tweetypie/handler/GeoBuilder.scala | 137 + .../handler/GetDeletedTweetsHandler.scala | 119 + .../GetStoredTweetsByUserHandler.scala | 188 ++ .../handler/GetStoredTweetsHandler.scala | 161 ++ .../handler/GetTweetCountsHandler.scala | 44 + .../handler/GetTweetFieldsHandler.scala | 395 +++ .../tweetypie/handler/GetTweetsHandler.scala | 415 +++ .../tweetypie/handler/HandlerError.scala | 45 + .../tweetypie/handler/MediaBuilder.scala | 176 ++ .../twitter/tweetypie/handler/PostTweet.scala | 395 +++ .../QuotedTweetDeleteEventBuilder.scala | 34 + .../QuotedTweetTakedownEventBuilder.scala | 36 + .../tweetypie/handler/RateLimitChecker.scala | 49 + .../tweetypie/handler/ReplyBuilder.scala | 633 +++++ .../tweetypie/handler/RetweetBuilder.scala | 352 +++ .../tweetypie/handler/ReverseGeocoder.scala | 78 + .../handler/ScarecrowRetweetSpamChecker.scala | 64 + .../handler/ScarecrowTweetSpamChecker.scala | 106 + .../handler/ScrubGeoEventBuilder.scala | 72 + .../tweetypie/handler/SelfThreadBuilder.scala | 119 + .../handler/SetAdditionalFieldsBuilder.scala | 61 + .../handler/SetRetweetVisibilityHandler.scala | 45 + .../com/twitter/tweetypie/handler/Spam.scala | 99 + .../tweetypie/handler/TakedownHandler.scala | 76 + .../tweetypie/handler/TweetBuilder.scala | 1180 +++++++++ .../tweetypie/handler/TweetCreationLock.scala | 402 +++ .../handler/TweetDeletePathHandler.scala | 811 ++++++ .../handler/TweetWriteValidator.scala | 118 + .../tweetypie/handler/U13ValidationUtil.scala | 21 + .../handler/UndeleteTweetHandler.scala | 215 ++ .../tweetypie/handler/UnretweetHandler.scala | 65 + .../UpdatePossiblySensitiveTweetHandler.scala | 46 + .../tweetypie/handler/UrlEntityBuilder.scala | 102 + .../tweetypie/handler/UrlShortener.scala | 106 + .../handler/UserTakedownHandler.scala | 79 + .../handler/WritePathQueryOptions.scala | 153 ++ .../twitter/tweetypie/handler/package.scala | 42 + .../com/twitter/tweetypie/hydrator/BUILD | 58 + .../tweetypie/hydrator/Card2Hydrator.scala | 76 + .../tweetypie/hydrator/CardHydrator.scala | 47 + .../hydrator/ContributorHydrator.scala | 36 + .../ContributorVisibilityFilter.scala | 42 + .../ConversationControlHydrator.scala | 108 + .../hydrator/ConversationIdHydrator.scala | 33 + .../hydrator/ConversationMutedHydrator.scala | 54 + .../hydrator/CopyFromSourceTweet.scala | 229 ++ .../hydrator/CreatedAtRepairer.scala | 49 + .../hydrator/DeviceSourceHydrator.scala | 33 + .../hydrator/DirectedAtHydrator.scala | 92 + .../hydrator/EditControlHydrator.scala | 132 + .../tweetypie/hydrator/EditHydrator.scala | 63 + .../hydrator/EditPerspectiveHydrator.scala | 179 ++ .../EscherbirdAnnotationHydrator.scala | 22 + .../FeatureSwitchResultsHydrator.scala | 42 + .../tweetypie/hydrator/GeoScrubHydrator.scala | 31 + .../tweetypie/hydrator/HasMediaHydrator.scala | 14 + .../hydrator/IM1837FilterHydrator.scala | 23 + .../hydrator/IM2884FilterHydrator.scala | 27 + .../hydrator/IM3433FilterHydrator.scala | 25 + .../tweetypie/hydrator/LanguageHydrator.scala | 24 + .../hydrator/MediaEntityHydrator.scala | 67 + .../hydrator/MediaInfoHydrator.scala | 73 + .../hydrator/MediaIsProtectedHydrator.scala | 36 + .../tweetypie/hydrator/MediaKeyHydrator.scala | 54 + .../hydrator/MediaRefsHydrator.scala | 124 + .../hydrator/MediaTagsHydrator.scala | 103 + .../hydrator/MediaUrlFieldsHydrator.scala | 25 + .../hydrator/MentionEntityHydrator.scala | 47 + .../NegativeVisibleTextRangeRepairer.scala | 18 + .../hydrator/NoteTweetSuffixHydrator.scala | 66 + .../hydrator/PartialEntityCleaner.scala | 80 + .../hydrator/PastedMediaHydrator.scala | 102 + .../hydrator/PerspectiveHydrator.scala | 112 + .../tweetypie/hydrator/PlaceHydrator.scala | 28 + .../PreviousTweetCountsHydrator.scala | 152 ++ .../hydrator/ProfileGeoHydrator.scala | 31 + .../QuoteTweetVisibilityHydrator.scala | 93 + .../hydrator/QuotedTweetHydrator.scala | 51 + .../hydrator/QuotedTweetRefHydrator.scala | 129 + .../hydrator/QuotedTweetRefUrlsHydrator.scala | 61 + .../tweetypie/hydrator/RepairMutation.scala | 15 + .../hydrator/ReplyScreenNameHydrator.scala | 33 + .../hydrator/ReportedTweetFilter.scala | 25 + .../hydrator/RetweetMediaRepairer.scala | 15 + .../RetweetParentStatusIdRepairer.scala | 19 + .../hydrator/ScrubEngagementHydrator.scala | 27 + .../ScrubUncacheableTweetRepairer.scala | 38 + .../hydrator/SourceTweetHydrator.scala | 67 + .../hydrator/StripHiddenGeoCoordinates.scala | 12 + .../SuperfluousUrlEntityScrubber.scala | 37 + .../tweetypie/hydrator/TakedownHydrator.scala | 45 + .../tweetypie/hydrator/TextRepairer.scala | 47 + .../TweetAuthorVisibilityHydrator.scala | 43 + .../hydrator/TweetCountsHydrator.scala | 189 ++ .../twitter/tweetypie/hydrator/TweetCtx.scala | 90 + .../tweetypie/hydrator/TweetHydration.scala | 848 ++++++ .../hydrator/TweetLegacyFormatter.scala | 330 +++ .../hydrator/TweetQueryOptionsExpander.scala | 144 + .../hydrator/TweetVisibilityHydrator.scala | 66 + .../hydrator/UnmentionDataHydrator.scala | 28 + .../hydrator/UnrequestedFieldScrubber.scala | 211 ++ .../hydrator/UrlEntityHydrator.scala | 122 + .../tweetypie/hydrator/ValueHydrator.scala | 200 ++ .../twitter/tweetypie/hydrator/package.scala | 17 + .../scala/com/twitter/tweetypie/media/BUILD | 21 + .../twitter/tweetypie/media/MediaClient.scala | 288 ++ .../tweetypie/media/MediaKeyClassifier.scala | 25 + .../tweetypie/media/MediaKeyUtil.scala | 24 + .../tweetypie/media/MediaMetadata.scala | 58 + .../scala/com/twitter/tweetypie/package.scala | 114 + .../com/twitter/tweetypie/repository/BUILD | 82 + .../tweetypie/repository/CacheStitch.scala | 87 + .../repository/CachingTweetRepository.scala | 329 +++ .../repository/Card2Repository.scala | 56 + .../tweetypie/repository/CardRepository.scala | 28 + .../repository/CardUsersRepository.scala | 43 + .../ConversationControlRepository.scala | 51 + .../repository/ConversationIdRepository.scala | 95 + .../ConversationMutedRepository.scala | 13 + ...esContainerMaterializationRepository.scala | 62 + .../DeletedTweetVisibilityRepository.scala | 84 + .../repository/DeviceSourceRepository.scala | 75 + .../EscherbirdAnnotationRepository.scala | 23 + .../GeoScrubTimestampRepository.scala | 16 + .../repository/GeoduckPlaceRepository.scala | 132 + .../LastQuoteOfQuoterRepository.scala | 24 + .../repository/ManhattanTweetRepository.scala | 147 ++ .../repository/MediaMetadataRepository.scala | 22 + .../repository/ParentUserIdRepository.scala | 33 + .../repository/PastedMediaRepository.scala | 129 + .../PenguinLanguageRepository.scala | 53 + .../repository/PerspectiveRepository.scala | 15 + .../repository/PlaceRepository.scala | 13 + .../repository/ProfileGeoRepository.scala | 66 + .../QuotedTweetVisibilityRepository.scala | 48 + .../QuoterHasAlreadyQuotedRepository.scala | 15 + .../repository/RelationshipRepository.scala | 53 + .../RetweetSpamCheckRepository.scala | 13 + .../repository/StitchLockingCache.scala | 161 ++ .../StratoCommunityAccessRepository.scala | 26 + .../StratoCommunityMembershipRepository.scala | 19 + .../StratoPromotedTweetRepository.scala | 19 + .../StratoSafetyLabelsRepository.scala | 49 + ...toSubscriptionVerificationRepository.scala | 19 + .../StratoSuperFollowEligibleRepository.scala | 19 + ...StratoSuperFollowRelationsRepository.scala | 60 + .../repository/TweetCountsRepository.scala | 59 + .../tweetypie/repository/TweetQuery.scala | 147 ++ .../repository/TweetRepository.scala | 31 + .../repository/TweetResultRepository.scala | 17 + .../repository/TweetSpamCheckRepository.scala | 14 + .../TweetVisibilityRepository.scala | 123 + .../repository/UnmentionInfoRepository.scala | 39 + .../UnmentionedEntitiesRepository.scala | 28 + .../tweetypie/repository/UrlRepository.scala | 69 + .../repository/UserInfoRepository.scala | 138 + .../tweetypie/repository/UserRepository.scala | 285 ++ .../repository/UserTakedownRepository.scala | 26 + .../repository/UserViewerRecipient.scala | 78 + .../tweetypie/repository/VibeRepository.scala | 30 + .../VisibilityResultToFilteredState.scala | 209 ++ .../tweetypie/repository/package.scala | 8 + .../serverutil/ActivityService.scala | 25 + .../tweetypie/serverutil/ActivityUtil.scala | 23 + .../com/twitter/tweetypie/serverutil/BUILD | 23 + .../serverutil/BoringStackTrace.scala | 43 + .../serverutil/CaffeineMemcacheClient.scala | 174 ++ .../serverutil/DeviceSourceParser.scala | 100 + .../serverutil/ExceptionCounter.scala | 38 + .../ExtendedTweetMetadataBuilder.scala | 52 + .../serverutil/NullMemcacheClient.scala | 46 + .../tweetypie/serverutil/PartnerMedia.scala | 15 + .../tweetypie/serverutil/StoredCard.scala | 36 + .../tweetypie/serverutil/logcachewrites/BUILD | 15 + .../logcachewrites/TweetCacheWrite.scala | 99 + .../logcachewrites/WriteLoggingCache.scala | 66 + .../scala/com/twitter/tweetypie/service/BUILD | 38 + .../service/ClientHandlingTweetService.scala | 524 ++++ .../service/DispatchingTweetService.scala | 376 +++ .../service/FailureLoggingTweetService.scala | 76 + .../tweetypie/service/MethodAuthorizer.scala | 91 + .../service/ObservedTweetService.scala | 422 +++ .../tweetypie/service/QuillTweetService.scala | 75 + .../service/ReplicatingTweetService.scala | 47 + .../tweetypie/service/RescueExceptions.scala | 63 + .../tweetypie/service/TweetServiceProxy.scala | 146 ++ .../service/TweetServiceWarmer.scala | 90 + .../twitter/tweetypie/service/observer/BUILD | 21 + .../observer/GetDeletedTweetsObserver.scala | 25 + .../GetStoredTweetsByUserObserver.scala | 67 + .../observer/GetStoredTweetsObserver.scala | 52 + .../observer/GetTweetCountsObserver.scala | 67 + .../observer/GetTweetFieldsObserver.scala | 160 ++ .../service/observer/GetTweetsObserver.scala | 120 + .../tweetypie/service/observer/Observer.scala | 365 +++ .../service/observer/PostTweetObserver.scala | 82 + .../service/observer/ResultStateStats.scala | 19 + .../observer/StoredTweetsObserver.scala | 56 + .../tweetypie/service/observer/package.scala | 13 + .../twitter/tweetypie/service/package.scala | 12 + .../tweetypie/store/AsyncEnqueueStore.scala | 95 + .../scala/com/twitter/tweetypie/store/BUILD | 60 + .../tweetypie/store/CachingTweetStore.scala | 420 +++ .../store/DeleteAdditionalFields.scala | 172 ++ .../twitter/tweetypie/store/DeleteTweet.scala | 221 ++ .../tweetypie/store/FanoutServiceStore.scala | 38 + .../com/twitter/tweetypie/store/Flush.scala | 34 + .../store/GeoSearchRequestIDStore.scala | 72 + .../GizmoduckUserCountsUpdatingStore.scala | 48 + .../GizmoduckUserGeotagUpdateStore.scala | 68 + .../com/twitter/tweetypie/store/Guano.scala | 144 + .../tweetypie/store/GuanoServiceStore.scala | 120 + .../tweetypie/store/IncrBookmarkCount.scala | 92 + .../tweetypie/store/IncrFavCount.scala | 90 + .../tweetypie/store/InitialTweetUpdate.scala | 31 + .../twitter/tweetypie/store/InsertTweet.scala | 284 ++ .../tweetypie/store/LimiterStore.scala | 41 + .../tweetypie/store/LogLensStore.scala | 169 ++ .../tweetypie/store/ManhattanTweetStore.scala | 231 ++ .../tweetypie/store/MediaIndexHelper.scala | 34 + .../tweetypie/store/MediaServiceStore.scala | 62 + .../tweetypie/store/QuotedTweetDelete.scala | 45 + .../tweetypie/store/QuotedTweetOps.scala | 33 + .../tweetypie/store/QuotedTweetTakedown.scala | 51 + .../store/ReplicatingTweetStore.scala | 180 ++ .../store/RetweetArchivalEnqueueStore.scala | 38 + .../tweetypie/store/ScribeMediaTagStore.scala | 42 + .../twitter/tweetypie/store/ScrubGeo.scala | 164 ++ .../tweetypie/store/SetAdditionalFields.scala | 155 ++ .../store/SetRetweetVisibility.scala | 172 ++ .../twitter/tweetypie/store/Takedown.scala | 205 ++ .../store/TlsTimelineUpdatingStore.scala | 150 ++ .../store/TweetCountsCacheUpdatingStore.scala | 358 +++ .../tweetypie/store/TweetEventBusStore.scala | 209 ++ .../tweetypie/store/TweetIndexingStore.scala | 65 + .../tweetypie/store/TweetStatsStore.scala | 64 + .../twitter/tweetypie/store/TweetStore.scala | 292 +++ .../tweetypie/store/TweetStoreEvent.scala | 144 + .../twitter/tweetypie/store/TweetUpdate.scala | 41 + .../tweetypie/store/UndeleteTweet.scala | 237 ++ .../store/UpdatePossiblySensitiveTweet.scala | 206 ++ .../com/twitter/tweetypie/store/package.scala | 16 + tweetypie/server/src/main/thrift/BUILD | 29 + .../src/main/thrift/tweetypie_internal.thrift | 705 +++++ tweetypie/servo/README.md | 3 + tweetypie/servo/decider/BUILD | 5 + tweetypie/servo/decider/src/main/scala/BUILD | 18 + .../servo/decider/DeciderGateBuilder.scala | 41 + .../servo/decider/DeciderKeyEnum.scala | 3 + .../com/twitter/servo/decider/package.scala | 5 + .../com/twitter/servo/gate/DeciderGate.scala | 34 + tweetypie/servo/json/BUILD | 5 + .../main/scala/com/twitter/servo/json/BUILD | 21 + .../servo/json/ThriftJsonInspector.scala | 142 + tweetypie/servo/repo/BUILD | 5 + tweetypie/servo/repo/src/main/scala/BUILD | 29 + .../servo/cache/ByteCountingMemcache.scala | 183 ++ .../scala/com/twitter/servo/cache/Cache.scala | 275 ++ .../twitter/servo/cache/CacheFactory.scala | 153 ++ .../com/twitter/servo/cache/Cached.scala | 261 ++ .../twitter/servo/cache/CounterCache.scala | 20 + .../servo/cache/CounterSerializer.scala | 114 + .../twitter/servo/cache/FinagleMemcache.scala | 149 ++ .../twitter/servo/cache/ForwardingCache.scala | 186 ++ .../servo/cache/HotKeyMemcacheClient.scala | 109 + .../twitter/servo/cache/InProcessCache.scala | 63 + .../servo/cache/IterableSerializer.scala | 84 + .../servo/cache/KeyFilteringCache.scala | 51 + .../twitter/servo/cache/KeyTransformer.scala | 21 + .../twitter/servo/cache/LockingCache.scala | 486 ++++ .../com/twitter/servo/cache/Memcache.scala | 59 + .../twitter/servo/cache/MigratingCache.scala | 245 ++ .../twitter/servo/cache/MissingCache.scala | 46 + .../twitter/servo/cache/ObservableCache.scala | 419 +++ .../servo/cache/SecondaryIndexingCache.scala | 85 + .../twitter/servo/cache/SelectedCache.scala | 97 + .../twitter/servo/cache/SeqSerializer.scala | 10 + .../com/twitter/servo/cache/Serializer.scala | 184 ++ .../twitter/servo/cache/SetSerializer.scala | 10 + .../servo/cache/SimpleReplicatingCache.scala | 231 ++ .../servo/cache/TransformingCache.scala | 324 +++ .../com/twitter/servo/cache/TtlCache.scala | 95 + .../com/twitter/servo/cache/package.scala | 36 + .../twitter/servo/database/Accessors.scala | 151 ++ .../com/twitter/servo/database/Bitfield.scala | 56 + .../twitter/servo/database/Credentials.scala | 22 + .../com/twitter/servo/database/Database.scala | 201 ++ .../com/twitter/servo/database/package.scala | 19 + .../servo/hydrator/KeyValueHydrator.scala | 155 ++ .../servo/keyvalue/KeyValueResult.scala | 473 ++++ .../CachingCounterKeyValueRepository.scala | 44 + .../CachingKeyValueRepository.scala | 736 ++++++ .../servo/repository/ChunkingStrategy.scala | 50 + .../DarkmodingKeyValueRepositoryFactory.scala | 161 ++ .../HotKeyCachingKeyValueRepository.scala | 74 + .../ImmutableKeyValueRepository.scala | 18 + .../servo/repository/KeyValueRepository.scala | 192 ++ .../ObservableKeyValueRepository.scala | 89 + .../twitter/servo/repository/Repository.scala | 133 + .../ResponseCachingKeyValueRepository.scala | 103 + .../twitter/servo/repository/RichQuery.scala | 34 + .../SuccessRateTrackingRepository.scala | 81 + .../twitter/servo/repository/package.scala | 50 + .../twitter/servo/store/CachingStore.scala | 112 + .../twitter/servo/store/KeyValueStore.scala | 13 + .../twitter/servo/store/ObservableStore.scala | 32 + .../scala/com/twitter/servo/store/Store.scala | 93 + tweetypie/servo/repo/src/main/thrift/BUILD | 13 + .../com/twitter/servo/cache/servo_repo.thrift | 39 + tweetypie/servo/request/BUILD | 5 + tweetypie/servo/request/src/main/scala/BUILD | 20 + .../request/ClientRequestAuthorizer.scala | 172 ++ .../servo/request/ClientRequestObserver.scala | 58 + .../servo/request/PermissionModule.scala | 233 ++ .../twitter/servo/request/RequestFilter.scala | 120 + .../servo/request/RequestHandler.scala | 24 + .../com/twitter/servo/request/package.scala | 35 + tweetypie/servo/util/BUILD | 6 + tweetypie/servo/util/src/main/scala/BUILD | 53 + .../scala/com/twitter/servo/data/Lens.scala | 147 ++ .../com/twitter/servo/data/Mutation.scala | 268 ++ .../com/twitter/servo/forked/Forked.scala | 120 + .../twitter/servo/forked/QueueExecutor.scala | 82 + .../twitter/servo/gate/RateLimitingGate.scala | 64 + .../com/twitter/servo/util/Availability.scala | 43 + .../com/twitter/servo/util/Average.scala | 116 + .../twitter/servo/util/BatchExecutor.scala | 218 ++ .../util/CancelledExceptionExtractor.scala | 21 + .../CounterInitializingStatsReceiver.scala | 24 + .../scala/com/twitter/servo/util/Effect.scala | 83 + .../twitter/servo/util/ExceptionCounter.scala | 193 ++ .../twitter/servo/util/FrequencyCounter.scala | 51 + .../twitter/servo/util/FunctionArrow.scala | 75 + .../com/twitter/servo/util/FutureArrow.scala | 501 ++++ .../com/twitter/servo/util/FutureEffect.scala | 379 +++ .../scala/com/twitter/servo/util/Gate.scala | 210 ++ .../util/LogarithmicallyBucketedTimer.scala | 41 + .../servo/util/MemoizingStatsReceiver.scala | 46 + .../com/twitter/servo/util/Observable.scala | 22 + .../twitter/servo/util/OptionOrdering.scala | 22 + .../twitter/servo/util/RandomPerturber.scala | 16 + .../servo/util/RateLimitingLogger.scala | 71 + .../scala/com/twitter/servo/util/Retry.scala | 100 + .../com/twitter/servo/util/RetryHandler.scala | 169 ++ .../com/twitter/servo/util/RpcRetry.scala | 90 + .../scala/com/twitter/servo/util/Scribe.scala | 80 + .../servo/util/SuccessRateTracker.scala | 179 ++ .../servo/util/SynchronizedHashMap.scala | 5 + .../servo/util/ThreadLocalStringBuilder.scala | 11 + .../twitter/servo/util/ThrowableHelper.scala | 41 + .../com/twitter/servo/util/Transformer.scala | 227 ++ .../com/twitter/servo/util/TryOrdering.scala | 23 + .../servo/util/WaitForServerSets.scala | 60 + .../com/twitter/servo/util/package.scala | 6 + 591 files changed, 68352 insertions(+) create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala create mode 100644 tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift create mode 100644 tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift create mode 100644 tweetypie/server/BUILD create mode 100644 tweetypie/server/README.md create mode 100644 tweetypie/server/config/BUILD create mode 100644 tweetypie/server/config/decider.yml create mode 100644 tweetypie/server/config/decider_staging.yml create mode 100644 tweetypie/server/config/logging/logback-all-include.xml create mode 100644 tweetypie/server/config/logging/logback-without-loglens.xml create mode 100644 tweetypie/server/config/logging/logback.xml create mode 100644 tweetypie/server/config/partner_media.yml create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala create mode 100644 tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala create mode 100644 tweetypie/server/src/main/thrift/BUILD create mode 100644 tweetypie/server/src/main/thrift/tweetypie_internal.thrift create mode 100644 tweetypie/servo/README.md create mode 100644 tweetypie/servo/decider/BUILD create mode 100644 tweetypie/servo/decider/src/main/scala/BUILD create mode 100644 tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala create mode 100644 tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala create mode 100644 tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala create mode 100644 tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala create mode 100644 tweetypie/servo/json/BUILD create mode 100644 tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD create mode 100644 tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala create mode 100644 tweetypie/servo/repo/BUILD create mode 100644 tweetypie/servo/repo/src/main/scala/BUILD create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala create mode 100644 tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala create mode 100644 tweetypie/servo/repo/src/main/thrift/BUILD create mode 100644 tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift create mode 100644 tweetypie/servo/request/BUILD create mode 100644 tweetypie/servo/request/src/main/scala/BUILD create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala create mode 100644 tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala create mode 100644 tweetypie/servo/util/BUILD create mode 100644 tweetypie/servo/util/src/main/scala/BUILD create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala create mode 100644 tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala diff --git a/README.md b/README.md index 818e7334e..ebb136186 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. | | | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). | | | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. | +| | [Tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. | The product surface currently included in this repository is the For You Timeline. diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala new file mode 100644 index 000000000..91e06e4c6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/AdditionalFields.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.additionalfields + +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.scrooge.TFieldBlob +import com.twitter.scrooge.ThriftStructField + +object AdditionalFields { + type FieldId = Short + + /** additional fields really start at 100, be we are ignoring conversation id for now */ + val StartAdditionalId = 101 + + /** all known [[Tweet]] field IDs */ + val CompiledFieldIds: Seq[FieldId] = Tweet.metaData.fields.map(_.id) + + /** all known [[Tweet]] fields in the "additional-field" range (excludes id) */ + val CompiledAdditionalFieldMetaDatas: Seq[ThriftStructField[Tweet]] = + Tweet.metaData.fields.filter(f => isAdditionalFieldId(f.id)) + + val CompiledAdditionalFieldsMap: Map[Short, ThriftStructField[Tweet]] = + CompiledAdditionalFieldMetaDatas.map(field => (field.id, field)).toMap + + /** all known [[Tweet]] field IDs in the "additional-field" range */ + val CompiledAdditionalFieldIds: Seq[FieldId] = + CompiledAdditionalFieldsMap.keys.toSeq + + /** all [[Tweet]] field IDs which should be rejected when set as additional + * fields on via PostTweetRequest.additionalFields or RetweetRequest.additionalFields */ + val RejectedFieldIds: Seq[FieldId] = Seq( + // Should be provided via PostTweetRequest.conversationControl field. go/convocontrolsbackend + Tweet.ConversationControlField.id, + // This field should only be set based on whether the client sets the right community + // tweet annotation. + Tweet.CommunitiesField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.ExclusiveTweetControlOptions]]. + // The exclusiveTweetControl field requires the userId to be set + // and we shouldn't trust the client to provide the right one. + Tweet.ExclusiveTweetControlField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.TrustedFriendsControlOptions]]. + // The trustedFriendsControl field requires the trustedFriendsListId to be + // set and we shouldn't trust the client to provide the right one. + Tweet.TrustedFriendsControlField.id, + // This field should not be set by clients and should opt for + // [[PostTweetRequest.CollabControlOptions]]. + // The collabControl field requires a list of Collaborators to be + // set and we shouldn't trust the client to provide the right one. + Tweet.CollabControlField.id + ) + + def isAdditionalFieldId(fieldId: FieldId): Boolean = + fieldId >= StartAdditionalId + + /** + * Provides a list of all additional field IDs on the tweet, which include all + * the compiled additional fields and all the provided passthrough fields. This includes + * compiled additional fields where the value is None. + */ + def allAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledAdditionalFieldIds ++ tweet._passthroughFields.keys + + /** + * Provides a list of all field IDs that have a value on the tweet which are not known compiled + * additional fields (excludes [[Tweet.id]]). + */ + def unsettableAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledFieldIds + .filter { id => + !isAdditionalFieldId(id) && id != Tweet.IdField.id && tweet.getFieldBlob(id).isDefined + } ++ + tweet._passthroughFields.keys + + /** + * Provides a list of all field IDs that have a value on the tweet which are explicitly disallowed + * from being set via PostTweetRequest.additionalFields and RetweetRequest.additionalFields + */ + def rejectedAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + RejectedFieldIds + .filter { id => tweet.getFieldBlob(id).isDefined } + + def unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds: Seq[FieldId]): String = + s"request may not contain fields: [${unsettableFieldIds.sorted.mkString(", ")}]" + + /** + * Provides a list of all additional field IDs that have a value on the tweet, + * compiled and passthrough (excludes Tweet.id). + */ + def nonEmptyAdditionalFieldIds(tweet: Tweet): Seq[FieldId] = + CompiledAdditionalFieldMetaDatas.collect { + case f if f.getValue(tweet) != None => f.id + } ++ tweet._passthroughFields.keys + + def additionalFields(tweet: Tweet): Seq[TFieldBlob] = + (tweet.getFieldBlobs(CompiledAdditionalFieldIds) ++ tweet._passthroughFields).values.toSeq + + /** + * Merge base tweet with additional fields. + * Non-additional fields in the additional tweet are ignored. + * @param base: a tweet that contains basic fields + * @param additional: a tweet object that carries additional fields + */ + def setAdditionalFields(base: Tweet, additional: Tweet): Tweet = + setAdditionalFields(base, additionalFields(additional)) + + def setAdditionalFields(base: Tweet, additional: Option[Tweet]): Tweet = + additional.map(setAdditionalFields(base, _)).getOrElse(base) + + def setAdditionalFields(base: Tweet, additional: Traversable[TFieldBlob]): Tweet = + additional.foldLeft(base) { case (t, f) => t.setField(f) } + + /** + * Unsets the specified fields on the given tweet. + */ + def unsetFields(tweet: Tweet, fieldIds: Iterable[FieldId]): Tweet = { + tweet.unsetFields(fieldIds.toSet) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD new file mode 100644 index 000000000..472135458 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/org/apache/thrift:libthrift", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD new file mode 100644 index 000000000..3e9bc82d8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-memcached/src/main/scala", + "scrooge/scrooge-serializer", + "stitch/stitch-core", + "util/util-core", + "util/util-logging", + # CachedValue struct + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala new file mode 100644 index 000000000..816162fad --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheOperations.scala @@ -0,0 +1,241 @@ +package com.twitter.tweetypie.caching + +import com.twitter.finagle.service.StatsFilter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.stats.ExceptionStatsHandler +import com.twitter.finagle.stats.Counter +import com.twitter.util.Future +import com.twitter.util.logging.Logger +import com.twitter.finagle.memcached +import scala.util.control.NonFatal + +/** + * Wrapper around a memcached client that performs serialization and + * deserialization, tracks stats, provides tracing, and provides + * per-key fresh/stale/failure/miss results. + * + * The operations that write values to cache will only write values + * that the ValueSerializer says are cacheable. The idea here is that + * the deserialize and serialize functions must be coherent, and no + * matter how you choose to write these values back to cache, the + * serializer will have the appropriate knowledge about whether the + * values are cacheable. + * + * For most cases, you will want to use [[StitchCaching]] rather than + * calling this wrapper directly. + * + * @param keySerializer How to convert a K value to a memcached key. + * + * @param valueSerializer How to serialize and deserialize V values, + * as well as which values are cacheable, and how long to store the + * values in cache. + */ +class CacheOperations[K, V]( + keySerializer: K => String, + valueSerializer: ValueSerializer[V], + memcachedClient: memcached.Client, + statsReceiver: StatsReceiver, + logger: Logger, + exceptionStatsHandler: ExceptionStatsHandler = StatsFilter.DefaultExceptions) { + // The memcached operations that are performed via this + // [[CacheOperations]] instance will be tracked under this stats + // receiver. + // + // We count all memcached failures together under this scope, + // because memcached operations should not fail unless there are + // communication problems, so differentiating the method that was + // being called will not give us any useful information. + private[this] val memcachedStats: StatsReceiver = statsReceiver.scope("memcached") + + // Incremented for every attempt to `get` a key from cache. + private[this] val memcachedGetCounter: Counter = memcachedStats.counter("get") + + // One of these two counters is incremented for every successful + // response returned from a `get` call to memcached. + private[this] val memcachedNotFoundCounter: Counter = memcachedStats.counter("not_found") + private[this] val memcachedFoundCounter: Counter = memcachedStats.counter("found") + + // Records the state of the cache load after serialization. The + // policy may transform a value that was successfully loaded from + // cache into any result type, which is why we explicitly track + // "found" and "not_found" above. If `stale` + `fresh` is not equal + // to `found`, then it means that the policy has translated a found + // value into a miss or failure. The policy may do this in order to + // cause the caching filter to treat the value that was found in + // cache in the way it would have treated a miss or failure from + // cache. + private[this] val resultStats: StatsReceiver = statsReceiver.scope("result") + private[this] val resultFreshCounter: Counter = resultStats.counter("fresh") + private[this] val resultStaleCounter: Counter = resultStats.counter("stale") + private[this] val resultMissCounter: Counter = resultStats.counter("miss") + private[this] val resultFailureCounter: Counter = resultStats.counter("failure") + + // Used for recording exceptions that occurred during + // deserialization. This will never be incremented if the + // deserializer returns a result, even if the result is a + // [[CacheResult.Failure]]. See the comment where this stat is + // incremented for more details. + private[this] val deserializeFailureStats: StatsReceiver = statsReceiver.scope("deserialize") + + private[this] val notSerializedCounter: Counter = statsReceiver.counter("not_serialized") + + /** + * Load a batch of values from cache. Mostly this deals with + * converting the [[memcached.GetResult]] to a + * [[Seq[CachedResult[V]]]]. The result is in the same order as the + * keys, and there will always be an entry for each key. This method + * should never return a [[Future.exception]]. + */ + def get(keys: Seq[K]): Future[Seq[CacheResult[V]]] = { + memcachedGetCounter.incr(keys.size) + val cacheKeys: Seq[String] = keys.map(keySerializer) + if (logger.isTraceEnabled) { + logger.trace { + val lines: Seq[String] = keys.zip(cacheKeys).map { case (k, c) => s"\n $k ($c)" } + "Starting load for keys:" + lines.mkString + } + } + + memcachedClient + .getResult(cacheKeys) + .map { getResult => + memcachedNotFoundCounter.incr(getResult.misses.size) + val results: Seq[CacheResult[V]] = + cacheKeys.map { cacheKey => + val result: CacheResult[V] = + getResult.hits.get(cacheKey) match { + case Some(memcachedValue) => + memcachedFoundCounter.incr() + try { + valueSerializer.deserialize(memcachedValue.value) + } catch { + case NonFatal(e) => + // If the serializer throws an exception, then + // the serialized value was malformed. In that + // case, we record the failure so that it can be + // detected and fixed, but treat it as a cache + // miss. The reason that we treat it as a miss + // rather than a failure is that a miss will + // cause a write back to cache, and we want to + // write a valid result back to cache to replace + // the bad entry that we just loaded. + // + // A serializer is free to return Miss itself to + // obtain this behavior if it is expected or + // desired, to avoid the logging and stats (and + // the minor overhead of catching an exception). + // + // The exceptions are tracked separately from + // other exceptions so that it is easy to see + // whether the deserializer itself ever throws an + // exception. + exceptionStatsHandler.record(deserializeFailureStats, e) + logger.warn(s"Failed deserializing value for cache key $cacheKey", e) + CacheResult.Miss + } + + case None if getResult.misses.contains(cacheKey) => + CacheResult.Miss + + case None => + val exception = + getResult.failures.get(cacheKey) match { + case None => + // To get here, this was not a hit or a miss, + // so we expect the key to be present in + // failures. If it is not, then either the + // contract of getResult was violated, or this + // method is somehow attempting to access a + // result for a key that was not + // loaded. Either of these indicates a bug, so + // we log a high priority log message. + logger.error( + s"Key $cacheKey not found in hits, misses or failures. " + + "This indicates a bug in the memcached library or " + + "CacheOperations.load" + ) + // We return this as a failure because that + // will cause the repo to be consulted and the + // value *not* to be written back to cache, + // which is probably the safest thing to do + // (if we don't know what's going on, default + // to an uncached repo). + new IllegalStateException + + case Some(e) => + e + } + exceptionStatsHandler.record(memcachedStats, exception) + CacheResult.Failure(exception) + } + + // Count each kind of CacheResult, to make it possible to + // see how effective the caching is. + result match { + case CacheResult.Fresh(_) => resultFreshCounter.incr() + case CacheResult.Stale(_) => resultStaleCounter.incr() + case CacheResult.Miss => resultMissCounter.incr() + case CacheResult.Failure(_) => resultFailureCounter.incr() + } + + result + } + + if (logger.isTraceEnabled) { + logger.trace { + val lines: Seq[String] = + (keys, cacheKeys, results).zipped.map { + case (key, cacheKey, result) => s"\n $key ($cacheKey) -> $result" + } + + "Cache results:" + lines.mkString + } + } + + results + } + .handle { + case e => + // If there is a failure from the memcached client, fan it + // out to each cache key, so that the caller does not need + // to handle failure of the batch differently than failure + // of individual keys. This should be rare anyway, since the + // memcached client already does this for common Finagle + // exceptions + resultFailureCounter.incr(keys.size) + val theFailure: CacheResult[V] = CacheResult.Failure(e) + keys.map { _ => + // Record this as many times as we would if it were in the GetResult + exceptionStatsHandler.record(memcachedStats, e) + theFailure + } + } + } + + // Incremented for every attempt to `set` a key in value. + private[this] val memcachedSetCounter: Counter = memcachedStats.counter("set") + + /** + * Write an entry back to cache, using `set`. If the serializer does + * not serialize the value, then this method will immediately return + * with success. + */ + def set(key: K, value: V): Future[Unit] = + valueSerializer.serialize(value) match { + case Some((expiry, serialized)) => + if (logger.isTraceEnabled) { + logger.trace(s"Writing back to cache $key -> $value (expiry = $expiry)") + } + memcachedSetCounter.incr() + memcachedClient + .set(key = keySerializer(key), flags = 0, expiry = expiry, value = serialized) + .onFailure(exceptionStatsHandler.record(memcachedStats, _)) + + case None => + if (logger.isTraceEnabled) { + logger.trace(s"Not writing back $key -> $value") + } + notSerializedCounter.incr() + Future.Done + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala new file mode 100644 index 000000000..c6e9500e7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/CacheResult.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.caching + +/** + * Encodes the possible states of a value loaded from memcached. + * + * @see [[ValueSerializer]] and [[CacheOperations]] + */ +sealed trait CacheResult[+V] + +object CacheResult { + + /** + * Signals that the value could not be successfully loaded from + * cache. `Failure` values should not be written back to cache. + * + * This value may result from an error talking to the memcached + * instance or it may be returned from the Serializer when the value + * should not be reused, but should also not be overwritten. + */ + final case class Failure(e: Throwable) extends CacheResult[Nothing] + + /** + * Signals that the cache load attempt was successful, but there was + * not a usable value. + * + * When processing a `Miss`, the value should be written back to + * cache if it loads successfully. + */ + case object Miss extends CacheResult[Nothing] + + /** + * Signals that the value was found in cache. + * + * It is not necessary to load the value from the original source. + */ + case class Fresh[V](value: V) extends CacheResult[V] + + /** + * Signals that the value was found in cache. + * + * This value should be used, but it should be refreshed + * out-of-band. + */ + case class Stale[V](value: V) extends CacheResult[V] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala new file mode 100644 index 000000000..1f2a743c1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/Expiry.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.caching + +import com.twitter.util.Duration +import com.twitter.util.Time + +/** + * Helpers for creating common expiry functions. + * + * An expiry function maps from the value to a time in the future when + * the value should expire from cache. These are useful in the + * implementation of a [[ValueSerializer]]. + */ +object Expiry { + + /** + * Return a time that indicates to memcached to never expire this + * value. + * + * This function takes [[Any]] so that it can be used at any value + * type, since it doesn't examine the value at all. + */ + val Never: Any => Time = + _ => Time.Top + + /** + * Return function that indicates to memcached that the value should + * not be used after the `ttl` has elapsed. + * + * This function takes [[Any]] so that it can be used at any value + * type, since it doesn't examine the value at all. + */ + def byAge(ttl: Duration): Any => Time = + _ => Time.now + ttl +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala new file mode 100644 index 000000000..37aaa2216 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ServoCachedValueSerializer.scala @@ -0,0 +1,140 @@ +package com.twitter.tweetypie.caching + +import com.twitter.io.Buf +import com.twitter.scrooge.CompactThriftSerializer +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.servo.cache.thriftscala.CachedValue +import com.twitter.servo.cache.thriftscala.CachedValueStatus +import com.twitter.stitch.NotFound +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try +import java.nio.ByteBuffer + +object ServoCachedValueSerializer { + + /** + * Thrown when the fields of the servo CachedValue struct do not + * satisfy the invariants expected by this serialization code. + */ + case class UnexpectedCachedValueState(cachedValue: CachedValue) extends Exception { + def message: String = s"Unexpected state for CachedValue. Value was: $cachedValue" + } + + val CachedValueThriftSerializer: CompactThriftSerializer[CachedValue] = CompactThriftSerializer( + CachedValue) +} + +/** + * A [[ValueSerializer]] that is compatible with the use of + * Servo's [[CachedValue]] struct by tweetypie: + * + * - The only [[CachedValueStatus]] values that are cacheable are + * [[CachedValueStatus.Found]] and [[CachedValueStatus.NotFound]]. + * + * - We only track the `cachedAtMsec` field, because tweetypie's cache + * interaction does not use the other fields, and the values that + * are cached this way are never updated, so storing readThroughAt + * or writtenThroughAt would not add any information. + * + * - When values are present, they are serialized using + * [[org.apache.thrift.protocol.TCompactProtocol]]. + * + * - The CachedValue struct itself is also serialized using TCompactProtocol. + * + * The serializer operates on [[Try]] values and will cache [[Return]] + * and `Throw(NotFound)` values. + */ +case class ServoCachedValueSerializer[V <: ThriftStruct]( + codec: ThriftStructCodec[V], + expiry: Try[V] => Time, + softTtl: SoftTtl[Try[V]]) + extends ValueSerializer[Try[V]] { + import ServoCachedValueSerializer.UnexpectedCachedValueState + import ServoCachedValueSerializer.CachedValueThriftSerializer + + private[this] val ValueThriftSerializer = CompactThriftSerializer(codec) + + /** + * Return an expiry based on the value and a + * TCompactProtocol-encoded servo CachedValue struct with the + * following fields defined: + * + * - `value`: [[None]] + * for {{{Throw(NotFound)}}, {{{Some(encodedStruct)}}} for + * [[Return]], where {{{encodedStruct}}} is a + * TCompactProtocol-encoding of the value inside of the Return. + * + * - `status`: [[CachedValueStatus.Found]] if the value is Return, + * and [[CachedValueStatus.NotFound]] if it is Throw(NotFound) + * + * - `cachedAtMsec`: The current time, accoring to [[Time.now]] + * + * No other fields will be defined. + * + * @throws IllegalArgumentException if called with a value that + * should not be cached. + */ + override def serialize(value: Try[V]): Option[(Time, Buf)] = { + def serializeCachedValue(payload: Option[ByteBuffer]) = { + val cachedValue = CachedValue( + value = payload, + status = if (payload.isDefined) CachedValueStatus.Found else CachedValueStatus.NotFound, + cachedAtMsec = Time.now.inMilliseconds) + + val serialized = Buf.ByteArray.Owned(CachedValueThriftSerializer.toBytes(cachedValue)) + + (expiry(value), serialized) + } + + value match { + case Throw(NotFound) => + Some(serializeCachedValue(None)) + case Return(struct) => + val payload = Some(ByteBuffer.wrap(ValueThriftSerializer.toBytes(struct))) + Some(serializeCachedValue(payload)) + case _ => + None + } + } + + /** + * Deserializes values serialized by [[serializeValue]]. The + * value will be [[CacheResult.Fresh]] or [[CacheResult.Stale]] + * depending on the result of {{{softTtl.isFresh}}}. + * + * @throws UnexpectedCachedValueState if the state of the + * [[CachedValue]] could not be produced by [[serialize]] + */ + override def deserialize(buf: Buf): CacheResult[Try[V]] = { + val cachedValue = CachedValueThriftSerializer.fromBytes(Buf.ByteArray.Owned.extract(buf)) + val hasValue = cachedValue.value.isDefined + val isValid = + (hasValue && cachedValue.status == CachedValueStatus.Found) || + (!hasValue && cachedValue.status == CachedValueStatus.NotFound) + + if (!isValid) { + // Exceptions thrown by deserialization are recorded and treated + // as a cache miss by CacheOperations, so throwing this + // exception will cause the value in cache to be + // overwritten. There will be stats recorded whenever this + // happens. + throw UnexpectedCachedValueState(cachedValue) + } + + val value = + cachedValue.value match { + case Some(valueBuffer) => + val valueBytes = new Array[Byte](valueBuffer.remaining) + valueBuffer.duplicate.get(valueBytes) + Return(ValueThriftSerializer.fromBytes(valueBytes)) + + case None => + Throw(NotFound) + } + + softTtl.toCacheResult(value, Time.fromMilliseconds(cachedValue.cachedAtMsec)) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala new file mode 100644 index 000000000..ad2237924 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/SoftTtl.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie.caching + +import com.twitter.util.Duration +import com.twitter.util.Time +import scala.util.Random +import com.twitter.logging.Logger + +/** + * Used to determine whether values successfully retrieved from cache + * are [[CacheResult.Fresh]] or [[CacheResult.Stale]]. This is useful + * in the implementation of a [[ValueSerializer]]. + */ +trait SoftTtl[-V] { + + /** + * Determines whether a cached value was fresh. + * + * @param cachedAt the time at which the value was cached. + */ + def isFresh(value: V, cachedAt: Time): Boolean + + /** + * Wraps the value in Fresh or Stale depending on the value of `isFresh`. + * + * (The type variable U exists because it is not allowed to return + * values of a contravariant type, so we must define a variable that + * is a specific subclass of V. This is worth it because it allows + * us to create polymorphic policies without having to specify the + * type. Another solution would be to make the type invariant, but + * then we would have to specify the type whenever we create an + * instance.) + */ + def toCacheResult[U <: V](value: U, cachedAt: Time): CacheResult[U] = + if (isFresh(value, cachedAt)) CacheResult.Fresh(value) else CacheResult.Stale(value) +} + +object SoftTtl { + + /** + * Regardless of the inputs, the value will always be considered + * fresh. + */ + object NeverRefresh extends SoftTtl[Any] { + override def isFresh(_unusedValue: Any, _unusedCachedAt: Time): Boolean = true + } + + /** + * Trigger refresh based on the length of time that a value has been + * stored in cache, ignoring the value. + * + * @param softTtl Items that were cached longer ago than this value + * will be refreshed when they are accessed. + * + * @param jitter Add nondeterminism to the soft TTL to prevent a + * thundering herd of requests refreshing the value at the same + * time. The time at which the value is considered stale will be + * uniformly spread out over a range of +/- (jitter/2). It is + * valid to set the jitter to zero, which will turn off jittering. + * + * @param logger If non-null, use this logger rather than one based + * on the class name. This logger is only used for trace-level + * logging. + */ + case class ByAge[V]( + softTtl: Duration, + jitter: Duration, + specificLogger: Logger = null, + rng: Random = Random) + extends SoftTtl[Any] { + + private[this] val logger: Logger = + if (specificLogger == null) Logger(getClass) else specificLogger + + private[this] val maxJitterMs: Long = jitter.inMilliseconds + + // this requirement is due to using Random.nextInt to choose the + // jitter, but it allows jitter of greater than 24 days + require(maxJitterMs <= (Int.MaxValue / 2)) + + // Negative jitter probably indicates misuse of the API + require(maxJitterMs >= 0) + + // we want period +/- jitter, but the random generator + // generates non-negative numbers, so we generate [0, 2 * + // maxJitter) and subtract maxJitter to obtain [-maxJitter, + // maxJitter) + private[this] val maxJitterRangeMs: Int = (maxJitterMs * 2).toInt + + // We perform all calculations in milliseconds, so convert the + // period to milliseconds out here. + private[this] val softTtlMs: Long = softTtl.inMilliseconds + + // If the value is below this age, it will always be fresh, + // regardless of jitter. + private[this] val alwaysFreshAgeMs: Long = softTtlMs - maxJitterMs + + // If the value is above this age, it will always be stale, + // regardless of jitter. + private[this] val alwaysStaleAgeMs: Long = softTtlMs + maxJitterMs + + override def isFresh(value: Any, cachedAt: Time): Boolean = { + val ageMs: Long = (Time.now - cachedAt).inMilliseconds + val fresh = + if (ageMs <= alwaysFreshAgeMs) { + true + } else if (ageMs > alwaysStaleAgeMs) { + false + } else { + val jitterMs: Long = rng.nextInt(maxJitterRangeMs) - maxJitterMs + ageMs <= softTtlMs + jitterMs + } + + logger.ifTrace( + s"Checked soft ttl: fresh = $fresh, " + + s"soft_ttl_ms = $softTtlMs, age_ms = $ageMs, value = $value") + + fresh + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala new file mode 100644 index 000000000..45861f04c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchAsync.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie.caching + +import scala.collection.mutable +import com.twitter.util.Future +import com.twitter.stitch.Stitch +import com.twitter.stitch.Runner +import com.twitter.stitch.FutureRunner +import com.twitter.stitch.Group + +/** + * Workaround for a infelicity in the implementation of [[Stitch.async]]. + * + * This has the same semantics to [[Stitch.async]], with the exception + * that interrupts to the main computation will not interrupt the + * async call. + * + * The problem that this implementation solves is that we do not want + * async calls grouped together with synchronous calls. See the + * mailing list thread [1] for discussion. This may eventually be + * fixed in Stitch. + */ +private[caching] object StitchAsync { + // Contains a deferred Stitch that we want to run asynchronously + private[this] class AsyncCall(deferred: => Stitch[_]) { + def call(): Stitch[_] = deferred + } + + private object AsyncGroup extends Group[AsyncCall, Unit] { + override def runner(): Runner[AsyncCall, Unit] = + new FutureRunner[AsyncCall, Unit] { + // All of the deferred calls of any type. When they are + // executed in `run`, the normal Stitch batching and deduping + // will occur. + private[this] val calls = new mutable.ArrayBuffer[AsyncCall] + + def add(call: AsyncCall): Stitch[Unit] = { + // Just remember the deferred call. + calls.append(call) + + // Since we don't wait for the completion of the effect, + // just return a constant value. + Stitch.Unit + } + + def run(): Future[_] = { + // The future returned from this innter invocation of + // Stitch.run is not linked to the returned future, so these + // effects are not linked to the outer Run in which this + // method was invoked. + Stitch.run { + Stitch.traverse(calls) { asyncCall: AsyncCall => + asyncCall + .call() + .liftToTry // So that an exception will not interrupt the other calls + } + } + Future.Unit + } + } + } + + def apply(call: => Stitch[_]): Stitch[Unit] = + // Group together all of the async calls + Stitch.call(new AsyncCall(call), AsyncGroup) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala new file mode 100644 index 000000000..8c9de67ff --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCacheOperations.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.caching + +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Group +import com.twitter.stitch.Stitch +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Try + +/** + * Wrapper around [[CacheOperations]] providing a [[Stitch]] API. + */ +case class StitchCacheOperations[K, V](operations: CacheOperations[K, V]) { + import StitchCacheOperations.SetCall + + private[this] val getGroup: Group[K, CacheResult[V]] = + MapGroup[K, CacheResult[V]] { keys: Seq[K] => + operations + .get(keys) + .map(values => keys.zip(values).toMap.mapValues(Return(_))) + } + + def get(key: K): Stitch[CacheResult[V]] = + Stitch.call(key, getGroup) + + private[this] val setGroup: Group[SetCall[K, V], Unit] = + new MapGroup[SetCall[K, V], Unit] { + + override def run(calls: Seq[SetCall[K, V]]): Future[SetCall[K, V] => Try[Unit]] = + Future + .collectToTry(calls.map(call => operations.set(call.key, call.value))) + .map(tries => calls.zip(tries).toMap) + } + + /** + * Performs a [[CacheOperations.set]]. + */ + def set(key: K, value: V): Stitch[Unit] = + // This is implemented as a Stitch.call instead of a Stitch.future + // in order to handle the case where a batch has a duplicate + // key. Each copy of the duplicate key will trigger a write back + // to cache, so we dedupe the writes in order to avoid the + // extraneous RPC call. + Stitch.call(new StitchCacheOperations.SetCall(key, value), setGroup) +} + +object StitchCacheOperations { + + /** + * Used as the "call" for [[SetGroup]]. This is essentially a tuple + * where equality is defined only by the key. + */ + private class SetCall[K, V](val key: K, val value: V) { + override def equals(other: Any): Boolean = + other match { + case setCall: SetCall[_, _] => key == setCall.key + case _ => false + } + + override def hashCode: Int = key.hashCode + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala new file mode 100644 index 000000000..830bd11a2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/StitchCaching.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie.caching + +import com.twitter.stitch.Stitch + +/** + * Apply caching to a [[Stitch]] function. + * + * @see CacheResult for more information about the semantics + * implemented here. + */ +class StitchCaching[K, V](operations: CacheOperations[K, V], repo: K => Stitch[V]) + extends (K => Stitch[V]) { + + private[this] val stitchOps = new StitchCacheOperations(operations) + + override def apply(key: K): Stitch[V] = + stitchOps.get(key).flatMap { + case CacheResult.Fresh(value) => + Stitch.value(value) + + case CacheResult.Stale(staleValue) => + StitchAsync(repo(key).flatMap(refreshed => stitchOps.set(key, refreshed))) + .map(_ => staleValue) + + case CacheResult.Miss => + repo(key) + .applyEffect(value => StitchAsync(stitchOps.set(key, value))) + + case CacheResult.Failure(_) => + // In the case of failure, we don't attempt to write back to + // cache, because cache failure usually means communication + // failure, and sending more requests to the cache that holds + // the value for this key could make the situation worse. + repo(key) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala new file mode 100644 index 000000000..42335d0ff --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/caching/ValueSerializer.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie.caching + +import com.twitter.io.Buf +import com.twitter.util.Time + +/** + * How to store values of type V in cache. This includes whether a + * given value is cacheable, how to serialize it, when it should + * expire from cache, and how to interpret byte patterns from cache. + */ +trait ValueSerializer[V] { + + /** + * Prepare the value for storage in cache. When a [[Some]] is + * returned, the [[Buf]] should be a valid input to [[deserialize]] + * and the [[Time]] will be used as the expiry in the memcached + * command. When [[None]] is returned, it indicates that the value + * cannot or should not be written back to cache. + * + * The most common use case for returning None is caching Try + * values, where certain exceptional values encode a cacheable state + * of a value. In particular, Throw(NotFound) is commonly used to + * encode a missing value, and we usually want to cache those + * negative lookups, but we don't want to cache e.g. a timeout + * exception. + * + * @return a pair of expiry time for this cache entry and the bytes + * to store in cache. If you do not want this value to explicitly + * expire, use Time.Top as the expiry. + */ + def serialize(value: V): Option[(Time, Buf)] + + /** + * Deserialize a value found in cache. This function converts the + * bytes found in memcache to a [[CacheResult]]. In general, you + * probably want to return [[CacheResult.Fresh]] or + * [[CacheResult.Stale]], but you are free to return any of the + * range of [[CacheResult]]s, depending on the behavior that you + * want. + * + * This is a total function because in the common use case, the + * bytes stored in cache will be appropriate for the + * serializer. This method is free to throw any exception if the + * bytes are not valid. + */ + def deserialize(serializedValue: Buf): CacheResult[V] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD new file mode 100644 index 000000000..c29029d8c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport", + "finagle/finagle-thrift/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/data", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala new file mode 100644 index 000000000..8741ca80d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/client_id/ClientIdHelper.scala @@ -0,0 +1,185 @@ +package com.twitter.tweetypie.client_id + +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.transport.S2STransport +import com.twitter.finagle.thrift.ClientId +import com.twitter.servo.util.Gate +import com.twitter.strato.access.Access +import com.twitter.strato.access.Access.ForwardedServiceIdentifier + +object ClientIdHelper { + + val UnknownClientId = "unknown" + + def default: ClientIdHelper = new ClientIdHelper(UseTransportServiceIdentifier) + + /** + * Trims off the last .element, which is usually .prod or .staging + */ + def getClientIdRoot(clientId: String): String = + clientId.lastIndexOf('.') match { + case -1 => clientId + case idx => clientId.substring(0, idx) + } + + /** + * Returns the last .element without the '.' + */ + def getClientIdEnv(clientId: String): String = + clientId.lastIndexOf('.') match { + case -1 => clientId + case idx => clientId.substring(idx + 1) + } + + private[client_id] def asClientId(s: ServiceIdentifier): String = s"${s.service}.${s.environment}" +} + +class ClientIdHelper(serviceIdentifierStrategy: ServiceIdentifierStrategy) { + + private[client_id] val ProcessPathPrefix = "/p/" + + /** + * The effective client id is used for request authorization and metrics + * attribution. For calls to Tweetypie's thrift API, the thrift ClientId + * is used and is expected in the form of "service-name.env". Federated + * Strato clients don't support configured ClientIds and instead provide + * a "process path" containing instance-specific information. So for + * calls to the federated API, we compute an effective client id from + * the ServiceIdentifier, if present, in Strato's Access principles. The + * implementation avoids computing this identifier unless necessary, + * since this method is invoked on every request. + */ + def effectiveClientId: Option[String] = { + val clientId: Option[String] = ClientId.current.map(_.name) + clientId + // Exclude process paths because they are instance-specific and aren't + // supported by tweetypie for authorization or metrics purposes. + .filterNot(_.startsWith(ProcessPathPrefix)) + // Try computing a value from the ServiceId if the thrift + // ClientId is undefined or unsupported. + .orElse(serviceIdentifierStrategy.serviceIdentifier.map(ClientIdHelper.asClientId)) + // Ultimately fall back to the ClientId value, even when given an + // unsupported format, so that error text and debug logs include + // the value passed by the caller. + .orElse(clientId) + } + + def effectiveClientIdRoot: Option[String] = effectiveClientId.map(ClientIdHelper.getClientIdRoot) + + def effectiveServiceIdentifier: Option[ServiceIdentifier] = + serviceIdentifierStrategy.serviceIdentifier +} + +/** Logic how to find a [[ServiceIdentifier]] for the purpose of crafting a client ID. */ +trait ServiceIdentifierStrategy { + def serviceIdentifier: Option[ServiceIdentifier] + + /** + * Returns the only element of given [[Set]] or [[None]]. + * + * This utility is used defensively against a set of principals collected + * from [[Access.getPrincipals]]. While the contract is that there should be at most one + * instance of each principal kind present in that set, in practice that has not been the case + * always. The safest strategy to in that case is to abandon a set completely if more than + * one principals are competing. + */ + final protected def onlyElement[T](set: Set[T]): Option[T] = + if (set.size <= 1) { + set.headOption + } else { + None + } +} + +/** + * Picks [[ServiceIdentifier]] from Finagle SSL Transport, if one exists. + * + * This works for both Thrift API calls as well as StratoFed API calls. Strato's + * [[Access#getPrincipals]] collection, which would typically be consulted by StratoFed + * column logic, contains the same [[ServiceIdentifier]] derived from the Finagle SSL + * transport, so there's no need to have separate strategies for Thrift vs StratoFed + * calls. + * + * This is the default behavior of using [[ServiceIdentifier]] for computing client ID. + */ +private[client_id] class UseTransportServiceIdentifier( + // overridable for testing + getPeerServiceIdentifier: => ServiceIdentifier, +) extends ServiceIdentifierStrategy { + override def serviceIdentifier: Option[ServiceIdentifier] = + getPeerServiceIdentifier match { + case EmptyServiceIdentifier => None + case si => Some(si) + } +} + +object UseTransportServiceIdentifier + extends UseTransportServiceIdentifier(S2STransport.peerServiceIdentifier) + +/** + * Picks [[ForwardedServiceIdentifier]] from Strato principals for client ID + * if [[ServiceIdentifier]] points at call coming from Strato. + * If not present, falls back to [[UseTransportServiceIdentifier]] behavior. + * + * Tweetypie utilizes the strategy to pick [[ServiceIdentifier]] for the purpose + * of generating a client ID when the client ID is absent or unknown. + * [[PreferForwardedServiceIdentifierForStrato]] looks for the [[ForwardedServiceIdentifier]] + * values set by stratoserver request. + * The reason is, stratoserver is effectively a conduit, forwarding the [[ServiceIdentifier]] + * of the _actual client_ that is calling stratoserver. + * Any direct callers not going through stratoserver will default to [[ServiceIdentfier]]. + */ +private[client_id] class PreferForwardedServiceIdentifierForStrato( + // overridable for testing + getPeerServiceIdentifier: => ServiceIdentifier, +) extends ServiceIdentifierStrategy { + val useTransportServiceIdentifier = + new UseTransportServiceIdentifier(getPeerServiceIdentifier) + + override def serviceIdentifier: Option[ServiceIdentifier] = + useTransportServiceIdentifier.serviceIdentifier match { + case Some(serviceIdentifier) if isStrato(serviceIdentifier) => + onlyElement( + Access.getPrincipals + .collect { + case forwarded: ForwardedServiceIdentifier => + forwarded.serviceIdentifier.serviceIdentifier + } + ).orElse(useTransportServiceIdentifier.serviceIdentifier) + case other => other + } + + /** + * Strato uses various service names like "stratoserver" and "stratoserver-patient". + * They all do start with "stratoserver" though, so at the point of implementing, + * the safest bet to recognize strato is to look for this prefix. + * + * This also works for staged strato instances (which it should), despite allowing + * for technically any caller to force this strategy, by creating service certificate + * with this service name. + */ + private def isStrato(serviceIdentifier: ServiceIdentifier): Boolean = + serviceIdentifier.service.startsWith("stratoserver") +} + +object PreferForwardedServiceIdentifierForStrato + extends PreferForwardedServiceIdentifierForStrato(S2STransport.peerServiceIdentifier) + +/** + * [[ServiceIdentifierStrategy]] which dispatches between two delegates based on the value + * of a unitary decider every time [[serviceIdentifier]] is called. + */ +class ConditionalServiceIdentifierStrategy( + private val condition: Gate[Unit], + private val ifTrue: ServiceIdentifierStrategy, + private val ifFalse: ServiceIdentifierStrategy) + extends ServiceIdentifierStrategy { + + override def serviceIdentifier: Option[ServiceIdentifier] = + if (condition()) { + ifTrue.serviceIdentifier + } else { + ifFalse.serviceIdentifier + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD new file mode 100644 index 000000000..30cef76c5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "context", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "graphql/common/src/main/scala/com/twitter/graphql/common/core", + "src/thrift/com/twitter/context:twitter-context-scala", + "twitter-context/src/main/scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala new file mode 100644 index 000000000..4d987a02c --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/context/TweetypieContext.scala @@ -0,0 +1,135 @@ +package com.twitter.tweetypie.context + +import com.twitter.context.TwitterContext +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.finagle.SimpleFilter +import com.twitter.finagle.context.Contexts +import com.twitter.io.Buf +import com.twitter.io.Buf.ByteArray.Owned +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.graphql.common.core.GraphQlClientApplication +import com.twitter.util.Try +import java.nio.charset.StandardCharsets.UTF_8 +import scala.util.matching.Regex + +/** + * Context and filters to help track callers of Tweetypie's endpoints. This context and its + * filters were originally added to provide visibility into callers of Tweetypie who are + * using the birdherd library to access tweets. + * + * This context data is intended to be marshalled by callers to Tweetypie, but then the + * context data is stripped (moved from broadcast to local). This happens so that the + * context data is not forwarded down tweetypie's backend rpc chains, which often result + * in transitive calls back into tweetypie. This effectively creates single-hop marshalling. + */ +object TweetypieContext { + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + case class Ctx(via: String) + val Empty = Ctx("") + + object Broadcast { + private[this] object Key extends Contexts.broadcast.Key[Ctx](id = Ctx.getClass.getName) { + + override def marshal(value: Ctx): Buf = + Owned(value.via.getBytes(UTF_8)) + + override def tryUnmarshal(buf: Buf): Try[Ctx] = + Try(Ctx(new String(Owned.extract(buf), UTF_8))) + } + + private[TweetypieContext] def current(): Option[Ctx] = + Contexts.broadcast.get(Key) + + def currentOrElse(default: Ctx): Ctx = + current().getOrElse(default) + + def letClear[T](f: => T): T = + Contexts.broadcast.letClear(Key)(f) + + def let[T](ctx: Ctx)(f: => T): T = + if (Empty == ctx) { + letClear(f) + } else { + Contexts.broadcast.let(Key, ctx)(f) + } + + // ctx has to be by name so we can re-evaluate it for every request (for usage in ServiceTwitter.scala) + def filter(ctx: => Ctx): Filter.TypeAgnostic = + new Filter.TypeAgnostic { + override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] = + (request: Req, service: Service[Req, Rep]) => Broadcast.let(ctx)(service(request)) + } + } + + object Local { + private[this] val Key = + new Contexts.local.Key[Ctx] + + private[TweetypieContext] def let[T](ctx: Option[Ctx])(f: => T): T = + ctx match { + case Some(ctx) if ctx != Empty => Contexts.local.let(Key, ctx)(f) + case None => Contexts.local.letClear(Key)(f) + } + + def current(): Option[Ctx] = + Contexts.local.get(Key) + + def filter[Req, Rep]: SimpleFilter[Req, Rep] = + (request: Req, service: Service[Req, Rep]) => { + val ctx = Broadcast.current() + Broadcast.letClear(Local.let(ctx)(service(request))) + } + + private[this] def clientAppIdToName(clientAppId: Long) = + GraphQlClientApplication.AllById.get(clientAppId).map(_.name).getOrElse("nonTOO") + + private[this] val pathRegexes: Seq[(Regex, String)] = Seq( + ("timeline_conversation_.*_json".r, "timeline_conversation__slug__json"), + ("user_timeline_.*_json".r, "user_timeline__user__json"), + ("[0-9]{2,}".r, "_id_") + ) + + // `context.via` will either be a string like: "birdherd" or "birdherd:/1.1/statuses/show/123.json, + // depending on whether birdherd code was able to determine the path of the request. + private[this] def getViaAndPath(via: String): (String, Option[String]) = + via.split(":", 2) match { + case Array(via, path) => + val sanitizedPath = path + .replace('/', '_') + .replace('.', '_') + + // Apply each regex in turn + val normalizedPath = pathRegexes.foldLeft(sanitizedPath) { + case (path, (regex, replacement)) => regex.replaceAllIn(path, replacement) + } + + (via, Some(normalizedPath)) + case Array(via) => (via, None) + } + + def trackStats[U](scopes: StatsReceiver*): Unit = + for { + tweetypieCtx <- TweetypieContext.Local.current() + (via, pathOpt) = getViaAndPath(tweetypieCtx.via) + twitterCtx <- TwitterContext() + clientAppId <- twitterCtx.clientApplicationId + } yield { + val clientAppName = clientAppIdToName(clientAppId) + scopes.foreach { stats => + val ctxStats = stats.scope("context") + val viaStats = ctxStats.scope("via", via) + viaStats.scope("all").counter("requests").incr() + val viaClientStats = viaStats.scope("by_client", clientAppName) + viaClientStats.counter("requests").incr() + pathOpt.foreach { path => + val viaPathStats = viaStats.scope("by_path", path) + viaPathStats.counter("requests").incr() + } + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD new file mode 100644 index 000000000..8c40f583a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["DeciderGates.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/guava", + "decider", + "finagle/finagle-toggle/src/main/scala/com/twitter/finagle/server", + "tweetypie/servo/decider", + "tweetypie/servo/util/src/main/scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala new file mode 100644 index 000000000..56df716f6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/DeciderGates.scala @@ -0,0 +1,60 @@ +package com.twitter.tweetypie +package decider + +import com.google.common.hash.Hashing +import com.twitter.decider.Decider +import com.twitter.decider.Feature +import com.twitter.servo.gate.DeciderGate +import com.twitter.servo.util.Gate +import java.nio.charset.StandardCharsets +import scala.collection.mutable +trait DeciderGates { + def overrides: Map[String, Boolean] = Map.empty + def decider: Decider + def prefix: String + + protected val seenFeatures: mutable.HashSet[String] = new mutable.HashSet[String] + + private def deciderFeature(name: String): Feature = { + decider.feature(prefix + "_" + name) + } + + def withOverride[T](name: String, mkGate: Feature => Gate[T]): Gate[T] = { + seenFeatures += name + overrides.get(name).map(Gate.const).getOrElse(mkGate(deciderFeature(name))) + } + + protected def linear(name: String): Gate[Unit] = withOverride[Unit](name, DeciderGate.linear) + protected def byId(name: String): Gate[Long] = withOverride[Long](name, DeciderGate.byId) + + /** + * It returns a Gate[String] that can be used to check availability of the feature. + * The string is hashed into a Long and used as an "id" and then used to call servo's + * DeciderGate.byId + * + * @param name decider name + * @return Gate[String] + */ + protected def byStringId(name: String): Gate[String] = + byId(name).contramap { s: String => + Hashing.sipHash24().hashString(s, StandardCharsets.UTF_8).asLong() + } + + def all: Traversable[String] = seenFeatures + + def unusedOverrides: Set[String] = overrides.keySet.diff(all.toSet) + + /** + * Generate a map of name -> availability, taking into account overrides. + * Overrides are either on or off so map to 10000 or 0, respectively. + */ + def availabilityMap: Map[String, Option[Int]] = + all.map { name => + val availability: Option[Int] = overrides + .get(name) + .map(on => if (on) 10000 else 0) + .orElse(deciderFeature(name).availability) + + name -> availability + }.toMap +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD new file mode 100644 index 000000000..a23ca66e4 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/BUILD @@ -0,0 +1,10 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala new file mode 100644 index 000000000..7b396f3f8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides/TweetyPieDeciderOverrides.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie.decider.overrides + +import com.twitter.decider.LocalOverrides + +object TweetyPieDeciderOverrides extends LocalOverrides.Namespace("tweetypie", "tweetypie_") { + val CheckSpamOnRetweet: LocalOverrides.Override = feature("check_spam_on_retweet") + val CheckSpamOnTweet: LocalOverrides.Override = feature("check_spam_on_tweet") + val ConversationControlUseFeatureSwitchResults: LocalOverrides.Override = feature( + "conversation_control_use_feature_switch_results") + val ConversationControlTweetCreateEnabled: LocalOverrides.Override = feature( + "conversation_control_tweet_create_enabled") + val EnableExclusiveTweetControlValidation: LocalOverrides.Override = feature( + "enable_exclusive_tweet_control_validation") + val EnableHotKeyCaches: LocalOverrides.Override = feature("enable_hot_key_caches") + val HydrateConversationMuted: LocalOverrides.Override = feature("hydrate_conversation_muted") + val HydrateExtensionsOnWrite: LocalOverrides.Override = feature("hydrate_extensions_on_write") + val HydrateEscherbirdAnnotations: LocalOverrides.Override = feature( + "hydrate_escherbird_annotations") + val HydrateGnipProfileGeoEnrichment: LocalOverrides.Override = feature( + "hydrate_gnip_profile_geo_enrichment") + val HydratePastedPics: LocalOverrides.Override = feature("hydrate_pasted_pics") + val HydratePerspectivesEditsForOtherSafetyLevels: LocalOverrides.Override = feature( + "hydrate_perspectives_edits_for_other_levels") + val HydrateScrubEngagements: LocalOverrides.Override = feature("hydrate_scrub_engagements") + val LogRepoExceptions: LocalOverrides.Override = feature("log_repo_exceptions") + val MediaRefsHydratorIncludePastedMedia: LocalOverrides.Override = feature( + "media_refs_hydrator_include_pasted_media") + val ShortCircuitLikelyPartialTweetReads: LocalOverrides.Override = feature( + "short_circuit_likely_partial_tweet_reads_ms") + val RateLimitByLimiterService: LocalOverrides.Override = feature("rate_limit_by_limiter_service") + val RateLimitTweetCreationFailure: LocalOverrides.Override = feature( + "rate_limit_tweet_creation_failure") + val ReplyTweetConversationControlHydrationEnabled = feature( + "reply_tweet_conversation_control_hydration_enabled" + ) + val DisableInviteViaMention = feature( + "disable_invite_via_mention" + ) + val EnableRemoveUnmentionedImplicitMentions: LocalOverrides.Override = feature( + "enable_remove_unmentioned_implicit_mentions") + val useReplicatedDeleteTweet2: LocalOverrides.Override = feature("use_replicated_delete_tweet_2") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD new file mode 100644 index 000000000..de6522d52 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "stitch/stitch-core", + "strato/src/main/scala/com/twitter/strato/client", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "util/util-core", + "util/util-stats", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala new file mode 100644 index 000000000..dd123206f --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie/NudgeBuilder.scala @@ -0,0 +1,165 @@ +package com.twitter.tweetypie.jiminy.tweetypie + +import com.twitter.finagle.stats.CategorizingExceptionStatsHandler +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.incentives.jiminy.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.Gate +import com.twitter.stitch.Stitch +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw + +case class NudgeBuilderRequest( + text: String, + inReplyToTweetId: Option[NudgeBuilder.TweetId], + conversationId: Option[NudgeBuilder.TweetId], + hasQuotedTweet: Boolean, + nudgeOptions: Option[CreateTweetNudgeOptions], + tweetId: Option[NudgeBuilder.TweetId]) + +trait NudgeBuilder extends FutureArrow[NudgeBuilderRequest, Unit] { + + /** + * Check whether the user should receive a nudge instead of creating + * the Tweet. If nudgeOptions is None, then no nudge check will be + * performed. + * + * @return a Future.exception containing a [[TweetCreateFailure]] if the + * user should be nudged, or Future.Unit if the user should not be + * nudged. + */ + def apply( + request: NudgeBuilderRequest + ): Future[Unit] +} + +object NudgeBuilder { + type Type = FutureArrow[NudgeBuilderRequest, Unit] + type TweetId = Long + + // darkTrafficCreateNudgeOptions ensure that our dark traffic sends a request that will + // accurately test the Jiminy backend. in this case, we specify that we want checks for all + // possible nudge types + private[this] val darkTrafficCreateNudgeOptions = Some( + CreateTweetNudgeOptions( + requestedNudgeTypes = Some( + Set( + TweetNudgeType.PotentiallyToxicTweet, + TweetNudgeType.ReviseOrMute, + TweetNudgeType.ReviseOrHideThenBlock, + TweetNudgeType.ReviseOrBlock + ) + ) + ) + ) + + private[this] def mkJiminyRequest( + request: NudgeBuilderRequest, + isDarkRequest: Boolean = false + ): CreateTweetNudgeRequest = { + val tweetType = + if (request.inReplyToTweetId.nonEmpty) TweetType.Reply + else if (request.hasQuotedTweet) TweetType.QuoteTweet + else TweetType.OriginalTweet + + CreateTweetNudgeRequest( + tweetText = request.text, + tweetType = tweetType, + inReplyToTweetId = request.inReplyToTweetId, + conversationId = request.conversationId, + createTweetNudgeOptions = + if (isDarkRequest) darkTrafficCreateNudgeOptions else request.nudgeOptions, + tweetId = request.tweetId + ) + } + + /** + * NudgeBuilder implemented by calling the strato column `incentives/createNudge`. + * + * Stats recorded: + * - latency_ms: Latency histogram (also implicitly number of + * invocations). This is counted only in the case that a nudge + * check was requested (`nudgeOptions` is non-empty) + * + * - nudge: The nudge check succeeded and a nudge was created. + * + * - no_nudge: The nudge check succeeded, but no nudge was created. + * + * - failures: Calling strato to create a nudge failed. Broken out + * by exception. + */ + + def apply( + nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse], + enableDarkTraffic: Gate[Unit], + stats: StatsReceiver + ): NudgeBuilder = { + new NudgeBuilder { + private[this] val nudgeLatencyStat = stats.stat("latency_ms") + private[this] val nudgeCounter = stats.counter("nudge") + private[this] val noNudgeCounter = stats.counter("no_nudge") + private[this] val darkRequestCounter = stats.counter("dark_request") + private[this] val nudgeExceptionHandler = new CategorizingExceptionStatsHandler + + override def apply( + request: NudgeBuilderRequest + ): Future[Unit] = + request.nudgeOptions match { + case None => + if (enableDarkTraffic()) { + darkRequestCounter.incr() + Stat + .timeFuture(nudgeLatencyStat) { + nudgeArrow(mkJiminyRequest(request, isDarkRequest = true)) + } + .transform { _ => + // ignore the response since it is a dark request + Future.Done + } + } else { + Future.Done + } + + case Some(_) => + Stat + .timeFuture(nudgeLatencyStat) { + nudgeArrow(mkJiminyRequest(request)) + } + .transform { + case Throw(e) => + nudgeExceptionHandler.record(stats, e) + // If we failed to invoke the nudge column, then + // just continue on with the Tweet creation. + Future.Done + + case Return(CreateTweetNudgeResponse(Some(nudge))) => + nudgeCounter.incr() + Future.exception(TweetCreateFailure.Nudged(nudge = nudge)) + + case Return(CreateTweetNudgeResponse(None)) => + noNudgeCounter.incr() + Future.Done + } + } + } + } + + def apply( + strato: StratoClient, + enableDarkTraffic: Gate[Unit], + stats: StatsReceiver + ): NudgeBuilder = { + val executer = + strato.executer[CreateTweetNudgeRequest, CreateTweetNudgeResponse]( + "incentives/createTweetNudge") + val nudgeArrow: FutureArrow[CreateTweetNudgeRequest, CreateTweetNudgeResponse] = { req => + Stitch.run(executer.execute(req)) + } + apply(nudgeArrow, enableDarkTraffic, stats) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD new file mode 100644 index 000000000..52259fc54 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "src/java/com/twitter/common/text/language:language-identifier", + "src/java/com/twitter/common/text/language:locale-util", + "src/java/com/twitter/common/text/pipeline", + "src/java/com/twitter/common/text/token", + "src/java/com/twitter/common_internal/text", + "src/java/com/twitter/common_internal/text/version", + "tweetypie/src/resources/com/twitter/tweetypie/matching", + "util/util-core/src/main/scala/com/twitter/concurrent", + "util/util-core/src/main/scala/com/twitter/io", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala new file mode 100644 index 000000000..09e9695cc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TokenSequence.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie.matching + +object TokenSequence { + + /** + * Is `suffix` a suffix of `s`, starting at `offset` in `s`? + */ + def hasSuffixAt(s: CharSequence, suffix: CharSequence, offset: Int): Boolean = + if (offset == 0 && (s.eq(suffix) || s == suffix)) { + true + } else if (suffix.length != (s.length - offset)) { + false + } else { + @annotation.tailrec + def go(i: Int): Boolean = + if (i == suffix.length) true + else if (suffix.charAt(i) == s.charAt(offset + i)) go(i + 1) + else false + + go(0) + } + + /** + * Do two [[CharSequence]]s contain the same characters? + * + * [[CharSequence]] equality is not sufficient because + * [[CharSequence]]s of different types may not consider other + * [[CharSequence]]s containing the same characters equivalent. + */ + def sameCharacters(s1: CharSequence, s2: CharSequence): Boolean = + hasSuffixAt(s1, s2, 0) + + /** + * This method implements the product definition of a token matching a + * keyword. That definition is: + * + * - The token contains the same characters as the keyword. + * - The token contains the same characters as the keyword after + * dropping a leading '#' or '@' from the token. + * + * The intention is that a keyword matches an identical hashtag, but + * if the keyword itself is a hashtag, it only matches the hashtag + * form. + * + * The tokenization process should rule out tokens or keywords that + * start with multiple '#' characters, even though this implementation + * allows for e.g. token "##a" to match "#a". + */ + def tokenMatches(token: CharSequence, keyword: CharSequence): Boolean = + if (sameCharacters(token, keyword)) true + else if (token.length == 0) false + else { + val tokenStart = token.charAt(0) + (tokenStart == '#' || tokenStart == '@') && hasSuffixAt(token, keyword, 1) + } +} + +/** + * A sequence of normalized tokens. The sequence depends on the locale + * in which the text was parsed and the version of the penguin library + * that was used at tokenization time. + */ +case class TokenSequence private[matching] (toIndexedSeq: IndexedSeq[CharSequence]) { + import TokenSequence.tokenMatches + + private def apply(i: Int): CharSequence = toIndexedSeq(i) + + def isEmpty: Boolean = toIndexedSeq.isEmpty + def nonEmpty: Boolean = toIndexedSeq.nonEmpty + + /** + * Does the supplied sequence of keywords match a consecutive sequence + * of tokens within this sequence? + */ + def containsKeywordSequence(keywords: TokenSequence): Boolean = { + val finalIndex = toIndexedSeq.length - keywords.toIndexedSeq.length + + @annotation.tailrec + def matchesAt(offset: Int, i: Int): Boolean = + if (i >= keywords.toIndexedSeq.length) true + else if (tokenMatches(this(i + offset), keywords(i))) matchesAt(offset, i + 1) + else false + + @annotation.tailrec + def search(offset: Int): Boolean = + if (offset > finalIndex) false + else if (matchesAt(offset, 0)) true + else search(offset + 1) + + search(0) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala new file mode 100644 index 000000000..7cb3cd315 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/Tokenizer.scala @@ -0,0 +1,156 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.language.LocaleUtil +import com.twitter.common_internal.text.pipeline.TwitterTextNormalizer +import com.twitter.common_internal.text.pipeline.TwitterTextTokenizer +import com.twitter.common_internal.text.version.PenguinVersion +import com.twitter.concurrent.Once +import com.twitter.io.StreamIO +import java.util.Locale +import scala.collection.JavaConverters._ + +/** + * Extract a sequence of normalized tokens from the input text. The + * normalization and tokenization are properly configured for keyword + * matching between texts. + */ +trait Tokenizer { + def tokenize(input: String): TokenSequence +} + +object Tokenizer { + + /** + * When a Penguin version is not explicitly specified, use this + * version of Penguin to perform normalization and tokenization. If + * you cache tokenized text, be sure to store the version as well, to + * avoid comparing text that was normalized with different algorithms. + */ + val DefaultPenguinVersion: PenguinVersion = PenguinVersion.PENGUIN_6 + + /** + * If you already know the locale of the text that is being tokenized, + * use this method to get a tokenizer that is much more efficient than + * the Tweet or Query tokenizer, since it does not have to perform + * language detection. + */ + def forLocale(locale: Locale): Tokenizer = get(locale, DefaultPenguinVersion) + + /** + * Obtain a `Tokenizer` that will tokenize the text for the given + * locale and version of the Penguin library. + */ + def get(locale: Locale, version: PenguinVersion): Tokenizer = + TokenizerFactories(version).forLocale(locale) + + /** + * Encapsulates the configuration and use of [[TwitterTextTokenizer]] + * and [[TwitterTextNormalizer]]. + */ + private[this] class TokenizerFactory(version: PenguinVersion) { + // The normalizer is thread-safe, so share one instance. + private[this] val normalizer = + (new TwitterTextNormalizer.Builder(version)).build() + + // The TwitterTextTokenizer is relatively expensive to build, + // and is not thread safe, so keep instances of it in a + // ThreadLocal. + private[this] val local = + new ThreadLocal[TwitterTextTokenizer] { + override def initialValue: TwitterTextTokenizer = + (new TwitterTextTokenizer.Builder(version)).build() + } + + /** + * Obtain a [[Tokenizer]] for this combination of [[PenguinVersion]] + * and [[Locale]]. + */ + def forLocale(locale: Locale): Tokenizer = + new Tokenizer { + override def tokenize(input: String): TokenSequence = { + val stream = local.get.getTwitterTokenStreamFor(locale) + stream.reset(normalizer.normalize(input, locale)) + val builder = IndexedSeq.newBuilder[CharSequence] + while (stream.incrementToken) builder += stream.term() + TokenSequence(builder.result()) + } + } + } + + /** + * Since there are a small number of Penguin versions, eagerly + * initialize a TokenizerFactory for each version, to avoid managing + * mutable state. + */ + private[this] val TokenizerFactories: PenguinVersion => TokenizerFactory = + PenguinVersion.values.map(v => v -> new TokenizerFactory(v)).toMap + + /** + * The set of locales used in warmup. These locales are mentioned in + * the logic of TwitterTextTokenizer and TwitterTextNormalizer. + */ + private[this] val WarmUpLocales: Seq[Locale] = + Seq + .concat( + Seq( + Locale.JAPANESE, + Locale.KOREAN, + LocaleUtil.UNKNOWN, + LocaleUtil.THAI, + LocaleUtil.ARABIC, + LocaleUtil.SWEDISH + ), + LocaleUtil.CHINESE_JAPANESE_LOCALES.asScala, + LocaleUtil.CJK_LOCALES.asScala + ) + .toSet + .toArray + .toSeq + + /** + * Load the default inputs that are used for warming up this library. + */ + def warmUpCorpus(): Seq[String] = { + val stream = getClass.getResourceAsStream("warmup-text.txt") + val bytes = + try StreamIO.buffer(stream) + finally stream.close() + bytes.toString("UTF-8").linesIterator.toArray.toSeq + } + + /** + * Exercise the functionality of this library on the specified + * strings. In general, prefer [[warmUp]] to this method. + */ + def warmUpWith(ver: PenguinVersion, texts: Iterable[String]): Unit = + texts.foreach { txt => + // Exercise each locale + WarmUpLocales.foreach { loc => + Tokenizer.get(loc, ver).tokenize(txt) + UserMutes.builder().withPenguinVersion(ver).withLocale(loc).validate(txt) + } + + // Exercise language detection + TweetTokenizer.get(ver).tokenize(txt) + UserMutes.builder().withPenguinVersion(ver).validate(txt) + } + + private[this] val warmUpOnce = Once(warmUpWith(DefaultPenguinVersion, warmUpCorpus())) + + /** + * The creation of the first TwitterTextTokenizer is relatively + * expensive, and tokenizing some texts may cause significant + * initialization. + * + * This method exercises the functionality of this library + * with a range of texts in order to perform as much initialization as + * possible before the library is used in a latency-sensitive way. + * + * The warmup routine will only run once. Subsequent invocations of + * `warmUp` will no do additional work, and will return once warmup is + * complete. + * + * The warmup will take on the order of seconds. + */ + def warmUp(): Unit = warmUpOnce() +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala new file mode 100644 index 000000000..592891235 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/TweetTokenizer.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.common_internal.text.version.PenguinVersion +import java.util.Locale + +object TweetTokenizer extends Tokenizer { + type LocalePicking = Option[Locale] => Tokenizer + + /** + * Get a Tokenizer-producing function that uses the supplied locale + * to select an appropriate Tokenizer. + */ + def localePicking: LocalePicking = { + case None => TweetTokenizer + case Some(locale) => Tokenizer.forLocale(locale) + } + + private[this] val tweetLangIdentifier = + (new TwitterLanguageIdentifier.Builder).buildForTweet() + + /** + * Get a Tokenizer that performs Tweet language detection, and uses + * that result to tokenize the text. If you already know the locale of + * the tweet text, use `Tokenizer.get`, because it's much + * cheaper. + */ + def get(version: PenguinVersion): Tokenizer = + new Tokenizer { + override def tokenize(text: String): TokenSequence = { + val locale = tweetLangIdentifier.identify(text).getLocale + Tokenizer.get(locale, version).tokenize(text) + } + } + + private[this] val Default = get(Tokenizer.DefaultPenguinVersion) + + /** + * Tokenize the given text using Tweet language detection and + * `Tokenizer.DefaultPenguinVersion`. Prefer `Tokenizer.forLocale` if + * you already know the language of the text. + */ + override def tokenize(tweetText: String): TokenSequence = + Default.tokenize(tweetText) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala new file mode 100644 index 000000000..dc7430c86 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/matching/UserMutes.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.matching + +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.common_internal.text.version.PenguinVersion +import java.util.Locale +import scala.collection.JavaConversions.asScalaBuffer + +object UserMutesBuilder { + private[matching] val Default = + new UserMutesBuilder(Tokenizer.DefaultPenguinVersion, None) + + private val queryLangIdentifier = + (new TwitterLanguageIdentifier.Builder).buildForQuery() +} + +class UserMutesBuilder private (penguinVersion: PenguinVersion, localeOpt: Option[Locale]) { + + /** + * Use the specified Penguin version when tokenizing a keyword mute + * string. In general, use the default version, unless you need to + * specify a particular version for compatibility with another system + * that is using that version. + */ + def withPenguinVersion(ver: PenguinVersion): UserMutesBuilder = + if (ver == penguinVersion) this + else new UserMutesBuilder(ver, localeOpt) + + /** + * Use the specified locale when tokenizing a keyword mute string. + */ + def withLocale(locale: Locale): UserMutesBuilder = + if (localeOpt.contains(locale)) this + else new UserMutesBuilder(penguinVersion, Some(locale)) + + /** + * When tokenizing a user mute list, detect the language of the + * text. This is significantly more expensive than using a predefined + * locale, but is appropriate when the locale is not yet known. + */ + def detectLocale(): UserMutesBuilder = + if (localeOpt.isEmpty) this + else new UserMutesBuilder(penguinVersion, localeOpt) + + private[this] lazy val tokenizer = + localeOpt match { + case None => + // No locale was specified, so use a Tokenizer that performs + // language detection before tokenizing. + new Tokenizer { + override def tokenize(text: String): TokenSequence = { + val locale = UserMutesBuilder.queryLangIdentifier.identify(text).getLocale + Tokenizer.get(locale, penguinVersion).tokenize(text) + } + } + + case Some(locale) => + Tokenizer.get(locale, penguinVersion) + } + + /** + * Given a list of the user's raw keyword mutes, return a preprocessed + * set of mutes suitable for matching against tweet text. If the input + * contains any phrases that fail validation, then they will be + * dropped. + */ + def build(rawInput: Seq[String]): UserMutes = + UserMutes(rawInput.flatMap(validate(_).right.toOption)) + + /** + * Java-friendly API for processing a user's list of raw keyword mutes + * into a preprocessed form suitable for matching against text. + */ + def fromJavaList(rawInput: java.util.List[String]): UserMutes = + build(asScalaBuffer(rawInput).toSeq) + + /** + * Validate the raw user input muted phrase. Currently, the only + * inputs that are not valid for keyword muting are those inputs that + * do not contain any keywords, because those inputs would match all + * tweets. + */ + def validate(mutedPhrase: String): Either[UserMutes.ValidationError, TokenSequence] = { + val keywords = tokenizer.tokenize(mutedPhrase) + if (keywords.isEmpty) UserMutes.EmptyPhraseError else Right(keywords) + } +} + +object UserMutes { + sealed trait ValidationError + + /** + * The phrase's tokenization did not produce any tokens + */ + case object EmptyPhrase extends ValidationError + + private[matching] val EmptyPhraseError = Left(EmptyPhrase) + + /** + * Get a [[UserMutesBuilder]] that uses the default Penguin version and + * performs language identification to choose a locale. + */ + def builder(): UserMutesBuilder = UserMutesBuilder.Default +} + +/** + * A user's muted keyword list, preprocessed into token sequences. + */ +case class UserMutes private[matching] (toSeq: Seq[TokenSequence]) { + + /** + * Do any of the users' muted keyword sequences occur within the + * supplied text? + */ + def matches(text: TokenSequence): Boolean = + toSeq.exists(text.containsKeywordSequence) + + /** + * Find all positions of matching muted keyword from the user's + * muted keyword list + */ + def find(text: TokenSequence): Seq[Int] = + toSeq.zipWithIndex.collect { + case (token, index) if text.containsKeywordSequence(token) => index + } + + def isEmpty: Boolean = toSeq.isEmpty + def nonEmpty: Boolean = toSeq.nonEmpty +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD new file mode 100644 index 000000000..2b1e9ec79 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/BUILD @@ -0,0 +1,17 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tco-util", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-logging/src/main/scala/com/twitter/logging", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala new file mode 100644 index 000000000..bd0e6f4a3 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/Media.scala @@ -0,0 +1,149 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediaservices.commons.thriftscala.MediaCategory +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetLenses + +/** + * A smörgåsbord of media-related helper methods. + */ +object Media { + val AnimatedGifContentType = "video/mp4 codecs=avc1.42E0" + + case class MediaTco(expandedUrl: String, url: String, displayUrl: String) + + val ImageContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.ImageJpeg, + MediaContentType.ImagePng, + MediaContentType.ImageGif + ) + + val AnimatedGifContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.VideoMp4 + ) + + val VideoContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.VideoGeneric + ) + + val InUseContentTypes: Set[MediaContentType] = + Set[MediaContentType]( + MediaContentType.ImageGif, + MediaContentType.ImageJpeg, + MediaContentType.ImagePng, + MediaContentType.VideoMp4, + MediaContentType.VideoGeneric + ) + + def isImage(contentType: MediaContentType): Boolean = + ImageContentTypes.contains(contentType) + + def contentTypeToString(contentType: MediaContentType): String = + contentType match { + case MediaContentType.ImageGif => "image/gif" + case MediaContentType.ImageJpeg => "image/jpeg" + case MediaContentType.ImagePng => "image/png" + case MediaContentType.VideoMp4 => "video/mp4" + case MediaContentType.VideoGeneric => "video" + case _ => throw new IllegalArgumentException(s"UnknownMediaContentType: $contentType") + } + + def stringToContentType(str: String): MediaContentType = + str match { + case "image/gif" => MediaContentType.ImageGif + case "image/jpeg" => MediaContentType.ImageJpeg + case "image/png" => MediaContentType.ImagePng + case "video/mp4" => MediaContentType.VideoMp4 + case "video" => MediaContentType.VideoGeneric + case _ => throw new IllegalArgumentException(s"Unknown Content Type String: $str") + } + + def extensionForContentType(cType: MediaContentType): String = + cType match { + case MediaContentType.ImageJpeg => "jpg" + case MediaContentType.ImagePng => "png" + case MediaContentType.ImageGif => "gif" + case MediaContentType.VideoMp4 => "mp4" + case MediaContentType.VideoGeneric => "" + case _ => "unknown" + } + + /** + * Extract a URL entity from a media entity. + */ + def extractUrlEntity(mediaEntity: MediaEntity): UrlEntity = + UrlEntity( + fromIndex = mediaEntity.fromIndex, + toIndex = mediaEntity.toIndex, + url = mediaEntity.url, + expanded = Some(mediaEntity.expandedUrl), + display = Some(mediaEntity.displayUrl) + ) + + /** + * Copy the fields from the URL entity into the media entity. + */ + def copyFromUrlEntity(mediaEntity: MediaEntity, urlEntity: UrlEntity): MediaEntity = { + val expandedUrl = + urlEntity.expanded.orElse(Option(mediaEntity.expandedUrl)).getOrElse(urlEntity.url) + + val displayUrl = + urlEntity.url match { + case TcoSlug(slug) => MediaUrl.Display.fromTcoSlug(slug) + case _ => urlEntity.expanded.getOrElse(urlEntity.url) + } + + mediaEntity.copy( + fromIndex = urlEntity.fromIndex, + toIndex = urlEntity.toIndex, + url = urlEntity.url, + expandedUrl = expandedUrl, + displayUrl = displayUrl + ) + } + + def getAspectRatio(size: MediaSize): AspectRatio = + getAspectRatio(size.width, size.height) + + def getAspectRatio(width: Int, height: Int): AspectRatio = { + if (width == 0 || height == 0) { + throw new IllegalArgumentException(s"Dimensions must be non zero: ($width, $height)") + } + + def calculateGcd(a: Int, b: Int): Int = + if (b == 0) a else calculateGcd(b, a % b) + + val gcd = calculateGcd(math.max(width, height), math.min(width, height)) + AspectRatio((width / gcd).toShort, (height / gcd).toShort) + } + + /** + * Return just the media that belongs to this tweet + */ + def ownMedia(tweet: Tweet): Seq[MediaEntity] = + TweetLenses.media.get(tweet).filter(isOwnMedia(tweet.id, _)) + + /** + * Does the given media entity, which is was found on the tweet with the specified + * tweetId, belong to that tweet? + */ + def isOwnMedia(tweetId: TweetId, entity: MediaEntity): Boolean = + entity.sourceStatusId.forall(_ == tweetId) + + /** + * Mixed Media is any case where there is more than one media item & any of them is not an image. + */ + + def isMixedMedia(mediaEntities: Seq[MediaEntity]): Boolean = + mediaEntities.length > 1 && (mediaEntities.flatMap(_.mediaInfo).exists { + case _: MediaInfo.ImageInfo => false + case _ => true + } || + mediaEntities.flatMap(_.mediaKey).map(_.mediaCategory).exists(_ != MediaCategory.TweetImage)) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala new file mode 100644 index 000000000..eb26dfad8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/MediaUrl.scala @@ -0,0 +1,108 @@ +package com.twitter.tweetypie +package media + +import com.twitter.logging.Logger +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.UrlEntity + +/** + * Creating and parsing tweet media entity URLs. + * + * There are four kinds of URL in a media entity: + * + * - Display URLs: pic.twitter.com aliases for the short URL, for + * embedding in the tweet text. + * + * - Short URLs: regular t.co URLs that expand to the permalink URL. + * + * - Permalink URLs: link to a page that displays the media after + * doing authorization + * + * - Asset URLs: links to the actual media asset. + * + */ +object MediaUrl { + private[this] val log = Logger(getClass) + + /** + * The URL that should be filled in to the displayUrl field of the + * media entity. This URL behaves exactly the same as a t.co link + * (only the domain is different.) + */ + object Display { + val Root = "pic.twitter.com/" + + def fromTcoSlug(tcoSlug: String): String = Root + tcoSlug + } + + /** + * The link target for the link in the tweet text (the expanded URL + * for the media, copied from the URL entity.) For native photos, + * this is the tweet permalink page. + * + * For users without a screen name ("handleless" or NoScreenName users) + * a permalink to /i/status/:tweet_id is used. + */ + object Permalink { + val Root = "https://twitter.com/" + val Internal = "i" + val PhotoSuffix = "/photo/1" + val VideoSuffix = "/video/1" + + def apply(screenName: String, tweetId: TweetId, isVideo: Boolean): String = + Root + + (if (screenName.isEmpty) Internal else screenName) + + "/status/" + + tweetId + + (if (isVideo) VideoSuffix else PhotoSuffix) + + private[this] val PermalinkRegex = + """https?://twitter.com/(?:#!/)?\w+/status/(\d+)/(?:photo|video)/\d+""".r + + private[this] def getTweetId(permalink: String): Option[TweetId] = + permalink match { + case PermalinkRegex(tweetIdStr) => + try { + Some(tweetIdStr.toLong) + } catch { + // Digits too big to fit in a Long + case _: NumberFormatException => None + } + case _ => None + } + + def getTweetId(urlEntity: UrlEntity): Option[TweetId] = + urlEntity.expanded.flatMap(getTweetId) + + def hasTweetId(permalink: String, tweetId: TweetId): Boolean = + getTweetId(permalink).contains(tweetId) + + def hasTweetId(mediaEntity: MediaEntity, tweetId: TweetId): Boolean = + hasTweetId(mediaEntity.expandedUrl, tweetId) + + def hasTweetId(urlEntity: UrlEntity, tweetId: TweetId): Boolean = + getTweetId(urlEntity).contains(tweetId) + } + + /** + * Converts a url that starts with "https://" to one that starts with "http://". + */ + def httpsToHttp(url: String): String = + url.replace("https://", "http://") + + /** + * Gets the last path element from an asset url. This exists temporarily to support + * the now deprecated mediaPath element in MediaEntity. + */ + def mediaPathFromUrl(url: String): String = + url.lastIndexOf('/') match { + case -1 => + log.error("Invalid media path. Could not find last element: " + url) + // Better to return a broken preview URL to the client + // than to fail the whole request. + "" + + case idx => + url.substring(idx + 1) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala new file mode 100644 index 000000000..d8fb9b2d1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/media/package.scala @@ -0,0 +1,7 @@ +package com.twitter.tweetypie + +package object media { + type TweetId = Long + type UserId = Long + type MediaId = Long +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala new file mode 100644 index 000000000..a0035b9e5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/AddTweetHandler.scala @@ -0,0 +1,80 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils.collectWithRateLimitCheck +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Time + +object AddTweetHandler { + private[storage] type InternalAddTweet = ( + Tweet, + ManhattanOperations.Insert, + Scribe, + StatsReceiver, + Time + ) => Stitch[Unit] + + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe, + stats: StatsReceiver + ): TweetStorageClient.AddTweet = + tweet => doAddTweet(tweet, insert, scribe, stats, Time.now) + + def makeRecords( + storedTweet: StoredTweet, + timestamp: Time + ): Seq[TweetManhattanRecord] = { + val core = CoreFieldsCodec.fromTweet(storedTweet) + val packedCoreFieldsBlob = CoreFieldsCodec.toTFieldBlob(core) + val coreRecord = + TweetManhattanRecord( + TweetKey.coreFieldsKey(storedTweet.id), + ManhattanValue(TFieldBlobCodec.toByteBuffer(packedCoreFieldsBlob), Some(timestamp)) + ) + + val otherFieldIds = + TweetFields.nonCoreInternalFields ++ TweetFields.getAdditionalFieldIds(storedTweet) + + val otherFields = + storedTweet + .getFieldBlobs(otherFieldIds) + .map { + case (fieldId, tFieldBlob) => + TweetManhattanRecord( + TweetKey.fieldKey(storedTweet.id, fieldId), + ManhattanValue(TFieldBlobCodec.toByteBuffer(tFieldBlob), Some(timestamp)) + ) + } + .toSeq + otherFields :+ coreRecord + } + + private[storage] val doAddTweet: InternalAddTweet = ( + tweet: Tweet, + insert: ManhattanOperations.Insert, + scribe: Scribe, + stats: StatsReceiver, + timestamp: Time + ) => { + assert(tweet.coreData.isDefined, s"Tweet ${tweet.id} is missing coreData: $tweet") + + val storedTweet = StorageConversions.toStoredTweet(tweet) + val records = makeRecords(storedTweet, timestamp) + val inserts = records.map(insert) + val insertsWithRateLimitCheck = + Stitch.collect(inserts.map(_.liftToTry)).map(collectWithRateLimitCheck).lowerFromTry + + Stats.updatePerFieldQpsCounters( + "addTweet", + TweetFields.getAdditionalFieldIds(storedTweet), + 1, + stats + ) + + insertsWithRateLimitCheck.unit.onSuccess { _ => scribe.logAdded(storedTweet) } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD new file mode 100644 index 000000000..6a3db82e7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BUILD @@ -0,0 +1,47 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = [ + "bazel-compatible", + "bazel-incompatible-scaladoc", + ], + dependencies = [ + "3rdparty/jvm/com/chuusai:shapeless", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "diffshow", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization", + "finagle/finagle-core/src/main", + "finagle/finagle-stats", + "finagle/finagle-thriftmux/src/main/scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo/src/main/scala", + "tweetypie/servo/util", + "snowflake:id", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/manhattan:internal-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "storage/clients/manhattan/client/src/main/scala", + "tbird-thrift:scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal:storage_internal-scala", + "util-internal/scribe", + "util/util-core:scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala new file mode 100644 index 000000000..224c09cb0 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/BounceDeleteHandler.scala @@ -0,0 +1,20 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Time + +object BounceDeleteHandler { + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe + ): TweetStorageClient.BounceDelete = + tweetId => { + val mhTimestamp = Time.now + val bounceDeleteRecord = TweetStateRecord + .BounceDeleted(tweetId, mhTimestamp.inMillis) + .toTweetMhRecord + + insert(bounceDeleteRecord).onSuccess { _ => + scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala new file mode 100644 index 000000000..670014f26 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Codecs.scala @@ -0,0 +1,242 @@ +package com.twitter.tweetypie.storage + +import com.twitter.bijection.Conversion.asMethod +import com.twitter.bijection.Injection +import com.twitter.scrooge.TFieldBlob +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.tweetypie.storage.Response.FieldResponse +import com.twitter.tweetypie.storage.Response.FieldResponseCode +import com.twitter.tweetypie.storage_internal.thriftscala.CoreFields +import com.twitter.tweetypie.storage_internal.thriftscala.InternalTweet +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import java.io.ByteArrayOutputStream +import java.nio.ByteBuffer +import org.apache.thrift.protocol.TBinaryProtocol +import org.apache.thrift.transport.TIOStreamTransport +import org.apache.thrift.transport.TMemoryInputTransport +import scala.collection.immutable +import scala.util.control.NoStackTrace + +// NOTE: All field ids and Tweet structure in this file correspond to the StoredTweet struct ONLY + +object ByteArrayCodec { + def toByteBuffer(byteArray: Array[Byte]): ByteBuffer = byteArray.as[ByteBuffer] + def fromByteBuffer(buffer: ByteBuffer): Array[Byte] = buffer.as[Array[Byte]] +} + +object StringCodec { + private val string2ByteBuffer = Injection.connect[String, Array[Byte], ByteBuffer] + def toByteBuffer(strValue: String): ByteBuffer = string2ByteBuffer(strValue) + def fromByteBuffer(buffer: ByteBuffer): String = string2ByteBuffer.invert(buffer).get +} + +/** + * Terminology + * ----------- + * Tweet id field : The field number of 'tweetId' in the 'Tweet' thrift structure (i.e "1") + * + * First AdditionalField id : The ID if the first additional field in 'Tweet' thrift structure. All field Ids less than this are + * considered internal and all the ids greater than or equal to this field id are considered 'Additional fields'. + * This is set to 100. + * + * Internal Fields : Fields with ids [1 to firstAdditionalFieldid) (excluding firstAdditionalFieldId) + * + * Core fields : (Subset of Internal fields)- Fields with ids [1 to 8, 19]. These fields are "packed" together and stored + * under a single key. This key is referred to as "CoreFieldsKey" (see @TweetKeyType.CoreFieldsKey). + * Note: Actually field 1 is skipped when packing as this field is the tweet id and it need not be + * explicitly stored since the pkey already contains the tweet Id) + * + * Root Core field id : The field id under which the packed core fields are stored in Manhattan. (This is field Id "1") + * + * Required fields : (Subset of Core fields) - Fields with ids [1 to 5] that MUST be present on every tweet. + * + * Additional Fields : All fields with field ids >= 'firstAdditionalFieldId' + * + * Compiled Additional fields : (Subset of Additional Fields) - All fields that the storage library knows about + * (i.e present on the latest storage_internal.thrift that is compiled-in). + * + * Passthrough fields : (Subset of Additional Fields) - The fields on storage_internal.thrift that the storage library is NOT aware of + * These field ids are is obtained looking at the "_passThroughFields" member of the scrooge-generated + * 'Tweet' object. + * + * coreFieldsIdInInternalTweet: This is the field id of the core fields (the only field) in the Internal Tweet struct + */ +object TweetFields { + val firstAdditionalFieldId: Short = 100 + val tweetIdField: Short = 1 + val geoFieldId: Short = 9 + + // The field under which all the core field values are stored (in serialized form). + val rootCoreFieldId: Short = 1 + + val coreFieldIds: immutable.IndexedSeq[FieldId] = { + val quotedTweetFieldId: Short = 19 + (1 to 8).map(_.toShort) ++ Seq(quotedTweetFieldId) + } + val requiredFieldIds: immutable.IndexedSeq[FieldId] = (1 to 5).map(_.toShort) + + val coreFieldsIdInInternalTweet: Short = 1 + + val compiledAdditionalFieldIds: Seq[FieldId] = + StoredTweet.metaData.fields.filter(_.id >= firstAdditionalFieldId).map(_.id) + val internalFieldIds: Seq[FieldId] = + StoredTweet.metaData.fields.filter(_.id < firstAdditionalFieldId).map(_.id) + val nonCoreInternalFields: Seq[FieldId] = + (internalFieldIds.toSet -- coreFieldIds.toSet).toSeq + def getAdditionalFieldIds(tweet: StoredTweet): Seq[FieldId] = + compiledAdditionalFieldIds ++ tweet._passthroughFields.keys.toSeq +} + +/** + * Helper object to convert TFieldBlob to ByteBuffer that gets stored in Manhattan. + * + * The following is the format in which the TFieldBlob gets stored: + * [Version][TField][TFieldBlob] + */ +object TFieldBlobCodec { + val BinaryProtocolFactory: TBinaryProtocol.Factory = new TBinaryProtocol.Factory() + val FormatVersion = 1.0 + + def toByteBuffer(tFieldBlob: TFieldBlob): ByteBuffer = { + val baos = new ByteArrayOutputStream() + val prot = BinaryProtocolFactory.getProtocol(new TIOStreamTransport(baos)) + + prot.writeDouble(FormatVersion) + prot.writeFieldBegin(tFieldBlob.field) + prot.writeBinary(ByteArrayCodec.toByteBuffer(tFieldBlob.data)) + + ByteArrayCodec.toByteBuffer(baos.toByteArray) + } + + def fromByteBuffer(buffer: ByteBuffer): TFieldBlob = { + val byteArray = ByteArrayCodec.fromByteBuffer(buffer) + val prot = BinaryProtocolFactory.getProtocol(new TMemoryInputTransport(byteArray)) + + val version = prot.readDouble() + if (version != FormatVersion) { + throw new VersionMismatchError( + "Version mismatch in decoding ByteBuffer to TFieldBlob. " + + "Actual version: " + version + ". Expected version: " + FormatVersion + ) + } + + val tField = prot.readFieldBegin() + val dataBuffer = prot.readBinary() + val data = ByteArrayCodec.fromByteBuffer(dataBuffer) + + TFieldBlob(tField, data) + } +} + +/** + * Helper object to help convert 'CoreFields' object to/from TFieldBlob (and also to construct + * 'CoreFields' object from a 'StoredTweet' object) + * + * More details: + * - A subset of fields on the 'StoredTweet' thrift structure (2-8,19) are 'packaged' and stored + * together as a serialized TFieldBlob object under a single key in Manhattan (see TweetKeyCodec + * helper object above for more details). + * + * - To make the packing/unpacking the fields to/from TFieldBlob object, we created the following + * two helper thrift structures 'CoreFields' and 'InternalTweet' + * + * // The field Ids and types here MUST exactly match field Ids on 'StoredTweet' thrift structure. + * struct CoreFields { + * 2: optional i64 user_id + * ... + * 8: optional i64 contributor_id + * ... + * 19: optional StoredQuotedTweet stored_quoted_tweet + * + * } + * + * // The field id of core fields MUST be "1" + * struct InternalTweet { + * 1: CoreFields coreFields + * } + * + * - Given the above two structures, packing/unpacking fields (2-8,19) on StoredTweet object into a TFieldBlob + * becomes very trivial: + * For packing: + * (i) Copy fields (2-8,19) from StoredTweet object to a new CoreFields object + * (ii) Create a new InternalTweet object with the 'CoreFields' object constructed in step (i) above + * (iii) Extract field "1" as a TFieldBlob from InternalField (by calling the scrooge generated "getFieldBlob(1)" + * function on the InternalField objecton + * + * For unpacking: + * (i) Create an empty 'InternalField' object + * (ii) Call scrooge-generated 'setField' by passing the tFieldBlob blob (created by packing steps above) + * (iii) Doing step (ii) above will create a hydrated 'CoreField' object that can be accessed by 'coreFields' + * member of 'InternalTweet' object. + */ +object CoreFieldsCodec { + val coreFieldIds: Seq[FieldId] = CoreFields.metaData.fields.map(_.id) + + // "Pack" the core fields i.e converts 'CoreFields' object to "packed" tFieldBlob (See description + // above for more details) + def toTFieldBlob(coreFields: CoreFields): TFieldBlob = { + InternalTweet(Some(coreFields)).getFieldBlob(TweetFields.coreFieldsIdInInternalTweet).get + } + + // "Unpack" the core fields from a packed TFieldBlob into a CoreFields object (see description above for + // more details) + def fromTFieldBlob(tFieldBlob: TFieldBlob): CoreFields = { + InternalTweet().setField(tFieldBlob).coreFields.get + } + + // "Unpack" the core fields from a packed TFieldBlob into a Map of core-fieldId-> TFieldBlob + def unpackFields(tFieldBlob: TFieldBlob): Map[Short, TFieldBlob] = + fromTFieldBlob(tFieldBlob).getFieldBlobs(coreFieldIds) + + // Create a 'CoreFields' thrift object from 'Tweet' thrift object. + def fromTweet(tweet: StoredTweet): CoreFields = { + // As mentioned above, the field ids and types on the 'CoreFields' struct exactly match the + // corresponding fields on StoredTweet structure. So it is safe to call .getField() on Tweet object and + // and pass the returned tFleldBlob a 'setField' on 'CoreFields' object. + coreFieldIds.foldLeft(CoreFields()) { + case (core, fieldId) => + tweet.getFieldBlob(fieldId) match { + case None => core + case Some(tFieldBlob) => core.setField(tFieldBlob) + } + } + } +} + +/** + * Helper object to convert ManhattanException to FieldResponseCode thrift object + */ +object FieldResponseCodeCodec { + import FieldResponseCodec.ValueNotFoundException + + def fromManhattanException(mhException: ManhattanException): FieldResponseCode = { + mhException match { + case _: ValueNotFoundException => FieldResponseCode.ValueNotFound + case _: InternalErrorManhattanException => FieldResponseCode.Error + case _: InvalidRequestManhattanException => FieldResponseCode.InvalidRequest + case _: DeniedManhattanException => FieldResponseCode.Error + case _: UnsatisfiableManhattanException => FieldResponseCode.Error + case _: TimeoutManhattanException => FieldResponseCode.Timeout + } + } +} + +/** + * Helper object to construct FieldResponse thrift object from an Exception. + * This is typically called to convert 'ManhattanException' object to 'FieldResponse' thrift object + */ +object FieldResponseCodec { + class ValueNotFoundException extends ManhattanException("Value not found!") with NoStackTrace + private[storage] val NotFound = new ValueNotFoundException + + def fromThrowable(e: Throwable, additionalMsg: Option[String] = None): FieldResponse = { + val (respCode, errMsg) = e match { + case mhException: ManhattanException => + (FieldResponseCodeCodec.fromManhattanException(mhException), mhException.getMessage) + case _ => (FieldResponseCode.Error, e.getMessage) + } + + val respMsg = additionalMsg.map(_ + ". " + errMsg).orElse(Some(errMsg.toString)) + FieldResponse(respCode, respMsg) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala new file mode 100644 index 000000000..5c89c7a5e --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/DeleteAdditionalFieldsHandler.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Throw +import com.twitter.util.Time + +object DeleteAdditionalFieldsHandler { + def apply( + delete: ManhattanOperations.Delete, + stats: StatsReceiver + ): TweetStorageClient.DeleteAdditionalFields = + (unfilteredTweetIds: Seq[TweetId], additionalFields: Seq[Field]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + val additionalFieldIds = additionalFields.map(_.id) + require(additionalFields.nonEmpty, "Additional fields to delete cannot be empty") + require( + additionalFieldIds.min >= TweetFields.firstAdditionalFieldId, + s"Additional fields $additionalFields must be in additional field range (>= ${TweetFields.firstAdditionalFieldId})" + ) + + Stats.addWidthStat("deleteAdditionalFields", "tweetIds", tweetIds.size, stats) + Stats.addWidthStat( + "deleteAdditionalFields", + "additionalFieldIds", + additionalFieldIds.size, + stats + ) + Stats.updatePerFieldQpsCounters( + "deleteAdditionalFields", + additionalFieldIds, + tweetIds.size, + stats + ) + val mhTimestamp = Time.now + + val stitches = tweetIds.map { tweetId => + val (fieldIds, mhKeysToDelete) = + additionalFieldIds.map { fieldId => + (fieldId, TweetKey.additionalFieldsKey(tweetId, fieldId)) + }.unzip + + val deletionStitches = mhKeysToDelete.map { mhKeyToDelete => + delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry + } + + Stitch.collect(deletionStitches).map { responsesTries => + val wasRateLimited = responsesTries.exists { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + + val resultsPerTweet = fieldIds.zip(responsesTries).toMap + + if (wasRateLimited) { + buildTweetOverCapacityResponse("deleteAdditionalFields", tweetId, resultsPerTweet) + } else { + buildTweetResponse("deleteAdditionalFields", tweetId, resultsPerTweet) + } + } + } + + Stitch.collect(stitches) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala new file mode 100644 index 000000000..093559234 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Field.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.storage + +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.{Tweet => TpTweet} + +/** + * A field of the stored version of a tweet to read, update, or delete. + * + * There is not a one-to-one correspondence between the fields ids of + * [[com.twitter.tweetypie.thriftscala.Tweet]] and + * [[com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet]]. For example, in StoredTweet, + * the nsfwUser property is field 11; in Tweet, it is a property of the coreData struct in field 2. + * To circumvent the confusion of using one set of field ids or the other, callers use instances of + * [[Field]] to reference the part of the object to modify. + */ +class Field private[storage] (val id: Short) extends AnyVal { + override def toString: String = id.toString +} + +/** + * NOTE: Make sure `AllUpdatableCompiledFields` is kept up to date when adding any new field + */ +object Field { + import AdditionalFields.isAdditionalFieldId + val Geo: Field = new Field(StoredTweet.GeoField.id) + val HasTakedown: Field = new Field(StoredTweet.HasTakedownField.id) + val NsfwUser: Field = new Field(StoredTweet.NsfwUserField.id) + val NsfwAdmin: Field = new Field(StoredTweet.NsfwAdminField.id) + val TweetypieOnlyTakedownCountryCodes: Field = + new Field(TpTweet.TweetypieOnlyTakedownCountryCodesField.id) + val TweetypieOnlyTakedownReasons: Field = + new Field(TpTweet.TweetypieOnlyTakedownReasonsField.id) + + val AllUpdatableCompiledFields: Set[Field] = Set(Geo, HasTakedown, NsfwUser, NsfwAdmin) + + def additionalField(id: Short): Field = { + require(isAdditionalFieldId(id), "field id must be in the additional field range") + new Field(id) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala new file mode 100644 index 000000000..dfacaa4a6 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetDeletedTweetsHandler.scala @@ -0,0 +1,150 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.DeletedTweet +import scala.util.control.NonFatal + +sealed trait DeleteState +object DeleteState { + + /** + * This tweet is deleted but has not been permanently deleted from Manhattan. Tweets in this state + * may be undeleted. + */ + case object SoftDeleted extends DeleteState + + /** + * This tweet is deleted after being bounced for violating the Twitter Rules but has not been + * permanently deleted from Manhattan. Tweets in this state may NOT be undeleted. + */ + case object BounceDeleted extends DeleteState + + /** + * This tweet has been permanently deleted from Manhattan. + */ + case object HardDeleted extends DeleteState + + /** + * There is no data in Manhattan to distinguish this tweet id from one that never existed. + */ + case object NotFound extends DeleteState + + /** + * This tweet exists and is not in a deleted state. + */ + case object NotDeleted extends DeleteState +} + +case class DeletedTweetResponse( + tweetId: TweetId, + overallResponse: TweetResponseCode, + deleteState: DeleteState, + tweet: Option[DeletedTweet]) + +object GetDeletedTweetsHandler { + def apply( + read: ManhattanOperations.Read, + stats: StatsReceiver + ): TweetStorageClient.GetDeletedTweets = + (unfilteredTweetIds: Seq[TweetId]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + + Stats.addWidthStat("getDeletedTweets", "tweetIds", tweetIds.size, stats) + + val stitches = tweetIds.map { tweetId => + read(tweetId) + .map { mhRecords => + val storedTweet = buildStoredTweet(tweetId, mhRecords) + + TweetStateRecord.mostRecent(mhRecords) match { + case Some(m: TweetStateRecord.SoftDeleted) => softDeleted(m, storedTweet) + case Some(m: TweetStateRecord.BounceDeleted) => bounceDeleted(m, storedTweet) + case Some(m: TweetStateRecord.HardDeleted) => hardDeleted(m, storedTweet) + case _ if storedTweet.getFieldBlobs(expectedFields).isEmpty => notFound(tweetId) + case _ => notDeleted(tweetId, storedTweet) + } + } + .handle { + case _: DeniedManhattanException => + DeletedTweetResponse( + tweetId, + TweetResponseCode.OverCapacity, + DeleteState.NotFound, + None + ) + + case NonFatal(ex) => + TweetUtils.log.warning( + ex, + s"Unhandled exception in GetDeletedTweetsHandler for tweetId: $tweetId" + ) + DeletedTweetResponse(tweetId, TweetResponseCode.Failure, DeleteState.NotFound, None) + } + } + + Stitch.collect(stitches) + } + + private def notFound(tweetId: TweetId) = + DeletedTweetResponse( + tweetId = tweetId, + overallResponse = TweetResponseCode.Success, + deleteState = DeleteState.NotFound, + tweet = None + ) + + private def softDeleted(record: TweetStateRecord.SoftDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.SoftDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy(deletedAtMsec = Some(record.createdAt)) + ) + ) + + private def bounceDeleted(record: TweetStateRecord.BounceDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.BounceDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy(deletedAtMsec = Some(record.createdAt)) + ) + ) + + private def hardDeleted(record: TweetStateRecord.HardDeleted, storedTweet: StoredTweet) = + DeletedTweetResponse( + record.tweetId, + TweetResponseCode.Success, + DeleteState.HardDeleted, + Some( + StorageConversions + .toDeletedTweet(storedTweet) + .copy( + hardDeletedAtMsec = Some(record.createdAt), + deletedAtMsec = Some(record.deletedAt) + ) + ) + ) + + /** + * notDeleted returns a tweet to simplify tweetypie.handler.UndeleteTweetHandler + */ + private def notDeleted(tweetId: TweetId, storedTweet: StoredTweet) = + DeletedTweetResponse( + tweetId = tweetId, + overallResponse = TweetResponseCode.Success, + deleteState = DeleteState.NotDeleted, + tweet = Some(StorageConversions.toDeletedTweet(storedTweet)) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala new file mode 100644 index 000000000..eafdda5e9 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetStoredTweetHandler.scala @@ -0,0 +1,126 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.stitch.StitchSeqGroup +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Error +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet.Response._ +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Time +import com.twitter.util.Try +import scala.collection.mutable + +object GetStoredTweetHandler { + private[this] object DeletedState { + def unapply(stateRecord: Option[TweetStateRecord]): Option[TweetStateRecord] = + stateRecord match { + case state @ (Some(_: TweetStateRecord.SoftDeleted) | Some( + _: TweetStateRecord.HardDeleted) | Some(_: TweetStateRecord.BounceDeleted)) => + state + case _ => None + } + } + + private[this] def deletedAtMs(stateRecord: Option[TweetStateRecord]): Option[Long] = + stateRecord match { + case Some(d: TweetStateRecord.SoftDeleted) => Some(d.createdAt) + case Some(d: TweetStateRecord.BounceDeleted) => Some(d.createdAt) + case Some(d: TweetStateRecord.HardDeleted) => Some(d.deletedAt) + case _ => None + } + + private[this] def tweetResponseFromRecords( + tweetId: TweetId, + mhRecords: Seq[TweetManhattanRecord], + statsReceiver: StatsReceiver, + ): GetStoredTweet.Response = { + val errs = + mutable.Buffer[Error]() + + val hasStoredTweetFields: Boolean = mhRecords.exists { + case TweetManhattanRecord(TweetKey(_, _: TweetKey.LKey.FieldKey), _) => true + case _ => false + } + + val storedTweet = if (hasStoredTweetFields) { + Try(buildStoredTweet(tweetId, mhRecords, includeScrubbed = true)) + .onFailure(_ => errs.append(Error.TweetIsCorrupt)) + .toOption + } else { + None + } + + val scrubbedFields: Set[FieldId] = extractScrubbedFields(mhRecords) + val tweet: Option[Tweet] = storedTweet.map(StorageConversions.fromStoredTweetAllowInvalid) + val stateRecords: Seq[TweetStateRecord] = TweetStateRecord.fromTweetMhRecords(mhRecords) + val tweetState: Option[TweetStateRecord] = TweetStateRecord.mostRecent(mhRecords) + + storedTweet.foreach { storedTweet => + val storedExpectedFields = storedTweet.getFieldBlobs(expectedFields) + val missingExpectedFields = expectedFields.filterNot(storedExpectedFields.contains) + if (missingExpectedFields.nonEmpty || !isValid(storedTweet)) { + errs.append(Error.TweetFieldsMissingOrInvalid) + } + + val invalidScrubbedFields = storedTweet.getFieldBlobs(scrubbedFields).keys + if (invalidScrubbedFields.nonEmpty) { + errs.append(Error.ScrubbedFieldsPresent) + } + + if (deletedAtMs(tweetState).exists(_ < Time.now.inMilliseconds - 14.days.inMilliseconds)) { + errs.append(Error.TweetShouldBeHardDeleted) + } + } + + val err = Option(errs.toList).filter(_.nonEmpty) + + (tweet, tweetState, err) match { + case (None, None, None) => + statsReceiver.counter("not_found").incr() + NotFound(tweetId) + + case (None, Some(tweetState: TweetStateRecord.HardDeleted), None) => + statsReceiver.counter("hard_deleted").incr() + HardDeleted(tweetId, Some(tweetState), stateRecords, scrubbedFields) + + case (None, _, Some(errs)) => + statsReceiver.counter("failed").incr() + Failed(tweetId, tweetState, stateRecords, scrubbedFields, errs) + + case (Some(tweet), _, Some(errs)) => + statsReceiver.counter("found_invalid").incr() + FoundWithErrors(tweet, tweetState, stateRecords, scrubbedFields, errs) + + case (Some(tweet), DeletedState(state), None) => + statsReceiver.counter("deleted").incr() + FoundDeleted(tweet, Some(state), stateRecords, scrubbedFields) + + case (Some(tweet), _, None) => + statsReceiver.counter("found").incr() + Found(tweet, tweetState, stateRecords, scrubbedFields) + } + } + + def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetStoredTweet = { + + object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { + override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { + Stats.addWidthStat("getStoredTweet", "tweetIds", tweetIds.size, statsReceiver) + Stitch.traverse(tweetIds)(read(_)) + } + } + + tweetId => + if (tweetId <= 0) { + Stitch.NotFound + } else { + Stitch + .call(tweetId, mhGroup) + .map(mhRecords => + tweetResponseFromRecords(tweetId, mhRecords, statsReceiver.scope("getStoredTweet"))) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala new file mode 100644 index 000000000..f68025e2d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/GetTweetHandler.scala @@ -0,0 +1,167 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.stitch.StitchSeqGroup +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanException +import com.twitter.tweetypie.storage.TweetStateRecord.BounceDeleted +import com.twitter.tweetypie.storage.TweetStateRecord.HardDeleted +import com.twitter.tweetypie.storage.TweetStateRecord.SoftDeleted +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Duration +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time + +object GetTweetHandler { + private[this] val logger = Logger(getClass) + + ////////////////////////////////////////////////// + // Logging racy reads for later validation. + + val RacyTweetWindow: Duration = 10.seconds + + /** + * If this read is soon after the tweet was created, then we would usually + * expect it to be served from cache. This early read indicates that this + * tweet is prone to consistency issues, so we log what's present in + * Manhattan at the time of the read for later analysis. + */ + private[this] def logRacyRead(tweetId: TweetId, records: Seq[TweetManhattanRecord]): Unit = + if (SnowflakeId.isSnowflakeId(tweetId)) { + val tweetAge = Time.now.since(SnowflakeId(tweetId).time) + if (tweetAge <= RacyTweetWindow) { + val sb = new StringBuilder + sb.append("racy_tweet_read\t") + .append(tweetId) + .append('\t') + .append(tweetAge.inMilliseconds) // Log the age for analysis purposes + records.foreach { rec => + sb.append('\t') + .append(rec.lkey) + rec.value.timestamp.foreach { ts => + // If there is a timestamp for this key, log it so that we can tell + // later on whether a value should have been present. We expect + // keys written in a single write to have the same timestamp, and + // generally, keys written in separate writes will have different + // timestamps. The timestamp value is optional in Manhattan, but + // we expect there to always be a value for the timestamp. + sb.append(':') + .append(ts.inMilliseconds) + } + } + logger.info(sb.toString) + } + } + + /** + * Convert a set of records from Manhattan into a GetTweet.Response. + */ + def tweetResponseFromRecords( + tweetId: TweetId, + mhRecords: Seq[TweetManhattanRecord], + statsReceiver: StatsReceiver = NullStatsReceiver + ): GetTweet.Response = + if (mhRecords.isEmpty) { + GetTweet.Response.NotFound + } else { + // If no internal fields are present or no required fields present, we consider the tweet + // as not returnable (even if some additional fields are present) + def tweetFromRecords(tweetId: TweetId, mhRecords: Seq[TweetManhattanRecord]) = { + val storedTweet = buildStoredTweet(tweetId, mhRecords) + if (storedTweet.getFieldBlobs(expectedFields).nonEmpty) { + if (isValid(storedTweet)) { + statsReceiver.counter("valid").incr() + Some(StorageConversions.fromStoredTweet(storedTweet)) + } else { + log.info(s"Invalid Tweet Id: $tweetId") + statsReceiver.counter("invalid").incr() + None + } + } else { + // The Tweet contained none of the fields defined in `expectedFields` + log.info(s"Expected Fields Not Present Tweet Id: $tweetId") + statsReceiver.counter("expected_fields_not_present").incr() + None + } + } + + val stateRecord = TweetStateRecord.mostRecent(mhRecords) + stateRecord match { + // some other cases don't require an attempt to construct a Tweet + case Some(_: SoftDeleted) | Some(_: HardDeleted) => GetTweet.Response.Deleted + + // all other cases require an attempt to construct a Tweet, which may not be successful + case _ => + logRacyRead(tweetId, mhRecords) + (stateRecord, tweetFromRecords(tweetId, mhRecords)) match { + // BounceDeleted contains the Tweet data so that callers can access data on the the + // tweet (e.g. hard delete daemon requires conversationId and userId. There are no + // plans for Tweetypie server to make use of the returned tweet at this time. + case (Some(_: BounceDeleted), Some(tweet)) => GetTweet.Response.BounceDeleted(tweet) + case (Some(_: BounceDeleted), None) => GetTweet.Response.Deleted + case (_, Some(tweet)) => GetTweet.Response.Found(tweet) + case _ => GetTweet.Response.NotFound + } + } + } + + def apply(read: ManhattanOperations.Read, statsReceiver: StatsReceiver): GetTweet = { + + object stats { + val getTweetScope = statsReceiver.scope("getTweet") + val deniedCounter: Counter = getTweetScope.counter("mh_denied") + val mhExceptionCounter: Counter = getTweetScope.counter("mh_exception") + val nonFatalExceptionCounter: Counter = getTweetScope.counter("non_fatal_exception") + val notFoundCounter: Counter = getTweetScope.counter("not_found") + } + + object mhGroup extends StitchSeqGroup[TweetId, Seq[TweetManhattanRecord]] { + override def run(tweetIds: Seq[TweetId]): Stitch[Seq[Seq[TweetManhattanRecord]]] = { + Stats.addWidthStat("getTweet", "tweetIds", tweetIds.size, statsReceiver) + Stitch.traverse(tweetIds)(read(_)) + } + } + + tweetId => + if (tweetId <= 0) { + Stitch.NotFound + } else { + Stitch + .call(tweetId, mhGroup) + .map(mhRecords => tweetResponseFromRecords(tweetId, mhRecords, stats.getTweetScope)) + .liftToTry + .map { + case Throw(mhException: DeniedManhattanException) => + stats.deniedCounter.incr() + Throw(RateLimited("", mhException)) + + // Encountered some other Manhattan error + case t @ Throw(_: ManhattanException) => + stats.mhExceptionCounter.incr() + t + + // Something else happened + case t @ Throw(ex) => + stats.nonFatalExceptionCounter.incr() + TweetUtils.log + .warning(ex, s"Unhandled exception in GetTweetHandler for tweetId: $tweetId") + t + + case r @ Return(GetTweet.Response.NotFound) => + stats.notFoundCounter.incr() + r + + case r @ Return(_) => r + } + .lowerFromTry + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala new file mode 100644 index 000000000..8483926f4 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/HardDeleteTweetHandler.scala @@ -0,0 +1,153 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetKey.LKey.ForceAddedStateKey +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet.Response._ +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try + +object HardDeleteTweetHandler { + + /** + * When a tweet is removed lkeys with these prefixes will be deleted permanently. + */ + private[storage] def isKeyToBeDeleted(key: TweetKey): Boolean = + key.lKey match { + case (TweetKey.LKey.CoreFieldsKey | TweetKey.LKey.InternalFieldsKey(_) | + TweetKey.LKey.AdditionalFieldsKey(_) | TweetKey.LKey.SoftDeletionStateKey | + TweetKey.LKey.BounceDeletionStateKey | TweetKey.LKey.UnDeletionStateKey | + TweetKey.LKey.ForceAddedStateKey) => + true + case _ => false + } + + /** + * When hard deleting, there are two actions, writing the record and + * removing the tweet data. If we are performing any action, we will + * always try to remove the tweet data. If the tweet does not yet have a + * hard deletion record, then we will need to write one. This method + * returns the HardDeleted record if it needs to be written, and None + * if it has already been written. + * + * If the tweet is not in a deleted state we signal this with a + * Throw(NotDeleted). + */ + private[storage] def getHardDeleteStateRecord( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + mhTimestamp: Time, + stats: StatsReceiver + ): Try[Option[TweetStateRecord.HardDeleted]] = { + val mostRecent = TweetStateRecord.mostRecent(records) + val currentStateStr = mostRecent.map(_.name).getOrElse("no_tweet_state_record") + stats.counter(currentStateStr).incr() + + mostRecent match { + case Some( + record @ (TweetStateRecord.SoftDeleted(_, _) | TweetStateRecord.BounceDeleted(_, _))) => + Return( + Some( + TweetStateRecord.HardDeleted( + tweetId = tweetId, + // createdAt is the hard deletion timestamp when dealing with hard deletes in Manhattan + createdAt = mhTimestamp.inMillis, + // deletedAt is the soft deletion timestamp when dealing with hard deletes in Manhattan + deletedAt = record.createdAt + ) + ) + ) + + case Some(_: TweetStateRecord.HardDeleted) => + Return(None) + + case Some(_: TweetStateRecord.ForceAdded) => + Throw(NotDeleted(tweetId, Some(ForceAddedStateKey))) + + case Some(_: TweetStateRecord.Undeleted) => + Throw(NotDeleted(tweetId, Some(TweetKey.LKey.UnDeletionStateKey))) + + case None => + Throw(NotDeleted(tweetId, None)) + } + } + + /** + * This handler returns HardDeleteTweet.Response.Deleted if data associated with the tweet is deleted, + * either as a result of this request or a previous one. + * + * The most recently added record determines the tweet's state. This method will only delete data + * for tweets in the soft-delete or hard-delete state. (Calling hardDeleteTweet for tweets that have + * already been hard-deleted will remove any lkeys that may not have been deleted previously). + */ + def apply( + read: ManhattanOperations.Read, + insert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + scribe: Scribe, + stats: StatsReceiver + ): TweetId => Stitch[HardDeleteTweet.Response] = { + val hardDeleteStats = stats.scope("hardDeleteTweet") + val hardDeleteTweetCancelled = hardDeleteStats.counter("cancelled") + val beforeStateStats = hardDeleteStats.scope("before_state") + + def removeRecords(keys: Seq[TweetKey], mhTimestamp: Time): Stitch[Unit] = + Stitch + .collect(keys.map(key => delete(key, Some(mhTimestamp)).liftToTry)) + .map(collectWithRateLimitCheck) + .lowerFromTry + + def writeRecord(record: Option[TweetStateRecord.HardDeleted]): Stitch[Unit] = + record match { + case Some(r) => + insert(r.toTweetMhRecord).onSuccess { _ => + scribe.logRemoved( + r.tweetId, + Time.fromMilliseconds(r.createdAt), + isSoftDeleted = false + ) + } + case None => Stitch.Unit + } + + tweetId => + read(tweetId) + .flatMap { records => + val hardDeletionTimestamp = Time.now + + val keysToBeDeleted: Seq[TweetKey] = records.map(_.key).filter(isKeyToBeDeleted) + + getHardDeleteStateRecord( + tweetId, + records, + hardDeletionTimestamp, + beforeStateStats) match { + case Return(record) => + Stitch + .join( + writeRecord(record), + removeRecords(keysToBeDeleted, hardDeletionTimestamp) + ).map(_ => + // If the tweetId is non-snowflake and has previously been hard deleted + // there will be no coreData record to fall back on to get the tweet + // creation time and createdAtMillis will be None. + Deleted( + // deletedAtMillis: when the tweet was hard deleted + deletedAtMillis = Some(hardDeletionTimestamp.inMillis), + // createdAtMillis: when the tweet itself was created + // (as opposed to when the deletion record was created) + createdAtMillis = + TweetUtils.creationTimeFromTweetIdOrMHRecords(tweetId, records) + )) + case Throw(notDeleted: NotDeleted) => + hardDeleteTweetCancelled.incr() + Stitch.value(notDeleted) + case Throw(e) => Stitch.exception(e) // this should never happen + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala new file mode 100644 index 000000000..113a749cb --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/InspectFields.scala @@ -0,0 +1,228 @@ +package com.twitter.tweetypie.storage + +import com.google.common.base.CaseFormat +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.scrooge.TFieldBlob +import com.twitter.scrooge.ThriftStructFieldInfo +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage.ManhattanOperations.Read +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.tweetypie.thriftscala.{Tweet => TweetypieTweet} +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import diffshow.Container +import diffshow.DiffShow +import diffshow.Expr +import org.apache.commons.codec.binary.Base64 +import scala.util.Try +import shapeless.Cached +import shapeless.Strict + +// This class is used by the Tweetypie Console to inspect tweet field content in Manhattan +class InspectFields(svcIdentifier: ServiceIdentifier) { + val mhApplicationId = "tbird_mh" + val mhDatasetName = "tbird_mh" + val mhDestinationName = "/s/manhattan/cylon.native-thrift" + val mhTimeout: Duration = 5000.milliseconds + + val localMhEndpoint: ManhattanKVEndpoint = + ManhattanKVEndpointBuilder( + ManhattanKVClient( + mhApplicationId, + mhDestinationName, + ManhattanKVClientMtlsParams(svcIdentifier))) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(mhTimeout) + .build() + + val readOperation: Read = (new ManhattanOperations(mhDatasetName, localMhEndpoint)).read + + def lookup(tweetId: Long): Future[String] = { + val result = readOperation(tweetId).liftToTry.map { + case Return(mhRecords) => + prettyPrintManhattanRecords(tweetId, TweetKey.padTweetIdStr(tweetId), mhRecords) + case Throw(e) => e.toString + } + + Stitch.run(result) + } + + def storedTweet(tweetId: Long): Future[StoredTweet] = { + val result = readOperation(tweetId).liftToTry.map { + case Return(mhRecords) => + buildStoredTweet(tweetId, mhRecords) + case Throw(e) => + throw e + } + + Stitch.run(result) + } + + private[this] def prettyPrintManhattanRecords( + tweetId: Long, + pkey: String, + mhRecords: Seq[TweetManhattanRecord] + ): String = { + if (mhRecords.isEmpty) { + "Not Found" + } else { + val formattedRecords = getFormattedManhattanRecords(tweetId, mhRecords) + val keyFieldWidth = formattedRecords.map(_.key.length).max + 2 + val fieldNameFieldWidth = formattedRecords.map(_.fieldName.length).max + 2 + + val formatString = s" %-${keyFieldWidth}s %-${fieldNameFieldWidth}s %s" + + val recordsString = + formattedRecords + .map { record => + val content = record.content.replaceAll("\n", "\n" + formatString.format("", "", "")) + formatString.format(record.key, record.fieldName, content) + } + .mkString("\n") + + "/tbird_mh/" + pkey + "/" + "\n" + recordsString + } + } + + private[this] def getFormattedManhattanRecords( + tweetId: Long, + mhRecords: Seq[TweetManhattanRecord] + ): Seq[FormattedManhattanRecord] = { + val storedTweet = buildStoredTweet(tweetId, mhRecords).copy(updatedAt = None) + val tweetypieTweet: Option[TweetypieTweet] = + Try(StorageConversions.fromStoredTweet(storedTweet)).toOption + + val blobMap: Map[String, TFieldBlob] = getStoredTweetBlobs(mhRecords).map { blob => + getFieldName(blob.field.id) -> blob + }.toMap + + mhRecords + .map { + case TweetManhattanRecord(fullKey, mhValue) => + FormattedManhattanRecord( + key = fullKey.lKey.toString, + fieldName = getFieldName(fullKey.lKey), + content = prettyPrintManhattanValue( + fullKey.lKey, + mhValue, + storedTweet, + tweetypieTweet, + tweetId, + blobMap + ) + ) + } + .sortBy(_.key.replace("external", "xternal")) // sort by key, with internal first + } + + private[this] def getFieldNameFromThrift( + fieldId: Short, + fieldInfos: List[ThriftStructFieldInfo] + ): String = + fieldInfos + .find(info => info.tfield.id == fieldId) + .map(_.tfield.name) + .getOrElse("") + + private[this] def isLkeyScrubbedField(lkey: String): Boolean = + lkey.split("/")(1) == "scrubbed_fields" + + private[this] def getFieldName(lkey: TweetKey.LKey): String = + lkey match { + case fieldKey: TweetKey.LKey.FieldKey => getFieldName(fieldKey.fieldId) + case _ => "" + } + + private[this] def getFieldName(fieldId: Short): String = + if (fieldId == 1) { + "core_fields" + } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { + getFieldNameFromThrift(fieldId, TweetypieTweet.fieldInfos) + } else { + getFieldNameFromThrift(fieldId, StoredTweet.fieldInfos) + } + + private[this] def prettyPrintManhattanValue( + lkey: TweetKey.LKey, + mhValue: TweetManhattanValue, + storedTweet: StoredTweet, + tweetypieTweet: Option[TweetypieTweet], + tweetId: Long, + tfieldBlobs: Map[String, TFieldBlob] + ): String = { + val decoded = lkey match { + case _: TweetKey.LKey.MetadataKey => + decodeMetadata(mhValue) + + case fieldKey: TweetKey.LKey.FieldKey => + tfieldBlobs + .get(getFieldName(fieldKey.fieldId)) + .map(blob => decodeField(tweetId, blob, storedTweet, tweetypieTweet)) + + case _ => + None + } + + decoded.getOrElse { // If all else fails, encode the data as a base64 string + val contents = mhValue.contents.array + if (contents.isEmpty) { + "" + } else { + Base64.encodeBase64String(contents) + } + } + } + + private[this] def decodeMetadata(mhValue: TweetManhattanValue): Option[String] = { + val byteArray = ByteArrayCodec.fromByteBuffer(mhValue.contents) + Try(Json.decode(byteArray).toString).toOption + } + + private[this] def decodeField( + tweetId: Long, + blob: TFieldBlob, + storedTweet: StoredTweet, + tweetypieTweet: Option[TweetypieTweet] + ): String = { + val fieldId = blob.field.id + + if (fieldId == 1) { + coreFields(storedTweet) + } else if (AdditionalFields.isAdditionalFieldId(fieldId)) { + decodeTweetWithOneField(TweetypieTweet(tweetId).setField(blob)) + } else { + decodeTweetWithOneField(StoredTweet(tweetId).setField(blob)) + } + } + + // Takes a Tweet or StoredTweet with a single field set and returns the value of that field + private[this] def decodeTweetWithOneField[T]( + tweetWithOneField: T + )( + implicit ev: Cached[Strict[DiffShow[T]]] + ): String = { + val config = diffshow.Config(hideFieldWithEmptyVal = true) + val tree: Expr = config.transform(DiffShow.show(tweetWithOneField)) + + // matches a Tweet or StoredTweet with two values, the first being the id + val value = tree.transform { + case Container(_, List(diffshow.Field("id", _), diffshow.Field(_, value))) => value + } + + config.exprPrinter.apply(value, width = 80).render + } + + private[this] def coreFields(storedTweet: StoredTweet): String = + diffshow.show(CoreFieldsCodec.fromTweet(storedTweet), hideFieldWithEmptyVal = true) + + private[this] def toCamelCase(s: String): String = + CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, s) +} + +case class FormattedManhattanRecord(key: String, fieldName: String, content: String) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala new file mode 100644 index 000000000..e5f087a34 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Json.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.storage + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule + +object Json { + val TimestampKey = "timestamp" + val SoftDeleteTimestampKey = "softdelete_timestamp" + + private val mapper = new ObjectMapper + mapper.registerModule(DefaultScalaModule) + + def encode(m: Map[String, Any]): Array[Byte] = mapper.writeValueAsBytes(m) + + def decode(arr: Array[Byte]): Map[String, Any] = + mapper.readValue[Map[String, Any]](arr, classOf[Map[String, Any]]) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala new file mode 100644 index 000000000..fed0af6c7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanOperations.scala @@ -0,0 +1,103 @@ +package com.twitter.tweetypie.storage + +import com.twitter.bijection.Injection +import com.twitter.io.Buf +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections.BufInjection +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint +import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1 +import com.twitter.storage.client.manhattan.kv.impl.Component +import com.twitter.storage.client.manhattan.kv.{impl => mh} +import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection +import com.twitter.util.Time +import java.nio.ByteBuffer +import scala.util.control.NonFatal + +case class TweetManhattanRecord(key: TweetKey, value: TweetManhattanValue) { + def pkey: TweetId = key.tweetId + def lkey: TweetKey.LKey = key.lKey + + /** + * Produces a representation that is human-readable, but contains + * all of the information from the record. It is not intended for + * producing machine-readable values. + * + * This conversion is relatively expensive, so beware of using it in + * hot code paths. + */ + override def toString: String = { + val valueString = + try { + key.lKey match { + case _: TweetKey.LKey.MetadataKey => + StringCodec.fromByteBuffer(value.contents) + + case _: TweetKey.LKey.FieldKey => + val tFieldBlob = TFieldBlobCodec.fromByteBuffer(value.contents) + s"TFieldBlob(${tFieldBlob.field}, 0x${Buf.slowHexString(tFieldBlob.content)})" + + case TweetKey.LKey.Unknown(_) => + "0x" + Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) + } + } catch { + case NonFatal(e) => + val hexValue = Buf.slowHexString(Buf.ByteBuffer.Shared(value.contents)) + s"0x$hexValue (failed to decode due to $e)" + } + + s"$key => ${value.copy(contents = valueString)}" + } +} + +object ManhattanOperations { + type Read = TweetId => Stitch[Seq[TweetManhattanRecord]] + type Insert = TweetManhattanRecord => Stitch[Unit] + type Delete = (TweetKey, Option[Time]) => Stitch[Unit] + type DeleteRange = TweetId => Stitch[Unit] + + object PkeyInjection extends Injection[TweetId, String] { + override def apply(tweetId: TweetId): String = TweetKey.padTweetIdStr(tweetId) + override def invert(str: String): scala.util.Try[TweetId] = scala.util.Try(str.toLong) + } + + case class InvalidLkey(lkeyStr: String) extends Exception + + object LkeyInjection extends Injection[TweetKey.LKey, String] { + override def apply(lkey: TweetKey.LKey): String = lkey.toString + override def invert(str: String): scala.util.Try[TweetKey.LKey] = + scala.util.Success(TweetKey.LKey.fromString(str)) + } + + val KeyDescriptor: DescriptorP1L1.EmptyKey[TweetId, TweetKey.LKey] = + mh.KeyDescriptor( + Component(PkeyInjection.andThen(StringInjection)), + Component(LkeyInjection.andThen(StringInjection)) + ) + + val ValueDescriptor: mh.ValueDescriptor.EmptyValue[ByteBuffer] = mh.ValueDescriptor(BufInjection) +} + +class ManhattanOperations(dataset: String, mhEndpoint: ManhattanKVEndpoint) { + import ManhattanOperations._ + + private[this] def pkey(tweetId: TweetId) = KeyDescriptor.withDataset(dataset).withPkey(tweetId) + + def read: Read = { tweetId => + mhEndpoint.slice(pkey(tweetId).under(), ValueDescriptor).map { mhData => + mhData.map { + case (key, value) => TweetManhattanRecord(TweetKey(key.pkey, key.lkey), value) + } + } + } + + def insert: Insert = + record => { + val mhKey = pkey(record.key.tweetId).withLkey(record.key.lKey) + mhEndpoint.insert(mhKey, ValueDescriptor.withValue(record.value)) + } + + def delete: Delete = (key, time) => mhEndpoint.delete(pkey(key.tweetId).withLkey(key.lKey), time) + + def deleteRange: DeleteRange = + tweetId => mhEndpoint.deleteRange(KeyDescriptor.withDataset(dataset).withPkey(tweetId).under()) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala new file mode 100644 index 000000000..daf6a3076 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ManhattanTweetStorageClient.scala @@ -0,0 +1,451 @@ +package com.twitter.tweetypie.storage + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.BareFormatter +import com.twitter.logging.Level +import com.twitter.logging.ScribeHandler +import com.twitter.logging._ +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections._ +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.storage.Scribe.ScribeHandlerFactory +import com.twitter.tweetypie.storage.TweetStorageClient.BounceDelete +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetStorageClient.HardDeleteTweet +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.Duration +import com.twitter.util.Return +import com.twitter.util.Throw +import scala.util.Random + +object ManhattanTweetStorageClient { + object Config { + + /** + * The Manhattan dataset where tweets are stored is not externally + * configurable because writing tweets to a non-production dataset + * requires great care. Staging instances using a different dataset will + * write tweets to a non-production store, but will publish events, log to + * HDFS, and cache data referencing tweets in that store which are not + * accessible by the rest of the production cluster. + * + * In a completely isolated environment it should be safe to write to + * other datasets for testing purposes. + */ + val Dataset = "tbird_mh" + + /** + * Once a tweet has been deleted it can only be undeleted within this time + * window, after which [[UndeleteHandler]] will return an error on + * undelete attempts. + */ + val UndeleteWindowHours = 240 + + /** + * Default label used for underlying Manhattan Thrift client metrics + * + * The finagle client metrics will be exported at clnt/:label. + */ + val ThriftClientLabel = "mh_cylon" + + /** + * Return the corresponding Wily path for the Cylon cluster in the "other" DC + */ + def remoteDestination(zone: String): String = + s"/srv#/prod/${remoteZone(zone)}/manhattan/cylon.native-thrift" + + private def remoteZone(zone: String) = zone match { + case "pdxa" => "atla" + case "atla" | "localhost" => "pdxa" + case _ => + throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") + } + } + + /** + * @param applicationId Manhattan application id used for quota accounting + * @param localDestination Wily path to local Manhattan cluster + * @param localTimeout Overall timeout (including retries) for all reads/writes to local cluster + * @param remoteDestination Wily path to remote Manhattan cluster, used for undelete and force add + * @param remoteTimeout Overall timeout (including retries) for all reads/writes to remote cluster + * @param undeleteWindowHours Amount of time during which a deleted tweet can be undeleted + * @param thriftClientLabel Label used to scope stats for Manhattan Thrift client + * @param maxRequestsPerBatch Configure the Stitch RequestGroup.Generator batch size + * @param serviceIdentifier The ServiceIdentifier to use when making connections to a Manhattan cluster + * @param opportunisticTlsLevel The level to use for opportunistic TLS for connections to the Manhattan cluster + */ + case class Config( + applicationId: String, + localDestination: String, + localTimeout: Duration, + remoteDestination: String, + remoteTimeout: Duration, + undeleteWindowHours: Int = Config.UndeleteWindowHours, + thriftClientLabel: String = Config.ThriftClientLabel, + maxRequestsPerBatch: Int = Int.MaxValue, + serviceIdentifier: ServiceIdentifier, + opportunisticTlsLevel: OpportunisticTls.Level) + + /** + * Sanitizes the input for APIs which take in a (Tweet, Seq[Field]) as input. + * + * NOTE: This function only applies sanity checks which are common to + * all APIs which take in a (Tweet, Seq[Field]) as input. API specific + * checks are not covered here. + * + * @param apiStitch the backing API call + * @tparam T the output type of the backing API call + * @return a stitch function which does some basic input sanity checking + */ + private[storage] def sanitizeTweetFields[T]( + apiStitch: (Tweet, Seq[Field]) => Stitch[T] + ): (Tweet, Seq[Field]) => Stitch[T] = + (tweet, fields) => { + require(fields.forall(_.id > 0), s"Field ids ${fields} are not positive numbers") + apiStitch(tweet, fields) + } + + // Returns a handler that asynchronously logs messages to Scribe using the BareFormatter which + // logs just the message without any additional metadata + def scribeHandler(categoryName: String): HandlerFactory = + ScribeHandler( + formatter = BareFormatter, + maxMessagesPerTransaction = 100, + category = categoryName, + level = Some(Level.TRACE) + ) + + /** + * A Config appropriate for interactive sessions and scripts. + */ + def develConfig(): Config = + Config( + applicationId = Option(System.getenv("USER")).getOrElse("") + ".devel", + localDestination = "/s/manhattan/cylon.native-thrift", + localTimeout = 10.seconds, + remoteDestination = "/s/manhattan/cylon.native-thrift", + remoteTimeout = 10.seconds, + undeleteWindowHours = Config.UndeleteWindowHours, + thriftClientLabel = Config.ThriftClientLabel, + maxRequestsPerBatch = Int.MaxValue, + serviceIdentifier = ServiceIdentifier(System.getenv("USER"), "tweetypie", "devel", "local"), + opportunisticTlsLevel = OpportunisticTls.Required + ) + + /** + * Build a Manhattan tweet storage client for use in interactive + * sessions and scripts. + */ + def devel(): TweetStorageClient = + new ManhattanTweetStorageClient( + develConfig(), + NullStatsReceiver, + ClientIdHelper.default, + ) +} + +class ManhattanTweetStorageClient( + config: ManhattanTweetStorageClient.Config, + statsReceiver: StatsReceiver, + private val clientIdHelper: ClientIdHelper) + extends TweetStorageClient { + import ManhattanTweetStorageClient._ + + lazy val scribeHandlerFactory: ScribeHandlerFactory = scribeHandler _ + val scribe: Scribe = new Scribe(scribeHandlerFactory, statsReceiver) + + def mkClient( + dest: String, + label: String + ): ManhattanKVClient = { + val mhMtlsParams = + if (config.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams + else + ManhattanKVClientMtlsParams( + serviceIdentifier = config.serviceIdentifier, + opportunisticTls = config.opportunisticTlsLevel + ) + + new ManhattanKVClient( + config.applicationId, + dest, + mhMtlsParams, + label, + Seq(Experiments.ApertureLoadBalancer)) + } + + val localClient: ManhattanKVClient = mkClient(config.localDestination, config.thriftClientLabel) + + val localMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(localClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(config.localTimeout) + .maxRequestsPerBatch(config.maxRequestsPerBatch) + .build() + + val localManhattanOperations = new ManhattanOperations(Config.Dataset, localMhEndpoint) + + val remoteClient: ManhattanKVClient = + mkClient(config.remoteDestination, s"${config.thriftClientLabel}_remote") + + val remoteMhEndpoint: ManhattanKVEndpoint = ManhattanKVEndpointBuilder(remoteClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(config.remoteTimeout) + .build() + + val remoteManhattanOperations = new ManhattanOperations(Config.Dataset, remoteMhEndpoint) + + /** + * Note: This translation is only useful for non-batch endpoints. Batch endpoints currently + * represent failure without propagating an exception + * (e.g. [[com.twitter.tweetypie.storage.Response.TweetResponseCode.Failure]]). + */ + private[this] def translateExceptions( + apiName: String, + statsReceiver: StatsReceiver + ): PartialFunction[Throwable, Throwable] = { + case e: IllegalArgumentException => ClientError(e.getMessage, e) + case e: DeniedManhattanException => RateLimited(e.getMessage, e) + case e: VersionMismatchError => + statsReceiver.scope(apiName).counter("mh_version_mismatches").incr() + e + case e: InternalError => + TweetUtils.log.error(e, s"Error processing $apiName request: ${e.getMessage}") + e + } + + /** + * Count requests per client id producing metrics of the form + * .../clients/:root_client_id/requests + */ + def observeClientId[A, B]( + apiStitch: A => Stitch[B], + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): A => Stitch[B] = { + val clients = statsReceiver.scope("clients") + + val incrementClientRequests = { args: A => + val clientId = clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId) + clients.counter(clientId, "requests").incr + } + + a => { + incrementClientRequests(a) + apiStitch(a) + } + } + + /** + * Increment counters based on the overall response status of the returned [[GetTweet.Response]]. + */ + def observeGetTweetResponseCode[A]( + apiStitch: A => Stitch[GetTweet.Response], + statsReceiver: StatsReceiver + ): A => Stitch[GetTweet.Response] = { + val scope = statsReceiver.scope("response_code") + + val success = scope.counter("success") + val notFound = scope.counter("not_found") + val failure = scope.counter("failure") + val overCapacity = scope.counter("over_capacity") + val deleted = scope.counter("deleted") + val bounceDeleted = scope.counter("bounce_deleted") + + a => + apiStitch(a).respond { + case Return(_: GetTweet.Response.Found) => success.incr() + case Return(GetTweet.Response.NotFound) => notFound.incr() + case Return(_: GetTweet.Response.BounceDeleted) => bounceDeleted.incr() + case Return(GetTweet.Response.Deleted) => deleted.incr() + case Throw(_: RateLimited) => overCapacity.incr() + case Throw(_) => failure.incr() + } + } + + /** + * We do 3 things here: + * + * - Bookkeeping for overall requests + * - Bookkeeping for per api requests + * - Translate exceptions + * + * @param apiName the API being called + * @param apiStitch the implementation of the API + * @tparam A template for input type of API + * @tparam B template for output type of API + * @return Function which executes the given API call + */ + private[storage] def endpoint[A, B]( + apiName: String, + apiStitch: A => Stitch[B] + ): A => Stitch[B] = { + val translateException = translateExceptions(apiName, statsReceiver) + val observe = StitchUtils.observe[B](statsReceiver, apiName) + + a => + StitchUtils.translateExceptions( + observe(apiStitch(a)), + translateException + ) + } + + private[storage] def endpoint2[A, B, C]( + apiName: String, + apiStitch: (A, B) => Stitch[C], + clientIdHelper: ClientIdHelper, + ): (A, B) => Stitch[C] = + Function.untupled(endpoint(apiName, apiStitch.tupled)) + + val getTweet: TweetStorageClient.GetTweet = { + val stats = statsReceiver.scope("getTweet") + + observeClientId( + observeGetTweetResponseCode( + endpoint( + "getTweet", + GetTweetHandler( + read = localManhattanOperations.read, + statsReceiver = stats, + ) + ), + stats, + ), + stats, + clientIdHelper, + ) + } + + val getStoredTweet: TweetStorageClient.GetStoredTweet = { + val stats = statsReceiver.scope("getStoredTweet") + + observeClientId( + endpoint( + "getStoredTweet", + GetStoredTweetHandler( + read = localManhattanOperations.read, + statsReceiver = stats, + ) + ), + stats, + clientIdHelper, + ) + } + + val addTweet: TweetStorageClient.AddTweet = + endpoint( + "addTweet", + AddTweetHandler( + insert = localManhattanOperations.insert, + scribe = scribe, + stats = statsReceiver + ) + ) + + val updateTweet: TweetStorageClient.UpdateTweet = + endpoint2( + "updateTweet", + ManhattanTweetStorageClient.sanitizeTweetFields( + UpdateTweetHandler( + insert = localManhattanOperations.insert, + stats = statsReceiver, + ) + ), + clientIdHelper, + ) + + val softDelete: TweetStorageClient.SoftDelete = + endpoint( + "softDelete", + SoftDeleteHandler( + insert = localManhattanOperations.insert, + scribe = scribe + ) + ) + + val bounceDelete: BounceDelete = + endpoint( + "bounceDelete", + BounceDeleteHandler( + insert = localManhattanOperations.insert, + scribe = scribe + ) + ) + + val undelete: TweetStorageClient.Undelete = + endpoint( + "undelete", + UndeleteHandler( + read = localManhattanOperations.read, + localInsert = localManhattanOperations.insert, + remoteInsert = remoteManhattanOperations.insert, + delete = localManhattanOperations.delete, + undeleteWindowHours = config.undeleteWindowHours, + stats = statsReceiver + ) + ) + + val getDeletedTweets: TweetStorageClient.GetDeletedTweets = + endpoint( + "getDeletedTweets", + GetDeletedTweetsHandler( + read = localManhattanOperations.read, + stats = statsReceiver + ) + ) + + val deleteAdditionalFields: TweetStorageClient.DeleteAdditionalFields = + endpoint2( + "deleteAdditionalFields", + DeleteAdditionalFieldsHandler( + delete = localManhattanOperations.delete, + stats = statsReceiver, + ), + clientIdHelper, + ) + + val scrub: TweetStorageClient.Scrub = + endpoint2( + "scrub", + ScrubHandler( + insert = localManhattanOperations.insert, + delete = localManhattanOperations.delete, + scribe = scribe, + stats = statsReceiver, + ), + clientIdHelper, + ) + + val hardDeleteTweet: HardDeleteTweet = + endpoint( + "hardDeleteTweet", + HardDeleteTweetHandler( + read = localManhattanOperations.read, + insert = localManhattanOperations.insert, + delete = localManhattanOperations.delete, + scribe = scribe, + stats = statsReceiver + ) + ) + + val ping: TweetStorageClient.Ping = + () => + Stitch + .run( + localMhEndpoint + .get( + ManhattanOperations.KeyDescriptor + .withDataset(Config.Dataset) + .withPkey(Random.nextLong().abs) + .withLkey(TweetKey.LKey.CoreFieldsKey), // could be any lkey + ValueDescriptor(BufInjection) + ).unit + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala new file mode 100644 index 000000000..8444a7d96 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Response.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.storage + +object Response { + case class TweetResponse( + tweetId: Long, + overallResponse: TweetResponseCode, + additionalFieldResponses: Option[Map[Short, FieldResponse]] = None) + + sealed trait TweetResponseCode + + object TweetResponseCode { + object Success extends TweetResponseCode + object Partial extends TweetResponseCode + object Failure extends TweetResponseCode + object OverCapacity extends TweetResponseCode + object Deleted extends TweetResponseCode + } + + case class FieldResponse(code: FieldResponseCode, message: Option[String] = None) + + sealed trait FieldResponseCode + + object FieldResponseCode { + object Success extends FieldResponseCode + object InvalidRequest extends FieldResponseCode + object ValueNotFound extends FieldResponseCode + object Timeout extends FieldResponseCode + object Error extends FieldResponseCode + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala new file mode 100644 index 000000000..89b3e8efc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Scribe.scala @@ -0,0 +1,85 @@ +package com.twitter.tweetypie.storage + +import com.twitter.servo.util.FutureEffect +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging._ +import com.twitter.scrooge.BinaryThriftStructSerializer +import com.twitter.servo.util.{Scribe => ServoScribe} +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tbird.thriftscala.Added +import com.twitter.tbird.thriftscala.Removed +import com.twitter.tbird.thriftscala.Scrubbed +import com.twitter.util.Time + +/** + * Scribe is used to log tweet writes which are used to generate /tables/statuses in HDFS. + * + * Write Scribe Category Message + * ----- --------------- ------- + * add tbird_add_status [[com.twitter.tbird.thriftscala.Added]] + * remove tbird_remove_status [[com.twitter.tbird.thriftscala.Removed]] + * scrub tbird_scrub_status [[com.twitter.tbird.thriftscala.Scrubbed]] + * + * The thrift representation is encoded using binary thrift protocol format, followed by base64 + * encoding and converted to string using default character set (utf8). The logger uses BareFormatter. + * + * The thrift ops are scribed only after the write API call has succeeded. + * + * The class is thread safe except initial configuration and registration routines, + * and no exception is expected unless java heap is out of memory. + * + * If exception does get thrown, add/remove/scrub operations will fail and + * client will have to retry + */ +class Scribe(factory: Scribe.ScribeHandlerFactory, statsReceiver: StatsReceiver) { + import Scribe._ + + private val AddedSerializer = BinaryThriftStructSerializer(Added) + private val RemovedSerializer = BinaryThriftStructSerializer(Removed) + private val ScrubbedSerializer = BinaryThriftStructSerializer(Scrubbed) + + private val addCounter = statsReceiver.counter("scribe/add/count") + private val removeCounter = statsReceiver.counter("scribe/remove/count") + private val scrubCounter = statsReceiver.counter("scribe/scrub/count") + + val addHandler: FutureEffect[String] = ServoScribe(factory(scribeAddedCategory)()) + val removeHandler: FutureEffect[String] = ServoScribe(factory(scribeRemovedCategory)()) + val scrubHandler: FutureEffect[String] = ServoScribe(factory(scribeScrubbedCategory)()) + + private def addedToString(tweet: StoredTweet): String = + AddedSerializer.toString( + Added(StatusConversions.toTBirdStatus(tweet), Time.now.inMilliseconds, Some(false)) + ) + + private def removedToString(id: Long, at: Time, isSoftDeleted: Boolean): String = + RemovedSerializer.toString(Removed(id, at.inMilliseconds, Some(isSoftDeleted))) + + private def scrubbedToString(id: Long, cols: Seq[Int], at: Time): String = + ScrubbedSerializer.toString(Scrubbed(id, cols, at.inMilliseconds)) + + def logAdded(tweet: StoredTweet): Unit = { + addHandler(addedToString(tweet)) + addCounter.incr() + } + + def logRemoved(id: Long, at: Time, isSoftDeleted: Boolean): Unit = { + removeHandler(removedToString(id, at, isSoftDeleted)) + removeCounter.incr() + } + + def logScrubbed(id: Long, cols: Seq[Int], at: Time): Unit = { + scrubHandler(scrubbedToString(id, cols, at)) + scrubCounter.incr() + } +} + +object Scribe { + type ScribeHandlerFactory = (String) => HandlerFactory + + /** WARNING: These categories are white-listed. If you are changing them, the new categories should be white-listed. + * You should followup with CoreWorkflows team (CW) for that. + */ + private val scribeAddedCategory = "tbird_add_status" + private val scribeRemovedCategory = "tbird_remove_status" + private val scribeScrubbedCategory = "tbird_scrub_status" +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala new file mode 100644 index 000000000..7bbae6251 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/ScrubHandler.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Time + +/** + * Deletes data for the scrubbed field and writes a metadata record. + * Provides scrub functionality. Right now, we only allow the scrubbing of the geo field. + * It should be simple to add more fields to the allowlist if needed. + */ +object ScrubHandler { + + val scrubFieldsAllowlist: Set[Field] = Set(Field.Geo) + + def apply( + insert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + scribe: Scribe, + stats: StatsReceiver + ): TweetStorageClient.Scrub = + (unfilteredTweetIds: Seq[TweetId], columns: Seq[Field]) => { + val tweetIds = unfilteredTweetIds.filter(_ > 0) + + require(columns.nonEmpty, "Must specify fields to scrub") + require( + columns.toSet.size == columns.size, + s"Duplicate fields to scrub specified: $columns" + ) + require( + columns.forall(scrubFieldsAllowlist.contains(_)), + s"Cannot scrub $columns; scrubbable fields are restricted to $scrubFieldsAllowlist" + ) + + Stats.addWidthStat("scrub", "ids", tweetIds.size, stats) + val mhTimestamp = Time.now + + val stitches = tweetIds.map { tweetId => + val deletionStitches = columns.map { field => + val mhKeyToDelete = TweetKey.fieldKey(tweetId, field.id) + delete(mhKeyToDelete, Some(mhTimestamp)).liftToTry + } + + val collectedStitch = + Stitch.collect(deletionStitches).map(collectWithRateLimitCheck).lowerFromTry + + collectedStitch + .flatMap { _ => + val scrubbedStitches = columns.map { column => + val scrubbedKey = TweetKey.scrubbedFieldKey(tweetId, column.id) + val record = + TweetManhattanRecord( + scrubbedKey, + ManhattanValue(StringCodec.toByteBuffer(""), Some(mhTimestamp)) + ) + + insert(record).liftToTry + } + + Stitch.collect(scrubbedStitches) + } + .map(collectWithRateLimitCheck) + } + + Stitch.collect(stitches).map(collectWithRateLimitCheck).lowerFromTry.onSuccess { _ => + tweetIds.foreach { id => scribe.logScrubbed(id, columns.map(_.id.toInt), mhTimestamp) } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala new file mode 100644 index 000000000..ea350ccb9 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/SoftDeleteHandler.scala @@ -0,0 +1,20 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Time + +object SoftDeleteHandler { + def apply( + insert: ManhattanOperations.Insert, + scribe: Scribe + ): TweetStorageClient.SoftDelete = + tweetId => { + val mhTimestamp = Time.now + val softDeleteRecord = TweetStateRecord + .SoftDeleted(tweetId, mhTimestamp.inMillis) + .toTweetMhRecord + + insert(softDeleteRecord).onSuccess { _ => + scribe.logRemoved(tweetId, mhTimestamp, isSoftDeleted = true) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala new file mode 100644 index 000000000..87d8b41a1 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/Stats.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver + +object Stats { + // These two methods below (addWidthStat and updatePerFieldQpsCounters) are called per RPC call for most APIs, + // so we rely on the stats receiver that is passed in to the library to do memoization. + + private[storage] def addWidthStat( + rpcName: String, + paramName: String, + width: Int, + stats: StatsReceiver + ): Unit = + getStat(rpcName, paramName, stats).add(width) + + // Updates the counters for each Additional field. The idea here is to expose the QPS for each + // additional field + private[storage] def updatePerFieldQpsCounters( + rpcName: String, + fieldIds: Seq[FieldId], + count: Int, + stats: StatsReceiver + ): Unit = { + fieldIds.foreach { fieldId => getCounter(rpcName, fieldId, stats).incr(count) } + } + + private def getCounter(rpcName: String, fieldId: FieldId, stats: StatsReceiver) = + stats.scope(rpcName, "fields", fieldId.toString).counter("count") + + private def getStat(rpcName: String, paramName: String, stats: StatsReceiver) = + stats.scope(rpcName, paramName).stat("width") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala new file mode 100644 index 000000000..77dfed9ba --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StatusConversions.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie.storage + +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tbird.{thriftscala => tbird} + +object StatusConversions { + + /** + * This is used only in Scribe.scala, when scribing to tbird_add_status + * Once we remove that, we can also remove this. + */ + def toTBirdStatus(tweet: StoredTweet): tbird.Status = + tbird.Status( + id = tweet.id, + userId = tweet.userId.get, + text = tweet.text.get, + createdVia = tweet.createdVia.get, + createdAtSec = tweet.createdAtSec.get, + reply = tweet.reply.map(toTBirdReply), + share = tweet.share.map(toTBirdShare), + contributorId = tweet.contributorId, + geo = tweet.geo.map(toTBirdGeo), + hasTakedown = tweet.hasTakedown.getOrElse(false), + nsfwUser = tweet.nsfwUser.getOrElse(false), + nsfwAdmin = tweet.nsfwAdmin.getOrElse(false), + media = tweet.media.map(_.map(toTBirdMedia)).getOrElse(Seq()), + narrowcast = tweet.narrowcast.map(toTBirdNarrowcast), + nullcast = tweet.nullcast.getOrElse(false), + trackingId = tweet.trackingId + ) + + /** + * This is only used in a test, to verify that the above method `toTBirdStatus` + * works, so we can't remove it as long as the above method exists. + */ + def fromTBirdStatus(status: tbird.Status): StoredTweet = { + StoredTweet( + id = status.id, + userId = Some(status.userId), + text = Some(status.text), + createdVia = Some(status.createdVia), + createdAtSec = Some(status.createdAtSec), + reply = status.reply.map(fromTBirdReply), + share = status.share.map(fromTBirdShare), + contributorId = status.contributorId, + geo = status.geo.map(fromTBirdGeo), + hasTakedown = Some(status.hasTakedown), + nsfwUser = Some(status.nsfwUser), + nsfwAdmin = Some(status.nsfwAdmin), + media = Some(status.media.map(fromTBirdMedia)), + narrowcast = status.narrowcast.map(fromTBirdNarrowcast), + nullcast = Some(status.nullcast), + trackingId = status.trackingId + ) + } + + private def fromTBirdReply(reply: tbird.Reply): StoredReply = + StoredReply( + inReplyToStatusId = reply.inReplyToStatusId, + inReplyToUserId = reply.inReplyToUserId + ) + + private def fromTBirdShare(share: tbird.Share): StoredShare = + StoredShare( + sourceStatusId = share.sourceStatusId, + sourceUserId = share.sourceUserId, + parentStatusId = share.parentStatusId + ) + + private def fromTBirdGeo(geo: tbird.Geo): StoredGeo = + StoredGeo( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + entityId = geo.entityId + ) + + private def fromTBirdMedia(media: tbird.MediaEntity): StoredMediaEntity = + StoredMediaEntity( + id = media.id, + mediaType = media.mediaType, + width = media.width, + height = media.height + ) + + private def fromTBirdNarrowcast(narrowcast: tbird.Narrowcast): StoredNarrowcast = + StoredNarrowcast( + language = Some(narrowcast.language), + location = Some(narrowcast.location), + ids = Some(narrowcast.ids) + ) + + private def toTBirdReply(reply: StoredReply): tbird.Reply = + tbird.Reply( + inReplyToStatusId = reply.inReplyToStatusId, + inReplyToUserId = reply.inReplyToUserId + ) + + private def toTBirdShare(share: StoredShare): tbird.Share = + tbird.Share( + sourceStatusId = share.sourceStatusId, + sourceUserId = share.sourceUserId, + parentStatusId = share.parentStatusId + ) + + private def toTBirdGeo(geo: StoredGeo): tbird.Geo = + tbird.Geo( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + entityId = geo.entityId, + name = geo.name + ) + + private def toTBirdMedia(media: StoredMediaEntity): tbird.MediaEntity = + tbird.MediaEntity( + id = media.id, + mediaType = media.mediaType, + width = media.width, + height = media.height + ) + + private def toTBirdNarrowcast(narrowcast: StoredNarrowcast): tbird.Narrowcast = + tbird.Narrowcast( + language = narrowcast.language.getOrElse(Nil), + location = narrowcast.location.getOrElse(Nil), + ids = narrowcast.ids.getOrElse(Nil) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala new file mode 100644 index 000000000..d424a8817 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/StorageConversions.scala @@ -0,0 +1,346 @@ +package com.twitter.tweetypie.storage + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.scrooge.TFieldBlob +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage_internal.thriftscala._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetLenses + +object StorageConversions { + private val tbTweetCompiledAdditionalFieldIds = + StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId) + + def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply = + StoredReply( + inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0), + inReplyToUserId = reply.inReplyToUserId, + conversationId = conversationId + ) + + def toStoredShare(share: Share): StoredShare = + StoredShare( + share.sourceStatusId, + share.sourceUserId, + share.parentStatusId + ) + + def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] = + qt.permalink + .filterNot { p => + text.contains(p.shortUrl) + } // omit StoredQuotedTweet when url already in text + .map { p => + StoredQuotedTweet( + qt.tweetId, + qt.userId, + p.shortUrl + ) + } + + def toStoredGeo(tweet: Tweet): Option[StoredGeo] = + TweetLenses.geoCoordinates.get(tweet) match { + case None => + TweetLenses.placeId.get(tweet) match { + case None => None + case Some(placeId) => + Some( + StoredGeo( + latitude = 0.0, + longitude = 0.0, + geoPrecision = 0, + entityId = 0, + name = Some(placeId) + ) + ) + } + case Some(coords) => + Some( + StoredGeo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = TweetLenses.placeId.get(tweet) + ) + ) + } + + def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] = + mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity) + + def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] = + media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize => + StoredMediaEntity( + id = media.mediaId, + mediaType = origSize.deprecatedContentType.value.toByte, + width = origSize.width.toShort, + height = origSize.height.toShort + ) + } + + // The language and ids fields are for compatibility with existing tweets stored in manhattan. + def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast = + StoredNarrowcast( + language = Some(Seq.empty), + location = Some(narrowcast.location), + ids = Some(Seq.empty) + ) + + def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet = + from.foldLeft(to) { case (t, f) => t.setField(f) } + + def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet = + toStoredAdditionalFields(AdditionalFields.additionalFields(from), to) + + def toStoredTweet(tweet: Tweet): StoredTweet = { + val storedTweet = + StoredTweet( + id = tweet.id, + userId = Some(TweetLenses.userId(tweet)), + text = Some(TweetLenses.text(tweet)), + createdVia = Some(TweetLenses.createdVia(tweet)), + createdAtSec = Some(TweetLenses.createdAt(tweet)), + reply = + TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) }, + share = TweetLenses.share(tweet).map(toStoredShare), + contributorId = tweet.contributor.map(_.userId), + geo = toStoredGeo(tweet), + hasTakedown = Some(TweetLenses.hasTakedown(tweet)), + nsfwUser = Some(TweetLenses.nsfwUser(tweet)), + nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)), + media = tweet.media.map(toStoredMedia), + narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast), + nullcast = Some(TweetLenses.nullcast(tweet)), + trackingId = TweetLenses.trackingId(tweet), + quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt => + toStoredQuotedTweet(qt, TweetLenses.text(tweet)) + } + ) + toStoredAdditionalFields(tweet, storedTweet) + } + + /** + * Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object + * and only extracting the specified fields. + * + * NOTE: Assumes that specified fields are set in the tweet. + * + * @param tpTweet Tweetypie Tweet to be converted + * @param fields the fields to be populated in the on disk Tweet + * + * @return an on disk Tweet which has only the specified fields set + */ + def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = { + + // Make sure all the passed in fields are known or additional fields + require( + (fields -- Field.AllUpdatableCompiledFields) + .forall(field => AdditionalFields.isAdditionalFieldId(field.id)) + ) + + val storedTweet = + StoredTweet( + id = tpTweet.id, + geo = if (fields.contains(Field.Geo)) { + tpTweet.coreData.get.coordinates match { + case None => + tpTweet.coreData.get.placeId match { + case None => None + case Some(placeId) => + Some( + StoredGeo( + latitude = 0.0, + longitude = 0.0, + geoPrecision = 0, + entityId = 0, + name = Some(placeId) + ) + ) + } + case Some(coords) => + Some( + StoredGeo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = tpTweet.coreData.get.placeId + ) + ) + } + } else { + None + }, + hasTakedown = + if (fields.contains(Field.HasTakedown)) + Some(tpTweet.coreData.get.hasTakedown) + else + None, + nsfwUser = + if (fields.contains(Field.NsfwUser)) + Some(tpTweet.coreData.get.nsfwUser) + else + None, + nsfwAdmin = + if (fields.contains(Field.NsfwAdmin)) + Some(tpTweet.coreData.get.nsfwAdmin) + else + None + ) + + if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId)) + toStoredAdditionalFields(tpTweet, storedTweet) + else + storedTweet + } + + def fromStoredReply(reply: StoredReply): Reply = + Reply( + Some(reply.inReplyToStatusId).filter(_ > 0), + reply.inReplyToUserId + ) + + def fromStoredShare(share: StoredShare): Share = + Share( + share.sourceStatusId, + share.sourceUserId, + share.parentStatusId + ) + + def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet = + QuotedTweet( + qt.tweetId, + qt.userId, + Some( + ShortenedUrl( + shortUrl = qt.shortUrl, + longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator + displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator + ) + ) + ) + + def fromStoredGeo(geo: StoredGeo): GeoCoordinates = + GeoCoordinates( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + display = geo.entityId == 2 + ) + + def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity = + MediaEntity( + fromIndex = -1, // will get filled in later + toIndex = -1, // will get filled in later + url = null, // will get filled in later + mediaPath = "", // field is obsolete + mediaUrl = null, // will get filled in later + mediaUrlHttps = null, // will get filled in later + displayUrl = null, // will get filled in later + expandedUrl = null, // will get filled in later + mediaId = media.id, + nsfw = false, + sizes = Set( + MediaSize( + sizeType = MediaSizeType.Orig, + resizeMethod = MediaResizeMethod.Fit, + deprecatedContentType = MediaContentType(media.mediaType), + width = media.width, + height = media.height + ) + ) + ) + + def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast = + Narrowcast( + location = narrowcast.location.getOrElse(Seq()) + ) + + def fromStoredTweet(storedTweet: StoredTweet): Tweet = { + val coreData = + TweetCoreData( + userId = storedTweet.userId.get, + text = storedTweet.text.get, + createdVia = storedTweet.createdVia.get, + createdAtSecs = storedTweet.createdAtSec.get, + reply = storedTweet.reply.map(fromStoredReply), + share = storedTweet.share.map(fromStoredShare), + hasTakedown = storedTweet.hasTakedown.getOrElse(false), + nsfwUser = storedTweet.nsfwUser.getOrElse(false), + nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false), + narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast), + nullcast = storedTweet.nullcast.getOrElse(false), + trackingId = storedTweet.trackingId, + conversationId = storedTweet.reply.flatMap(_.conversationId), + placeId = storedTweet.geo.flatMap(_.name), + coordinates = storedTweet.geo.map(fromStoredGeo), + hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None + ) + + // retweets should never have their media, but some tweets incorrectly do. + val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq + + val tpTweet = + Tweet( + id = storedTweet.id, + coreData = Some(coreData), + contributor = storedTweet.contributorId.map(Contributor(_)), + media = Some(storedMedia.flatten.map(fromStoredMediaEntity)), + mentions = Some(Seq.empty), + urls = Some(Seq.empty), + cashtags = Some(Seq.empty), + hashtags = Some(Seq.empty), + quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet) + ) + fromStoredAdditionalFields(storedTweet, tpTweet) + } + + def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = { + fromStoredTweet( + storedTweet.copy( + userId = storedTweet.userId.orElse(Some(-1L)), + text = storedTweet.text.orElse(Some("")), + createdVia = storedTweet.createdVia.orElse(Some("")), + createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L)) + )) + } + + def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = { + val passThroughAdditionalFields = + from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId) + val allAdditionalFields = + from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields + allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) } + } + + def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = { + val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id) + val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read)) + DeletedTweet( + id = storedTweet.id, + userId = storedTweet.userId, + text = storedTweet.text, + createdAtSecs = storedTweet.createdAtSec, + share = storedTweet.share.map(toDeletedShare), + media = storedTweet.media.map(_.map(toDeletedMediaEntity)), + noteTweetId = noteTweetOption.map(_.id), + isExpandable = noteTweetOption.flatMap(_.isExpandable) + ) + } + + def toDeletedShare(storedShare: StoredShare): DeletedTweetShare = + DeletedTweetShare( + sourceStatusId = storedShare.sourceStatusId, + sourceUserId = storedShare.sourceUserId, + parentStatusId = storedShare.parentStatusId + ) + + def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity = + DeletedTweetMediaEntity( + id = storedMediaEntity.id, + mediaType = storedMediaEntity.mediaType, + width = storedMediaEntity.width, + height = storedMediaEntity.height + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala new file mode 100644 index 000000000..52e907594 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TimestampDecoder.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie.storage + +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try +import java.util.Arrays +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal + +sealed abstract class TimestampType(val keyName: String) +object TimestampType { + object Default extends TimestampType("timestamp") + object SoftDelete extends TimestampType("softdelete_timestamp") +} + +/** + * TimestampDecoder gets the timestamps associated with state records. The Manhattan timestamp is + * used for legacy records (with value "1"), otherwise the timestamp is extracted from the + * JSON value. + * + * See "Metadata" in README.md for further information about state records. + */ +object TimestampDecoder { + case class UnparsableJson(msg: String, t: Throwable) extends Exception(msg, t) with NoStackTrace + case class MissingJsonTimestamp(msg: String) extends Exception(msg) with NoStackTrace + case class UnexpectedJsonValue(msg: String) extends Exception(msg) with NoStackTrace + case class MissingManhattanTimestamp(msg: String) extends Exception(msg) with NoStackTrace + + private[storage] val LegacyValue: Array[Byte] = Array('1') + + /** + * The first backfill of tweet data to Manhattan supplied timestamps in milliseconds where + * nanoseconds were expected. The result is that some values have an incorrect Manhattan + * timestamp. For these bad timestamps, time.inNanoseconds is actually milliseconds. + * + * For example, the deletion record for tweet 22225781 has Manhattan timestamp 1970-01-01 00:23:24 +0000. + * Contrast with the deletion record for tweet 435404491999813632 with Manhattan timestamp 2014-11-09 14:24:04 +0000 + * + * This threshold value comes from the last time in milliseconds that was interpreted + * as nanoseconds, e.g. Time.fromNanoseconds(1438387200000L) == 1970-01-01 00:23:58 +0000 + */ + private[storage] val BadTimestampThreshold = Time.at("1970-01-01 00:23:58 +0000") + + def decode(record: TweetManhattanRecord, tsType: TimestampType): Try[Long] = + decode(record.value, tsType) + + def decode(mhValue: TweetManhattanValue, tsType: TimestampType): Try[Long] = { + val value = ByteArrayCodec.fromByteBuffer(mhValue.contents) + if (isLegacyRecord(value)) { + nativeManhattanTimestamp(mhValue) + } else { + jsonTimestamp(value, tsType) + } + } + + private def isLegacyRecord(value: Array[Byte]) = Arrays.equals(value, LegacyValue) + + private def nativeManhattanTimestamp(mhValue: TweetManhattanValue): Try[Long] = + mhValue.timestamp match { + case Some(ts) => Return(correctedTimestamp(ts)) + case None => + Throw(MissingManhattanTimestamp(s"Manhattan timestamp missing in value $mhValue")) + } + + private def jsonTimestamp(value: Array[Byte], tsType: TimestampType): Try[Long] = + Try { Json.decode(value) } + .rescue { case NonFatal(e) => Throw(UnparsableJson(e.getMessage, e)) } + .flatMap { m => + m.get(tsType.keyName) match { + case Some(v) => + v match { + case l: Long => Return(l) + case i: Integer => Return(i.toLong) + case _ => + Throw( + UnexpectedJsonValue(s"Unexpected value for ${tsType.keyName} in record data $m") + ) + } + case None => + Throw(MissingJsonTimestamp(s"Missing key ${tsType.keyName} in record data $m")) + } + } + + def correctedTime(t: Time): Time = + if (t < BadTimestampThreshold) Time.fromMilliseconds(t.inNanoseconds) else t + + def correctedTime(t: Long): Time = correctedTime(Time.fromNanoseconds(t)) + + def correctedTimestamp(t: Time): Long = + if (t < BadTimestampThreshold) t.inNanoseconds else t.inMilliseconds +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala new file mode 100644 index 000000000..ed5d01141 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetKey.scala @@ -0,0 +1,164 @@ +package com.twitter.tweetypie.storage + +/** + * Responsible for encoding/decoding Tweet records to/from Manhattan keys + * + * K/V Scheme: + * ----------- + * [TweetId] + * /metadata + * /delete_state (a.k.a. hard delete) + * /soft_delete_state + * /bounce_delete_state + * /undelete_state + * /force_added_state + * /scrubbed_fields/ + * /[ScrubbedFieldId_1] + * .. + * /[ScrubbedFieldId_M] + * /fields + * /internal + * /1 + * /9 + * .. + * /99 + * /external + * /100 + * .. + * + * IMPORTANT NOTE: + * 1) Field Ids 2 to 8 in Tweet thrift struct are considered "core fields" are 'packed' together + * into a TFieldBlob and stored under field id 1 (i.e [DatasetName]/[TweetId]/fields/internal/1). + * This is why we do not see keys from [DatasetName]/[TweetId]/fields/internal/2 to [DatasetName]/ + * [TweetId]/fields/internal/8) + * + * 2) Also, the tweet id (which is the field id 1 in Tweet thrift structure) is not explicitly stored + * in Manhattan. There is no need to explicitly store it since it is a part of the Pkey + */ +case class TweetKey(tweetId: TweetId, lKey: TweetKey.LKey) { + override def toString: String = + s"/${ManhattanOperations.PkeyInjection(tweetId)}/${ManhattanOperations.LkeyInjection(lKey)}" +} + +object TweetKey { + // Manhattan uses lexicographical order for keys. To make sure lexicographical order matches the + // numerical order, we should pad both tweet id and field ids with leading zeros. + // Since tweet id is long and field id is a short, the max width of each can be obtained by doing + // Long.MaxValue.toString.length and Short.MaxValue.toString.length respectively + private val TweetIdFormatStr = s"%0${Long.MaxValue.toString.length}d" + private val FieldIdFormatStr = s"%0${Short.MaxValue.toString.length}d" + private[storage] def padTweetIdStr(tweetId: Long): String = TweetIdFormatStr.format(tweetId) + private[storage] def padFieldIdStr(fieldId: Short): String = FieldIdFormatStr.format(fieldId) + + def coreFieldsKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.CoreFieldsKey) + def hardDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.HardDeletionStateKey) + def softDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.SoftDeletionStateKey) + def bounceDeletionStateKey(tweetId: TweetId): TweetKey = + TweetKey(tweetId, LKey.BounceDeletionStateKey) + def unDeletionStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.UnDeletionStateKey) + def forceAddedStateKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ForceAddedStateKey) + def scrubbedGeoFieldKey(tweetId: TweetId): TweetKey = TweetKey(tweetId, LKey.ScrubbedGeoFieldKey) + def fieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.FieldKey(fieldId)) + def internalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.InternalFieldsKey(fieldId)) + def additionalFieldsKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.AdditionalFieldsKey(fieldId)) + def scrubbedFieldKey(tweetId: TweetId, fieldId: FieldId): TweetKey = + TweetKey(tweetId, LKey.ScrubbedFieldKey(fieldId)) + + // AllFieldsKeyPrefix: fields + // CoreFieldsKey: fields/internal/1 (Stores subset of StoredTweet fields which are + // "packed" into a single CoreFields record) + // HardDeletionStateKey: metadata/delete_state + // SoftDeletionStateKey: metadata/soft_delete_state + // BounceDeletionStateKey: metadata/bounce_delete_state + // UnDeletionStateKey: metadata/undelete_state + // ForceAddedStateKey: metadata/force_added_state + // FieldKey: fields// (where + // is 'internal' for field ids < 100 and 'external' for all other + // fields ids) + // InternalFieldsKeyPrefix: fields/internal + // PKey: + // ScrubbedFieldKey: metadata/scrubbed_fields/ + // ScrubbedFieldKeyPrefix: metadata/scrubbed_fields + sealed abstract class LKey(override val toString: String) + object LKey { + private val HardDeletionRecordLiteral = "delete_state" + private val SoftDeletionRecordLiteral = "soft_delete_state" + private val BounceDeletionRecordLiteral = "bounce_delete_state" + private val UnDeletionRecordLiteral = "undelete_state" + private val ForceAddRecordLiteral = "force_added_state" + private val ScrubbedFieldsGroup = "scrubbed_fields" + private val InternalFieldsGroup = "internal" + private val ExternalFieldsGroup = "external" + private val MetadataCategory = "metadata" + private val FieldsCategory = "fields" + private val InternalFieldsKeyPrefix = s"$FieldsCategory/$InternalFieldsGroup/" + private val ExternalFieldsKeyPrefix = s"$FieldsCategory/$ExternalFieldsGroup/" + private val ScrubbedFieldsKeyPrefix = s"$MetadataCategory/$ScrubbedFieldsGroup/" + + sealed abstract class MetadataKey(metadataType: String) + extends LKey(s"$MetadataCategory/$metadataType") + sealed abstract class StateKey(stateType: String) extends MetadataKey(stateType) + case object HardDeletionStateKey extends StateKey(s"$HardDeletionRecordLiteral") + case object SoftDeletionStateKey extends StateKey(s"$SoftDeletionRecordLiteral") + case object BounceDeletionStateKey extends StateKey(s"$BounceDeletionRecordLiteral") + case object UnDeletionStateKey extends StateKey(s"$UnDeletionRecordLiteral") + case object ForceAddedStateKey extends StateKey(s"$ForceAddRecordLiteral") + + case class ScrubbedFieldKey(fieldId: FieldId) + extends MetadataKey(s"$ScrubbedFieldsGroup/${padFieldIdStr(fieldId)}") + val ScrubbedGeoFieldKey: LKey.ScrubbedFieldKey = ScrubbedFieldKey(TweetFields.geoFieldId) + + /** + * LKey that has one of many possible fields id. This generalize over + * internal and additional fields key. + */ + sealed abstract class FieldKey(prefix: String) extends LKey(toString) { + def fieldId: FieldId + override val toString: String = prefix + padFieldIdStr(fieldId) + } + object FieldKey { + def apply(fieldId: FieldId): FieldKey = + fieldId match { + case id if id < TweetFields.firstAdditionalFieldId => InternalFieldsKey(fieldId) + case _ => AdditionalFieldsKey(fieldId) + } + } + + case class InternalFieldsKey(fieldId: FieldId) extends FieldKey(InternalFieldsKeyPrefix) { + assert(fieldId < TweetFields.firstAdditionalFieldId) + } + case class AdditionalFieldsKey(fieldId: FieldId) extends FieldKey(ExternalFieldsKeyPrefix) { + assert(fieldId >= TweetFields.firstAdditionalFieldId) + } + val CoreFieldsKey: LKey.InternalFieldsKey = InternalFieldsKey(TweetFields.rootCoreFieldId) + + case class Unknown private (str: String) extends LKey(str) + + def fromString(str: String): LKey = { + def extractFieldId(prefix: String): FieldId = + str.slice(prefix.length, str.length).toShort + + str match { + case CoreFieldsKey.toString => CoreFieldsKey + case HardDeletionStateKey.toString => HardDeletionStateKey + case SoftDeletionStateKey.toString => SoftDeletionStateKey + case BounceDeletionStateKey.toString => BounceDeletionStateKey + case UnDeletionStateKey.toString => UnDeletionStateKey + case ForceAddedStateKey.toString => ForceAddedStateKey + case ScrubbedGeoFieldKey.toString => ScrubbedGeoFieldKey + case _ if str.startsWith(InternalFieldsKeyPrefix) => + InternalFieldsKey(extractFieldId(InternalFieldsKeyPrefix)) + case _ if str.startsWith(ExternalFieldsKeyPrefix) => + AdditionalFieldsKey(extractFieldId(ExternalFieldsKeyPrefix)) + case _ if str.startsWith(ScrubbedFieldsKeyPrefix) => + ScrubbedFieldKey(extractFieldId(ScrubbedFieldsKeyPrefix)) + case _ => Unknown(str) + } + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala new file mode 100644 index 000000000..a5d31a62d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStateRecord.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie.storage + +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.util.Time + +/** + * A [[TweetStateRecord]] represents an action taken on a tweet and can be used to determine a tweet's state. + * + * The state is determined by the record with the most recent timestamp. In the absence of any + * record a tweet is considered found, which is to say the tweet has not been through the + * deletion process. + * + * The [[TweetStateRecord]] type is determined by the lkey of a tweet manhattan record: + * metadata/delete_state -> HardDeleted + * metadata/soft_delete_state -> SoftDeleted + * metadata/undelete_state -> Undeleted + * metadata/force_added_state -> ForceAdded + * + * See the README in this directory for more details about the state of a tweet. + */ +sealed trait TweetStateRecord { + def tweetId: TweetId + def createdAt: Long + def stateKey: TweetKey.LKey.StateKey + def values: Map[String, Long] = Map("timestamp" -> createdAt) + def name: String + + def toTweetMhRecord: TweetManhattanRecord = { + val valByteBuffer = ByteArrayCodec.toByteBuffer(Json.encode(values)) + val value = ManhattanValue(valByteBuffer, Some(Time.fromMilliseconds(createdAt))) + TweetManhattanRecord(TweetKey(tweetId, stateKey), value) + } +} + +object TweetStateRecord { + + /** When a soft-deleted or bounce deleted tweet is ultimately hard-deleted by an offline job. */ + case class HardDeleted(tweetId: TweetId, createdAt: Long, deletedAt: Long) + extends TweetStateRecord { + // timestamp in the mh backend is the hard deletion timestamp + override def values = Map("timestamp" -> createdAt, "softdelete_timestamp" -> deletedAt) + def stateKey = TweetKey.LKey.HardDeletionStateKey + def name = "hard_deleted" + } + + /** When a tweet is deleted by the user. It can still be undeleted while in the soft deleted state. */ + case class SoftDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.SoftDeletionStateKey + def name = "soft_deleted" + } + + /** When a tweet is deleted by go/bouncer for violating Twitter Rules. It MAY NOT be undeleted. */ + case class BounceDeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.BounceDeletionStateKey + def name = "bounce_deleted" + } + + /** When a tweet is undeleted by an internal system. */ + case class Undeleted(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.UnDeletionStateKey + def name = "undeleted" + } + + /** When a tweet is created using the forceAdd endpoint. */ + case class ForceAdded(tweetId: TweetId, createdAt: Long) extends TweetStateRecord { + def stateKey = TweetKey.LKey.ForceAddedStateKey + def name = "force_added" + } + + def fromTweetMhRecord(record: TweetManhattanRecord): Option[TweetStateRecord] = { + def ts = TimestampDecoder.decode(record, TimestampType.Default).getOrElse(0L) + def sdts = TimestampDecoder.decode(record, TimestampType.SoftDelete).getOrElse(0L) + def tweetId = record.pkey + + record.lkey match { + case TweetKey.LKey.HardDeletionStateKey => Some(HardDeleted(tweetId, ts, sdts)) + case TweetKey.LKey.SoftDeletionStateKey => Some(SoftDeleted(tweetId, ts)) + case TweetKey.LKey.BounceDeletionStateKey => Some(BounceDeleted(tweetId, ts)) + case TweetKey.LKey.UnDeletionStateKey => Some(Undeleted(tweetId, ts)) + case TweetKey.LKey.ForceAddedStateKey => Some(ForceAdded(tweetId, ts)) + case _ => None + } + } + + def fromTweetMhRecords(records: Seq[TweetManhattanRecord]): Seq[TweetStateRecord] = + records.flatMap(fromTweetMhRecord) + + def mostRecent(records: Seq[TweetManhattanRecord]): Option[TweetStateRecord] = + fromTweetMhRecords(records).sortBy(_.createdAt).lastOption +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala new file mode 100644 index 000000000..69023abc2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageClient.scala @@ -0,0 +1,201 @@ +package com.twitter.tweetypie.storage + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.Response.TweetResponse +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future + +/** + * Interface for reading and writing tweet data in Manhattan + */ +trait TweetStorageClient { + import TweetStorageClient._ + def addTweet: AddTweet + def deleteAdditionalFields: DeleteAdditionalFields + def getTweet: GetTweet + def getStoredTweet: GetStoredTweet + def getDeletedTweets: GetDeletedTweets + def undelete: Undelete + def updateTweet: UpdateTweet + def scrub: Scrub + def softDelete: SoftDelete + def bounceDelete: BounceDelete + def hardDeleteTweet: HardDeleteTweet + def ping: Ping +} + +object TweetStorageClient { + type GetTweet = TweetId => Stitch[GetTweet.Response] + + object GetTweet { + sealed trait Response + object Response { + case class Found(tweet: Tweet) extends Response + object NotFound extends Response + object Deleted extends Response + // On BounceDeleted, provide the full Tweet so that implementations + // (i.e. ManhattanTweetStorageClient) don't not need to be aware of the specific tweet + // fields required by callers for proper processing of bounced deleted tweets. + case class BounceDeleted(tweet: Tweet) extends Response + } + } + + type GetStoredTweet = TweetId => Stitch[GetStoredTweet.Response] + + object GetStoredTweet { + sealed abstract class Error(val message: String) { + override def toString: String = message + } + object Error { + case object TweetIsCorrupt extends Error("stored tweet data is corrupt and cannot be decoded") + + case object ScrubbedFieldsPresent + extends Error("stored tweet fields that should be scrubbed are still present") + + case object TweetFieldsMissingOrInvalid + extends Error("expected tweet fields are missing or contain invalid values") + + case object TweetShouldBeHardDeleted + extends Error("stored tweet that should be hard deleted is still present") + } + + sealed trait Response + object Response { + sealed trait StoredTweetMetadata { + def state: Option[TweetStateRecord] + def allStates: Seq[TweetStateRecord] + def scrubbedFields: Set[FieldId] + } + + sealed trait StoredTweetErrors { + def errs: Seq[Error] + } + + /** + * Tweet data was found, possibly state records and/or scrubbed field records. + */ + sealed trait FoundAny extends Response with StoredTweetMetadata { + def tweet: Tweet + } + + object FoundAny { + def unapply( + response: Response + ): Option[ + (Tweet, Option[TweetStateRecord], Seq[TweetStateRecord], Set[FieldId], Seq[Error]) + ] = + response match { + case f: FoundWithErrors => + Some((f.tweet, f.state, f.allStates, f.scrubbedFields, f.errs)) + case f: FoundAny => Some((f.tweet, f.state, f.allStates, f.scrubbedFields, Seq.empty)) + case _ => None + } + } + + /** + * No records for this tweet id were found in storage + */ + case class NotFound(id: TweetId) extends Response + + /** + * Data related to the Tweet id was found but could not be loaded successfully. The + * errs array contains details of the problems. + */ + case class Failed( + id: TweetId, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + errs: Seq[Error], + ) extends Response + with StoredTweetMetadata + with StoredTweetErrors + + /** + * No Tweet data was found, and the most recent state record found is HardDeleted + */ + case class HardDeleted( + id: TweetId, + state: Option[TweetStateRecord.HardDeleted], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends Response + with StoredTweetMetadata + + /** + * Tweet data was found, and the most recent state record found, if any, is not + * any form of deletion record. + */ + case class Found( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends FoundAny + + /** + * Tweet data was found, and the most recent state record found indicates deletion. + */ + case class FoundDeleted( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + ) extends FoundAny + + /** + * Tweet data was found, however errors were detected in the stored data. Required + * fields may be missing from the Tweet struct (e.g. CoreData), stored fields that + * should be scrubbed remain present, or Tweets that should be hard-deleted remain + * in storage. The errs array contains details of the problems. + */ + case class FoundWithErrors( + tweet: Tweet, + state: Option[TweetStateRecord], + allStates: Seq[TweetStateRecord], + scrubbedFields: Set[FieldId], + errs: Seq[Error], + ) extends FoundAny + with StoredTweetErrors + } + } + + type HardDeleteTweet = TweetId => Stitch[HardDeleteTweet.Response] + type SoftDelete = TweetId => Stitch[Unit] + type BounceDelete = TweetId => Stitch[Unit] + + object HardDeleteTweet { + sealed trait Response + object Response { + case class Deleted(deletedAtMillis: Option[Long], createdAtMillis: Option[Long]) + extends Response + case class NotDeleted(id: TweetId, ineligibleLKey: Option[TweetKey.LKey]) + extends Throwable + with Response + } + } + + type Undelete = TweetId => Stitch[Undelete.Response] + object Undelete { + case class Response( + code: UndeleteResponseCode, + tweet: Option[Tweet] = None, + createdAtMillis: Option[Long] = None, + archivedAtMillis: Option[Long] = None) + + sealed trait UndeleteResponseCode + + object UndeleteResponseCode { + object Success extends UndeleteResponseCode + object BackupNotFound extends UndeleteResponseCode + object NotCreated extends UndeleteResponseCode + } + } + + type AddTweet = Tweet => Stitch[Unit] + type UpdateTweet = (Tweet, Seq[Field]) => Stitch[TweetResponse] + type GetDeletedTweets = Seq[TweetId] => Stitch[Seq[DeletedTweetResponse]] + type DeleteAdditionalFields = (Seq[TweetId], Seq[Field]) => Stitch[Seq[TweetResponse]] + type Scrub = (Seq[TweetId], Seq[Field]) => Stitch[Unit] + type Ping = () => Future[Unit] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala new file mode 100644 index 000000000..7f1bd6b1e --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetStorageException.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.storage + +import scala.util.control.NoStackTrace + +sealed abstract class TweetStorageException(message: String, cause: Throwable) + extends Exception(message, cause) + +/** + * The request was not properly formed and failed an assertion present in the code. Should not be + * retried without modification. + */ +case class ClientError(message: String, cause: Throwable) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * Request was rejected by Manhattan or the in-process rate limiter. Should not be retried. + */ +case class RateLimited(message: String, cause: Throwable) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * Corrupt tweets were requested from Manhattan + */ +case class VersionMismatchError(message: String, cause: Throwable = null) + extends TweetStorageException(message, cause) + with NoStackTrace + +/** + * All other unhandled exceptions. + */ +case class InternalError(message: String, cause: Throwable = null) + extends TweetStorageException(message, cause) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala new file mode 100644 index 000000000..b10ef107d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/TweetUtils.scala @@ -0,0 +1,265 @@ +package com.twitter.tweetypie.storage + +import com.twitter.logging.Logger +import com.twitter.scrooge.TFieldBlob +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanException +import com.twitter.tweetypie.storage.Response._ +import com.twitter.tweetypie.storage_internal.thriftscala.StoredTweet +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +object TweetUtils { + val log: Logger = Logger("com.twitter.tweetypie.storage.TweetStorageLibrary") + import FieldResponseCodec.ValueNotFoundException + + /** + * It's rare, but we have seen tweets with userId=0, which is likely the result of a + * failed/partial delete. Treat these as invalid tweets, which are returned to callers + * as not found. + */ + def isValid(tweet: StoredTweet): Boolean = + tweet.userId.exists(_ != 0) && tweet.text.nonEmpty && + tweet.createdVia.nonEmpty && tweet.createdAtSec.nonEmpty + + /** + * Helper function to extract Scrubbed field Ids from the result returned by reading entire tweet prefix + * function. + * + * @param records The sequence of MH records for the given tweetId + * + * @return The set of scrubbed field ids + */ + private[tweetypie] def extractScrubbedFields(records: Seq[TweetManhattanRecord]): Set[Short] = + records + .map(r => r.lkey) + .collect { case TweetKey.LKey.ScrubbedFieldKey(fieldId) => fieldId } + .toSet + + private[tweetypie] val expectedFields = + TweetFields.requiredFieldIds.toSet - TweetFields.tweetIdField + + /** + * Find the timestamp from a tweetId and a list of MH records. This is used when + * you need a timestamp and you aren't sure that tweetId is a snowflake id. + * + * @param tweetId A tweetId you want the timestamp for. + * @param records Tbird_mh records keyed on tweetId, one of which should be the + * core fields record. + * @return A milliseconds timestamp if one could be found. + */ + private[tweetypie] def creationTimeFromTweetIdOrMHRecords( + tweetId: Long, + records: Seq[TweetManhattanRecord] + ): Option[Long] = + SnowflakeId + .unixTimeMillisOptFromId(tweetId).orElse({ + records + .find(_.lkey == TweetKey.LKey.CoreFieldsKey) + .flatMap { coreFields => + CoreFieldsCodec + .fromTFieldBlob( + TFieldBlobCodec.fromByteBuffer(coreFields.value.contents) + ).createdAtSec.map(seconds => seconds * 1000) + } + }) + + /** + * Helper function used to parse manhattan results for fields in a tweet (given in the form of + * Sequence of (FieldKey, Try[Unit]) pairs) and build a TweetResponse object. + * + * @param callerName The name of the caller function. Used for error messages + * @param tweetId Id of the Tweet for which TweetResponse is being built + * @param fieldResults Sequence of (FieldKey, Try[Unit]). + * + * @return TweetResponse object + */ + private[tweetypie] def buildTweetResponse( + callerName: String, + tweetId: Long, + fieldResults: Map[FieldId, Try[Unit]] + ): TweetResponse = { + // Count Found/Not Found + val successCount = + fieldResults.foldLeft(0) { + case (count, (_, Return(_))) => count + 1 + case (count, (_, Throw(_: ValueNotFoundException))) => count + 1 + case (count, _) => count + } + + val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResults) + + val overallCode = if (successCount > 0 && successCount == fieldResults.size) { + TweetResponseCode.Success + } else { + + // If any field was rate limited, then we consider the entire tweet to be rate limited. So first we scan + // the field results to check such an occurrence. + val wasRateLimited = fieldResults.exists { fieldResult => + fieldResult._2 match { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + } + + // Were we rate limited for any of the additional fields? + if (wasRateLimited) { + TweetResponseCode.OverCapacity + } else if (successCount == 0) { + // successCount is < fieldResults.size at this point. So if allOrNone is true or + // if successCount == 0 (i.e failed on all Fields), the overall code should be 'Failure' + TweetResponseCode.Failure + } else { + // allOrNone == false AND successCount > 0 at this point. Clearly the overallCode should be Partial + TweetResponseCode.Partial + } + } + + TweetResponse(tweetId, overallCode, Some(fieldResponsesMap)) + + } + + /** + * Helper function to convert manhattan results into a Map[FieldId, FieldResponse] + * + * @param fieldResults Sequence of (TweetKey, TFieldBlob). + */ + private[tweetypie] def getFieldResponses( + callerName: String, + tweetId: TweetId, + fieldResults: Map[FieldId, Try[_]] + ): Map[FieldId, FieldResponse] = + fieldResults.map { + case (fieldId, resp) => + def keyStr = TweetKey.fieldKey(tweetId, fieldId).toString + resp match { + case Return(_) => + fieldId -> FieldResponse(FieldResponseCode.Success, None) + case Throw(mhException: ManhattanException) => + val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $mhException" + mhException match { + case _: ValueNotFoundException => // ValueNotFound is not an error + case _ => log.error(errMsg) + } + fieldId -> FieldResponseCodec.fromThrowable(mhException, Some(errMsg)) + case Throw(e) => + val errMsg = s"Exception in $callerName. Key: $keyStr. Error: $e" + log.error(errMsg) + fieldId -> FieldResponse(FieldResponseCode.Error, Some(errMsg)) + } + } + + /** + * Helper function to build a TweetResponse object when being rate limited. Its possible that only some of the fields + * got rate limited, so we indicate which fields got processed successfully, and which encountered some sort of error. + * + * @param tweetId Tweet id + * @param callerName name of API calling this function + * @param fieldResponses field responses for the case where + * + * @return The TweetResponse object + */ + private[tweetypie] def buildTweetOverCapacityResponse( + callerName: String, + tweetId: Long, + fieldResponses: Map[FieldId, Try[Unit]] + ) = { + val fieldResponsesMap = getFieldResponses(callerName, tweetId, fieldResponses) + TweetResponse(tweetId, TweetResponseCode.OverCapacity, Some(fieldResponsesMap)) + } + + /** + * Build a StoredTweet from a Seq of records. Core fields are handled specially. + */ + private[tweetypie] def buildStoredTweet( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + includeScrubbed: Boolean = false, + ): StoredTweet = { + getStoredTweetBlobs(records, includeScrubbed) + .flatMap { fieldBlob => + // When fieldId == TweetFields.rootCoreFieldId, we have further work to do since the + // 'value' is really serialized/packed version of all core fields. In this case we'll have + // to unpack it into many TFieldBlobs. + if (fieldBlob.id == TweetFields.rootCoreFieldId) { + // We won't throw any error in this function and instead let the caller function handle this + // condition (i.e If the caller function does not find any values for the core-fields in + // the returned map, it should assume that the tweet is not found) + CoreFieldsCodec.unpackFields(fieldBlob).values.toSeq + } else { + Seq(fieldBlob) + } + }.foldLeft(StoredTweet(tweetId))(_.setField(_)) + } + + private[tweetypie] def buildValidStoredTweet( + tweetId: TweetId, + records: Seq[TweetManhattanRecord] + ): Option[StoredTweet] = { + val storedTweet = buildStoredTweet(tweetId, records) + if (storedTweet.getFieldBlobs(expectedFields).nonEmpty && isValid(storedTweet)) { + Some(storedTweet) + } else { + None + } + } + + /** + * Return a TFieldBlob for each StoredTweet field defined in this set of records. + * @param includeScrubbed when false, result will not include scrubbed fields even + * if the data is present in the set of records. + */ + private[tweetypie] def getStoredTweetBlobs( + records: Seq[TweetManhattanRecord], + includeScrubbed: Boolean = false, + ): Seq[TFieldBlob] = { + val scrubbed = extractScrubbedFields(records) + + records + .flatMap { r => + // extract LKey.FieldKey records if they are not scrubbed and get their TFieldBlobs + r.key match { + case fullKey @ TweetKey(_, key: TweetKey.LKey.FieldKey) + if includeScrubbed || !scrubbed.contains(key.fieldId) => + try { + val fieldBlob = TFieldBlobCodec.fromByteBuffer(r.value.contents) + if (fieldBlob.field.id != key.fieldId) { + throw new AssertionError( + s"Blob stored for $fullKey has unexpected id ${fieldBlob.field.id}" + ) + } + Some(fieldBlob) + } catch { + case e: VersionMismatchError => + log.error( + s"Failed to decode bytebuffer for $fullKey: ${e.getMessage}" + ) + throw e + } + case _ => None + } + } + } + + /** + * Its important to bubble up rate limiting exceptions as they would likely be the root cause for other issues + * (timeouts etc.), so we scan for this particular exception, and if found, we bubble that up specifically + * + * @param seqOfTries The sequence of tries which may contain within it a rate limit exception + * + * @return if a rate limiting exn was detected, this will be a Throw(e: DeniedManhattanException) + * otherwise it will be a Return(_) only if all individual tries succeeded + */ + private[tweetypie] def collectWithRateLimitCheck(seqOfTries: Seq[Try[Unit]]): Try[Unit] = { + val rateLimitThrowOpt = seqOfTries.find { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + + rateLimitThrowOpt.getOrElse( + Try.collect(seqOfTries).map(_ => ()) + ) // Operation is considered successful only if all the deletions are successful + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala new file mode 100644 index 000000000..f0e14eb9d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UndeleteHandler.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetStorageClient.Undelete +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.util.Time + +object UndeleteHandler { + def apply( + read: ManhattanOperations.Read, + localInsert: ManhattanOperations.Insert, + remoteInsert: ManhattanOperations.Insert, + delete: ManhattanOperations.Delete, + undeleteWindowHours: Int, + stats: StatsReceiver + ): Undelete = { + def withinUndeleteWindow(timestampMs: Long) = + (Time.now - Time.fromMilliseconds(timestampMs)).inHours < undeleteWindowHours + + def prepareUndelete( + tweetId: TweetId, + records: Seq[TweetManhattanRecord] + ): (Undelete.Response, Option[TweetManhattanRecord]) = { + val undeleteRecord = + Some(TweetStateRecord.Undeleted(tweetId, Time.now.inMillis).toTweetMhRecord) + + TweetStateRecord.mostRecent(records) match { + // check if we need to undo a soft deletion + case Some(TweetStateRecord.SoftDeleted(_, createdAt)) => + if (createdAt > 0) { + if (withinUndeleteWindow(createdAt)) { + ( + mkSuccessfulUndeleteResponse(tweetId, records, Some(createdAt)), + undeleteRecord + ) + } else { + (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) + } + } else { + throw InternalError(s"Timestamp unavailable for $tweetId") + } + + // BounceDeleted tweets may not be undeleted. see go/bouncedtweet + case Some(_: TweetStateRecord.HardDeleted | _: TweetStateRecord.BounceDeleted) => + (Undelete.Response(Undelete.UndeleteResponseCode.BackupNotFound), None) + + case Some(_: TweetStateRecord.Undeleted) => + // We still want to write the undelete record, because at this point, we only know that the local DC's + // winning record is not a soft/hard deletion record, while its possible that the remote DC's winning + // record might still be a soft deletion record. Having said that, we don't want to set it to true + // if the winning record is forceAdd, as the forceAdd call should have ensured that both DCs had the + // forceAdd record. + (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) + + case Some(_: TweetStateRecord.ForceAdded) => + (mkSuccessfulUndeleteResponse(tweetId, records), None) + + // lets write the undeletion record just in case there is a softdeletion record in flight + case None => (mkSuccessfulUndeleteResponse(tweetId, records), undeleteRecord) + } + } + + // Write the undelete record both locally and remotely to protect + // against races with hard delete replication. We only need this + // protection for the insertion of the undelete record. + def multiInsert(record: TweetManhattanRecord): Stitch[Unit] = + Stitch + .collect( + Seq( + localInsert(record).liftToTry, + remoteInsert(record).liftToTry + ) + ) + .map(collectWithRateLimitCheck) + .lowerFromTry + + def deleteSoftDeleteRecord(tweetId: TweetId): Stitch[Unit] = { + val mhKey = TweetKey.softDeletionStateKey(tweetId) + delete(mhKey, None) + } + + tweetId => + for { + records <- read(tweetId) + (response, undeleteRecord) = prepareUndelete(tweetId, records) + _ <- Stitch.collect(undeleteRecord.map(multiInsert)).unit + _ <- deleteSoftDeleteRecord(tweetId) + } yield { + response + } + } + + private[storage] def mkSuccessfulUndeleteResponse( + tweetId: TweetId, + records: Seq[TweetManhattanRecord], + timestampOpt: Option[Long] = None + ) = + Undelete.Response( + Undelete.UndeleteResponseCode.Success, + Some( + StorageConversions.fromStoredTweet(buildStoredTweet(tweetId, records)) + ), + archivedAtMillis = timestampOpt + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala new file mode 100644 index 000000000..7bf68f6ef --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/UpdateTweetHandler.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie.storage + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.DeniedManhattanException +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import com.twitter.tweetypie.storage.TweetUtils._ +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Throw +import com.twitter.util.Time + +object UpdateTweetHandler { + def apply( + insert: ManhattanOperations.Insert, + stats: StatsReceiver + ): TweetStorageClient.UpdateTweet = { (tpTweet: Tweet, fields: Seq[Field]) => + require( + fields.forall(!TweetFields.coreFieldIds.contains(_)), + "Core fields cannot be modified by calling updateTweet; use addTweet instead." + ) + require( + areAllFieldsDefined(tpTweet, fields), + s"Input tweet $tpTweet does not have specified fields $fields set" + ) + + val now = Time.now + val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) + val tweetId = storedTweet.id + Stats.updatePerFieldQpsCounters("updateTweet", fields.map(_.id), 1, stats) + + val (fieldIds, stitchesPerTweet) = + fields.map { field => + val fieldId = field.id + val tweetKey = TweetKey.fieldKey(tweetId, fieldId) + val blob = storedTweet.getFieldBlob(fieldId).get + val value = ManhattanValue(TFieldBlobCodec.toByteBuffer(blob), Some(now)) + val record = TweetManhattanRecord(tweetKey, value) + + (fieldId, insert(record).liftToTry) + }.unzip + + Stitch.collect(stitchesPerTweet).map { seqOfTries => + val fieldkeyAndMhResults = fieldIds.zip(seqOfTries).toMap + // If even a single field was rate limited, we will send an overall OverCapacity TweetResponse + val wasRateLimited = fieldkeyAndMhResults.exists { keyAndResult => + keyAndResult._2 match { + case Throw(e: DeniedManhattanException) => true + case _ => false + } + } + + if (wasRateLimited) { + buildTweetOverCapacityResponse("updateTweets", tweetId, fieldkeyAndMhResults) + } else { + buildTweetResponse("updateTweets", tweetId, fieldkeyAndMhResults) + } + } + } + + private def areAllFieldsDefined(tpTweet: Tweet, fields: Seq[Field]) = { + val storedTweet = StorageConversions.toStoredTweetForFields(tpTweet, fields.toSet) + fields.map(_.id).forall(storedTweet.getFieldBlob(_).isDefined) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala new file mode 100644 index 000000000..57a02248b --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/storage/package.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie + +import com.twitter.storage.client.manhattan.kv.ManhattanValue +import java.nio.ByteBuffer + +package object storage { + type TweetId = Long + type FieldId = Short + + type TweetManhattanValue = ManhattanValue[ByteBuffer] +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD new file mode 100644 index 000000000..e93c3b2ba --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/BUILD @@ -0,0 +1,20 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "snowflake:id", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-java", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala new file mode 100644 index 000000000..046ff226a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TFlockIndexer.scala @@ -0,0 +1,532 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package tflock + +import com.twitter.finagle.stats.Counter +import com.twitter.flockdb.client._ +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.serverutil.StoredCard +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future +import scala.collection.mutable.ListBuffer + +object TFlockIndexer { + + /** + * Printable names for some edge types currently defined in [[com.twitter.flockdb.client]]. + * Used to defined stats counters for adding edges. + */ + val graphNames: Map[Int, String] = + Map( + CardTweetsGraph.id -> "card_tweets", + ConversationGraph.id -> "conversation", + DirectedAtUserIdGraph.id -> "directed_at_user_id", + InvitedUsersGraph.id -> "invited_users", + MediaTimelineGraph.id -> "media_timeline", + MentionsGraph.id -> "mentions", + NarrowcastSentTweetsGraph.id -> "narrowcast_sent_tweets", + NullcastedTweetsGraph.id -> "nullcasted_tweets", + QuotersGraph.id -> "quoters", + QuotesGraph.id -> "quotes", + QuoteTweetsIndexGraph.id -> "quote_tweets_index", + RepliesToTweetsGraph.id -> "replies_to_tweets", + RetweetsByMeGraph.id -> "retweets_by_me", + RetweetsGraph.id -> "retweets", + RetweetsOfMeGraph.id -> "retweets_of_me", + RetweetSourceGraph.id -> "retweet_source", + TweetsRetweetedGraph.id -> "tweets_retweeted", + UserTimelineGraph.id -> "user_timeline", + CreatorSubscriptionTimelineGraph.id -> "creator_subscription_timeline", + CreatorSubscriptionMediaTimelineGraph.id -> "creator_subscription_image_timeline", + ) + + /** + * On edge deletion, edges are either archived permanently or retained for 3 months, based on + * the retention policy in the above confluence page. + * + * These two retention policies correspond to the two deletion techniques: archive and remove. + * We call removeEdges for edges with a short retention policy and archiveEdges for edges with + * a permanent retention policy. + */ + val graphsWithRemovedEdges: Seq[Int] = + Seq( + CardTweetsGraph.id, + CuratedTimelineGraph.id, + CuratedTweetsGraph.id, + DirectedAtUserIdGraph.id, + MediaTimelineGraph.id, + MutedConversationsGraph.id, + QuotersGraph.id, + QuotesGraph.id, + QuoteTweetsIndexGraph.id, + ReportedTweetsGraph.id, + RetweetsOfMeGraph.id, + RetweetSourceGraph.id, + SoftLikesGraph.id, + TweetsRetweetedGraph.id, + CreatorSubscriptionTimelineGraph.id, + CreatorSubscriptionMediaTimelineGraph.id, + ) + + /** + * These edges should be left in place when bounced tweets are deleted. + * These edges are removed during hard deletion. + * + * This is done so external teams (timelines) can execute on these edges for + * tombstone feature. + */ + val bounceDeleteGraphIds: Set[Int] = + Set( + UserTimelineGraph.id, + ConversationGraph.id + ) + + def makeCounters(stats: StatsReceiver, operation: String): Map[Int, Counter] = { + TFlockIndexer.graphNames + .mapValues(stats.scope(_).counter(operation)) + .withDefaultValue(stats.scope("unknown").counter(operation)) + } +} + +/** + * @param backgroundIndexingPriority specifies the queue to use for + * background indexing operations. This is useful for making the + * effects of background indexing operations (such as deleting edges + * for deleted Tweets) available sooner in testing scenarios + * (end-to-end tests or development instances). It is set to + * Priority.Low in production to reduce the load on high priority + * queues that we use for prominently user-visible operations. + */ +class TFlockIndexer( + tflock: TFlockClient, + hasMedia: Tweet => Boolean, + backgroundIndexingPriority: Priority, + stats: StatsReceiver) + extends TweetIndexer { + private[this] val FutureNil = Future.Nil + + private[this] val archiveCounters = TFlockIndexer.makeCounters(stats, "archive") + private[this] val removeCounters = TFlockIndexer.makeCounters(stats, "remove") + private[this] val insertCounters = TFlockIndexer.makeCounters(stats, "insert") + private[this] val negateCounters = TFlockIndexer.makeCounters(stats, "negate") + + private[this] val foregroundIndexingPriority: Priority = Priority.High + + override def createIndex(tweet: Tweet): Future[Unit] = + createEdges(tweet, isUndelete = false) + + override def undeleteIndex(tweet: Tweet): Future[Unit] = + createEdges(tweet, isUndelete = true) + + private[this] case class PartitionedEdges( + longRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, + shortRetention: Seq[ExecuteEdge[StatusGraph]] = Nil, + negate: Seq[ExecuteEdge[StatusGraph]] = Nil, + ignore: Seq[ExecuteEdge[StatusGraph]] = Nil) + + private[this] def partitionEdgesForDelete( + edges: Seq[ExecuteEdge[StatusGraph]], + isBounceDelete: Boolean + ) = + edges.foldLeft(PartitionedEdges()) { + // Two dependees of UserTimelineGraph edge states to satisfy: timelines & safety tools. + // Timelines show bounce-deleted tweets as tombstones; regular deletes are not shown. + // - i.e. timelineIds = UserTimelineGraph(Normal || Negative) + // Safety tools show deleted tweets to authorized internal review agents + // - i.e. deletedIds = UserTimelineGraph(Removed || Negative) + case (partitionedEdges, edge) if isBounceDelete && edge.graphId == UserTimelineGraph.id => + partitionedEdges.copy(negate = edge +: partitionedEdges.negate) + + case (partitionedEdges, edge) if isBounceDelete && edge.graphId == ConversationGraph.id => + // Bounce-deleted tweets remain rendered as tombstones in conversations, so do not modify + // the ConversationGraph edge state + partitionedEdges.copy(ignore = edge +: partitionedEdges.ignore) + + case (partitionedEdges, edge) + if TFlockIndexer.graphsWithRemovedEdges.contains(edge.graphId) => + partitionedEdges.copy(shortRetention = edge +: partitionedEdges.shortRetention) + + case (partitionedEdges, edge) => + partitionedEdges.copy(longRetention = edge +: partitionedEdges.longRetention) + } + + override def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = + for { + edges <- getEdges(tweet, isCreate = false, isDelete = true, isUndelete = false) + partitionedEdges = partitionEdgesForDelete(edges, isBounceDelete) + () <- + Future + .join( + tflock + .archiveEdges(partitionedEdges.longRetention, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.longRetention.foreach(e => archiveCounters(e.graphId).incr())), + tflock + .removeEdges(partitionedEdges.shortRetention, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.shortRetention.foreach(e => removeCounters(e.graphId).incr())), + tflock + .negateEdges(partitionedEdges.negate, backgroundIndexingPriority) + .onSuccess(_ => + partitionedEdges.negate.foreach(e => negateCounters(e.graphId).incr())) + ) + .unit + } yield () + + /** + * This operation is called when a user is put into or taken out of + * a state in which their retweets should no longer be visible + * (e.g. suspended or ROPO). + */ + override def setRetweetVisibility(retweetId: TweetId, setVisible: Boolean): Future[Unit] = { + val retweetEdge = Seq(ExecuteEdge(retweetId, RetweetsGraph, None, Reverse)) + + if (setVisible) { + tflock + .insertEdges(retweetEdge, backgroundIndexingPriority) + .onSuccess(_ => insertCounters(RetweetsGraph.id).incr()) + } else { + tflock + .archiveEdges(retweetEdge, backgroundIndexingPriority) + .onSuccess(_ => archiveCounters(RetweetsGraph.id).incr()) + } + } + + private[this] def createEdges(tweet: Tweet, isUndelete: Boolean): Future[Unit] = + for { + edges <- getEdges(tweet = tweet, isCreate = true, isDelete = false, isUndelete = isUndelete) + () <- tflock.insertEdges(edges, foregroundIndexingPriority) + } yield { + // Count all the edges we've successfully added: + edges.foreach(e => insertCounters(e.graphId).incr()) + } + + private[this] def addRTEdges( + tweet: Tweet, + share: Share, + isCreate: Boolean, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + + edges += RetweetsOfMeGraph.edge(share.sourceUserId, tweet.id) + edges += RetweetsByMeGraph.edge(getUserId(tweet), tweet.id) + edges += RetweetsGraph.edge(share.sourceStatusId, tweet.id) + + if (isCreate) { + edges += ExecuteEdge( + sourceId = getUserId(tweet), + graph = RetweetSourceGraph, + destinationIds = Some(Seq(share.sourceStatusId)), + direction = Forward, + position = Some(SnowflakeId(tweet.id).time.inMillis) + ) + edges.append(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) + } else { + edges += RetweetSourceGraph.edge(getUserId(tweet), share.sourceStatusId) + + // if this is the last retweet we need to remove it from the source user's + // tweets retweeted graph + futureEdges.append( + tflock.count(RetweetsGraph.from(share.sourceStatusId)).flatMap { count => + if (count <= 1) { + tflock.selectAll(RetweetsGraph.from(share.sourceStatusId)).map { tweets => + if (tweets.size <= 1) + Seq(TweetsRetweetedGraph.edge(share.sourceUserId, share.sourceStatusId)) + else + Nil + } + } else { + FutureNil + } + } + ) + } + } + + private[this] def addReplyEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + getReply(tweet).foreach { reply => + reply.inReplyToStatusId.flatMap { inReplyToStatusId => + edges += RepliesToTweetsGraph.edge(inReplyToStatusId, tweet.id) + + // only index conversationId if this is a reply to another tweet + TweetLenses.conversationId.get(tweet).map { conversationId => + edges += ConversationGraph.edge(conversationId, tweet.id) + } + } + } + } + + private[this] def addDirectedAtEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + TweetLenses.directedAtUser.get(tweet).foreach { directedAtUser => + edges += DirectedAtUserIdGraph.edge(directedAtUser.userId, tweet.id) + } + } + + private[this] def addMentionEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + getMentions(tweet) + .flatMap(_.userId).foreach { mention => + edges += MentionsGraph.edge(mention, tweet.id) + } + } + + private[this] def addQTEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]], + isCreate: Boolean + ): Unit = { + val userId = getUserId(tweet) + + tweet.quotedTweet.foreach { quotedTweet => + // Regardless of tweet creates/deletes, we add the corresponding edges to the + // following two graphs. Note that we're handling the case for + // the QuotersGraph slightly differently in the tweet delete case. + edges.append(QuotesGraph.edge(quotedTweet.userId, tweet.id)) + edges.append(QuoteTweetsIndexGraph.edge(quotedTweet.tweetId, tweet.id)) + if (isCreate) { + // As mentioned above, for tweet creates we go ahead and add an edge + // to the QuotersGraph without any additional checks. + edges.append(QuotersGraph.edge(quotedTweet.tweetId, userId)) + } else { + // For tweet deletes, we only add an edge to be deleted from the + // QuotersGraph if the tweeting user isn't quoting the tweet anymore + // i.e. if a user has quoted a tweet multiple times, we only delete + // an edge from the QuotersGraph if they've deleted all the quotes, + // otherwise an edge should exist by definition of what the QuotersGraph + // represents. + + // Note: There can be a potential edge case here due to a race condition + // in the following scenario. + // i) A quotes a tweet T twice resulting in tweets T1 and T2. + // ii) There should exist edges in the QuotersGraph from T -> A and T1 <-> T, T2 <-> T in + // the QuoteTweetsIndexGraph, but one of the edges haven't been written + // to the QuoteTweetsIndex graph in TFlock yet. + // iii) In this scenario, we shouldn't really be deleting an edge as we're doing below. + // The approach that we're taking below is a "best effort" approach similar to what we + // currently do for RTs. + + // Find all the quotes of the quoted tweet from the quoting user + val quotesFromQuotingUser = QuoteTweetsIndexGraph + .from(quotedTweet.tweetId) + .intersect(UserTimelineGraph.from(userId)) + futureEdges.append( + tflock + .count(quotesFromQuotingUser).flatMap { count => + // If this is the last quote of the quoted tweet from the quoting user, + // we go ahead and delete the edge from the QuotersGraph. + if (count <= 1) { + tflock.selectAll(quotesFromQuotingUser).map { tweets => + if (tweets.size <= 1) { + Seq(QuotersGraph.edge(quotedTweet.tweetId, userId)) + } else { + Nil + } + } + } else { + FutureNil + } + } + ) + } + } + } + + private[this] def addCardEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + // Note that we are indexing only the TOO "stored" cards + // (cardUri=card://). Rest of the cards are ignored here. + tweet.cardReference + .collect { + case StoredCard(id) => + edges.append(CardTweetsGraph.edge(id, tweet.id)) + }.getOrElse(()) + } + + // Note: on undelete, this method restores all archived edges, including those that may have + // been archived prior to the delete. This is incorrect behavior but in practice rarely + // causes problems, as undeletes are so rare. + private[this] def addEdgesForDeleteOrUndelete( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + edges.appendAll( + Seq( + MentionsGraph.edges(tweet.id, None, Reverse), + RepliesToTweetsGraph.edges(tweet.id, None) + ) + ) + + // When we delete or undelete a conversation control root Tweet we want to archive or restore + // all the edges in InvitedUsersGraph from the Tweet id. + if (hasConversationControl(tweet) && isConversationRoot(tweet)) { + edges.append(InvitedUsersGraph.edges(tweet.id, None)) + } + } + + private[this] def addSimpleEdges( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + if (TweetLenses.nullcast.get(tweet)) { + edges.append(NullcastedTweetsGraph.edge(getUserId(tweet), tweet.id)) + } else if (TweetLenses.narrowcast.get(tweet).isDefined) { + edges.append(NarrowcastSentTweetsGraph.edge(getUserId(tweet), tweet.id)) + } else { + edges.append(UserTimelineGraph.edge(getUserId(tweet), tweet.id)) + + if (hasMedia(tweet)) + edges.append(MediaTimelineGraph.edge(getUserId(tweet), tweet.id)) + + // Index root creator subscription tweets. + // Ignore replies because those are not necessarily visible to a user who subscribes to tweet author + val isRootTweet: Boolean = tweet.coreData match { + case Some(c) => c.reply.isEmpty && c.share.isEmpty + case None => true + } + + if (tweet.exclusiveTweetControl.isDefined && isRootTweet) { + edges.append(CreatorSubscriptionTimelineGraph.edge(getUserId(tweet), tweet.id)) + + if (hasMedia(tweet)) + edges.append(CreatorSubscriptionMediaTimelineGraph.edge(getUserId(tweet), tweet.id)) + } + } + } + + /** + * Issues edges for each mention of user in a conversation-controlled tweet. This way InvitedUsers + * graph accumulates complete set of ids for @mention-invited users, by conversation id. + */ + private def invitedUsersEdgesForCreate( + tweet: Tweet, + edges: ListBuffer[ExecuteEdge[StatusGraph]] + ): Unit = { + val conversationId: Long = getConversationId(tweet).getOrElse(tweet.id) + val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) + edges.appendAll(mentions.map(userId => InvitedUsersGraph.edge(conversationId, userId))) + } + + /** + * Issues edges of InviteUsersGraph that ought to be deleted for a conversation controlled reply. + * These are mentions of users in the given tweet, only if the user was not mentioned elsewhere + * in the conversation. This way for a conversation, InvitedUsersGraph would always hold a set + * of all users invited to the conversation, and an edge is removed only after the last mention of + * a user is deleted. + */ + private def invitedUsersEdgesForDelete( + tweet: Tweet, + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + getConversationId(tweet).foreach { conversationId: Long => + val mentions: Seq[UserId] = getMentions(tweet).flatMap(_.userId) + mentions.foreach { userId => + val tweetIdsWithinConversation = ConversationGraph.from(conversationId) + val tweetIdsThatMentionUser = MentionsGraph.from(userId) + futureEdges.append( + tflock + .selectAll( + query = tweetIdsThatMentionUser.intersect(tweetIdsWithinConversation), + limit = Some(2), // Just need to know if it is >1 or <=1, so 2 are enough. + pageSize = None // Provide default, otherwise Mockito complains + ).map { tweetIds: Seq[Long] => + if (tweetIds.size <= 1) { + Seq(InvitedUsersGraph.edge(conversationId, userId)) + } else { + Nil + } + } + ) + } + } + } + + private def hasInviteViaMention(tweet: Tweet): Boolean = { + tweet.conversationControl match { + case Some(ConversationControl.ByInvitation(controls)) => + controls.inviteViaMention.getOrElse(false) + case Some(ConversationControl.Community(controls)) => + controls.inviteViaMention.getOrElse(false) + case Some(ConversationControl.Followers(followers)) => + followers.inviteViaMention.getOrElse(false) + case _ => + false + } + } + + private def hasConversationControl(tweet: Tweet): Boolean = + tweet.conversationControl.isDefined + + // If a Tweet has a ConversationControl, it must have a ConversationId associated with it so we + // can compare the ConversationId with the current Tweet ID to determine if it's the root of the + // conversation. See ConversationIdHydrator for more details + private def isConversationRoot(tweet: Tweet): Boolean = + getConversationId(tweet).get == tweet.id + + private def addInvitedUsersEdges( + tweet: Tweet, + isCreate: Boolean, + isUndelete: Boolean, + edges: ListBuffer[ExecuteEdge[StatusGraph]], + futureEdges: ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]] + ): Unit = { + if (hasConversationControl(tweet)) { + if (isCreate) { + if (isConversationRoot(tweet) && !isUndelete) { + // For root Tweets, only add edges for original creates, not for undeletes. + // Undeletes are handled by addEdgesForDeleteOrUndelete. + invitedUsersEdgesForCreate(tweet, edges) + } + if (!isConversationRoot(tweet) && hasInviteViaMention(tweet)) { + // For replies, only add edges when the conversation control is in inviteViaMention mode. + invitedUsersEdgesForCreate(tweet, edges) + } + } else { + if (!isConversationRoot(tweet)) { + invitedUsersEdgesForDelete(tweet, futureEdges) + } + } + } + } + + private[this] def getEdges( + tweet: Tweet, + isCreate: Boolean, + isDelete: Boolean, + isUndelete: Boolean + ): Future[Seq[ExecuteEdge[StatusGraph]]] = { + val edges = ListBuffer[ExecuteEdge[StatusGraph]]() + val futureEdges = ListBuffer[Future[Seq[ExecuteEdge[StatusGraph]]]]() + + addSimpleEdges(tweet, edges) + getShare(tweet) match { + case Some(share) => addRTEdges(tweet, share, isCreate, edges, futureEdges) + case _ => + addInvitedUsersEdges(tweet, isCreate, isUndelete, edges, futureEdges) + addReplyEdges(tweet, edges) + addDirectedAtEdges(tweet, edges) + addMentionEdges(tweet, edges) + addQTEdges(tweet, edges, futureEdges, isCreate) + addCardEdges(tweet, edges) + if (isDelete || isUndelete) { + addEdgesForDeleteOrUndelete(tweet, edges) + } + } + + Future + .collect(futureEdges) + .map { moreEdges => (edges ++= moreEdges.flatten).toList } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala new file mode 100644 index 000000000..9145a4362 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tflock/TweetIndexer.scala @@ -0,0 +1,30 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package tflock + +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future + +trait TweetIndexer { + + /** + * Called at tweet-creation time, this method should set up all relevant indices on the tweet. + */ + def createIndex(tweet: Tweet): Future[Unit] = Future.Unit + + /** + * Called at tweet-undelete time (which isn't yet handled), this method should + * restore all relevant indices on the tweet. + */ + def undeleteIndex(tweet: Tweet): Future[Unit] = Future.Unit + + /** + * Called at tweet-delete time, this method should archive all relevant indices on the tweet. + */ + def deleteIndex(tweet: Tweet, isBounceDelete: Boolean): Future[Unit] = Future.Unit + + /** + * This method should archive or unarchive the retweet edge in TFlock RetweetsGraph. + */ + def setRetweetVisibility(retweetId: TweetId, visible: Boolean): Future[Unit] = Future.Unit +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD new file mode 100644 index 000000000..c7ad2b832 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/BUILD @@ -0,0 +1,13 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala new file mode 100644 index 000000000..f450abd15 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/NotImplementedTweetService.scala @@ -0,0 +1,8 @@ +package com.twitter.tweetypie.thriftscala + +import com.twitter.finagle.service.FailedService + +class NotImplementedTweetService + extends TweetService$FinagleClient( + new FailedService(new UnsupportedOperationException("not implemented")) + ) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala new file mode 100644 index 000000000..df3ca4362 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/TweetServiceProxy.scala @@ -0,0 +1,79 @@ +package com.twitter.tweetypie.thriftscala + +import com.twitter.util.Future + +/** + * A trait for TweetService implementations that wrap an underlying + * TweetService and need to modify only some of the methods. + */ +trait TweetServiceProxy extends TweetService.MethodPerEndpoint { + protected def underlying: TweetService.MethodPerEndpoint + + /** + * Default implementation simply passes through the Future but logic can be added to wrap each + * invocation to the underlying TweetService + */ + protected def wrap[A](f: => Future[A]): Future[A] = + f + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + wrap(underlying.getTweets(request)) + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + wrap(underlying.getTweetFields(request)) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + wrap(underlying.getTweetCounts(request)) + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.setAdditionalFields(request)) + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.deleteAdditionalFields(request)) + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + wrap(underlying.postTweet(request)) + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + wrap(underlying.postRetweet(request)) + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + wrap(underlying.unretweet(request)) + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + wrap(underlying.getDeletedTweets(request)) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + wrap(underlying.deleteTweets(request)) + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + wrap(underlying.updatePossiblySensitiveTweet(request)) + + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + wrap(underlying.undeleteTweet(request)) + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + wrap(underlying.eraseUserTweets(request)) + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + wrap(underlying.incrTweetFavCount(request)) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + wrap(underlying.deleteLocationData(request)) + + override def scrubGeo(request: GeoScrub): Future[Unit] = + wrap(underlying.scrubGeo(request)) + + override def takedown(request: TakedownRequest): Future[Unit] = + wrap(underlying.takedown(request)) + + override def flush(request: FlushRequest): Future[Unit] = + wrap(underlying.flush(request)) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + wrap(underlying.incrTweetBookmarkCount(request)) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD new file mode 100644 index 000000000..ff66fe5b2 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/BUILD @@ -0,0 +1,15 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tco-util", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala new file mode 100644 index 000000000..09c0941ec --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/CashtagTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.CashtagEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object CashtagTextEntity extends TextEntity[CashtagEntity] { + override def fromIndex(entity: CashtagEntity): Short = entity.fromIndex + override def toIndex(entity: CashtagEntity): Short = entity.toIndex + override def move(entity: CashtagEntity, fromIndex: Short, toIndex: Short): CashtagEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala new file mode 100644 index 000000000..c9d7b30bc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/EntityExtractor.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.servo.data.Mutation +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.tweettext.PartialHtmlEncoding +import com.twitter.tweetypie.tweettext.TextEntity +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.util.TweetLenses +import com.twitter.twittertext.Extractor +import scala.collection.JavaConverters._ + +/** + * Contains functions to collect urls, mentions, hashtags, and cashtags from the text of tweets and messages + */ +object EntityExtractor { + // We only use one configuration of com.twitter.twittertext.Extractor, so it's + // OK to share one global reference. The only available + // configuration option is whether to extract URLs without protocols + // (defaults to true) + private[this] val extractor = new Extractor + + // The twitter-text library operates on unencoded text, but we store + // and process HTML-encoded text. The TextModification returned + // from this function contains the decoded text which we will operate on, + // but also provides us with the ability to map the indices on + // the twitter-text entities back to the entities on the encoded text. + private val htmlEncodedTextToEncodeModification: String => TextModification = + text => + PartialHtmlEncoding + .decodeWithModification(text) + .getOrElse(TextModification.identity(text)) + .inverse + + private[this] val extractAllUrlsFromTextMod: TextModification => Seq[UrlEntity] = + extractUrls(false) + + val extractAllUrls: String => Seq[UrlEntity] = + htmlEncodedTextToEncodeModification.andThen(extractAllUrlsFromTextMod) + + private[this] val extractTcoUrls: TextModification => Seq[UrlEntity] = + extractUrls(true) + + private[this] def extractUrls(tcoOnly: Boolean): TextModification => Seq[UrlEntity] = + mkEntityExtractor[UrlEntity]( + extractor.extractURLsWithIndices(_).asScala.filter { e => + if (tcoOnly) TcoUrl.isTcoUrl(e.getValue) else true + }, + UrlEntity(_, _, _) + ) + + private[this] val extractMentionsFromTextMod: TextModification => Seq[MentionEntity] = + mkEntityExtractor[MentionEntity]( + extractor.extractMentionedScreennamesWithIndices(_).asScala, + MentionEntity(_, _, _) + ) + + val extractMentions: String => Seq[MentionEntity] = + htmlEncodedTextToEncodeModification.andThen(extractMentionsFromTextMod) + + private[this] val extractHashtagsFromTextMod: TextModification => Seq[HashtagEntity] = + mkEntityExtractor[HashtagEntity]( + extractor.extractHashtagsWithIndices(_).asScala, + HashtagEntity(_, _, _) + ) + + val extractHashtags: String => Seq[HashtagEntity] = + htmlEncodedTextToEncodeModification.andThen(extractHashtagsFromTextMod) + + private[this] val extractCashtagsFromTextMod: TextModification => Seq[CashtagEntity] = + mkEntityExtractor[CashtagEntity]( + extractor.extractCashtagsWithIndices(_).asScala, + CashtagEntity(_, _, _) + ) + + val extractCashtags: String => Seq[CashtagEntity] = + htmlEncodedTextToEncodeModification.andThen(extractCashtagsFromTextMod) + + private[this] def mkEntityExtractor[E: TextEntity]( + extract: String => Seq[Extractor.Entity], + construct: (Short, Short, String) => E + ): TextModification => Seq[E] = + htmlEncodedMod => { + val convert: Extractor.Entity => Option[E] = + e => + for { + start <- asShort(e.getStart.intValue) + end <- asShort(e.getEnd.intValue) + if e.getValue != null + res <- htmlEncodedMod.reindexEntity(construct(start, end, e.getValue)) + } yield res + + val entities = extract(htmlEncodedMod.original) + extractor.modifyIndicesFromUTF16ToUnicode(htmlEncodedMod.original, entities.asJava) + entities.map(convert).flatten + } + + private[this] def asShort(i: Int): Option[Short] = + if (i.isValidShort) Some(i.toShort) else None + + private[this] def mutation(extractUrls: Boolean): Mutation[Tweet] = + Mutation { tweet => + val htmlEncodedMod = htmlEncodedTextToEncodeModification(TweetLenses.text.get(tweet)) + + Some( + tweet.copy( + urls = if (extractUrls) Some(extractTcoUrls(htmlEncodedMod)) else tweet.urls, + mentions = Some(extractMentionsFromTextMod(htmlEncodedMod)), + hashtags = Some(extractHashtagsFromTextMod(htmlEncodedMod)), + cashtags = Some(extractCashtagsFromTextMod(htmlEncodedMod)) + ) + ) + } + + val mutationWithoutUrls: Mutation[Tweet] = mutation(false) + val mutationAll: Mutation[Tweet] = mutation(true) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala new file mode 100644 index 000000000..4ba86ebc8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/HashtagTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.HashtagEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object HashtagTextEntity extends TextEntity[HashtagEntity] { + override def fromIndex(entity: HashtagEntity): Short = entity.fromIndex + override def toIndex(entity: HashtagEntity): Short = entity.toIndex + override def move(entity: HashtagEntity, fromIndex: Short, toIndex: Short): HashtagEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala new file mode 100644 index 000000000..a68595dee --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/Implicits.scala @@ -0,0 +1,10 @@ +package com.twitter.tweetypie.thriftscala.entities + +object Implicits { + implicit val hashtagTextEntity: HashtagTextEntity.type = HashtagTextEntity + implicit val cashtagTextEntity: CashtagTextEntity.type = CashtagTextEntity + implicit val mentionTextEntity: MentionTextEntity.type = MentionTextEntity + implicit val urlTextEntity: UrlTextEntity.type = UrlTextEntity + implicit val mediaTextEntity: MediaTextEntity.type = MediaTextEntity + implicit val textRangeTextEntity: TextRangeEntityAdapter.type = TextRangeEntityAdapter +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala new file mode 100644 index 000000000..45c145399 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MediaTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object MediaTextEntity extends TextEntity[MediaEntity] { + override def fromIndex(entity: MediaEntity): Short = entity.fromIndex + override def toIndex(entity: MediaEntity): Short = entity.toIndex + override def move(entity: MediaEntity, fromIndex: Short, toIndex: Short): MediaEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala new file mode 100644 index 000000000..f4ce11a43 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/MentionTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.MentionEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object MentionTextEntity extends TextEntity[MentionEntity] { + override def fromIndex(entity: MentionEntity): Short = entity.fromIndex + override def toIndex(entity: MentionEntity): Short = entity.toIndex + override def move(entity: MentionEntity, fromIndex: Short, toIndex: Short): MentionEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala new file mode 100644 index 000000000..a0dd5be79 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/TextRangeEntityAdapter.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.TextRange +import com.twitter.tweetypie.tweettext.TextEntity + +object TextRangeEntityAdapter extends TextEntity[TextRange] { + override def fromIndex(entity: TextRange): Short = entity.fromIndex.toShort + override def toIndex(entity: TextRange): Short = entity.toIndex.toShort + override def move(entity: TextRange, fromIndex: Short, toIndex: Short): TextRange = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala new file mode 100644 index 000000000..8ab52747a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities/UrlTextEntity.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.thriftscala.entities + +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.tweettext.TextEntity + +object UrlTextEntity extends TextEntity[UrlEntity] { + override def fromIndex(entity: UrlEntity): Short = entity.fromIndex + override def toIndex(entity: UrlEntity): Short = entity.toIndex + override def move(entity: UrlEntity, fromIndex: Short, toIndex: Short): UrlEntity = + entity.copy(fromIndex = fromIndex, toIndex = toIndex) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD new file mode 100644 index 000000000..0fb3b965a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/BUILD @@ -0,0 +1,16 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "tweetypie-tweettext", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/ibm/icu:icu4j", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala new file mode 100644 index 000000000..e24076f55 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/GraphemeIndexIterator.scala @@ -0,0 +1,44 @@ +package com.twitter.tweetypie.tweettext + +import com.ibm.icu.text.BreakIterator + +/** + * Adapt the [[BreakIterator]] interface to a scala [[Iterator]] + * over the offsets of user-perceived characters in a String. + */ +object GraphemeIndexIterator { + + /** + * Produce an iterator over indices in the string that mark the end + * of a user-perceived character (grapheme) + */ + def ends(s: String): Iterator[Offset.CodeUnit] = + // The start of every grapheme but the first is also a grapheme + // end. The last grapheme ends at the end of the string. + starts(s).drop(1) ++ Iterator(Offset.CodeUnit.length(s)) + + /** + * Produce an iterator over indices in the string that mark the start + * of a user-perceived character (grapheme) + */ + def starts(s: String): Iterator[Offset.CodeUnit] = + new Iterator[Offset.CodeUnit] { + private[this] val it = BreakIterator.getCharacterInstance() + + it.setText(s) + + override def hasNext: Boolean = it.current < s.length + + override def next: Offset.CodeUnit = { + if (!hasNext) throw new IllegalArgumentException(s"${it.current()}, ${s.length}") + + // No matter what, we will be returning the value of `current`, + // which is the index of the start of the next grapheme. + val result = it.current() + + it.next() + + Offset.CodeUnit(result) + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala new file mode 100644 index 000000000..6a4cb0f5a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/IndexConverter.scala @@ -0,0 +1,85 @@ +package com.twitter.tweetypie.tweettext + +/** + * An efficient converter of indices between code points and code units. + */ +class IndexConverter(text: String) { + // Keep track of a single corresponding pair of code unit and code point + // offsets so that we can re-use counting work if the next requested + // entity is near the most recent entity. + private var codePointIndex = 0 + // The code unit index should never split a surrogate pair. + private var charIndex = 0 + + /** + * @param offset Index into the string measured in code units. + * @return The code point index that corresponds to the specified character index. + */ + def toCodePoints(offset: Offset.CodeUnit): Offset.CodePoint = + Offset.CodePoint(codeUnitsToCodePoints(offset.toInt)) + + /** + * @param charIndex Index into the string measured in code units. + * @return The code point index that corresponds to the specified character index. + */ + def codeUnitsToCodePoints(charIndex: Int): Int = { + if (charIndex < this.charIndex) { + this.codePointIndex -= text.codePointCount(charIndex, this.charIndex) + } else { + this.codePointIndex += text.codePointCount(this.charIndex, charIndex) + } + this.charIndex = charIndex + + // Make sure that charIndex never points to the second code unit of a + // surrogate pair. + if (charIndex > 0 && Character.isSupplementaryCodePoint(text.codePointAt(charIndex - 1))) { + this.charIndex -= 1 + this.codePointIndex -= 1 + } + + this.codePointIndex + } + + /** + * @param offset Index into the string measured in code points. + * @return the corresponding code unit index + */ + def toCodeUnits(offset: Offset.CodePoint): Offset.CodeUnit = { + this.charIndex = text.offsetByCodePoints(charIndex, offset.toInt - this.codePointIndex) + this.codePointIndex = offset.toInt + Offset.CodeUnit(this.charIndex) + } + + /** + * @param codePointIndex Index into the string measured in code points. + * @return the corresponding code unit index + */ + def codePointsToCodeUnits(codePointIndex: Int): Int = + toCodeUnits(Offset.CodePoint(codePointIndex)).toInt + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * code point `to`. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substring(from: Offset.CodePoint, to: Offset.CodePoint): String = + text.substring(toCodeUnits(from).toInt, toCodeUnits(to).toInt) + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * code point `to`. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substringByCodePoints(from: Int, to: Int): String = + substring(Offset.CodePoint(from), Offset.CodePoint(to)) + + /** + * Returns a substring which begins at the specified code point `from` and extends to the + * end of the string. Since String.substring only works with character, the method first + * converts code point offset to code unit offset. + */ + def substringByCodePoints(from: Int): String = { + val charFrom = codePointsToCodeUnits(from) + text.substring(charFrom) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala new file mode 100644 index 000000000..119458643 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Offset.scala @@ -0,0 +1,253 @@ +package com.twitter.tweetypie.tweettext +import scala.collection.immutable + +/** + * An Offset is a typed index into a String. + */ +trait Offset[T] extends Ordering[T] { + def toInt(t: T): Int + def count(text: String, start: Offset.CodeUnit, end: Offset.CodeUnit): T + + def compare(t1: T, t2: T): Int = toInt(t1).compare(toInt(t2)) + def length(input: String): T = count(input, Offset.CodeUnit(0), Offset.CodeUnit.length(input)) +} + +object Offset { + + /** + * UTF-16 code unit offsets are the native offsets for Java/Scala + * Strings. + */ + case class CodeUnit(toInt: Int) extends AnyVal with Ordered[CodeUnit] { + def compare(other: CodeUnit): Int = toInt.compare(other.toInt) + def +(other: CodeUnit) = CodeUnit(toInt + other.toInt) + def -(other: CodeUnit) = CodeUnit(toInt - other.toInt) + def min(other: CodeUnit): CodeUnit = if (toInt < other.toInt) this else other + def max(other: CodeUnit): CodeUnit = if (toInt > other.toInt) this else other + def incr: CodeUnit = CodeUnit(toInt + 1) + def decr: CodeUnit = CodeUnit(toInt - 1) + def until(end: CodeUnit): immutable.IndexedSeq[CodeUnit] = + toInt.until(end.toInt).map(CodeUnit(_)) + + /** + * Converts this `CodeUnit` to the equivalent `CodePoint` within the + * given text. + */ + def toCodePoint(text: String): CodePoint = + CodePoint(text.codePointCount(0, toInt)) + + def offsetByCodePoints(text: String, codePoints: CodePoint): CodeUnit = + CodeUnit(text.offsetByCodePoints(toInt, codePoints.toInt)) + } + + implicit object CodeUnit extends Offset[CodeUnit] { + def toInt(u: CodeUnit): Int = u.toInt + override def length(text: String): CodeUnit = CodeUnit(text.length) + def count(text: String, start: CodeUnit, end: CodeUnit): CodeUnit = end - start + } + + /** + * Offsets in whole Unicode code points. Any CodePoint is a valid + * offset into the String as long as it is >= 0 and less than the + * number of code points in the string. + */ + case class CodePoint(toInt: Int) extends AnyVal with Ordered[CodePoint] { + def toShort: Short = toInt.toShort + def compare(other: CodePoint): Int = toInt.compare(other.toInt) + def +(other: CodePoint) = CodePoint(toInt + other.toInt) + def -(other: CodePoint) = CodePoint(toInt - other.toInt) + def min(other: CodePoint): CodePoint = if (toInt < other.toInt) this else other + def max(other: CodePoint): CodePoint = if (toInt > other.toInt) this else other + def until(end: CodePoint): immutable.IndexedSeq[CodePoint] = + toInt.until(end.toInt).map(CodePoint(_)) + + def toCodeUnit(text: String): CodeUnit = + CodeUnit(text.offsetByCodePoints(0, toInt)) + } + + implicit object CodePoint extends Offset[CodePoint] { + def toInt(p: CodePoint): Int = p.toInt + + def count(text: String, start: CodeUnit, end: CodeUnit): CodePoint = + CodePoint(text.codePointCount(start.toInt, end.toInt)) + } + + /** + * Offsets into the String as if the String were encoded as UTF-8. You + * cannot use a [[Utf8]] offset to index a String, because not all + * Utf8 indices are valid indices into the String. + */ + case class Utf8(toInt: Int) extends AnyVal with Ordered[Utf8] { + def compare(other: Utf8): Int = toInt.compare(other.toInt) + def +(other: Utf8) = Utf8(toInt + other.toInt) + def -(other: Utf8) = Utf8(toInt - other.toInt) + def min(other: Utf8): Utf8 = if (toInt < other.toInt) this else other + def max(other: Utf8): Utf8 = if (toInt > other.toInt) this else other + } + + implicit object Utf8 extends Offset[Utf8] { + def toInt(u: Utf8): Int = u.toInt + + /** + * Count how many bytes this section of text would be when encoded as + * UTF-8. + */ + def count(s: String, start: CodeUnit, end: CodeUnit): Utf8 = { + def go(i: CodeUnit, byteLength: Utf8): Utf8 = + if (i < end) { + val cp = s.codePointAt(i.toInt) + go(i + CodeUnit(Character.charCount(cp)), byteLength + forCodePoint(cp)) + } else { + byteLength + } + + go(start, Utf8(0)) + } + + /** + * Unfortunately, there is no convenient API for finding out how many + * bytes a unicode code point would take in UTF-8, so we have to + * explicitly calculate it. + * + * @see http://en.wikipedia.org/wiki/UTF-8#Description + */ + def forCodePoint(cp: Int): Utf8 = + Utf8 { + // if the code point is an unpaired surrogate, it will be converted + // into a 1 byte replacement character + if (Character.getType(cp) == Character.SURROGATE) 1 + else { + cp match { + case _ if cp < 0x80 => 1 + case _ if cp < 0x800 => 2 + case _ if cp < 0x10000 => 3 + case _ => 4 + } + } + } + } + + /** + * Display units count what we consider a "character" in a + * Tweet. [[DisplayUnit]] offsets are only valid for text that is + * NFC-normalized (See: http://www.unicode.org/reports/tr15) and + * HTML-encoded, though this interface cannot enforce that. + * + * Currently, a [[DisplayUnit]] is equivalent to a single Unicode code + * point combined with treating "<", ">", and "&" each as a + * single character (since they are displayed as '<', '>', and '&' + * respectively). This implementation is not directly exposed. + * + * It should be possible to change this definition without breaking + * code that uses the [[DisplayUnit]] interface e.g. to count + * user-perceived characters (graphemes) rather than code points, + * though any change has to be made in concert with changing the + * mobile client and Web implementations so that the user experience + * of character counting remains consistent. + */ + case class DisplayUnit(toInt: Int) extends AnyVal with Ordered[DisplayUnit] { + def compare(other: DisplayUnit): Int = toInt.compare(other.toInt) + def +(other: DisplayUnit) = DisplayUnit(toInt + other.toInt) + def -(other: DisplayUnit) = DisplayUnit(toInt - other.toInt) + def min(other: DisplayUnit): DisplayUnit = if (toInt < other.toInt) this else other + def max(other: DisplayUnit): DisplayUnit = if (toInt > other.toInt) this else other + } + + implicit object DisplayUnit extends Offset[DisplayUnit] { + def toInt(d: DisplayUnit): Int = d.toInt + + /** + * Returns the number of display units in the specified range of the + * given text. See [[DisplayUnit]] for a descrption of what we + * consider a display unit. + * + * The input string should already be NFC normalized to get + * consistent results. If partially html encoded, it will correctly + * count html entities as a single display unit. + * + * @param text the string containing the characters to count. + * @param the index to the first char of the text range + * @param the index after the last char of the text range. + */ + def count(text: String, start: CodeUnit, end: CodeUnit): DisplayUnit = { + val stop = end.min(CodeUnit.length(text)) + + @annotation.tailrec + def go(offset: CodeUnit, total: DisplayUnit): DisplayUnit = + if (offset >= stop) total + else go(offset + at(text, offset), total + DisplayUnit(1)) + + go(start, DisplayUnit(0)) + } + + /** + * Return the length of the display unit at the specified offset in + * the (NFC-normalized, HTML-encoded) text. + */ + def at(text: String, offset: CodeUnit): CodeUnit = + CodeUnit { + text.codePointAt(offset.toInt) match { + case '&' => + if (text.regionMatches(offset.toInt, "&", 0, 5)) 5 + else if (text.regionMatches(offset.toInt, "<", 0, 4)) 4 + else if (text.regionMatches(offset.toInt, ">", 0, 4)) 4 + else 1 + + case cp => Character.charCount(cp) + } + } + } + + /** + * Ranges of offsets, useful for avoiding slicing entities. + */ + sealed trait Ranges[T] { + def contains(t: T): Boolean + } + + object Ranges { + private[this] case class Impl[T](toSeq: Seq[(T, T)])(implicit off: Offset[T]) + extends Ranges[T] { + def contains(t: T): Boolean = toSeq.exists { case (lo, hi) => off.gt(t, lo) && off.lt(t, hi) } + } + + /** + * Non-inclusive range of offsets (matches values that are strictly + * between `hi` and `lo`) + */ + def between[T](lo: T, hi: T)(implicit off: Offset[T]): Ranges[T] = + if (off.toInt(hi) > off.toInt(lo) + 1 && off.toInt(lo) < Int.MaxValue) Impl(Seq((lo, hi))) + else Impl(Nil) + + /** + * The union of all of the specified ranges. + */ + def all[T](ranges: Seq[Ranges[T]])(implicit off: Offset[T]): Ranges[T] = + Impl( + // Preprocess the ranges so that each contains check is as cheap + // as possible. + ranges + .flatMap { case r: Impl[T] => r.toSeq } + .sortBy(_._1) + .foldLeft(Nil: List[(T, T)]) { + case ((a, b) :: out, (c, d)) if off.lt(c, b) => (a, d) :: out + case (out, r) => r :: out + } + ) + + def Empty[T: Offset]: Ranges[T] = Impl[T](Nil) + + private[this] val HtmlEscapes = """&(?:amp|lt|gt);""".r + + /** + * Match [[CodeUnit]]s that would split a HTML entity. + */ + def htmlEntities(s: String): Ranges[CodeUnit] = { + val it = HtmlEscapes.findAllIn(s) + all(it.map(_ => between(CodeUnit(it.start), CodeUnit(it.end))).toSeq) + } + + def fromCodePointPairs(pairs: Seq[(Int, Int)]): Ranges[CodePoint] = + all(pairs.map { case (lo, hi) => between(CodePoint(lo), CodePoint(hi)) }) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala new file mode 100644 index 000000000..7f1f338c3 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/PartialHtmlEncoding.scala @@ -0,0 +1,55 @@ +package com.twitter.tweetypie.tweettext + +/** + * Code used to convert raw user-provided text into an allowable form. + */ +object PartialHtmlEncoding { + + /** + * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. + * + * Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form. + */ + def encode(text: String): String = { + val buf = new StringBuilder + + text.foreach { + case '<' => buf.append("<") + case '>' => buf.append(">") + case '&' => buf.append("&") + case c => buf.append(c) + } + + buf.toString + } + + private val AmpLtRegex = "<".r + private val AmpGtRegex = ">".r + private val AmpAmpRegex = "&".r + + private val partialHtmlDecoder: (String => String) = + ((s: String) => AmpLtRegex.replaceAllIn(s, "<")) + .andThen(s => AmpGtRegex.replaceAllIn(s, ">")) + .andThen(s => AmpAmpRegex.replaceAllIn(s, "&")) + + /** + * The opposite of encode, it replaces all "<", ">", and "&" with + * `<`, `>`, and '&', respectively. + */ + def decode(text: String): String = + decodeWithModification(text) match { + case Some(mod) => mod.updated + case None => text + } + + /** + * Decodes encoded entities, and returns a `TextModification` if the text was modified. + */ + def decodeWithModification(text: String): Option[TextModification] = + TextModification.replaceAll( + text, + AmpLtRegex -> "<", + AmpGtRegex -> ">", + AmpAmpRegex -> "&" + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala new file mode 100644 index 000000000..0e5c06915 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Preprocessor.scala @@ -0,0 +1,251 @@ +package com.twitter.tweetypie.tweettext +import scala.util.matching.Regex + +/** + * Code used to convert raw user-provided text into an allowable form. + */ +object Preprocessor { + import TweetText._ + import TextModification.replaceAll + + /** + * Regex for dos-style line endings. + */ + val DosLineEndingRegex: Regex = """\r\n""".r + + /** + * Converts \r\n to just \n. + */ + def normalizeNewlines(text: String): String = + DosLineEndingRegex.replaceAllIn(text, "\n") + + /** + * Characters to strip out of tweet text at write-time. + */ + val unicodeCharsToStrip: Seq[Char] = + Seq( + '\uFFFE', '\uFEFF', // BOM + '\uFFFF', // Special + '\u200E', '\u200F', // ltr, rtl + '\u202A', '\u202B', '\u202C', '\u202D', '\u202E', // Directional change + '\u0000', '\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', + '\u0009', '\u000B', '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', + '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', + '\u001D', '\u001E', '\u001F', '\u007F', + '\u2065', + ) + + val UnicodeCharsToStripRegex: Regex = unicodeCharsToStrip.mkString("[", "", "]").r + + /** + * Strips out control characters and other non-textual unicode chars that can break xml and/or + * json rendering, or be used for exploits. + */ + def stripControlCharacters(text: String): String = + UnicodeCharsToStripRegex.replaceAllIn(text, "") + + val Tweetypie674UnicodeSequence: String = + "\u0633\u0645\u064e\u0640\u064e\u0651\u0648\u064f\u0648\u064f\u062d\u062e " + + "\u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e \u0337\u0334\u0310\u062e " + + "\u0627\u0645\u0627\u0631\u062a\u064a\u062e \u0337\u0334\u0310\u062e" + + val Tweetypie674UnicodeRegex: Regex = Tweetypie674UnicodeSequence.r + + /** + * Replace each `Tweetypie674UnicodeSequence` of this string to REPLACEMENT + * CHARACTER. + * + * Apple has a bug in its CoreText library. This aims to prevent + * ios clients from being crashed when a tweet contains the specific + * unicode sequence. + */ + def avoidCoreTextBug(text: String): String = + Tweetypie674UnicodeRegex.replaceAllIn(text, "\ufffd") + + /** + * Replace each `Tweetypie674UnicodeSequence` of this string to a REPLACEMENT + * CHARACTER, returns a TextModification object that provides information + * to also update entity indices. + */ + def replaceCoreTextBugModification(text: String): Option[TextModification] = + replaceAll(text, Tweetypie674UnicodeRegex, "\ufffd") + + private val preprocessor: String => String = + ((s: String) => nfcNormalize(s)) + .andThen(stripControlCharacters _) + .andThen(trimBlankCharacters _) + .andThen(normalizeNewlines _) + .andThen(collapseBlankLines _) + .andThen(avoidCoreTextBug _) + + /** + * Performs the text modifications that are necessary in the write-path before extracting URLs. + */ + def preprocessText(text: String): String = + preprocessor(text) + + /** + * Replaces all `<`, `>`, and '&' chars with "<", ">", and "&", respectively. + * + * The original purpose of this was presumably to prevent script injections when + * displaying tweets without proper escaping. Currently, tweets are encoded before + * they are stored in the database. + * + * Note that the pre-escaping of & < and > also happens in the rich text editor in javascript + */ + def partialHtmlEncode(text: String): String = + PartialHtmlEncoding.encode(text) + + /** + * The opposite of partialHtmlEncode, it replaces all "<", ">", and "&" with + * `<`, `>`, and '&', respectively. + */ + def partialHtmlDecode(text: String): String = + PartialHtmlEncoding.decode(text) + + /** + * + * Detects all forms of whitespace, considering as whitespace the following: + * This regex detects characters that always or often are rendered as blank space. We use + * this to prevent users from inserting excess blank lines and from tweeting effectively + * blank tweets. + * + * Note that these are not all semantically "whitespace", so this regex should not be used + * to process non-blank text, e.g. to separate words. + * + * Codepoints below and the `\p{Z}` regex character property alias are defined in the Unicode + * Character Database (UCD) at https://unicode.org/ucd/ and https://unicode.org/reports/tr44/ + * + * The `\p{Z}` regex character property alias is defined specifically in UCD as: + * + * Zs | Space_Separator | a space character (of various non-zero widths) + * Zl | Line_Separator | U+2028 LINE SEPARATOR only + * Zp | Paragraph_Separator | U+2029 PARAGRAPH SEPARATOR only + * Z | Separator | Zs | Zl | Zp + * ref: https://unicode.org/reports/tr44/#GC_Values_Table + * + * U+0009 Horizontal Tab (included in \s) + * U+000B Vertical Tab (included in \s) + * U+000C Form feed (included in \s) + * U+000D Carriage return (included in \s) + * U+0020 space (included in \s) + * U+0085 Next line (included in \u0085) + * U+061C arabic letter mark (included in \u061C) + * U+00A0 no-break space (included in \p{Z}) + * U+00AD soft-hyphen marker (included in \u00AD) + * U+1680 ogham space mark (included in \p{Z}) + * U+180E mongolian vowel separator (included in \p{Z} on jdk8 and included in \u180E on jdk11) + * U+2000 en quad (included in \p{Z}) + * U+2001 em quad (included in \p{Z}) + * U+2002 en space (included in \p{Z}) + * U+2003 em space (included in \p{Z}) + * U+2004 three-per-em space (included in \p{Z}) + * U+2005 four-per-em space (included in \p{Z}) + * U+2006 six-per-em space (included in \p{Z}) + * U+2007 figure space (included in \p{Z}) + * U+2008 punctuation space (included in \p{Z}) + * U+2009 thin space (included in \p{Z}) + * U+200A hair space (included in \p{Z}) + * U+200B zero-width (included in \u200B-\u200D) + * U+200C zero-width non-joiner (included in \u200B-\u200D) + * U+200D zero-width joiner (included in \u200B-\u200D) + * U+2028 line separator (included in \p{Z}) + * U+2029 paragraph separator (included in \p{Z}) + * U+202F narrow no-break space (included in \p{Z}) + * U+205F medium mathematical space (included in \p{Z}) + * U+2061 function application (included in \u2061-\u2064) + * U+2062 invisible times (included in \u2061-\u2064) + * U+2063 invisible separator (included in \u2061-\u2064) + * U+2064 invisible plus (included in \u2061-\u2064) + * U+2066 left-to-right isolate (included in \u2066-\u2069) + * U+2067 right-to-left isolate (included in \u2066-\u2069) + * U+2068 first strong isolate (included in \u2066-\u2069) + * U+2069 pop directional isolate (included in \u2066-\u2069) + * U+206A inhibit symmetric swapping (included in \u206A-\u206F) + * U+206B activate symmetric swapping (included in \u206A-\u206F) + * U+206C inhibit arabic form shaping (included in \u206A-\u206F) + * U+206D activate arabic form shaping (included in \u206A-\u206F) + * U+206E national digit shapes (included in \u206A-\u206F) + * U+206F nominal digit shapes (included in \u206A-\u206F) + * U+2800 braille pattern blank (included in \u2800) + * U+3164 hongul filler (see UCD Ignorable_Code_Point) + * U+FFA0 halfwidth hongul filler (see UCD Ignorable_Code_Point) + * U+3000 ideographic space (included in \p{Z}) + * U+FEFF zero-width no-break space (explicitly included in \uFEFF) + */ + val BlankTextRegex: Regex = + """[\s\p{Z}\u180E\u0085\u00AD\u061C\u200B-\u200D\u2061-\u2064\u2066-\u2069\u206A-\u206F\u2800\u3164\uFEFF\uFFA0]*""".r + + /** + * Some of the above blank characters are valid at the start of a Tweet (and irrelevant at the end) + * such as characters that change the direction of text. When trimming from the start + * or end of text we use a smaller set of characters + */ + val BlankWhenLeadingOrTrailingRegex: Regex = """[\s\p{Z}\u180E\u0085\u200B\uFEFF]*""".r + + /** + * Matches consecutive blanks, starting at a newline. + */ + val ConsecutiveBlankLinesRegex: Regex = ("""\n(""" + BlankTextRegex + """\n){2,}""").r + + val LeadingBlankCharactersRegex: Regex = ("^" + BlankWhenLeadingOrTrailingRegex).r + val TrailingBlankCharactersRegex: Regex = (BlankWhenLeadingOrTrailingRegex + "$").r + + /** + * Is the given text empty or contains nothing but whitespace? + */ + def isBlank(text: String): Boolean = + BlankTextRegex.pattern.matcher(text).matches() + + /** + * See http://confluence.local.twitter.com/display/PROD/Displaying+line+breaks+in+Tweets + * + * Collapses consecutive blanks lines down to a single blank line. We can assume that + * all newlines have already been normalized to just \n, so we don't have to worry about + * \r\n. + */ + def collapseBlankLinesModification(text: String): Option[TextModification] = + replaceAll(text, ConsecutiveBlankLinesRegex, "\n\n") + + def collapseBlankLines(text: String): String = + ConsecutiveBlankLinesRegex.replaceAllIn(text, "\n\n") + + def trimBlankCharacters(text: String): String = + TrailingBlankCharactersRegex.replaceFirstIn( + LeadingBlankCharactersRegex.replaceFirstIn(text, ""), + "" + ) + + /** Characters that are not visible on their own. Some of these are used in combination with + * other visible characters, and therefore cannot be always stripped from tweets. + */ + private[tweettext] val InvisibleCharacters: Seq[Char] = + Seq( + '\u2060', '\u2061', '\u2062', '\u2063', '\u2064', '\u206A', '\u206B', '\u206C', '\u206D', + '\u206D', '\u206E', '\u206F', '\u200C', + '\u200D', // non-printing chars with valid use in Arabic + '\u2009', '\u200A', '\u200B', // include very skinny spaces too + '\ufe00', '\ufe01', '\ufe02', '\ufe03', '\ufe04', '\ufe05', '\ufe06', '\ufe07', '\ufe08', + '\ufe09', '\ufe0A', '\ufe0B', '\ufe0C', '\ufe0D', '\ufe0E', '\ufe0F', + ) + + private[tweetypie] val InvisibleUnicodePattern: Regex = + ("^[" + InvisibleCharacters.mkString + "]+$").r + + def isInvisibleChar(input: Char): Boolean = { + InvisibleCharacters contains input + } + + /** If string is only "invisible characters", replace full string with whitespace. + * The purpose of this method is to remove invisible characters when ONLY invisible characters + * appear between two urls, which can be a security vulnerability due to misleading behavior. These + * characters cannot be removed as a rule applied to the tweet, because they are used in + * conjuction with other characters. + */ + def replaceInvisiblesWithWhitespace(text: String): String = { + text match { + case invisible @ InvisibleUnicodePattern() => " " * TweetText.codePointLength(invisible) + case other => other + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala new file mode 100644 index 000000000..e24eb7061 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextEntity.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie.tweettext + +/** + * A type class for entities found within a piece of tweet text. + */ +trait TextEntity[T] { + def fromIndex(entity: T): Short + def toIndex(entity: T): Short + def move(entity: T, fromIndex: Short, toIndex: Short): T +} + +object TextEntity { + def fromIndex[T: TextEntity](entity: T): Short = + implicitly[TextEntity[T]].fromIndex(entity) + + def toIndex[T: TextEntity](entity: T): Short = + implicitly[TextEntity[T]].toIndex(entity) + + def move[T: TextEntity](entity: T, fromIndex: Short, toIndex: Short): T = + implicitly[TextEntity[T]].move(entity, fromIndex, toIndex) + + def shift[T: TextEntity](entity: T, offset: Short): T = + move(entity, (fromIndex(entity) + offset).toShort, (toIndex(entity) + offset).toShort) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala new file mode 100644 index 000000000..053a4e115 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TextModification.scala @@ -0,0 +1,232 @@ +package com.twitter.tweetypie.tweettext + +import scala.util.matching.Regex + +object TextModification { + + /** + * Lift a text into a TextModification where `original` and `updated` text are the same + * and `replacements` is empty. + */ + def identity(text: String): TextModification = + TextModification(original = text, updated = text, replacements = Nil) + + /** + * Replace each substring that matches the regex with the substitution string, returns a + * TextModification object that contains the updated text and enough information to also + * update entity indices. + * + * This method should correctly be taking into account surrogate-pairs. The returned + * TextModification object has code-point offsets, instead of code-unit offsets. + */ + def replaceAll(text: String, regex: Regex, substitution: String): Option[TextModification] = + replaceAll(text, regex -> substitution) + + /** + * Replaces substrings that match the given `Regex` with the corresonding substitution + * string. Returns a `TextModification` that can be used to reindex entities. + */ + def replaceAll( + text: String, + regexAndSubstitutions: (Regex, String)* + ): Option[TextModification] = { + val matches = + (for { + (r, s) <- regexAndSubstitutions + m <- r.findAllIn(text).matchData + } yield (m, s)).sortBy { case (m, _) => m.start } + + if (matches.isEmpty) { + // no match found, return None to indicate no modifications made + None + } else { + val replacements = List.newBuilder[TextReplacement] + val indexConverter = new IndexConverter(text) + // contains the retained text, built up as we walk through the regex matches + val buf = new StringBuilder(text.length) + // the number of code-points copied into buf + var codePointsCopied = Offset.CodePoint(0) + // always holds the start code-unit offset to copy to buf when we encounter + // either a regex match or end-of-string. + var anchor = 0 + + import indexConverter.toCodePoints + + for ((m, sub) <- matches) { + val unchangedText = text.substring(anchor, m.start) + val unchangedLen = Offset.CodePoint.length(unchangedText) + val subLen = Offset.CodePoint.length(sub) + + // copies the text upto the regex match run, plus the replacement string + buf.append(unchangedText).append(sub) + codePointsCopied += unchangedLen + subLen + + // the offsets indicate the indices of the matched string in the original + // text, and the indices of the replacement string in the updated string + replacements += + TextReplacement( + originalFrom = toCodePoints(Offset.CodeUnit(m.start)), + originalTo = toCodePoints(Offset.CodeUnit(m.end)), + updatedFrom = codePointsCopied - subLen, + updatedTo = codePointsCopied + ) + + anchor = m.end + } + + buf.append(text.substring(anchor)) + + Some(TextModification(text, buf.toString, replacements.result())) + } + } + + /** + * Inserts a string at a specified code point offset. + * Returns a `TextModification` that can be used to reindex entities. + */ + def insertAt( + originalText: String, + insertAt: Offset.CodePoint, + textToInsert: String + ): TextModification = { + val insertAtCodeUnit = insertAt.toCodeUnit(originalText).toInt + val (before, after) = originalText.splitAt(insertAtCodeUnit) + val updatedText = s"$before$textToInsert$after" + val textToInsertLength = TweetText.codePointLength(textToInsert) + + TextModification( + original = originalText, + updated = updatedText, + replacements = List( + TextReplacement.fromCodePoints( + originalFrom = insertAt.toInt, + originalTo = insertAt.toInt, + updatedFrom = insertAt.toInt, + updatedTo = insertAt.toInt + textToInsertLength + )) + ) + } +} + +/** + * Encodes information about insertions/deletions/replacements made to a string, providing + * the original string, the updated string, and a list of TextReplacement objects + * that encode the indices of the segments that were changed. Using this information, + * it is possible to map an offset into the original string to an offset into the updated + * string, assuming the text at the offset was not within one of the modified segments. + * + * All offsets are code-points, not UTF6 code-units. + */ +case class TextModification( + original: String, + updated: String, + replacements: List[TextReplacement]) { + private val originalLen = Offset.CodePoint.length(original) + + /** + * Using an offset into the original String, computes the equivalent offset into the updated + * string. If the offset falls within a segment that was removed/replaced, None is returned. + */ + def reindex(index: Offset.CodePoint): Option[Offset.CodePoint] = + reindex(index, Offset.CodePoint(0), replacements) + + /** + * Reindexes an entity of type T. Returns the updated entity, or None if either the `fromIndex` + * or `toIndex` value is now out of range. + */ + def reindexEntity[T: TextEntity](e: T): Option[T] = + for { + from <- reindex(Offset.CodePoint(TextEntity.fromIndex(e))) + to <- reindex(Offset.CodePoint(TextEntity.toIndex(e) - 1)) + } yield TextEntity.move(e, from.toShort, (to.toShort + 1).toShort) + + /** + * Reindexes a sequence of entities of type T. Some entities could be filtered + * out if they span a region of text that has been removed. + */ + def reindexEntities[T: TextEntity](es: Seq[T]): Seq[T] = + for (e <- es; e2 <- reindexEntity(e)) yield e2 + + /** + * Swaps `original` and `updated` text and inverts all `TextReplacement` instances. + */ + def inverse: TextModification = + TextModification(updated, original, replacements.map(_.inverse)) + + // recursively walks through the list of TextReplacement objects computing + // offsets to add/substract from 'shift', which accumulates all changes and + // then gets added to index at the end. + private def reindex( + index: Offset.CodePoint, + shift: Offset.CodePoint, + reps: List[TextReplacement] + ): Option[Offset.CodePoint] = + reps match { + case Nil => + if (index.toInt >= 0 && index <= originalLen) + Some(index + shift) + else + None + case (r @ TextReplacement(fr, to, _, _)) :: tail => + if (index < fr) Some(index + shift) + else if (index < to) None + else reindex(index, shift + r.lengthDelta, tail) + } +} + +object TextReplacement { + def fromCodePoints( + originalFrom: Int, + originalTo: Int, + updatedFrom: Int, + updatedTo: Int + ): TextReplacement = + TextReplacement( + Offset.CodePoint(originalFrom), + Offset.CodePoint(originalTo), + Offset.CodePoint(updatedFrom), + Offset.CodePoint(updatedTo) + ) +} + +/** + * Encodes the indices of a segment of text in one string that maps to a replacement + * segment in an updated version of the text. The replacement segment could be empty + * (updatedTo == updatedFrom), indicating the segment was removed. + * + * All offsets are code-points, not UTF16 code-units. + * + * `originalFrom` and `updatedFrom` are inclusive. + * `originalTo` and `updatedTo` are exclusive. + */ +case class TextReplacement( + originalFrom: Offset.CodePoint, + originalTo: Offset.CodePoint, + updatedFrom: Offset.CodePoint, + updatedTo: Offset.CodePoint) { + def originalLength: Offset.CodePoint = originalTo - originalFrom + def updatedLength: Offset.CodePoint = updatedTo - updatedFrom + def lengthDelta: Offset.CodePoint = updatedLength - originalLength + + def shiftOriginal(offset: Offset.CodePoint): TextReplacement = + copy(originalFrom = originalFrom + offset, originalTo = originalTo + offset) + + def shiftUpdated(offset: Offset.CodePoint): TextReplacement = + copy(updatedFrom = updatedFrom + offset, updatedTo = updatedTo + offset) + + def shift(offset: Offset.CodePoint): TextReplacement = + TextReplacement( + originalFrom + offset, + originalTo + offset, + updatedFrom + offset, + updatedTo + offset + ) + + def inverse: TextReplacement = + TextReplacement( + originalFrom = updatedFrom, + originalTo = updatedTo, + updatedFrom = originalFrom, + updatedTo = originalTo + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala new file mode 100644 index 000000000..c9f6e28cc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/Truncator.scala @@ -0,0 +1,159 @@ +package com.twitter.tweetypie.tweettext + +import com.twitter.tweetypie.tweettext.TweetText._ +import com.twitter.twittertext.Extractor +import java.lang.Character +import scala.annotation.tailrec +import scala.collection.JavaConverters._ + +object Truncator { + val Ellipsis = "\u2026" + + /** + * Truncate tweet text for a retweet. If the text is longer than + * either of the length limits, code points are cut off from the end + * of the text and replaced with an ellipsis. We keep as much of the + * leading text as possible, subject to these constraints: + * + * - There are no more than `MaxDisplayLength` characters. + * + * - When converted to UTF-8, the result does not exceed `MaxByteLength`. + * + * - We do not break within a single grapheme cluster. + * + * The input is assumed to be partial HTML-encoded and may or may + * not be NFC normalized. The result will be partial HTML-encoded + * and will be NFC normalized. + */ + def truncateForRetweet(input: String): String = truncateWithEllipsis(input, Ellipsis) + + /** + * Truncate to [[com.twitter.tweetypie.tweettext.TweetText#OrginalMaxDisplayLength]] display + * units, using "..." as an ellipsis. The resulting text is guaranteed to pass our tweet length + * check, but it is not guaranteed to fit in a SMS message. + */ + def truncateForSms(input: String): String = truncateWithEllipsis(input, "...") + + /** + * Check the length of the given text, and truncate it if it is longer + * than the allowed length for a Tweet. The result of this method will + * always have: + * + * - Display length <= OriginalMaxDisplayLength. + * - Length when encoded as UTF-8 <= OriginalMaxUtf8Length. + * + * If the input would violate this, then the text will be + * truncated. When the text is truncated, it will be truncated such + * that: + * + * - Grapheme clusters will not be split. + * - The last character before the ellipsis will not be a whitespace + * character. + * - The ellipsis text will be appended to the end. + */ + private[this] def truncateWithEllipsis(input: String, ellipsis: String): String = { + val text = nfcNormalize(input) + val truncateAt = + truncationPoint(text, OriginalMaxDisplayLength, OriginalMaxUtf8Length, Some(ellipsis)) + if (truncateAt.codeUnitOffset.toInt == text.length) text + else text.take(truncateAt.codeUnitOffset.toInt) + ellipsis + } + + /** + * Indicates a potential TruncationPoint in piece of text. + * + * @param charOffset the utf-16 character offset of the truncation point + * @param codePointOffset the offset in code points + */ + case class TruncationPoint(codeUnitOffset: Offset.CodeUnit, codePointOffset: Offset.CodePoint) + + /** + * Computes a TruncationPoint for the given text and length constraints. If `truncated` on + * the result is `false`, it means the text will fit within the given constraints without + * truncation. Otherwise, the result indicates both the character and code-point offsets + * at which to perform the truncation, and the resulting display length and byte length of + * the truncated string. + * + * Text should be NFC normalized first for best results. + * + * @param withEllipsis if true, then the truncation point will be computed so that there is space + * to append an ellipsis and to still remain within the limits. The ellipsis is not counted + * in the returned display and byte lengths. + * + * @param atomicUnits may contain a list of ranges that should be treated as atomic unit and + * not split. each tuple is half-open range in code points. + */ + def truncationPoint( + text: String, + maxDisplayLength: Int = OriginalMaxDisplayLength, + maxByteLength: Int = OriginalMaxUtf8Length, + withEllipsis: Option[String] = None, + atomicUnits: Offset.Ranges[Offset.CodePoint] = Offset.Ranges.Empty + ): TruncationPoint = { + val breakPoints = + GraphemeIndexIterator + .ends(text) + .filterNot(Offset.Ranges.htmlEntities(text).contains) + + val ellipsisDisplayUnits = + withEllipsis.map(Offset.DisplayUnit.length).getOrElse(Offset.DisplayUnit(0)) + val maxTruncatedDisplayLength = Offset.DisplayUnit(maxDisplayLength) - ellipsisDisplayUnits + + val ellipsisByteLength = withEllipsis.map(Offset.Utf8.length).getOrElse(Offset.Utf8(0)) + val maxTruncatedByteLength = Offset.Utf8(maxByteLength) - ellipsisByteLength + + var codeUnit = Offset.CodeUnit(0) + var codePoint = Offset.CodePoint(0) + var displayLength = Offset.DisplayUnit(0) + var byteLength = Offset.Utf8(0) + var truncateCodeUnit = codeUnit + var truncateCodePoint = codePoint + + @tailrec def go(): TruncationPoint = + if (displayLength.toInt > maxDisplayLength || byteLength.toInt > maxByteLength) { + TruncationPoint(truncateCodeUnit, truncateCodePoint) + } else if (codeUnit != truncateCodeUnit && + displayLength <= maxTruncatedDisplayLength && + byteLength <= maxTruncatedByteLength && + (codeUnit.toInt == 0 || !Character.isWhitespace(text.codePointBefore(codeUnit.toInt))) && + !atomicUnits.contains(codePoint)) { + // we can advance the truncation point + truncateCodeUnit = codeUnit + truncateCodePoint = codePoint + go() + } else if (breakPoints.hasNext) { + // there are further truncation points to consider + val nextCodeUnit = breakPoints.next + codePoint += Offset.CodePoint.count(text, codeUnit, nextCodeUnit) + displayLength += Offset.DisplayUnit.count(text, codeUnit, nextCodeUnit) + byteLength += Offset.Utf8.count(text, codeUnit, nextCodeUnit) + codeUnit = nextCodeUnit + go() + } else { + TruncationPoint(codeUnit, codePoint) + } + + go() + } + + /** + * Truncate the given text, avoiding chopping HTML entities and tweet + * entities. This should only be used for testing because it performs + * entity extraction, and so is very inefficient. + */ + def truncateForTests( + input: String, + maxDisplayLength: Int = OriginalMaxDisplayLength, + maxByteLength: Int = OriginalMaxUtf8Length + ): String = { + val text = nfcNormalize(input) + val extractor = new Extractor + val entities = extractor.extractEntitiesWithIndices(text) + extractor.modifyIndicesFromUTF16ToUnicode(text, entities) + val avoid = Offset.Ranges.fromCodePointPairs( + entities.asScala.map(e => (e.getStart().intValue, e.getEnd().intValue)) + ) + val truncateAt = truncationPoint(text, maxDisplayLength, maxByteLength, None, avoid) + text.take(truncateAt.codeUnitOffset.toInt) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala new file mode 100644 index 000000000..cb2ae3069 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/tweettext/TweetText.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.tweettext + +import java.text.Normalizer + +object TweetText { + + /** The original maximum tweet length, taking into account normalization */ + private[tweetypie] val OriginalMaxDisplayLength = 140 + + /** Maximum number of visible code points allowed in a tweet when tweet length is counted by code + * points, taking into account normalization. See also [[MaxVisibleWeightedEmojiLength]]. + */ + private[tweetypie] val MaxVisibleWeightedLength = 280 + + /** Maximum number of visible code points allowed in a tweet when tweet length is counted by + * emoji, taking into account normalization. See also [[MaxVisibleWeightedLength]]. + * 140 is the max number of Emojis, visible, fully-weighted per Twitter's cramming rules + * 10 is the max number of Code Points per Emoji + */ + private[tweetypie] val MaxVisibleWeightedEmojiLength = 140 * 10 + + /** Maximum number of bytes when truncating tweet text for a retweet. Originally was the + * max UTF-8 length when tweets were at most 140 characters. + * See also [[OriginalMaxDisplayLength]]. + */ + private[tweetypie] val OriginalMaxUtf8Length = 600 + + /** Maximum number of bytes for tweet text using utf-8 encoding. + */ + private[tweetypie] val MaxUtf8Length = 5708 + + /** Maximum number of mentions allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxMentions = 50 + + /** Maximum number of urls allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxUrls = 10 + + /** Maximum number of hashtags allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxHashtags = 50 + + /** Maximum number of cashtags allowed in tweet text. This is enforced at tweet creation time */ + private[tweetypie] val MaxCashtags = 50 + + /** Maximum length of a hashtag (not including the '#') */ + private[tweetypie] val MaxHashtagLength = 100 + + /** + * Normalizes the text according to the unicode NFC spec. + */ + def nfcNormalize(text: String): String = Normalizer.normalize(text, Normalizer.Form.NFC) + + /** + * Return the number of "characters" in this text. See + * [[Offset.DisplayUnit]]. + */ + def displayLength(text: String): Int = Offset.DisplayUnit.length(text).toInt + + /** + * Return the number of Unicode code points in this String. + */ + def codePointLength(text: String): Int = Offset.CodePoint.length(text).toInt +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD new file mode 100644 index 000000000..9a3c54773 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/BUILD @@ -0,0 +1,76 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "util", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweet-util", + "util/util-core:scala", + ], +) + +scala_library( + name = "EditControlUtil", + sources = [ + "EditControlUtil.scala", + "package.scala", + ], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter.tweetypie", + name = "util-EditControlUtil", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/commons-codec", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:deprecated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:transient_context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweet-util", + "util/util-core:scala", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala new file mode 100644 index 000000000..6a89f6a3a --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityAnnotation.scala @@ -0,0 +1,29 @@ +package com.twitter.tweetypie.util + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations +import com.twitter.tweetypie.thriftscala.Tweet + +object CommunityAnnotation { + + val groupId: Long = 8 + val domainId: Long = 31 + + def apply(communityId: Long): TweetEntityAnnotation = + TweetEntityAnnotation(groupId, domainId, entityId = communityId) + + def unapply(annotation: TweetEntityAnnotation): Option[Long] = + annotation match { + case TweetEntityAnnotation(`groupId`, `domainId`, entityId) => Some(entityId) + case _ => None + } + + // Returns None instead of Some(Seq()) when there are non-community annotations present + def additionalFieldsToCommunityIDs(additionalFields: Tweet): Option[Seq[Long]] = { + additionalFields.escherbirdEntityAnnotations + .map { + case EscherbirdEntityAnnotations(entityAnnotations) => + entityAnnotations.flatMap(CommunityAnnotation.unapply) + }.filter(_.nonEmpty) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala new file mode 100644 index 000000000..a455fe3d8 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/CommunityUtil.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala.Communities + +object CommunityUtil { + + def communityIds(maybeCommunities: Option[Communities]): Seq[Long] = { + maybeCommunities match { + case None => + Nil + case Some(Communities(seq)) => + seq + } + } + + def hasCommunity(maybeCommunities: Option[Communities]): Boolean = { + maybeCommunities.exists(_.communityIds.nonEmpty) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala new file mode 100644 index 000000000..cb0ea84fb --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/ConversationControls.scala @@ -0,0 +1,112 @@ +package com.twitter.tweetypie +package util + +import com.twitter.tweetypie.thriftscala._ + +object ConversationControls { + object Create { + def byInvitation( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.ByInvitation = TweetCreateConversationControl.ByInvitation( + TweetCreateConversationControlByInvitation(inviteViaMention = inviteViaMention) + ) + + def community( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.Community = TweetCreateConversationControl.Community( + TweetCreateConversationControlCommunity(inviteViaMention = inviteViaMention) + ) + + def followers( + inviteViaMention: Option[Boolean] = None + ): TweetCreateConversationControl.Followers = TweetCreateConversationControl.Followers( + TweetCreateConversationControlFollowers(inviteViaMention = inviteViaMention) + ) + } + + object Scenario { + case class CommonScenario( + createConversationControl: TweetCreateConversationControl, + descriptionSuffix: String, + expectedConversationControl: (UserId, Seq[UserId]) => ConversationControl, + inviteViaMention: Option[Boolean]) + + def mkCommunityScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.community(inviteViaMention = inviteViaMention), + "community", + expectedConversationControl = (authorId, userIds) => { + community(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + def mkByInvitationScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.byInvitation(inviteViaMention = inviteViaMention), + "invited users", + expectedConversationControl = (authorId, userIds) => { + byInvitation(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + def mkFollowersScenario(inviteViaMention: Option[Boolean]): CommonScenario = + CommonScenario( + Create.followers(inviteViaMention = inviteViaMention), + "followers", + expectedConversationControl = (authorId, userIds) => { + followers(userIds, authorId, inviteViaMention) + }, + inviteViaMention + ) + + val communityScenario = mkCommunityScenario(None) + val communityInviteViaMentionScenario = mkCommunityScenario(Some(true)) + + val byInvitationScenario = mkByInvitationScenario(None) + val byInvitationInviteViaMentionScenario = mkByInvitationScenario(Some(true)) + + val followersScenario = mkFollowersScenario(None) + val followersInviteViaMentionScenario = mkFollowersScenario(Some(true)) + } + + def byInvitation( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.ByInvitation( + ConversationControlByInvitation( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) + + def community( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.Community( + ConversationControlCommunity( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) + + def followers( + invitedUserIds: Seq[UserId], + conversationTweetAuthorId: UserId, + inviteViaMention: Option[Boolean] = None + ): ConversationControl = + ConversationControl.Followers( + ConversationControlFollowers( + conversationTweetAuthorId = conversationTweetAuthorId, + invitedUserIds = invitedUserIds, + inviteViaMention = inviteViaMention + ) + ) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala new file mode 100644 index 000000000..7135e9538 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/EditControlUtil.scala @@ -0,0 +1,174 @@ +package com.twitter.tweetypie.util + +import com.twitter.servo.util.Gate +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditUpdateEditControlException +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditControlEdit +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Try +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Duration + +object EditControlUtil { + + val maxTweetEditsAllowed = 5 + val oldEditTimeWindow = Duration.fromMinutes(30) + val editTimeWindow = Duration.fromMinutes(60) + + def editControlEdit( + initialTweetId: TweetId, + editControlInitial: Option[EditControlInitial] = None + ): EditControl.Edit = + EditControl.Edit( + EditControlEdit(initialTweetId = initialTweetId, editControlInitial = editControlInitial)) + + // EditControl for the tweet that is not an edit, that is, any regular tweet we create + // that can, potentially, be edited later. + def makeEditControlInitial( + tweetId: TweetId, + createdAt: Time, + setEditWindowToSixtyMinutes: Gate[Unit] = Gate(_ => false) + ): EditControl.Initial = { + val editWindow = if (setEditWindowToSixtyMinutes()) editTimeWindow else oldEditTimeWindow + val initial = EditControlInitial( + editTweetIds = Seq(tweetId), + editableUntilMsecs = Some(createdAt.plus(editWindow).inMilliseconds), + editsRemaining = Some(maxTweetEditsAllowed), + isEditEligible = defaultIsEditEligible, + ) + EditControl.Initial(initial) + } + + // Returns if a given latestTweetId is the latest edit in the EditControl + def isLatestEdit( + tweetEditControl: Option[EditControl], + latestTweetId: TweetId + ): Try[Boolean] = { + tweetEditControl match { + case Some(EditControl.Initial(initial)) => + isLatestEditFromEditControlInitial(Some(initial), latestTweetId) + case Some(EditControl.Edit(edit)) => + isLatestEditFromEditControlInitial( + edit.editControlInitial, + latestTweetId + ) + case _ => Throw(TweetEditInvalidEditControlException) + } + } + + // Returns if a given latestTweetId is the latest edit in the EditControlInitial + private def isLatestEditFromEditControlInitial( + initialTweetEditControl: Option[EditControlInitial], + latestTweetId: TweetId + ): Try[Boolean] = { + initialTweetEditControl match { + case Some(initial) => + Return(latestTweetId == initial.editTweetIds.last) + case _ => Throw(TweetEditInvalidEditControlException) + } + } + + /* Create an updated edit control for an initialTweet given the id of the new edit */ + def editControlForInitialTweet( + initialTweet: Tweet, + newEditId: TweetId + ): Try[EditControl.Initial] = { + initialTweet.editControl match { + case Some(EditControl.Initial(initial)) => + Return(EditControl.Initial(plusEdit(initial, newEditId))) + + case Some(EditControl.Edit(_)) => Throw(TweetEditUpdateEditControlException) + + case _ => + initialTweet.coreData match { + case Some(coreData) => + Return( + makeEditControlInitial( + tweetId = initialTweet.id, + createdAt = Time.fromMilliseconds(coreData.createdAtSecs * 1000), + setEditWindowToSixtyMinutes = Gate(_ => true) + ) + ) + case None => Throw(new Exception("Tweet Missing Required CoreData")) + } + } + } + + def updateEditControl(tweet: Tweet, newEditId: TweetId): Try[Tweet] = + editControlForInitialTweet(tweet, newEditId).map { editControl => + tweet.copy(editControl = Some(editControl)) + } + + def plusEdit(initial: EditControlInitial, newEditId: TweetId): EditControlInitial = { + val newEditTweetIds = (initial.editTweetIds :+ newEditId).distinct.sorted + val editsCount = newEditTweetIds.size - 1 // as there is the original tweet ID there too. + initial.copy( + editTweetIds = newEditTweetIds, + editsRemaining = Some(maxTweetEditsAllowed - editsCount), + ) + } + + // The ID of the initial Tweet if this is an edit + def getInitialTweetIdIfEdit(tweet: Tweet): Option[TweetId] = tweet.editControl match { + case Some(EditControl.Edit(edit)) => Some(edit.initialTweetId) + case _ => None + } + + // If this is the first tweet in an edit chain, return the same tweet id + // otherwise return the result of getInitialTweetId + def getInitialTweetId(tweet: Tweet): TweetId = + getInitialTweetIdIfEdit(tweet).getOrElse(tweet.id) + + def isInitialTweet(tweet: Tweet): Boolean = + getInitialTweetId(tweet) == tweet.id + + // Extracted just so that we can easily track where the values of isEditEligible is coming from. + private def defaultIsEditEligible: Option[Boolean] = Some(true) + + // returns true if it's an edit of a Tweet or an initial Tweet that's been edited + def isEditTweet(tweet: Tweet): Boolean = + tweet.editControl match { + case Some(eci: EditControl.Initial) if eci.initial.editTweetIds.size <= 1 => false + case Some(_: EditControl.Initial) | Some(_: EditControl.Edit) | Some( + EditControl.UnknownUnionField(_)) => + true + case None => false + } + + // returns true if editControl is from an edit of a Tweet + // returns false for any other state, including edit intial. + def isEditControlEdit(editControl: EditControl): Boolean = { + editControl match { + case _: EditControl.Edit | EditControl.UnknownUnionField(_) => true + case _ => false + } + } + + def getEditTweetIds(editControl: Option[EditControl]): Try[Seq[TweetId]] = { + editControl match { + case Some(EditControl.Edit(EditControlEdit(_, Some(eci)))) => + Return(eci.editTweetIds) + case Some(EditControl.Initial(initial)) => + Return(initial.editTweetIds) + case _ => + Throw(new Exception(s"EditControlInitial not found in $editControl")) + } + } +} + +object TweetEditFailure { + abstract class TweetEditException(msg: String) extends Exception(msg) + + case object TweetEditGetInitialEditControlException + extends TweetEditException("Initial EditControl not found") + + case object TweetEditInvalidEditControlException + extends TweetEditException("Invalid EditControl for initial_tweet") + + case object TweetEditUpdateEditControlException + extends TweetEditException("Invalid Edit Control Update") +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala new file mode 100644 index 000000000..ce0b49079 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/RetryPolicyBuilder.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie.util + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.service.RetryPolicy.RetryableWriteException +import com.twitter.servo.exception.thriftscala.ServerError +import com.twitter.util.Duration +import com.twitter.util.Throw +import com.twitter.util.TimeoutException +import com.twitter.util.Try + +object RetryPolicyBuilder { + + /** + * Retry on any exception. + */ + def anyFailure[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(_) => true + } + + /** + * Retry on com.twitter.util.TimeoutException + */ + def timeouts[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(_: TimeoutException) => true + } + + /** + * Retry on com.twitter.finagle.service.RetryableWriteExceptions + */ + def writes[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(RetryableWriteException(_)) => true + } + + /** + * Retry on com.twitter.servo.exception.thriftscala.ServerError + */ + def servoServerError[A](backoffs: Stream[Duration]): RetryPolicy[Try[A]] = + RetryPolicy.backoff[Try[A]](Backoff.fromStream(backoffs)) { + case Throw(ServerError(_)) => true + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala new file mode 100644 index 000000000..7113beed5 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StitchUtils.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie.util + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.stitch.Stitch + +object StitchUtils { + def trackLatency[T](latencyStat: Stat, s: => Stitch[T]): Stitch[T] = { + Stitch + .time(s) + .map { + case (res, duration) => + latencyStat.add(duration.inMillis) + res + } + .lowerFromTry + } + + def observe[T](statsReceiver: StatsReceiver, apiName: String): Stitch[T] => Stitch[T] = { + val stats = statsReceiver.scope(apiName) + + val requests = stats.counter("requests") + val success = stats.counter("success") + val latencyStat = stats.stat("latency_ms") + + val exceptionCounter = + new servo.util.ExceptionCounter(stats, "failures") + + stitch => + trackLatency(latencyStat, stitch) + .respond { + case Return(_) => + requests.incr() + success.incr() + + case Throw(e) => + exceptionCounter(e) + requests.incr() + } + } + + def translateExceptions[T]( + stitch: Stitch[T], + translateException: PartialFunction[Throwable, Throwable] + ): Stitch[T] = + stitch.rescue { + case t if translateException.isDefinedAt(t) => + Stitch.exception(translateException(t)) + case t => Stitch.exception(t) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala new file mode 100644 index 000000000..ccddcf540 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/StringLiteral.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.util + +/** + * Escape a String into Java or Scala String literal syntax (adds the + * surrounding quotes.) + * + * This is primarily for printing Strings for debugging or logging. + */ +object StringLiteral extends (String => String) { + private[this] val ControlLimit = ' ' + private[this] val PrintableLimit = '\u007e' + private[this] val Specials = + Map('\n' -> 'n', '\r' -> 'r', '\t' -> 't', '"' -> '"', '\'' -> '\'', '\\' -> '\\') + + def apply(str: String): String = { + val s = new StringBuilder(str.length) + s.append('"') + var i = 0 + while (i < str.length) { + val c = str(i) + Specials.get(c) match { + case None => + if (c >= ControlLimit && c <= PrintableLimit) s.append(c) + else s.append("\\u%04x".format(c.toInt)) + case Some(special) => s.append('\\').append(special) + } + i += 1 + } + s.append('"').result + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala new file mode 100644 index 000000000..643971969 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/Takedowns.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie.util + +import com.twitter.takedown.util.TakedownReasons +import com.twitter.takedown.util.TakedownReasons.CountryCode +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tseng.withholding.thriftscala.UnspecifiedReason +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Contains tweetypie-specific utils for working with TakedownReasons. + */ +object Takedowns { + + type CountryCode = String + + /** + * Take a list of [[TakedownReason]] and return values to be saved on the [[Tweet]] in fields + * tweetypieOnlyTakedownCountryCode and tweetypieOnlyTakedownReason. + * + * - tweetypieOnlyTakedownCountryCode contains the country_code of all UnspecifiedReasons + * - tweetypieOnlyTakedownReason contains all other reasons + */ + def partitionReasons(reasons: Seq[TakedownReason]): (Seq[String], Seq[TakedownReason]) = { + val (unspecifiedReasons, specifiedReasons) = reasons.partition { + case TakedownReason.UnspecifiedReason(UnspecifiedReason(_)) => true + case _ => false + } + val unspecifiedCountryCodes = unspecifiedReasons.collect(TakedownReasons.reasonToCountryCode) + (unspecifiedCountryCodes, specifiedReasons) + } + + def fromTweet(t: Tweet): Takedowns = + Takedowns( + Seq + .concat( + t.tweetypieOnlyTakedownCountryCodes + .getOrElse(Nil).map(TakedownReasons.countryCodeToReason), + t.tweetypieOnlyTakedownReasons.getOrElse(Nil) + ).toSet + ) +} + +/** + * This class is used to ensure the caller has access to both the full list of reasons as well + * as the backwards-compatible list of country codes. + */ +case class Takedowns(reasons: Set[TakedownReason]) { + def countryCodes: Set[CountryCode] = reasons.collect(TakedownReasons.reasonToCountryCode) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala new file mode 100644 index 000000000..9fa6d77a0 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TransientContextUtil.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala.TransientCreateContext +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeCreatorId +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey.PeriscopeIsLive + +object TransientContextUtil { + + def toAdditionalContext(context: TransientCreateContext): Map[TweetCreateContextKey, String] = + Seq + .concat( + context.periscopeIsLive.map(PeriscopeIsLive -> _.toString), // "true" or "false" + context.periscopeCreatorId.map(PeriscopeCreatorId -> _.toString) // userId + ) + .toMap +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala new file mode 100644 index 000000000..06295fa25 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetCreationLock.scala @@ -0,0 +1,203 @@ +package com.twitter.tweetypie.util + +import com.twitter.conversions.DurationOps._ +import com.twitter.logging.Logger +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.scrooge.BinaryThriftStructSerializer +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.servo.util.Transformer +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.util.Base64Long +import com.twitter.util.Time +import java.nio.ByteBuffer +import java.security.MessageDigest +import org.apache.commons.codec.binary.Base64 +import scala.collection.immutable.SortedMap + +object TweetCreationLock { + case class Key private (userId: UserId, typeCode: String, idOrMd5: String) + extends ScopedCacheKey("t", "locker", 2, Base64Long.toBase64(userId), typeCode, idOrMd5) { + def uniquenessId: Option[String] = + if (typeCode == Key.TypeCode.UniquenessId) Some(idOrMd5) else None + } + + object Key { + private[this] val log = Logger(getClass) + + object TypeCode { + val SourceTweetId = "r" + val UniquenessId = "u" + val PostTweetRequest = "p" + } + + private[this] val serializer = BinaryThriftStructSerializer(PostTweetRequest) + + // normalize the representation of no media ids. + private[util] def sanitizeMediaUploadIds(mediaUploadIds: Option[Seq[Long]]) = + mediaUploadIds.filter(_.nonEmpty) + + /** + * Request deduplication depends on the hash of a serialized Thrift value. + * + * In order to guarantee that a Map has a reproducible serialized form, + * it's necessary to fix the ordering of its keys. + */ + private[util] def sanitizeMediaMetadata( + mediaMetadata: Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] + ): Option[scala.collection.Map[MediaId, UserDefinedProductMetadata]] = + mediaMetadata.map(m => SortedMap(m.toSeq: _*)) + + /** + * Make sure to sanitize request fields with map/set since serialized + * bytes ordering is not guaranteed for same thrift values. + */ + private[util] def sanitizeRequest(request: PostTweetRequest): PostTweetRequest = + PostTweetRequest( + userId = request.userId, + text = request.text, + createdVia = "", + inReplyToTweetId = request.inReplyToTweetId, + geo = request.geo, + mediaUploadIds = sanitizeMediaUploadIds(request.mediaUploadIds), + narrowcast = request.narrowcast, + nullcast = request.nullcast, + additionalFields = request.additionalFields, + attachmentUrl = request.attachmentUrl, + mediaMetadata = sanitizeMediaMetadata(request.mediaMetadata), + conversationControl = request.conversationControl, + underlyingCreativesContainerId = request.underlyingCreativesContainerId, + editOptions = request.editOptions, + noteTweetOptions = request.noteTweetOptions + ) + + def bySourceTweetId(userId: UserId, sourceTweetId: TweetId): Key = + Key(userId, TypeCode.SourceTweetId, Base64Long.toBase64(sourceTweetId)) + + def byRequest(request: PostTweetRequest): Key = + request.uniquenessId match { + case Some(uqid) => + byUniquenessId(request.userId, uqid) + case None => + val sanitized = sanitizeRequest(request) + val sanitizedBytes = serializer.toBytes(sanitized) + val digested = MessageDigest.getInstance("SHA-256").digest(sanitizedBytes) + val base64Digest = Base64.encodeBase64String(digested) + val key = Key(request.userId, TypeCode.PostTweetRequest, base64Digest) + log.ifDebug(s"Generated key $key from request:\n${sanitized}") + key + } + + /** + * Key for tweets that have a uniqueness id set. There is only one + * namespace of uniqueness ids, across all clients. They are + * expected to be Snowflake ids, in order to avoid cache + * collisions. + */ + def byUniquenessId(userId: UserId, uniquenessId: Long): Key = + Key(userId, TypeCode.UniquenessId, Base64Long.toBase64(uniquenessId)) + } + + /** + * The state of tweet creation for a given Key (request). + */ + sealed trait State + + object State { + + /** + * There is no tweet creation currently in progress. (This can + * either be represented by no entry in the cache, or this special + * marker. This lets us use checkAndSet for deletion to avoid + * accidentally overwriting other process' values.) + */ + case object Unlocked extends State + + /** + * Some process is attempting to create the tweet. + */ + case class InProgress(token: Long, timestamp: Time) extends State + + /** + * The tweet has already been successfully created, and has the + * specified id. + */ + case class AlreadyCreated(tweetId: TweetId, timestamp: Time) extends State + + /** + * When stored in cache, each state is prefixed by a byte + * indicating the type of the entry. + */ + object TypeCode { + val Unlocked: Byte = 0.toByte + val InProgress: Byte = 1.toByte // + random long + timestamp + val AlreadyCreated: Byte = 2.toByte // + tweet id + timestamp + } + + private[this] val BufferSize = 17 // type byte + 64-bit value + 64-bit timestamp + + // Constant buffer to use for storing the serialized form on + // Unlocked. + private[this] val UnlockedBuf = Array[Byte](TypeCode.Unlocked) + + // Store the serialization function in a ThreadLocal so that we can + // reuse the buffer between invocations. + private[this] val threadLocalSerialize = new ThreadLocal[State => Array[Byte]] { + override def initialValue(): State => Array[Byte] = { + // Allocate the thread-local state + val ary = new Array[Byte](BufferSize) + val buf = ByteBuffer.wrap(ary) + + { + case Unlocked => UnlockedBuf + case InProgress(token, timestamp) => + buf.clear() + buf + .put(TypeCode.InProgress) + .putLong(token) + .putLong(timestamp.sinceEpoch.inNanoseconds) + ary + case AlreadyCreated(tweetId, timestamp) => + buf.clear() + buf + .put(TypeCode.AlreadyCreated) + .putLong(tweetId) + .putLong(timestamp.sinceEpoch.inNanoseconds) + ary + } + } + } + + /** + * Convert this State to the cache representation. + */ + private[this] def toBytes(state: State): Array[Byte] = + threadLocalSerialize.get()(state) + + /** + * Convert this byte array into a LockState. + * + * @throws RuntimeException if the buffer is not of the right size + * and format + */ + private[this] def fromBytes(bytes: Array[Byte]): State = { + val buf = ByteBuffer.wrap(bytes) + val result = buf.get() match { + case TypeCode.Unlocked => Unlocked + case TypeCode.InProgress => InProgress(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) + case TypeCode.AlreadyCreated => + AlreadyCreated(buf.getLong(), buf.getLong().nanoseconds.afterEpoch) + case other => throw new RuntimeException("Invalid type code: " + other) + } + if (buf.remaining != 0) { + throw new RuntimeException("Extra data in buffer: " + bytes) + } + result + } + + /** + * How to serialize the State for storage in cache. + */ + val Serializer: Transformer[State, Array[Byte]] = + Transformer[State, Array[Byte]](tTo = toBytes _, tFrom = fromBytes _) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala new file mode 100644 index 000000000..6334c5d43 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetLenses.scala @@ -0,0 +1,506 @@ +package com.twitter.tweetypie.util + +import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment +import com.twitter.expandodo.thriftscala._ +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.servo.data.Lens +import com.twitter.spam.rtf.thriftscala.SafetyLabel +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData + +object TweetLenses { + import Lens.checkEq + + def requireSome[A, B](l: Lens[A, Option[B]]): Lens[A, B] = + checkEq[A, B]( + a => l.get(a).get, + (a, b) => l.set(a, Some(b)) + ) + + def tweetLens[A](get: Tweet => A, set: (Tweet, A) => Tweet): Lens[Tweet, A] = + checkEq[Tweet, A](get, set) + + val id: Lens[Tweet, TweetId] = + tweetLens[TweetId](_.id, (t, id) => t.copy(id = id)) + + val coreData: Lens[Tweet, Option[TweetCoreData]] = + tweetLens[Option[TweetCoreData]](_.coreData, (t, coreData) => t.copy(coreData = coreData)) + + val requiredCoreData: Lens[Tweet, TweetCoreData] = + requireSome(coreData) + + val optUrls: Lens[Tweet, Option[Seq[UrlEntity]]] = + tweetLens[Option[Seq[UrlEntity]]](_.urls, (t, urls) => t.copy(urls = urls)) + + val urls: Lens[Tweet, Seq[UrlEntity]] = + tweetLens[Seq[UrlEntity]](_.urls.toSeq.flatten, (t, urls) => t.copy(urls = Some(urls))) + + val optMentions: Lens[Tweet, Option[Seq[MentionEntity]]] = + tweetLens[Option[Seq[MentionEntity]]](_.mentions, (t, v) => t.copy(mentions = v)) + + val mentions: Lens[Tweet, Seq[MentionEntity]] = + tweetLens[Seq[MentionEntity]](_.mentions.toSeq.flatten, (t, v) => t.copy(mentions = Some(v))) + + val unmentionData: Lens[Tweet, Option[UnmentionData]] = + tweetLens[Option[UnmentionData]](_.unmentionData, (t, v) => t.copy(unmentionData = v)) + + val optHashtags: Lens[Tweet, Option[Seq[HashtagEntity]]] = + tweetLens[Option[Seq[HashtagEntity]]](_.hashtags, (t, v) => t.copy(hashtags = v)) + + val hashtags: Lens[Tweet, Seq[HashtagEntity]] = + tweetLens[Seq[HashtagEntity]](_.hashtags.toSeq.flatten, (t, v) => t.copy(hashtags = Some(v))) + + val optCashtags: Lens[Tweet, Option[Seq[CashtagEntity]]] = + tweetLens[Option[Seq[CashtagEntity]]](_.cashtags, (t, v) => t.copy(cashtags = v)) + + val cashtags: Lens[Tweet, Seq[CashtagEntity]] = + tweetLens[Seq[CashtagEntity]](_.cashtags.toSeq.flatten, (t, v) => t.copy(cashtags = Some(v))) + + val optMedia: Lens[Tweet, Option[Seq[MediaEntity]]] = + tweetLens[Option[Seq[MediaEntity]]](_.media, (t, v) => t.copy(media = v)) + + val media: Lens[Tweet, Seq[MediaEntity]] = + tweetLens[Seq[MediaEntity]](_.media.toSeq.flatten, (t, v) => t.copy(media = Some(v))) + + val mediaKeys: Lens[Tweet, Seq[MediaKey]] = + tweetLens[Seq[MediaKey]]( + _.mediaKeys.toSeq.flatten, + { + case (t, v) => t.copy(mediaKeys = Some(v)) + }) + + val place: Lens[Tweet, Option[Place]] = + tweetLens[Option[Place]]( + _.place, + { + case (t, v) => t.copy(place = v) + }) + + val quotedTweet: Lens[Tweet, Option[QuotedTweet]] = + tweetLens[Option[QuotedTweet]]( + _.quotedTweet, + { + case (t, v) => t.copy(quotedTweet = v) + }) + + val selfThreadMetadata: Lens[Tweet, Option[SelfThreadMetadata]] = + tweetLens[Option[SelfThreadMetadata]]( + _.selfThreadMetadata, + { + case (t, v) => t.copy(selfThreadMetadata = v) + }) + + val composerSource: Lens[Tweet, Option[ComposerSource]] = + tweetLens[Option[ComposerSource]]( + _.composerSource, + { + case (t, v) => t.copy(composerSource = v) + }) + + val deviceSource: Lens[Tweet, Option[DeviceSource]] = + tweetLens[Option[DeviceSource]]( + _.deviceSource, + { + case (t, v) => t.copy(deviceSource = v) + }) + + val perspective: Lens[Tweet, Option[StatusPerspective]] = + tweetLens[Option[StatusPerspective]]( + _.perspective, + { + case (t, v) => t.copy(perspective = v) + }) + + val cards: Lens[Tweet, Option[Seq[Card]]] = + tweetLens[Option[Seq[Card]]]( + _.cards, + { + case (t, v) => t.copy(cards = v) + }) + + val card2: Lens[Tweet, Option[Card2]] = + tweetLens[Option[Card2]]( + _.card2, + { + case (t, v) => t.copy(card2 = v) + }) + + val cardReference: Lens[Tweet, Option[CardReference]] = + tweetLens[Option[CardReference]]( + _.cardReference, + { + case (t, v) => t.copy(cardReference = v) + }) + + val spamLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.spamLabel, + { + case (t, v) => t.copy(spamLabel = v) + }) + + val lowQualityLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.lowQualityLabel, + { + case (t, v) => t.copy(lowQualityLabel = v) + }) + + val nsfwHighPrecisionLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.nsfwHighPrecisionLabel, + { + case (t, v) => t.copy(nsfwHighPrecisionLabel = v) + }) + + val bounceLabel: Lens[Tweet, Option[SafetyLabel]] = + tweetLens[Option[SafetyLabel]]( + _.bounceLabel, + { + case (t, v) => t.copy(bounceLabel = v) + }) + + val takedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = + tweetLens[Option[Seq[String]]]( + _.takedownCountryCodes, + { + case (t, v) => t.copy(takedownCountryCodes = v) + }) + + val takedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = + tweetLens[Option[Seq[TakedownReason]]]( + _.takedownReasons, + { + case (t, v) => t.copy(takedownReasons = v) + }) + + val contributor: Lens[Tweet, Option[Contributor]] = + tweetLens[Option[Contributor]]( + _.contributor, + { + case (t, v) => t.copy(contributor = v) + }) + + val mediaTags: Lens[Tweet, Option[TweetMediaTags]] = + tweetLens[Option[TweetMediaTags]]( + _.mediaTags, + { + case (t, v) => t.copy(mediaTags = v) + }) + + val mediaTagMap: Lens[Tweet, Map[MediaId, Seq[MediaTag]]] = + tweetLens[Map[MediaId, Seq[MediaTag]]]( + _.mediaTags.map { case TweetMediaTags(tagMap) => tagMap.toMap }.getOrElse(Map.empty), + (t, v) => { + val cleanMap = v.filter { case (_, tags) => tags.nonEmpty } + t.copy(mediaTags = if (cleanMap.nonEmpty) Some(TweetMediaTags(cleanMap)) else None) + } + ) + + val escherbirdEntityAnnotations: Lens[Tweet, Option[EscherbirdEntityAnnotations]] = + tweetLens[Option[EscherbirdEntityAnnotations]]( + _.escherbirdEntityAnnotations, + { + case (t, v) => t.copy(escherbirdEntityAnnotations = v) + }) + + val communities: Lens[Tweet, Option[Communities]] = + tweetLens[Option[Communities]]( + _.communities, + { + case (t, v) => t.copy(communities = v) + }) + + val tweetypieOnlyTakedownCountryCodes: Lens[Tweet, Option[Seq[String]]] = + tweetLens[Option[Seq[String]]]( + _.tweetypieOnlyTakedownCountryCodes, + { + case (t, v) => t.copy(tweetypieOnlyTakedownCountryCodes = v) + }) + + val tweetypieOnlyTakedownReasons: Lens[Tweet, Option[Seq[TakedownReason]]] = + tweetLens[Option[Seq[TakedownReason]]]( + _.tweetypieOnlyTakedownReasons, + { + case (t, v) => t.copy(tweetypieOnlyTakedownReasons = v) + }) + + val profileGeo: Lens[Tweet, Option[ProfileGeoEnrichment]] = + tweetLens[Option[ProfileGeoEnrichment]]( + _.profileGeoEnrichment, + (t, v) => t.copy(profileGeoEnrichment = v) + ) + + val visibleTextRange: Lens[Tweet, Option[TextRange]] = + tweetLens[Option[TextRange]]( + _.visibleTextRange, + { + case (t, v) => t.copy(visibleTextRange = v) + }) + + val selfPermalink: Lens[Tweet, Option[ShortenedUrl]] = + tweetLens[Option[ShortenedUrl]]( + _.selfPermalink, + { + case (t, v) => t.copy(selfPermalink = v) + }) + + val extendedTweetMetadata: Lens[Tweet, Option[ExtendedTweetMetadata]] = + tweetLens[Option[ExtendedTweetMetadata]]( + _.extendedTweetMetadata, + { + case (t, v) => t.copy(extendedTweetMetadata = v) + }) + + object TweetCoreData { + val userId: Lens[TweetCoreData, UserId] = checkEq[TweetCoreData, UserId]( + _.userId, + { (c, v) => + // Pleases the compiler: https://github.com/scala/bug/issues/9171 + val userId = v + c.copy(userId = userId) + }) + val text: Lens[TweetCoreData, String] = checkEq[TweetCoreData, String]( + _.text, + { (c, v) => + // Pleases the compiler: https://github.com/scala/bug/issues/9171 + val text = v + c.copy(text = text) + }) + val createdAt: Lens[TweetCoreData, TweetId] = + checkEq[TweetCoreData, Long](_.createdAtSecs, (c, v) => c.copy(createdAtSecs = v)) + val createdVia: Lens[TweetCoreData, String] = + checkEq[TweetCoreData, String]( + _.createdVia, + { + case (c, v) => c.copy(createdVia = v) + }) + val hasTakedown: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.hasTakedown, + { + case (c, v) => c.copy(hasTakedown = v) + }) + val nullcast: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nullcast, + { + case (c, v) => c.copy(nullcast = v) + }) + val nsfwUser: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nsfwUser, + { + case (c, v) => c.copy(nsfwUser = v) + }) + val nsfwAdmin: Lens[TweetCoreData, Boolean] = + checkEq[TweetCoreData, Boolean]( + _.nsfwAdmin, + { + case (c, v) => c.copy(nsfwAdmin = v) + }) + val reply: Lens[TweetCoreData, Option[Reply]] = + checkEq[TweetCoreData, Option[Reply]]( + _.reply, + { + case (c, v) => c.copy(reply = v) + }) + val share: Lens[TweetCoreData, Option[Share]] = + checkEq[TweetCoreData, Option[Share]]( + _.share, + { + case (c, v) => c.copy(share = v) + }) + val narrowcast: Lens[TweetCoreData, Option[Narrowcast]] = + checkEq[TweetCoreData, Option[Narrowcast]]( + _.narrowcast, + { + case (c, v) => c.copy(narrowcast = v) + }) + val directedAtUser: Lens[TweetCoreData, Option[DirectedAtUser]] = + checkEq[TweetCoreData, Option[DirectedAtUser]]( + _.directedAtUser, + { + case (c, v) => c.copy(directedAtUser = v) + }) + val conversationId: Lens[TweetCoreData, Option[ConversationId]] = + checkEq[TweetCoreData, Option[ConversationId]]( + _.conversationId, + { + case (c, v) => c.copy(conversationId = v) + }) + val placeId: Lens[TweetCoreData, Option[String]] = + checkEq[TweetCoreData, Option[String]]( + _.placeId, + { + case (c, v) => c.copy(placeId = v) + }) + val geoCoordinates: Lens[TweetCoreData, Option[GeoCoordinates]] = + checkEq[TweetCoreData, Option[GeoCoordinates]]( + _.coordinates, + (c, v) => c.copy(coordinates = v) + ) + val trackingId: Lens[TweetCoreData, Option[TweetId]] = + checkEq[TweetCoreData, Option[Long]]( + _.trackingId, + { + case (c, v) => c.copy(trackingId = v) + }) + val hasMedia: Lens[TweetCoreData, Option[Boolean]] = + checkEq[TweetCoreData, Option[Boolean]]( + _.hasMedia, + { + case (c, v) => c.copy(hasMedia = v) + }) + } + + val counts: Lens[Tweet, Option[StatusCounts]] = + tweetLens[Option[StatusCounts]]( + _.counts, + { + case (t, v) => t.copy(counts = v) + }) + + object StatusCounts { + val retweetCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.retweetCount, + (c, retweetCount) => c.copy(retweetCount = retweetCount) + ) + + val replyCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.replyCount, + (c, replyCount) => c.copy(replyCount = replyCount) + ) + + val favoriteCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.favoriteCount, + { + case (c, v) => c.copy(favoriteCount = v) + }) + + val quoteCount: Lens[StatusCounts, Option[TweetId]] = + checkEq[StatusCounts, Option[Long]]( + _.quoteCount, + { + case (c, v) => c.copy(quoteCount = v) + }) + } + + val userId: Lens[Tweet, UserId] = requiredCoreData andThen TweetCoreData.userId + val text: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.text + val createdVia: Lens[Tweet, String] = requiredCoreData andThen TweetCoreData.createdVia + val createdAt: Lens[Tweet, ConversationId] = requiredCoreData andThen TweetCoreData.createdAt + val reply: Lens[Tweet, Option[Reply]] = requiredCoreData andThen TweetCoreData.reply + val share: Lens[Tweet, Option[Share]] = requiredCoreData andThen TweetCoreData.share + val narrowcast: Lens[Tweet, Option[Narrowcast]] = + requiredCoreData andThen TweetCoreData.narrowcast + val directedAtUser: Lens[Tweet, Option[DirectedAtUser]] = + requiredCoreData andThen TweetCoreData.directedAtUser + val conversationId: Lens[Tweet, Option[ConversationId]] = + requiredCoreData andThen TweetCoreData.conversationId + val placeId: Lens[Tweet, Option[String]] = requiredCoreData andThen TweetCoreData.placeId + val geoCoordinates: Lens[Tweet, Option[GeoCoordinates]] = + requiredCoreData andThen TweetCoreData.geoCoordinates + val hasTakedown: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.hasTakedown + val nsfwAdmin: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwAdmin + val nsfwUser: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nsfwUser + val nullcast: Lens[Tweet, Boolean] = requiredCoreData andThen TweetCoreData.nullcast + val trackingId: Lens[Tweet, Option[ConversationId]] = + requiredCoreData andThen TweetCoreData.trackingId + val hasMedia: Lens[Tweet, Option[Boolean]] = requiredCoreData andThen TweetCoreData.hasMedia + + object CashtagEntity { + val indices: Lens[CashtagEntity, (Short, Short)] = + checkEq[CashtagEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val text: Lens[CashtagEntity, String] = + checkEq[CashtagEntity, String](_.text, (t, text) => t.copy(text = text)) + } + + object HashtagEntity { + val indices: Lens[HashtagEntity, (Short, Short)] = + checkEq[HashtagEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val text: Lens[HashtagEntity, String] = + checkEq[HashtagEntity, String](_.text, (t, text) => t.copy(text = text)) + } + + object MediaEntity { + val indices: Lens[MediaEntity, (Short, Short)] = + checkEq[MediaEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val mediaSizes: Lens[MediaEntity, collection.Set[MediaSize]] = + checkEq[MediaEntity, scala.collection.Set[MediaSize]]( + _.sizes, + (m, sizes) => m.copy(sizes = sizes) + ) + val url: Lens[MediaEntity, String] = + checkEq[MediaEntity, String]( + _.url, + { + case (t, v) => t.copy(url = v) + }) + val mediaInfo: Lens[MediaEntity, Option[MediaInfo]] = + checkEq[MediaEntity, Option[MediaInfo]]( + _.mediaInfo, + { + case (t, v) => t.copy(mediaInfo = v) + }) + } + + object MentionEntity { + val indices: Lens[MentionEntity, (Short, Short)] = + checkEq[MentionEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val screenName: Lens[MentionEntity, String] = + checkEq[MentionEntity, String]( + _.screenName, + (t, screenName) => t.copy(screenName = screenName) + ) + } + + object UrlEntity { + val indices: Lens[UrlEntity, (Short, Short)] = + checkEq[UrlEntity, (Short, Short)]( + t => (t.fromIndex, t.toIndex), + (t, v) => t.copy(fromIndex = v._1, toIndex = v._2) + ) + val url: Lens[UrlEntity, String] = + checkEq[UrlEntity, String](_.url, (t, url) => t.copy(url = url)) + } + + object Contributor { + val screenName: Lens[Contributor, Option[String]] = + checkEq[Contributor, Option[String]]( + _.screenName, + (c, screenName) => c.copy(screenName = screenName) + ) + } + + object Reply { + val inReplyToScreenName: Lens[Reply, Option[String]] = + checkEq[Reply, Option[String]]( + _.inReplyToScreenName, + (c, inReplyToScreenName) => c.copy(inReplyToScreenName = inReplyToScreenName) + ) + + val inReplyToStatusId: Lens[Reply, Option[TweetId]] = + checkEq[Reply, Option[TweetId]]( + _.inReplyToStatusId, + (c, inReplyToStatusId) => c.copy(inReplyToStatusId = inReplyToStatusId) + ) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala new file mode 100644 index 000000000..5a0bbcb2d --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetPermalinkUtil.scala @@ -0,0 +1,18 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.thriftscala._ + +object TweetPermalinkUtil { + def lastQuotedTweetPermalink(tweet: Tweet): Option[(UrlEntity, TweetPermalink)] = + lastQuotedTweetPermalink(TweetLenses.urls.get(tweet)) + + def lastQuotedTweetPermalink(urls: Seq[UrlEntity]): Option[(UrlEntity, TweetPermalink)] = + urls.flatMap(matchQuotedTweetPermalink).lastOption + + def matchQuotedTweetPermalink(entity: UrlEntity): Option[(UrlEntity, TweetPermalink)] = + for { + expanded <- entity.expanded + permalink <- TweetPermalink.parse(expanded) + } yield (entity, permalink) +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala new file mode 100644 index 000000000..a9b9c8748 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/TweetTransformer.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.util + +import com.twitter.tweetypie.thriftscala._ + +object TweetTransformer { + def toStatus(tweet: Tweet): Status = { + assert(tweet.coreData.nonEmpty, "tweet core data is missing") + val coreData = tweet.coreData.get + + val toGeo: Option[Geo] = + coreData.coordinates match { + case Some(coords) => + Some( + Geo( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + entityId = if (coords.display) 2 else 0, + name = coreData.placeId, + place = tweet.place, + placeId = coreData.placeId, + coordinates = Some(coords) + ) + ) + case _ => + coreData.placeId match { + case None => None + case Some(_) => + Some(Geo(name = coreData.placeId, place = tweet.place, placeId = coreData.placeId)) + } + } + + Status( + id = tweet.id, + userId = coreData.userId, + text = coreData.text, + createdVia = coreData.createdVia, + createdAt = coreData.createdAtSecs, + urls = tweet.urls.getOrElse(Seq.empty), + mentions = tweet.mentions.getOrElse(Seq.empty), + hashtags = tweet.hashtags.getOrElse(Seq.empty), + cashtags = tweet.cashtags.getOrElse(Seq.empty), + media = tweet.media.getOrElse(Seq.empty), + reply = tweet.coreData.flatMap(_.reply), + directedAtUser = tweet.coreData.flatMap(_.directedAtUser), + share = tweet.coreData.flatMap(_.share), + quotedTweet = tweet.quotedTweet, + geo = toGeo, + hasTakedown = coreData.hasTakedown, + nsfwUser = coreData.nsfwUser, + nsfwAdmin = coreData.nsfwAdmin, + counts = tweet.counts, + deviceSource = tweet.deviceSource, + narrowcast = coreData.narrowcast, + takedownCountryCodes = tweet.takedownCountryCodes, + perspective = tweet.perspective, + cards = tweet.cards, + card2 = tweet.card2, + nullcast = coreData.nullcast, + conversationId = coreData.conversationId, + language = tweet.language, + trackingId = coreData.trackingId, + spamLabels = tweet.spamLabels, + hasMedia = coreData.hasMedia, + contributor = tweet.contributor, + mediaTags = tweet.mediaTags + ) + } + + def toTweet(status: Status): Tweet = { + val coreData = + TweetCoreData( + userId = status.userId, + text = status.text, + createdVia = status.createdVia, + createdAtSecs = status.createdAt, + reply = status.reply, + directedAtUser = status.directedAtUser, + share = status.share, + hasTakedown = status.hasTakedown, + nsfwUser = status.nsfwUser, + nsfwAdmin = status.nsfwAdmin, + nullcast = status.nullcast, + narrowcast = status.narrowcast, + trackingId = status.trackingId, + conversationId = status.conversationId, + hasMedia = status.hasMedia, + coordinates = toCoords(status), + placeId = status.geo.flatMap(_.placeId) + ) + + Tweet( + id = status.id, + coreData = Some(coreData), + urls = Some(status.urls), + mentions = Some(status.mentions), + hashtags = Some(status.hashtags), + cashtags = Some(status.cashtags), + media = Some(status.media), + place = status.geo.flatMap(_.place), + quotedTweet = status.quotedTweet, + takedownCountryCodes = status.takedownCountryCodes, + counts = status.counts, + deviceSource = status.deviceSource, + perspective = status.perspective, + cards = status.cards, + card2 = status.card2, + language = status.language, + spamLabels = status.spamLabels, + contributor = status.contributor, + mediaTags = status.mediaTags + ) + } + + private def toCoords(status: Status): Option[GeoCoordinates] = + status.geo.map { geo => + if (geo.coordinates.nonEmpty) geo.coordinates.get + // Status from monorail have the coordinates as the top level fields in Geo, + // while the nested struct is empty. So we need to copy from the flat fields. + else + GeoCoordinates( + latitude = geo.latitude, + longitude = geo.longitude, + geoPrecision = geo.geoPrecision, + display = geo.entityId == 2 + ) + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala new file mode 100644 index 000000000..0dae0bfdc --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/AlertableExceptionLoggingFilter.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.util.logging + +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.classic.spi.ThrowableProxy +import ch.qos.logback.core.filter.Filter +import ch.qos.logback.core.spi.FilterReply +import com.twitter.tweetypie.serverutil.ExceptionCounter.isAlertable + +/** + * This class is currently being used by logback to log alertable exceptions to a seperate file. + * + * Filters do not change the log levels of individual loggers. Filters filter out specific messages + * for specific appenders. This allows us to have a log file with lots of information you will + * mostly not need and a log file with only important information. This type of filtering cannot be + * accomplished by changing the log levels of loggers, because the logger levels are global. We want + * to change the semantics for specific destinations (appenders). + */ +class AlertableExceptionLoggingFilter extends Filter[ILoggingEvent] { + private[this] val IgnorableLoggers: Set[String] = + Set( + "com.github.benmanes.caffeine.cache.BoundedLocalCache", + "abdecider", + "org.apache.kafka.common.network.SaslChannelBuilder", + "com.twitter.finagle.netty4.channel.ChannelStatsHandler$" + ) + + def include(proxy: ThrowableProxy, event: ILoggingEvent): Boolean = + isAlertable(proxy.getThrowable()) && !IgnorableLoggers(event.getLoggerName) + + override def decide(event: ILoggingEvent): FilterReply = + if (!isStarted) { + FilterReply.NEUTRAL + } else { + event.getThrowableProxy() match { + case proxy: ThrowableProxy if include(proxy, event) => + FilterReply.NEUTRAL + case _ => + FilterReply.DENY + } + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD new file mode 100644 index 000000000..68702d3cf --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/BUILD @@ -0,0 +1,17 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = [ + "bazel-compatible", + "logging_impl_check_whitelisted_target", + ], + dependencies = [ + "3rdparty/jvm/ch/qos/logback:logback-classic", + "3rdparty/jvm/com/google/guava", + "finagle/finagle-memcached/src/main/scala", + "src/thrift/com/twitter/servo:servo-exception-java", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala new file mode 100644 index 000000000..fe035bddf --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/logging/OnlyImportantLogsLoggingFilter.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.util.logging + +import ch.qos.logback.classic.Level +import ch.qos.logback.classic.spi.ILoggingEvent +import ch.qos.logback.core.filter.Filter +import ch.qos.logback.core.spi.FilterReply + +/** + * This class is currently being used by logback to log statements from tweetypie at one level and + * log statements from other packages at another. + * + * Filters do not change the log levels of individual loggers. Filters filter out specific messages + * for specific appenders. This allows us to have a log file with lots of information you will + * mostly not need and a log file with only important information. This type of filtering cannot be + * accomplished by changing the log levels of loggers, because the logger levels are global. We want + * to change the semantics for specific destinations (appenders). + */ +class OnlyImportantLogsLoggingFilter extends Filter[ILoggingEvent] { + private[this] def notImportant(loggerName: String): Boolean = + !loggerName.startsWith("com.twitter.tweetypie") + + override def decide(event: ILoggingEvent): FilterReply = + if (!isStarted || event.getLevel.isGreaterOrEqual(Level.WARN)) { + FilterReply.NEUTRAL + } else if (notImportant(event.getLoggerName())) { + FilterReply.DENY + } else { + FilterReply.NEUTRAL + } +} diff --git a/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala new file mode 100644 index 000000000..c99d3afa7 --- /dev/null +++ b/tweetypie/common/src/scala/com/twitter/tweetypie/util/package.scala @@ -0,0 +1,9 @@ +package com.twitter.tweetypie + +package object util { + type TweetId = Long + type UserId = Long + type MediaId = Long + type ConversationId = Long + type PlaceId = String +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD new file mode 100644 index 000000000..1ccf63deb --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/BUILD @@ -0,0 +1,353 @@ +create_thrift_libraries( + base_name = "media-entity", + sources = ["media_entity.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + "mediaservices/commons/src/main/thrift", + ], + export_roots = [ + "mediaservices/commons/src/main/thrift:thrift", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-media-entity-thrift-java", + provides_scala_name = "tweetypie-media-entity-thrift-scala", +) + +create_thrift_libraries( + base_name = "edit-control", + sources = ["edit_control.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-edit-control-thrift-java", + provides_scala_name = "tweetypie-edit-control-thrift-scala", +) + +create_thrift_libraries( + base_name = "api-fields", + sources = ["api_fields.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-api-fields-thrift-java", + provides_scala_name = "tweetypie-api-fields-thrift-scala", +) + +create_thrift_libraries( + base_name = "note-tweet", + sources = ["note_tweet.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "ruby", + "scala", + "strato", + ], + provides_java_name = "tweetypie-note-tweet-thrift-java", + provides_scala_name = "tweetypie-note-tweet-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet", + sources = [ + "creative-entity-enrichments/creative_entity_enrichments.thrift", + "geo/tweet_location_info.thrift", + "media/media_ref.thrift", + "tweet.thrift", + "unmentions/unmentions.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":api-fields", + ":edit-control", + ":media-entity", + ":note-tweet", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/content-health/toxicreplyfilter", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", + "src/thrift/com/twitter/escherbird:tweet-annotation", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/geoduck", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/timelines/self_thread:thrift", + "src/thrift/com/twitter/tseng/withholding:thrift", + "src/thrift/com/twitter/tweet_pivots:tweet-pivots", + "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", + "unified-cards/thrift/src/main/thrift:thrift-contract", + ], + export_roots = [ + ":api-fields", + ":edit-control", + ":media-entity", + ":note-tweet", + "mediaservices/commons/src/main/thrift:thrift", + "src/thrift/com/twitter/content-health/toxicreplyfilter", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo", + "src/thrift/com/twitter/escherbird:tweet-annotation", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/geoduck:geoduck", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/timelines/self_thread:thrift", + "src/thrift/com/twitter/tseng/withholding:thrift", + "src/thrift/com/twitter/tweet_pivots:tweet-pivots", + "strato/config/src/thrift/com/twitter/strato/columns/creative_entity_enrichments", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-tweet-thrift-java", + provides_python_name = "tweetypie-tweet-thrift-python", + provides_scala_name = "tweetypie-tweet-thrift-scala", +) + +create_thrift_libraries( + base_name = "service", + sources = [ + "deleted_tweet.thrift", + "tweet_service.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":audit", + ":transient_context", + ":tweet", + "carousel/service/thrift:service", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/bouncer:bounce-action-thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/servo:servo-exception", + "src/thrift/com/twitter/spam/features:safety-meta-data", + "src/thrift/com/twitter/spam/rtf:safety-label", + "src/thrift/com/twitter/spam/rtf:safety-level", + "src/thrift/com/twitter/spam/rtf:safety-result", + "src/thrift/com/twitter/tseng/withholding:thrift", + ], + export_roots = [ + ":transient_context", + ":tweet", + "carousel/service/thrift:service", + "incentives/jiminy/src/main/thrift/com/twitter/incentives/jiminy:thrift", + "src/thrift/com/twitter/bouncer:bounce-action-thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/spam/features:safety-meta-data", + "src/thrift/com/twitter/spam/rtf:safety-level", + "src/thrift/com/twitter/spam/rtf:safety-result", + ], + generate_languages = [ + "go", + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-service-thrift-java", + provides_python_name = "tweetypie-service-thrift-python", + provides_scala_name = "tweetypie-service-thrift-scala", +) + +create_thrift_libraries( + base_name = "events", + sources = [ + "retweet_archival_event.thrift", + "tweet_events.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":audit", + ":transient_context", + ":tweet", + "src/thrift/com/twitter/gizmoduck:user-thrift", + ], + export_roots = [ + ":audit", + ":transient_context", + ":tweet", + "src/thrift/com/twitter/gizmoduck:user-thrift", + ], + generate_languages = [ + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-events-thrift-java", + provides_scala_name = "tweetypie-events-thrift-scala", +) + +create_thrift_libraries( + base_name = "audit", + sources = ["tweet_audit.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-audit-thrift-java", + provides_scala_name = "tweetypie-audit-thrift-scala", +) + +create_thrift_libraries( + base_name = "deprecated", + sources = ["deprecated.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":service", + ":tweet", + "mediaservices/commons/src/main/thrift", + "src/thrift/com/twitter/expandodo:cards", + "src/thrift/com/twitter/gizmoduck:user-thrift", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", + ], + generate_languages = [ + "java", + "python", + "scala", + "strato", + ], + provides_java_name = "tweetypie-deprecated-thrift-java", + provides_scala_name = "tweetypie-deprecated-thrift-scala", +) + +create_thrift_libraries( + base_name = "delete_location_data", + sources = ["delete_location_data.thrift"], + tags = ["bazel-compatible"], + provides_java_name = "delete-location-data-java", + provides_scala_name = "delete-location-data-scala", +) + +create_thrift_libraries( + base_name = "transient_context", + sources = ["transient_context.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":tweet", + ], + generate_languages = [ + "go", + "java", + "lua", + "python", + "scala", + "strato", + ], + provides_java_name = "transient-context-java", + provides_scala_name = "transient-context-scala", +) + +create_thrift_libraries( + base_name = "tweet_comparison_service", + sources = ["tweet_comparison_service.thrift"], + tags = ["bazel-compatible"], + dependency_roots = [ + ":service", + "src/thrift/com/twitter/context:twitter-context", + ], + generate_languages = [ + "java", + "scala", + ], + provides_java_name = "tweet-comparison-service-thrift-java", + provides_scala_name = "tweet-comparison-service-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet_service_graphql", + sources = ["tweet_service_graphql.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + "src/thrift/com/twitter/ads/callback:engagement_request", + "strato/config/src/thrift/com/twitter/strato/graphql", + ], + generate_languages = [ + "scala", + "strato", + ], + provides_scala_name = "tweet-service-graphql-scala", +) + +create_thrift_libraries( + base_name = "stored-tweet-info", + sources = [ + "stored_tweet_info.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":tweet", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "tweetypie-stored-tweet-info-thrift-java", + provides_scala_name = "tweetypie-stored-tweet-info-thrift-scala", +) + +create_thrift_libraries( + base_name = "tweet-service-federated", + sources = [ + "tweet_service_federated.thrift", + ], + platform = "java8", + tags = ["bazel-compatible"], + dependency_roots = [ + ":stored-tweet-info", + ], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "tweetypie-service-federated-thrift-java", + provides_scala_name = "tweetypie-service-federated-thrift-scala", +) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift new file mode 100644 index 000000000..d48cbf171 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/api_fields.thrift @@ -0,0 +1,18 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.api_fields +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +// Structs used specifically for rendering through graphql. + +/** + * Perspective of a Tweet from the point of view of a User. + */ +struct TweetPerspective { + 1: bool favorited + 2: bool retweeted + 3: optional bool bookmarked +}(persisted='true', hasPersonalData = 'false', strato.graphql.typename='TweetPerspective') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift new file mode 100644 index 000000000..48a50ca03 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift @@ -0,0 +1,21 @@ +namespace java com.twitter.tweetypie.creative_entity_enrichments.thriftjava +#@ namespace scala com.twitter.tweetypie.creative_entity_enrichments.thriftscala +#@ namespace strato com.twitter.tweetypie.creative_entity_enrichments +namespace py gen.twitter.tweetypie.creative_entity_enrichments + +include "com/twitter/strato/columns/creative_entity_enrichments/enrichments.thrift" + +struct CreativeEntityEnrichmentRef { + 1: required i64 enrichmentId +}(persisted='true', hasPersonalData='false') + +/** + * This struct represents a collection of enrichments applied to a tweet. + * The enrichment for a tweet is just a metadata attached to a tweet + * Each enrichment has a unique id (EnrichmentId) to uniquely identify an enrichment. + * + * enrichment_type signifies the type of an enrichment (eg: Interactive Text). + */ +struct CreativeEntityEnrichments { + 1: required map enrichment_type_to_ref +}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift new file mode 100644 index 000000000..35f68dd10 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/delete_location_data.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +namespace py gen.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +/** + * Event that triggers deletion of the geo information on tweets created + * at timestamp_ms or earlier. + */ +struct DeleteLocationData { + /** + * The id of the user whose tweets should have their geo information + * removed. + */ + 1: required i64 user_id (personalDataType='UserId') + + /** + * The time at which this request was initiated. Tweets by this user + * whose snowflake ids contain timestamps less than or equal to this + * value will no longer be returned with geo information. + */ + 2: required i64 timestamp_ms + + /** + * The last time this user requested deletion of location data prior + * to this request. This value may be omitted, but should be included + * if available for implementation efficiency, since it eliminates the + * need to scan tweets older than this value for geo information. + */ + 3: optional i64 last_timestamp_ms +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift new file mode 100644 index 000000000..cedf451d5 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/deleted_tweet.thrift @@ -0,0 +1,86 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.deletedtweet +namespace rb TweetyPie +namespace go tweetypie + +// Structs used for response from getDeletedTweets + +struct DeletedTweetMediaEntity { + 1: required i64 id + 2: required i8 mediaType + 3: required i16 width + 4: required i16 height +} (persisted = 'true') + +struct DeletedTweetShare { + 1: required i64 sourceStatusId + 2: required i64 sourceUserId + 3: required i64 parentStatusId +} (persisted = 'true') + +/** + * A tweet that has been soft- or hard-deleted. + * + * Originally DeletedTweet used the same field ids as tbird.Status. + * This is no longer the case. + */ +struct DeletedTweet { + // Uses the same field ids as tbird.thrift so we can easily map and add fields later + 1: required i64 id + + /** + * User who created the tweet. Only available for soft-deleted tweets. + */ + 2: optional i64 userId + + /** + * Content of the tweet. Only available for soft-deleted tweets. + */ + 3: optional string text + + /** + * When the tweet was created. Only available for soft-deleted tweets. + */ + 5: optional i64 createdAtSecs + + /** + * Retweet information if the deleted tweet was a retweet. Only available + * for soft-deleted tweets. + */ + 7: optional DeletedTweetShare share + + /** + * Media metadata if the deleted tweet included media. Only available for + * soft-deleted tweets. + */ + 14: optional list media + + /** + * The time when this tweet was deleted by a user, in epoch milliseconds, either normally (aka + * "softDelete") or via a bouncer flow (aka "bounceDelete"). + * + * This data is not available for all deleted tweets. + */ + 18: optional i64 deletedAtMsec + + /** + * The time when this tweet was permanently deleted, in epoch milliseconds. + * + * This data is not available for all deleted tweets. + */ + 19: optional i64 hardDeletedAtMsec + + /** + * The ID of the NoteTweet associated with this Tweet if one exists. This is used by safety tools + * to fetch the NoteTweet content when viewing soft deleted Tweets. + */ + 20: optional i64 noteTweetId + + /** + * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text. Can + * be used to distinguish between Longer Tweets and RichText Tweets. + */ + 21: optional bool isExpandable +} (persisted = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift new file mode 100644 index 000000000..55cdde2d7 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/deprecated.thrift @@ -0,0 +1,99 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.deprecated +namespace rb TweetyPie + +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/gizmoduck/user.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_service.thrift" + +/** + * @deprecated Use Place + */ +struct Geo { + /** + * @deprecated Use coordinates.latitude + */ + 1: double latitude = 0.0 (personalDataType = 'GpsCoordinates') + + /** + * @deprecated Use coordinates.longitude + */ + 2: double longitude = 0.0 (personalDataType = 'GpsCoordinates') + + /** + * @deprecated Use coordinates.geo_precision + */ + 3: i32 geo_precision = 0 + + /** + * 0: don't show lat/long + * 2: show + * + * @deprecated + */ + 4: i64 entity_id = 0 + + /** + * @deprecated Use place_id + */ + 5: optional string name (personalDataType = 'PublishedCoarseLocationTweet') + + 6: optional tweet.Place place // provided if StatusRequestOptions.load_places is set + 7: optional string place_id // ex: ad2f50942562790b + 8: optional tweet.GeoCoordinates coordinates +}(persisted = 'true', hasPersonalData = 'true') + +/** + * @deprecated Use Tweet and APIs that accept or return Tweet. + */ +struct Status { + 1: i64 id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + 3: string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: string created_via (personalDataType = 'ClientType') + 5: i64 created_at // in seconds + 6: list urls = [] + 7: list mentions = [] + 8: list hashtags = [] + 29: list cashtags = [] + 9: list media = [] + 10: optional tweet.Reply reply + 31: optional tweet.DirectedAtUser directed_at_user + 11: optional tweet.Share share + 32: optional tweet.QuotedTweet quoted_tweet + 12: optional tweet.Contributor contributor + 13: optional Geo geo + // has_takedown indicates if there is a takedown specifically on this tweet. + // takedown_country_codes contains takedown countries for both the tweet and the user, + // so has_takedown might be false while takedown_country_codes is non-empty. + 14: bool has_takedown = 0 + 15: bool nsfw_user = 0 + 16: bool nsfw_admin = 0 + 17: optional tweet.StatusCounts counts + // 18: obsoleted + 19: optional tweet.DeviceSource device_source // not set on DB failure + 20: optional tweet.Narrowcast narrowcast + 21: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + 22: optional tweet.StatusPerspective perspective // not set if no user ID or on TLS failure + 23: optional list cards // only included if StatusRequestOptions.include_cards == true + // only included when StatusRequestOptions.include_cards == true + // and StatusRequestOptions.cards_platform_key is set to valid value + 30: optional cards.Card2 card2 + 24: bool nullcast = 0 + 25: optional i64 conversation_id (personalDataType = 'TweetId') + 26: optional tweet.Language language + 27: optional i64 tracking_id (personalDataType = 'ImpressionId') + 28: optional map spam_labels + 33: optional bool has_media + // obsolete 34: optional list topic_labels + // Additional fields for flexible schema + 101: optional tweet.TweetMediaTags media_tags + 103: optional tweet.CardBindingValues binding_values + 104: optional tweet.ReplyAddresses reply_addresses + 105: optional tweet.TwitterSuggestInfo twitter_suggest_info +}(persisted = 'true', hasPersonalData = 'true') + diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift new file mode 100644 index 000000000..d1eb83a33 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/edit_control.thrift @@ -0,0 +1,71 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.edit_control +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +/** + * EditControlInitial is present on all new Tweets. Initially, edit_tweet_ids will only contain the id of the new Tweet. + * Subsequent edits will append the edited Tweet ids to edit_tweet_ids. +**/ +struct EditControlInitial { + /** + * A list of all edits of this initial Tweet, including the initial Tweet id, + * and in ascending time order (the oldest revision first). + */ + 1: required list edit_tweet_ids = [] (personalDataType = 'TweetId', strato.json.numbers.type = 'string') + /** + * Epoch timestamp in milli-seconds (UTC) after which the tweet will no longer be editable. + */ + 2: optional i64 editable_until_msecs (strato.json.numbers.type = 'string') + /** + * Number of edits that are available for this Tweet. This starts at 5 and decrements with each edit. + */ + 3: optional i64 edits_remaining (strato.json.numbers.type = 'string') + + /** + * Specifies whether the Tweet has any intrinsic properties that mean it can't be edited + * (for example, we have a business rule that poll Tweets can't be edited). + * + * If a Tweet edit expires due to time frame or number of edits, this field still is set + * to true for Tweets that could have been edited. + */ + 4: optional bool is_edit_eligible +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlInitial') + +/** + * EditControlEdit is present for any Tweets that are an edit of another Tweet. The full list of edits can be retrieved + * from the edit_control_initial field, which will always be hydrated. +**/ +struct EditControlEdit { + /** + * The id of the initial Tweet in an edit chain + */ + 1: required i64 initial_tweet_id (personalDataType = 'TweetId', strato.json.numbers.type = 'string') + /** + * This field is only used during hydration to return the EditControl of the initial Tweet for + * a subsequently edited version. + */ + 2: optional EditControlInitial edit_control_initial +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControlEdit') + + +/** + * Tweet metadata about edits of a Tweet. A list of edits to a Tweet are represented as a chain of + * Tweets linked to each other using the EditControl field. + * + * EditControl can be either EditControlInitial which means that the Tweet is unedited or the first Tweet in + * an edit chain, or EditControlEdit which means it is a Tweet in the edit chain after the first + * Tweet. + */ +union EditControl { + 1: EditControlInitial initial + 2: EditControlEdit edit +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='EditControl') + + +service FederatedServiceBase { + EditControl getEditControl(1: required i64 tweetId) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift new file mode 100644 index 000000000..500e9ffcf --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/geo/tweet_location_info.thrift @@ -0,0 +1,72 @@ +namespace java com.twitter.tweetypie.geo.thriftjava +#@namespace scala com.twitter.tweetypie.geo.thriftscala +#@namespace strato com.twitter.tweetypie.geo +namespace py gen.twitter.tweetypie.geo +namespace rb TweetyPie + +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// // +// This file contains type definitions to support the Geo field added to Tweet flexible schema ONLY. // +// It is unlikely to be re-usable so treat it them as private outside the subpackage defined here. // +// // +// In respect to back storage, consider it has limited capacity, provisioned to address particular use cases. // +// There is no free resources outside its current usage plus a future projection (see Storage Capacity below). // +// For example: // +// 1- Adding extra fields to TweetLocationInfo will likely require extra storage. // +// 2- Increase on front-load QPS (read or write) may require extra sharding to not impact delay percentiles. // +// Failure to observe these may impact Tweetypie write-path and read-path. // +// // +////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Flags how a _Place_ is published into a tweet (a.k.a. geotagging). + */ +enum GeoTagPlaceSource { + /** + * Tweet is tagged to a place but it is impossible to determine its source. + * E.g.: created from non-TOO clients or legacy TOO clients + */ + UNKNOWN = 0 + /** + * Tweet is tagged to a Place by reverse geocoding its coordinates. + */ + COORDINATES = 1 + /** + * Tweet is tagged to a Place by the client application on user's behalf. + * N.B.: COORDINATES is not AUTO because the API request doesn't publish a Place + */ + AUTO = 2 + EXPLICIT = 3 + + // free to use, added for backwards compatibility on client code. + RESERVED_4 = 4 + RESERVED_5 = 5 + RESERVED_6 = 6 + RESERVED_7 = 7 +} + +/** + * Information about Tweet's Location(s). + * Designed to enable custom consumption experiences of the Tweet's location(s). + * E.g.: Tweet's perspectival view of a Location entity + * + * To guarantee user's rights of privacy: + * + * - Only include user's published location data or unpublished location data that + * is EXPLICITLY set as publicly available by the user. + * + * - Never include user's unpublished (aka shared) location data that + * is NOT EXPLICITLY set as publicly available by the user. + * + * E.g.: User is asked to share their GPS coordinates with Twitter from mobile client, + * under the guarantee it won't be made publicly available. + * + * Design notes: + * - Tweet's geotagged Place is represented by Tweet.place instead of being a field here. + */ +struct TweetLocationInfo { + /** + * Represents how the Tweet author published the "from" location in a Tweet (a.k.a geo-tagged). + */ + 1: optional GeoTagPlaceSource geotag_place_source +}(persisted='true', hasPersonalData='false') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift new file mode 100644 index 000000000..f2a739094 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/media/media_ref.thrift @@ -0,0 +1,20 @@ +namespace java com.twitter.tweetypie.media.thriftjava +#@namespace scala com.twitter.tweetypie.media.thriftscala +#@namespace strato com.twitter.tweetypie.media +namespace py gen.twitter.tweetypie.media +namespace rb TweetyPie + + +/** +* A MediaRef represents a reference to a piece of media in MediaInfoService, along with metadata +* about the source Tweet that the media came from in case of pasted media. +**/ +struct MediaRef { + 1: string generic_media_key (personalDataType = 'MediaId') + + // For Tweets with pasted media, the id of the Tweet where this media was copied from + 2: optional i64 source_tweet_id (personalDataType = 'TweetId') + + // The author of source_tweet_id + 3: optional i64 source_user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift new file mode 100644 index 000000000..c5b411710 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/media_entity.thrift @@ -0,0 +1,135 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.media_entity +namespace rb TweetyPie.media_entity +namespace go tweetypie.media_entity + +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/TweetMedia.thrift" + +/** + * DEPRECATED + * An RGB color. + * + * Each i8 should be interpreted as unsigned, ranging in value from 0 to + * 255. Borrowed from gizmoduck/user.thrift. + * + * The way in which we use ColorValue here is as metadata for a media file, + * so it needs to be annotated as having personal data. Fields that are of + * structured types cannot be annotated, so we have to put the annotation + * on the structure itself's fields even though it's more confusing to do so + * and could introduce issues if someone else reuses ColorValue outside of + * the context of a media file. + */ +struct ColorValue { + 1: i8 red (personalDataType = 'MediaFile') + 2: i8 green (personalDataType = 'MediaFile') + 3: i8 blue (personalDataType = 'MediaFile') +}(persisted = 'true', hasPersonalData = 'true') + +struct MediaEntity { + 1: i16 from_index (personalDataType = 'MediaFile') + 2: i16 to_index (personalDataType = 'MediaFile') + + /** + * The shortened t.co url found in the tweet text. + */ + 3: string url (personalDataType = 'ShortUrl') + + /** + * The text to display in place of the shortened url. + */ + 4: string display_url (personalDataType = 'LongUrl') + + /** + * The url to the media asset (a preview image in the case of a video). + */ + 5: string media_url (personalDataType = 'LongUrl') + + /** + * The https version of media_url. + */ + 6: string media_url_https (personalDataType = 'LongUrl') + + /** + * The expanded media permalink. + */ + 7: string expanded_url (personalDataType = 'LongUrl') + + 8: MediaCommon.MediaId media_id (strato.space = "Media", strato.name = "media", personalDataType = 'MediaId') + 9: bool nsfw + 10: set sizes + 11: string media_path + 12: optional bool is_protected + + /** + * The tweet that this MediaEntity was originally attached to. This value will be set if this + * MediaEntity is either on a retweet or a tweet with pasted-pic. + */ + 13: optional i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') + + + /** + * The user to attribute views of the media to. + * + * This field should be set when the media's attributableUserId field does not match the current + * Tweet's owner. Retweets of a Tweet with media and "managed media" are some reasons this may + * occur. When the value is None any views should be attributed to the tweet's owner. + **/ + 14: optional i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') + + /** + * Additional information specific to the media type. + * + * This field is optional with images (as the image information is in the + * previous fields), but required for animated GIF and native video (as, in + * this case, the previous fields only describe the preview image). + */ + 15: optional TweetMedia.MediaInfo media_info + + /** + * DEPRECATED + * The dominant color for the entire image (or keyframe for video or GIF). + * + * This can be used for placeholders while the media downloads (either a + * solid color or a gradient using the grid). + */ + 16: optional ColorValue dominant_color_overall + + /** + * DEPRECATED + * Dominant color of each quadrant of the image (keyframe for video or GIF). + * + * If present this list should have 4 elements, corresponding to + * [top_left, top_right, bottom_left, bottom_right] + */ + 17: optional list dominant_color_grid + + // obsolete 18: optional map extensions + + /** + * Stratostore extension points data encoded as a Strato record. + */ + 19: optional binary extensions_reply + + /** + * Holds metadata defined by the user for the tweet-asset relationship. + */ + 20: optional MediaInformation.UserDefinedProductMetadata metadata + + /** + * Media key used to interact with the media systems. + */ + 21: optional MediaCommon.MediaKey media_key + + /** + * Flexible structure for additional media metadata. This field is only + * included in a read-path request if specifically requested. It will + * always be included, when applicable, in write-path responses. + */ + 22: optional MediaInformation.AdditionalMetadata additional_metadata + +}(persisted='true', hasPersonalData = 'true') + diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift new file mode 100644 index 000000000..e8313a924 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/note_tweet.thrift @@ -0,0 +1,13 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.tweet_note +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +// Struct representing a NoteTweet associated with a Tweet +struct NoteTweet { + 1: required i64 id (strato.space = 'NoteTweet', strato.name = "note_tweet", personalDataType = 'TwitterArticleID') + 2: optional bool is_expandable (strato.name = "is_expandable") +} (persisted='true', hasPersonalData = 'true', strato.graphql.typename = 'NoteTweetData') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift new file mode 100644 index 000000000..0476dbded --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/retweet_archival_event.thrift @@ -0,0 +1,30 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.retweet_archival_event +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +/** + * This event is published to "retweet_archival_events" when Tweetypie processes an + * AsyncSetRetweetVisibilityRequest. + * + * This is useful for services (Interaction Counter, Insights Track) that need to + * know when the retweet engagement count of a tweet has been modified due to the + * retweeting user being put in to or out of suspension or read-only mode. + */ +struct RetweetArchivalEvent { + // The retweet id affected by this archival event. + 1: required i64 retweet_id (personalDataType = 'TweetId') + // The source tweet id for the retweet. This tweet had its retweet count modified. + 2: required i64 src_tweet_id (personalDataType = 'TweetId') + 3: required i64 retweet_user_id (personalDataType = 'UserId') + 4: required i64 src_tweet_user_id (personalDataType = 'UserId') + // Approximate time in milliseconds for when the count modification occurred, based on + // Unix Epoch (1 January 1970 00:00:00 UTC). Tweetypie will use the time when it is + // about to send the asynchronous write request to tflock for this timestamp. + 5: required i64 timestamp_ms + // Marks if this event is for archiving(True) or unarchiving(False) action. + // Archiving indicates an engagement count decrement occurred and unarchiving indicates an incremental. + 6: optional bool is_archiving_action +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD new file mode 100644 index 000000000..c619298c4 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/BUILD @@ -0,0 +1,11 @@ +create_thrift_libraries( + base_name = "storage_internal", + sources = ["*.thrift"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + generate_languages = [ + "java", + "scala", + ], +) diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift new file mode 100644 index 000000000..f614fa762 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/storage_internal/storage_internal.thrift @@ -0,0 +1,79 @@ +namespace java com.twitter.tweetypie.storage_internal.thriftjava +#@namespace scala com.twitter.tweetypie.storage_internal.thriftscala + +struct StoredReply { + 1: i64 in_reply_to_status_id (personalDataType = 'TweetId') + 2: i64 in_reply_to_user_id (personalDataType = 'UserId') + 3: optional i64 conversation_id (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredShare { + 1: i64 source_status_id (personalDataType = 'TweetId') + 2: i64 source_user_id (personalDataType = 'UserId') + 3: i64 parent_status_id (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredGeo { + 1: double latitude (personalDataType = 'GpsCoordinates') + 2: double longitude (personalDataType = 'GpsCoordinates') + 3: i32 geo_precision (personalDataType = 'GpsCoordinates') + 4: i64 entity_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + 5: optional string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') +} (hasPersonalData = 'true', persisted='true') + +struct StoredMediaEntity { + 1: i64 id (personalDataType = 'MediaId') + 2: i8 media_type (personalDataType = 'ContentTypeTweetMedia') + 3: i16 width + 4: i16 height +} (hasPersonalData = 'true', persisted='true') + +struct StoredNarrowcast { + 1: optional list language (personalDataType = 'InferredLanguage') + 2: optional list location (personalDataType = 'PublishedCoarseLocationTweet') + 3: optional list ids (personalDataType = 'TweetId') +} (hasPersonalData = 'true', persisted='true') + +struct StoredQuotedTweet { + 1: i64 tweet_id (personalDataType = 'TweetId') // the tweet id being quoted + 2: i64 user_id (personalDataType = 'UserId') // the user id being quoted + 3: string short_url (personalDataType = 'ShortUrl') // tco url - used when rendering in backwards-compat mode +} (hasPersonalData = 'true', persisted='true') + +struct StoredTweet { + 1: i64 id (personalDataType = 'TweetId') + 2: optional i64 user_id (personalDataType = 'UserId') + 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: optional string created_via (personalDataType = 'ClientType') + 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') // in seconds + + 6: optional StoredReply reply + 7: optional StoredShare share + 8: optional i64 contributor_id (personalDataType = 'Contributor') + 9: optional StoredGeo geo + 11: optional bool has_takedown + 12: optional bool nsfw_user (personalDataType = 'TweetSafetyLabels') + 13: optional bool nsfw_admin (personalDataType = 'TweetSafetyLabels') + 14: optional list media + 15: optional StoredNarrowcast narrowcast + 16: optional bool nullcast + 17: optional i64 tracking_id (personalDataType = 'ImpressionId') + 18: optional i64 updated_at (personalDataType = 'PrivateTimestamp, PublicTimestamp') + 19: optional StoredQuotedTweet quoted_tweet +} (hasPersonalData = 'true', persisted='true') + +struct CoreFields { + 2: optional i64 user_id (personalDataType = 'UserId') + 3: optional string text (personalDataType = 'PrivateTweets, PublicTweets') + 4: optional string created_via (personalDataType = 'ClientType') + 5: optional i64 created_at_sec (personalDataType = 'PrivateTimestamp, PublicTimestamp') + + 6: optional StoredReply reply + 7: optional StoredShare share + 8: optional i64 contributor_id (personalDataType = 'Contributor') + 19: optional StoredQuotedTweet quoted_tweet +} (hasPersonalData = 'true', persisted='true') + +struct InternalTweet { + 1: optional CoreFields core_fields +} (hasPersonalData = 'true', persisted='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift new file mode 100644 index 000000000..4c37451fc --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/stored_tweet_info.thrift @@ -0,0 +1,52 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie + +include "com/twitter/tweetypie/tweet.thrift" + +struct HardDeleted { + 1: i64 soft_deleted_timestamp_msec + 2: i64 timestamp_msec +} + +struct SoftDeleted { + 1: i64 timestamp_msec +} + +struct BounceDeleted { + 1: i64 timestamp_msec +} + +struct Undeleted { + 1: i64 timestamp_msec +} + +struct ForceAdded { + 1: i64 timestamp_msec +} + +struct NotFound {} + +union StoredTweetState { + 1: HardDeleted hard_deleted + 2: SoftDeleted soft_deleted + 3: BounceDeleted bounce_deleted + 4: Undeleted undeleted + 5: ForceAdded force_added + 6: NotFound not_found +} + +enum StoredTweetError { + CORRUPT = 1, + SCRUBBED_FIELDS_PRESENT = 2, + FIELDS_MISSING_OR_INVALID = 3, + SHOULD_BE_HARD_DELETED = 4, + FAILED_FETCH = 5 +} + +struct StoredTweetInfo { + 1: required i64 tweet_id + 2: optional tweet.Tweet tweet + 3: optional StoredTweetState stored_tweet_state + 4: required list errors = [] +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift new file mode 100644 index 000000000..942e42d35 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/transient_context.thrift @@ -0,0 +1,64 @@ +/** + * This file contains definitions for transient, passthrough structured data. + * + * If you need to add structured data that Tweetypie accepts in a request + * and passes the data through to one or more backends (eg. EventBus), this + * is the place to put it. Tweetypie may or may not inspect the data and + * alter the behavior based on it, but it won't change it. + */ + +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.transient_context +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/tweetypie/tweet.thrift" + +enum BatchComposeMode { + /** + * This is the first Tweet in a batch. + */ + BATCH_FIRST = 1 + + /** + * This is any of the subsequent Tweets in a batch. + */ + BATCH_SUBSEQUENT = 2 +} + +/** + * Data supplied at Tweet creation time that is not served by Tweetypie, but + * is passed through to consumers of the tweet_events eventbus stream as part + * of TweetCreateEvent. + * This is different from additional_context in that Tweetypie + * inspects this data as well, and we prefer structs over strings. + * If adding a new field that will be passed through to eventbus, prefer this + * over additional_context. + */ +struct TransientCreateContext { + /** + * Indicates whether a Tweet was created using a batch composer, and if so + * position of a Tweet within the batch. + * + * A value of 'None' indicates that the tweet was not created in a batch. + * + * More info: https://docs.google.com/document/d/1dJ9K0KzXPzhk0V-Nsekt0CAdOvyVI8sH9ESEiA2eDW4/edit + */ + 1: optional BatchComposeMode batch_compose + + /** + * Indicates if the tweet contains a live Periscope streaming video. + * + * This enables Periscope LiveFollow. + */ + 2: optional bool periscope_is_live + + /** + * Indicates the userId of the live Periscope streaming video. + * + * This enables Periscope LiveFollow. + */ + 3: optional i64 periscope_creator_id (personalDataType='UserId') +}(persisted='true', hasPersonalData='true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift new file mode 100644 index 000000000..bffca50c5 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet.thrift @@ -0,0 +1,1652 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.tweet +namespace rb TweetyPie +// Specific namespace to avoid golang circular import +namespace go tweetypie.tweet + +include "com/twitter/escherbird/tweet_annotation.thrift" +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/content-health/toxicreplyfilter/filtered_reply_details.thrift" +include "com/twitter/dataproducts/enrichments_profilegeo.thrift" +include "com/twitter/geoduck/public/thriftv1/geoduck_common.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/tweetypie/api_fields.thrift" +include "com/twitter/tweetypie/edit_control.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/note_tweet.thrift" +include "com/twitter/service/scarecrow/gen/tiered_actions.thrift" +include "com/twitter/spam/rtf/safety_label.thrift" +include "com/twitter/timelines/self_thread/self_thread.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweet_pivots/tweet_pivots.thrift" +include "com/twitter/tweetypie/geo/tweet_location_info.thrift" +include "com/twitter/tweetypie/media/media_ref.thrift" +include "unified_cards_contract.thrift" +include "com/twitter/tweetypie/creative-entity-enrichments/creative_entity_enrichments.thrift" +include "com/twitter/tweetypie/unmentions/unmentions.thrift" + +/** + * IDs are annotated with their corresponding space for Strato. + */ + +/** + * A Reply is data about a tweet in response to another tweet or a + * user. + * + * This struct will be present if: + * 1. This tweet is a reply to another tweet, or + * 2. This tweet is directed at a user (the tweet's text begins with + * an @mention). + */ +struct Reply { + /** + * The id of the tweet that this tweet is replying to. + * + * This field will be missing for directed-at tweets (tweets whose + * text begins with an @mention) that are not replying to another + * tweet. + */ + 1: optional i64 in_reply_to_status_id (strato.space = "Tweet", strato.name = "inReplyToStatus", personalDataType = 'TweetId', tweetEditAllowed='false') + + /** + * The user to whom this tweet is directed. + * + * If in_reply_to_status_id is set, this field is the author of that tweet. + * If in_reply_to_status_id is not set, this field is the user mentioned at + * the beginning of the tweet. + */ + 2: i64 in_reply_to_user_id (strato.space = "User", strato.name = "inReplyToUser", personalDataType = 'UserId') + + /** + * The current username of in_reply_to_user_id. + * + * This field is not set when Gizmoduck returns a failure to Tweetypie. + */ + 3: optional string in_reply_to_screen_name (personalDataType = 'Username') +}(persisted='true', hasPersonalData = 'true') + +/** + * Includes information about the user a tweet is directed at (when a tweet + * begins with @mention). + * + * Tweets with a DirectedAtUser are delivered to users who follow both the + * author and the DirectedAtUser. Normally the DirectedAtUser will be the same + * as Reply.in_reply_to_user_id, but will be different if the tweet's author + * rearranges the @mentions in a reply. + */ +struct DirectedAtUser { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: string screen_name (personalDataType = 'Username') +}(persisted='true', hasPersonalData = 'true') + +/** + * A Share is data about the source tweet of a retweet. + * + * Share was the internal name for the retweet feature. + */ +struct Share { + /** + * The id of the original tweet that was retweeted. + * + * This is always a tweet and never a retweet (unlike parent_status_id). + */ + 1: i64 source_status_id (strato.space = "Tweet", strato.name = "sourceStatus", personalDataType = 'TweetId') + + /* + * The user id of the original tweet's author. + */ + 2: i64 source_user_id (strato.space = "User", strato.name = "sourceUser", personalDataType = 'UserId') + + /** + * The id of the tweet that the user retweeted. + * + * Often this is the same as source_status_id, but it is different when a + * user retweets via another retweet. For example, user A posts tweet id 1, + * user B retweets it, creating tweet 2. If user user C sees B's retweet and + * retweets it, the result is another retweet of tweet id 1, with the parent + * status id of tweet 2. + */ + 3: i64 parent_status_id (strato.space = "Tweet", strato.name = "parentStatus", personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +/** + * A record mapping a shortened URL (usually t.co) to a long url, and a prettified + * display text. This is similar to data found in UrlEntity, and may replace that + * data in the future. + */ +struct ShortenedUrl { + /** + * Shortened t.co URL. + */ + 1: string short_url (personalDataType = 'ShortUrl') + + /** + * Original, full-length URL. + */ + 2: string long_url (personalDataType = 'LongUrl') + + /** + * Truncated version of expanded URL that does not include protocol and is + * limited to 27 characters. + */ + 3: string display_text (personalDataType = 'LongUrl') +}(persisted='true', hasPersonalData = 'true') + +/** + * A QuotedTweet is data about a tweet referenced within another tweet. + * + * QuotedTweet is included if Tweet.QuotedTweetField is requested, and the + * linked-to tweet is public and visible at the time that the linking tweet + * is hydrated, which can be during write-time or later after a cache-miss + * read. Since linked-to tweets can be deleted, and users can become + * suspended, deactivated, or protected, the presence of this value is not a + * guarantee that the quoted tweet is still public and visible. + * + * Because a tweet quoting another tweet may not require a permalink URL in + * the tweet's text, the URLs in ShortenedUrl may be useful to clients that + * require maintaining a legacy-rendering of the tweet's text with the permalink. + * See ShortenedUrl for details. Clients should avoid reading permalink whenever + * possible and prefer the QuotedTweet's tweet_id and user_id instead. + * + * we always populate the permalink on tweet hydration unless there are partial + * hydration errors or inner quoted tweet is filtered due to visibility rules. + * + */ +struct QuotedTweet { + 1: i64 tweet_id (strato.space = "Tweet", strato.name = "tweet", personalDataType = 'TweetId') + 2: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 3: optional ShortenedUrl permalink // URLs to access the quoted-tweet +}(persisted='true', hasPersonalData = 'true') + +/** + * A Contributor is a user who has access to another user's account. + */ +struct Contributor { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: optional string screen_name (personalDataType = 'Username')// not set on Gizmoduck failure +}(persisted='true', hasPersonalData = 'true') + +struct GeoCoordinates { + 1: double latitude (personalDataType = 'GpsCoordinates') + 2: double longitude (personalDataType = 'GpsCoordinates') + 3: i32 geo_precision = 0 (personalDataType = 'GpsCoordinates') + + /** + * Whether or not make the coordinates public. + * + * This parameter is needed because coordinates are not typically published + * by the author. If false: A tweet has geo coordinates shared but not make + * it public. + */ + 4: bool display = 1 +}(persisted='true', hasPersonalData = 'true') + +enum PlaceType { + UNKNOWN = 0 + COUNTRY = 1 + ADMIN = 2 + CITY = 3 + NEIGHBORHOOD = 4 + POI = 5 +} + +enum PlaceNameType { + NORMAL = 0 + ABBREVIATION = 1 + SYNONYM = 2 +} + +struct PlaceName { + 1: string name + 2: string language = "" + 3: PlaceNameType type + 4: bool preferred +}(persisted='true', hasPersonalData='false') + +/** + * A Place is the physical and political properties of a location on Earth. + */ +struct Place { + /** + * Geo service identifier. + */ + 1: string id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + + /** + * Granularity of place. + */ + 2: PlaceType type + + /** + * The name of this place composed with its parent locations. + * + * For example, the full name for "Brooklyn" would be "Brooklyn, NY". This + * name is returned in the language specified by + * GetTweetOptions.language_tag. + */ + 3: string full_name (personalDataType = 'InferredLocation') + + /** + * The best name for this place as determined by geoduck heuristics. + * + * This name is returned in the language specified by + * GetTweetOptions.language_tag. + * + * @see com.twitter.geoduck.util.primitives.bestPlaceNameMatchingFilter + */ + 4: string name (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') + + /** + * Arbitrary key/value data from the geoduck PlaceAttributes for this place. + */ + 5: map attributes (personalDataTypeKey = 'PostalCode') + + 7: set names + + /** + * The ISO 3166-1 alpha-2 code for the country containing this place. + */ + 9: optional string country_code (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * The best name for the country containing this place as determined by + * geoduck heuristics. + * + * This name is returned in the language specified by + * GetTweetOptions.language_tag. + */ + 10: optional string country_name (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * A simplified polygon that encompasses the place's geometry. + */ + 11: optional list bounding_box + + /** + * An unordered list of geo service identifiers for places that contain this + * one from the most immediate parent up to the country. + */ + 12: optional set containers (personalDataType = 'PublishedCoarseLocationTweet') + + /** + * A centroid-like coordinate that is within the geometry of the place. + */ + 13: optional GeoCoordinates centroid + + /** + * Reason this place is being suppressed from display. + * + * This field is present when we previously had a place for this ID, but are + * now choosing not to hydrate it and instead providing fake place metadata + * along with a reason for not including place information. + */ + 14: optional geoduck_common.WithheldReason withheldReason +}(persisted='true', hasPersonalData='true') + +/** + * A UrlEntity is the position and content of a t.co shortened URL in the + * tweet's text. + * + * If Talon returns an error to Tweetypie during tweet hydration, the + * UrlEntity will be omitted from the response. UrlEntities are not included + * for non-t.co-wrapped URLs found in older tweets, for spam and user safety + * reasons. +*/ +struct UrlEntity { + /** + * The position of this entity's first character, in zero-indexed Unicode + * code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Shortened t.co URL. + */ + 3: string url (personalDataType = 'ShortUrl') + + /** + * Original, full-length URL. + * + * This field will always be present on URL entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 4: optional string expanded (personalDataType = 'LongUrl') + + /** + * Truncated version of expanded URL that does not include protocol and is + * limited to 27 characters. + * + * This field will always be present on URL entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 5: optional string display (personalDataType = 'LongUrl') + + 6: optional i64 click_count (personalDataType = 'CountOfTweetEntitiesClicked') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A MentionEntity is the position and content of a mention, (the "@" + * character followed by the name of another valid user) in a tweet's text. + * + * If Gizmoduck returns an error to Tweetypie during tweet hydration that + * MentionEntity will be omitted from the response. + */ +struct MentionEntity { + /** + * The position of this entity's first character ("@"), in zero-indexed + * Unicode code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the mention without the leading "@". + */ + 3: string screen_name (personalDataType = 'Username') + + /** + * User id of the current user with the mentioned screen name. + * + * In the current implementation user id does not necessarily identify the + * user who was originally mentioned when the tweet was created, only the + * user who owns the mentioned screen name at the time of hydration. If a + * mentioned user changes their screen name and a second user takes the old + * name, this field identifies the second user. + * + * This field will always be present on mention entities returned by + * Tweetypie; it is optional as an implementation artifact. + */ + 4: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + + /** + * Display name of the current user with the mentioned screen name. + * + * See user_id for caveats about which user's name is used here. This field + * will always be present on mention entities returned by Tweetypie; it is + * optional as an implementation artifact. + */ + 5: optional string name (personalDataType = 'DisplayName') + + /** + * Indicates if the user referred to by this MentionEntity has been unmentioned + * from the conversation. If this field is set to true, the fromIndex and toIndex + * fields will have a value of 0. + * + * @deprecated isUnmentioned is no longer being populated + */ + 6: optional bool isUnmentioned (personalDataType = 'ContentPrivacySettings') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A list of users that are mentioned in the tweet and have a blocking + * relationship with the tweet author. Mentions for these users will be unlinked + * in the tweet. + */ +struct BlockingUnmentions { + 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') +}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'BlockingUnmentions') + +/** + * A list of users that are mentioned in the tweet and have indicated they do not want + * to be mentioned via their mention settings. Mentions for these users will be unlinked + * in the tweet by Twitter owned and operated clients. + */ +struct SettingsUnmentions { + 1: optional list unmentioned_user_ids (strato.space = 'User', strato.name = 'users', personalDataType = 'UserId') +}(persisted = 'true', hasPersonalData = 'true', strato.graphql.typename = 'SettingsUnmentions') + +/** + * A HashtagEntity is the position and content of a hashtag (a term starting + * with "#") in a tweet's text. + */ +struct HashtagEntity { + /** + * The position of this entity's first character ("#"), in zero-indexed + * Unicode code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the hashtag without the leading "#". + */ + 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(persisted = 'true', hasPersonalData = 'true') + +/** + * A CashtagEntity is the position and content of a cashtag (a term starting + * with "$") in a tweet's text. + */ +struct CashtagEntity { + /** + * The position of this entity's first character, in zero-indexed Unicode + * code points. + */ + 1: i16 from_index + + /** + * The position after this entity's last character, in zero-indexed Unicode + * code points. + */ + 2: i16 to_index + + /** + * Contents of the cashtag without the leading "$" + */ + 3: string text (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(persisted = 'true', hasPersonalData = 'true') + +enum MediaTagType { + USER = 0 + RESERVED_1 = 1 + RESERVED_2 = 2 + RESERVED_3 = 3 + RESERVED_4 = 4 +} + +struct MediaTag { + 1: MediaTagType tag_type + 2: optional i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 3: optional string screen_name (personalDataType = 'Username') + 4: optional string name (personalDataType = 'DisplayName') +}(persisted='true', hasPersonalData = 'true') + +struct TweetMediaTags { + 1: map> tag_map +}(persisted='true', hasPersonalData = 'true') + +/** + * A UserMention is a user reference not stored in the tweet text. + * + * @deprecated Was used only in ReplyAddresses + */ +struct UserMention { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + 2: optional string screen_name (personalDataType = 'Username') + 3: optional string name (personalDataType = 'DisplayName') +}(persisted='true', hasPersonalData = 'true') + +/** + * ReplyAddresses is a list of reply entities which are stored outside of the + * text. + * + * @deprecated + */ +struct ReplyAddresses { + 1: list users = [] +}(persisted='true', hasPersonalData = 'true') + +/** + * SchedulingInfo is metadata about tweets created by the tweet scheduling + * service. + */ +// +struct SchedulingInfo { + /** + * Id of the corresponding scheduled tweet before it was created as a real + * tweet. + */ + 1: i64 scheduled_tweet_id (personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +/** + * @deprecated + */ +enum SuggestType { + WTF_CARD = 0 + WORLD_CUP = 1 + WTD_CARD = 2 + NEWS_CARD = 3 + RESERVED_4 = 4 + RESERVED_5 = 5 + RESERVED_6 = 6 + RESERVED_7 = 7 + RESERVED_8 = 8 + RESERVED_9 = 9 + RESERVED_10 = 10 + RESERVED_11 = 11 +} + +/** + * @deprecated + */ +enum TwitterSuggestsVisibilityType { + /** + * Always public to everyone + */ + PUBLIC = 1 + + /** + * Inherits visibility rules of personalized_for_user_id. + */ + RESTRICTED = 2 + + /** + * Only visible to personalized_for_user_id (and author). + */ + PRIVATE = 3 +} + +/** + * TwitterSuggestInfo is details about a synthetic tweet generated by an early + * version of Twitter Suggests. + * + * @deprecated + */ +struct TwitterSuggestInfo { + 1: SuggestType suggest_type + 2: TwitterSuggestsVisibilityType visibility_type + 3: optional i64 personalized_for_user_id (strato.space = "User", strato.name = "personalizedForUser", personalDataType = 'UserId') + 4: optional i64 display_timestamp_secs (personalDataType = 'PublicTimestamp') +}(persisted='true', hasPersonalData = 'true') + +/** + * A DeviceSource contains information about the client application from which + * a tweet was sent. + * + * This information is stored in Passbird. The developer that owns a client + * application provides this information on https://apps.twitter.com. + */ +struct DeviceSource { + + /** + * The id of the client in the now deprecated device_sources MySQL table. + * + * Today this value will always be 0. + * + * @deprecated Use client_app_id + */ + 1: required i64 id (personalDataType = 'AppId') + + /** + * Identifier for the client in the format "oauth:" + */ + 2: string parameter + + /** + * Identifier for the client in the format "oauth:" + */ + 3: string internal_name + + /** + * Developer-provided name of the client application. + */ + 4: string name + + /** + * Developer-provided publicly accessible home page for the client + * application. + */ + 5: string url + + /** + * HTML fragment with a link to the client-provided URL + */ + 6: string display + + /** + * This field is marked optional for backwards compatibility but will always + * be populated by Tweetypie. + */ + 7: optional i64 client_app_id (personalDataType = 'AppId') +}(persisted='true', hasPersonalData = 'true') + +/** + * A Narrowcast restricts delivery of a tweet geographically. + * + * Narrowcasts allow multi-national advertisers to create geo-relevant content + * from a central handle that is only delivered to to followers in a + * particular country or set of countries. + */ +struct Narrowcast { + 2: list location = [] (personalDataType = 'PublishedCoarseLocationTweet') +}(persisted='true', hasPersonalData = 'true') + +/** + * StatusCounts is a summary of engagement metrics for a tweet. + * + * These metrics are loaded from TFlock. + */ +struct StatusCounts { + + /** + * Number of times this tweet has been retweeted. + * + * This number may not match the list of users who have retweeted because it + * includes retweets from protected and suspended users who are not listed. + */ + 1: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') + + /** + * Number of direct replies to this tweet. + * + * This number does not include replies to replies. + */ + 2: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') + + /** + * Number of favorites this tweet has received. + * + * This number may not match the list of users who have favorited a tweet + * because it includes favorites from protected and suspended users who are + * not listed. + */ + 3: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes', strato.json.numbers.type = 'int53') + + /** + * @deprecated + */ + 4: optional i64 unique_users_impressed_count (strato.json.numbers.type = 'int53') + + /** + * Number of replies to this tweet including replies to replies. + * + * @deprecated + */ + 5: optional i64 descendent_reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies', strato.json.numbers.type = 'int53') + + /** + * Number of times this tweet has been quote tweeted. + * + * This number may not match the list of users who have quote tweeted because it + * includes quote tweets from protected and suspended users who are not listed. + */ + 6: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets', strato.json.numbers.type = 'int53') + + /** + * Number of bookmarks this tweet has received. + */ + 7: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes', strato.json.numbers.type = 'int53') + +}(persisted='true', hasPersonalData = 'true', strato.graphql.typename='StatusCounts') + +/** + * A is a tweet's properties from one user's point of view. + */ +struct StatusPerspective { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId') + + /** + * Whether user_id has favorited this tweet. + */ + 2: bool favorited + + /** + * Whether user_id has retweeted this tweet. + */ + 3: bool retweeted + + /** + * If user_id has retweeted this tweet, retweet_id identifies that tweet. + */ + 4: optional i64 retweet_id (strato.space = "Tweet", strato.name = "retweet", personalDataType = 'TweetId') + + /** + * Whether user_id has reported this tweet as spam, offensive, or otherwise + * objectionable. + */ + 5: bool reported + + /** + * Whether user_id has bookmarked this tweet. + */ + 6: optional bool bookmarked +}(persisted='true', hasPersonalData = 'true') + +/** + * A Language is a guess about the human language of a tweet's text. + * + * Language is determined by TwitterLanguageIdentifier from the + * com.twitter.common.text package (commonly called "Penguin"). + */ +struct Language { + /** + * Language code in BCP-47 format. + */ + 1: required string language (personalDataType = 'InferredLanguage') + + /** + * Language direction. + */ + 2: bool right_to_left + + /** + * Confidence level of the detected language. + */ + 3: double confidence = 1.0 + + /** + * Other possible languages and their confidence levels. + */ + 4: optional map other_candidates +}(persisted='true', hasPersonalData = 'true') + +/** + * A SupplementalLanguage is a guess about the human language of a tweet's + * text. + * + * SupplementalLanguage is typically determined by a third-party translation + * service. It is only stored when the service detects a different language + * than TwitterLanguageIdentifier. + * + * @deprecated 2020-07-08 no longer populated. + */ +struct SupplementalLanguage { + /** + * Language code in BCP-47 format. + */ + 1: required string language (personalDataType = 'InferredLanguage') +}(persisted='true', hasPersonalData = 'true') + +/** + * A SpamLabel is a collection of spam actions for a tweet. + * + * Absence of a SpamLabel indicates that no action needs to be taken + */ +struct SpamLabel { + /** + * Filter this content at render-time + * + * @deprecated 2014-05-19 Use filter_renders + */ + 1: bool spam = 0 + + 2: optional set actions; +}(persisted='true') + + +/** + * The available types of spam signal + * + * @deprecated + */ +enum SpamSignalType { + MENTION = 1 + SEARCH = 2 + STREAMING = 4 + # OBSOLETE HOME_TIMELINE = 3 + # OBSOLETE NOTIFICATION = 5 + # OBSOLETE CONVERSATION = 6 + # OBSOLETE CREATION = 7 + RESERVED_VALUE_8 = 8 + RESERVED_VALUE_9 = 9 + RESERVED_VALUE_10 = 10 +} + +/** + * @deprecated + * CardBindingValues is a collection of key-value pairs used to render a card. + */ +struct CardBindingValues { + 1: list pairs = [] +}(persisted='true') + +/** + * A CardReference is a mechanism for explicitly associating a card with a + * tweet. + */ +struct CardReference { + /** + * Link to the card to associate with a tweet. + * + * This URI may reference either a card stored in the card service, or + * another resource, such as a crawled web page URL. This value supercedes + * any URL present in tweet text. + */ + 1: string card_uri +}(persisted='true') + +/** + * A TweetPivot is a semantic entity related to a tweet. + * + * TweetPivots are used to direct to the user to another related location. For + * example, a "See more about " UI element that takes the user to + * when clicked. + */ +struct TweetPivot { + 1: required tweet_annotation.TweetEntityAnnotation annotation + 2: required tweet_pivots.TweetPivotData data +}(persisted='true') + +struct TweetPivots { + 1: required list tweet_pivots +}(persisted='true') + +struct EscherbirdEntityAnnotations { + 1: list entity_annotations +}(persisted='true') + +struct TextRange { + /** + * The inclusive index of the start of the range, in zero-indexed Unicode + * code points. + */ + 1: required i32 from_index + + /** + * The exclusive index of the end of the range, in zero-indexed Unicode + * code points. + */ + 2: required i32 to_index +}(persisted='true') + +struct TweetCoreData { + 1: i64 user_id (strato.space = "User", strato.name = "user", personalDataType = 'UserId', tweetEditAllowed='false') + + /** + * The body of the tweet consisting of the user-supplied displayable message + * and: + * - an optional prefix list of @mentions + * - an optional suffix attachment url. + * + * The indices from visible_text_range specify the substring of text indended + * to be displayed, whose length is limited to 140 display characters. Note + * that the visible substring may be longer than 140 characters due to HTML + * entity encoding of &, <, and > . + + * For retweets the text is that of the original tweet, prepended with "RT + * @username: " and truncated to 140 characters. + */ + 2: string text (personalDataType = 'PrivateTweets, PublicTweets') + + /** + * The client from which this tweet was created + * + * The format of this value is oauth:. + */ + 3: string created_via (personalDataType = 'ClientType') + + /** + * Time this tweet was created. + * + * This value is seconds since the Unix epoch. For tweets with Snowflake IDs + * this value is redundant, since a millisecond-precision timestamp is part + * of the id. + */ + 4: i64 created_at_secs + + /** + * Present when this tweet is a reply to another tweet or another user. + */ + 5: optional Reply reply + + /** + * Present when a tweet begins with an @mention or has metadata indicating the directed-at user. + */ + 6: optional DirectedAtUser directed_at_user + + /** + * Present when this tweet is a retweet. + */ + 7: optional Share share + + /** + * Whether there is a takedown country code or takedown reason set for this specific tweet. + * + * See takedown_country_codes for the countries where the takedown is active. (deprecated) + * See takedown_reasons for a list of reasons why the tweet is taken down. + * + * has_takedown will be set to true if either this specific tweet or the author has a + * takedown active. + */ + 8: bool has_takedown = 0 + + /** + * Whether this tweet might be not-safe-for-work, judged by the tweet author. + * + * Users can flag their own accounts as not-safe-for-work in account + * preferences by selecting "Mark media I tweet as containing material that + * may be sensitive" and each tweet created after that point will have + * this flag set. + * + * The value can also be updated after tweet create time via the + * update_possibly_sensitive_tweet method. + */ + 9: bool nsfw_user = 0 + + /** + * Whether this tweet might be not-safe-for-work, judged by an internal Twitter + * support agent. + * + * This tweet value originates from the user's nsfw_admin flag at + * tweet create time but can be updated afterwards using the + * update_possibly_sensitive_tweet method. + */ + 10: bool nsfw_admin = 0 + + /** + * When nullcast is true a tweet is not delivered to a user's followers, not + * shown in the user's timeline, and does not appear in search results. + * + * This is primarily used to create tweets that can be used as ads without + * broadcasting them to an advertiser's followers. + */ + 11: bool nullcast = 0 (tweetEditAllowed='false') + + /** + * Narrowcast limits delivery of a tweet to followers in specific geographic + * regions. + */ + 12: optional Narrowcast narrowcast (tweetEditAllowed='false') + + /** + * The impression id of the ad from which this tweet was created. + * + * This is set when a user retweets or replies to a promoted tweet. It is + * used to attribute the "earned" exposure of an advertisement. + */ + 13: optional i64 tracking_id (personalDataType = 'ImpressionId', tweetEditAllowed='false') + + /** + * A shared identifier among all the tweets in the reply chain for a single + * tweet. + * + * The conversation id is the id of the tweet that started the conversation. + */ + 14: optional i64 conversation_id (strato.space = "Tweet", strato.name = "conversation", personalDataType = 'TweetId') + + /** + * Whether this tweet has media of any type. + * + * Media can be in the form of media entities, media cards, or URLs in the + * tweet text that link to media partners. + * + * @see MediaIndexHelper + */ + 15: optional bool has_media + + /** + * Supported for legacy clients to associate a location with a Tweet. + * + * Twitter owned clients must use place_id REST API param for geo-tagging. + * + * @deprecated Use place_id REST API param + */ + 16: optional GeoCoordinates coordinates (personalDataType = 'GpsCoordinates', tweetEditAllowed='false') + + /** + * The location where a tweet was sent from. + * + * Place is either published in API request explicitly or implicitly reverse + * geocoded from API lat/lon coordinates params. + * + * Tweetypie implementation notes: + * - Currently, if both place_id and coordinates are specified, coordinates + * takes precedence in geo-tagging. I.e.: Place returned rgc(coordinates) + * sets the place_id field. + * - place_id is reverse geocoded on write-path. + */ + 17: optional string place_id (personalDataType = 'PublishedPreciseLocationTweet, PublishedCoarseLocationTweet') +}(persisted='true', hasPersonalData = 'true', tweetEditAllowed='false') + +/** + * List of community ID's the tweet belongs to. + */ +struct Communities { + 1: required list community_ids (personalDataType = 'EngagementId') +}(persisted='true') + +/** + * Tweet metadata that is present on extended tweets, a tweet whose total text length is greater + * than the classic limit of 140 characters. + */ +struct ExtendedTweetMetadata { + /** + * @deprecated was display_count + */ + 1: i32 unused1 = 0 + + /** + * The index, in unicode code points, at which the tweet text should be truncated + * for rendering in a public API backwards-compatible mode. Once truncated to this + * point, the text should be appended with an ellipsis, a space, and the short_url + * from self_permalink. The resulting text must conform to the 140 display glyph + * limit. + */ + 2: required i32 api_compatible_truncation_index + + /** + * @deprecated was default_display_truncation_index + */ + 3: i32 unused3 = 0 + + /** + * @deprecated was is_long_form + */ + 4: bool unused4 = 0 + + /** + * @deprecated was preview_range + */ + 5: optional TextRange unused5 + + /** + * @deprecated was extended_preview_range + */ + 6: optional TextRange unused6 +}(persisted='true') + +/** + * @deprecated use TransientCreateContext instead + */ +enum TweetCreateContextKey { + PERISCOPE_IS_LIVE = 0, + PERISCOPE_CREATOR_ID = 1 +} + +/** + * DirectedAtUserMetadata is a tweetypie-internal structure that can be used to store metadata about + * a directed-at user on the tweet. + * + * Note: absence of this field does not imply the tweet does not have a DirectedAtUser, see + * tweet.directedAtUserMetadata for more information. + */ +struct DirectedAtUserMetadata { + /** + * ID of the user a tweet is directed-at. + */ + 1: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * Tweet metadata that may be present on tweets in a self-thread (tweetstorm). + * + * A self-thread is a tree of self-replies that may either: + * 1. begin as a reply to another user's tweet (called a non-root self-thread) or + * 2. stand alone (called root self-thread). + * + * Note that not all self-threads have SelfThreadMetadata. + */ +struct SelfThreadMetadata { + /** + * A shared identifier among all the tweets in the self-thread (tweetstorm). + * + * The tweetstorm id is the id of the tweet that started the self thread. + * + * If the id matches the tweet's conversation_id then it is a root self-thread, otherwise it is + * a non-root self-thread. + */ + 1: required i64 id (personalDataType = 'TweetId') + + /** + * Indicates if the tweet with this SelfThreadMetadata is a leaf in the self-thread tree. + * This flag might be used to encourage the author to extend their tweetstorm at the end. + */ + 2: bool isLeaf = 0 +}(persisted='true', hasPersonalData = 'true') + +/** + * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) + * flow, this should be `STANDARD`. + * + * When set to `CAMERA`, clients are expected to display the tweet with a different UI + * to emphasize attached media. + */ +enum ComposerSource { + STANDARD = 1 + CAMERA = 2 +} + + +/** + * The conversation owner and users in invited_user_ids can reply + **/ +struct ConversationControlByInvitation { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** + * The conversation owner, users in invited_user_ids, and users who the conversation owner follows can reply + **/ +struct ConversationControlCommunity { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** + * The conversation owner, users in invited_user_ids, and users who follows the conversation owner can reply + **/ +struct ConversationControlFollowers { + 1: required list invited_user_ids (personalDataType = 'UserId') + 2: required i64 conversation_tweet_author_id (personalDataType = 'UserId') + 3: optional bool invite_via_mention +}(persisted='true', hasPersonalData = 'true') + +/** +* This tweet metadata captures restrictions on who is allowed to reply in a conversation. +*/ +union ConversationControl { + + 1: ConversationControlCommunity community + + 2: ConversationControlByInvitation byInvitation + + 3: ConversationControlFollowers followers +}(persisted='true', hasPersonalData = 'true') + +// This tweet metadata shows the exclusivity of a tweet and is used to determine +// whether replies / visibility of a tweet is limited +struct ExclusiveTweetControl { + 1: required i64 conversation_author_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * Tweet metadata for a Trusted Friends tweet. + * + * A Trusted Friends tweet is a tweet whose visibility is restricted to members + * of an author-specified list. + * + * Replies to a Trusted Friends tweet will inherit a copy of this metadata from + * the root tweet. + */ +struct TrustedFriendsControl { + /** + * The ID of the Trusted Friends List whose members can view this tweet. + */ + 1: required i64 trusted_friends_list_id (personalDataType = 'TrustedFriendsListMetadata') +}(persisted='true', hasPersonalData = 'true') + +enum CollabInvitationStatus { + PENDING = 0 + ACCEPTED = 1 + REJECTED = 2 +} + +/** + * Represents a user who has been invited to collaborate on a CollabTweet, associated with whether + * they have accepted or rejected collaboration + */ +struct InvitedCollaborator { + 1: required i64 collaborator_user_id (personalDataType = 'UserId') + 2: required CollabInvitationStatus collab_invitation_status +}(persisted='true', hasPersonalData='true') + +/** + * Present if Tweet is a CollabInvitation awaiting publishing, stores list of invited Collaborators + */ +struct CollabInvitation { + 1: required list invited_collaborators +}(persisted='true', hasPersonalData='true') + +/** + * Present if Tweet is a published CollabTweet, stores list of Collaborators + */ +struct CollabTweet { + 1: required list collaborator_user_ids (personalDataType = 'UserId') +}(persisted='true', hasPersonalData='true') + +/** + * CollabTweets treat multiple users as co-authors or "Collaborators" of a single "Collab Tweet". + * + * When creating a Collab Tweet, the original author will begin by creating a CollabInvitation which + * is sent to another Collaborator to accept or reject collaboration. If and when other + * Collaborators have accepted, the CollabInvitation is replaced by a CollabTweet which is published + * publicly and fanned out to followers of all Collaborators. A CollabInvitation will be hidden from + * anyone except the list of Collaborators using VF. The CollabTweet will then be fanned out like + * a regular Tweet to the profiles and combined audiences of all Collaborators. + * + * A Tweet representing a CollabTweet or CollabInvitation is denoted by the presence of a + * CollabControl field on a Tweet. + */ +union CollabControl { + 1: CollabInvitation collab_invitation + 2: CollabTweet collab_tweet +}(persisted='true', hasPersonalData='true') + +/** + * A Tweet is a message that belongs to a Twitter user. + * + * The Tweet struct replaces the deprecated Status struct. All fields except + * id are optional. + * + * This struct supports the additional fields flexible schema. Additional fields are + * defined starting from field 101. + * + * The guidelines for adding a new Additional field: + * 1. It's required to define the additional field as an optional struct. + * Inside the struct, define optional or non-optional field(s) according + * to your needs. + * 2. If you have several immutable piece of data that are always accessed + * together, you should define them in the same struct for better storage + * locality. + * 3. If your data model has several mutable pieces, and different piece can + * be updated in a close succession, you should group them into + * separate structs and each struct contains one mutable piece. + */ +struct Tweet { + /** + * The primary key for a tweet. + * + * A tweet's id is assigned by the tweet service at creation time. Since + * 2010-11-04 tweet ids have been generated using Snowflake. Prior to this + * ids were assigned sequentially by MySQL AUTOINCREMENT. + */ + 1: i64 id (personalDataType = 'TweetId') + + /** + * The essential properties of a tweet. + * + * This field will always be present on tweets returned by Tweetypie. It is + * marked optional so an empty tweet can be provided to write additional + * fields. + */ + 2: optional TweetCoreData core_data + + /** + * URLs extracted from the tweet's text. + */ + 3: optional list urls + + /** + * Mentions extracted from the tweet's text. + */ + 4: optional list mentions + + /** + * Hashtags extracted from the tweet's text. + */ + 5: optional list hashtags + + /** + * Cashtags extracted from the tweet's text + */ + 6: optional list cashtags + + 7: optional list media + + /** + * Place identified by Tweet.core_data.place_id. + */ + 10: optional Place place + + 11: optional QuotedTweet quoted_tweet + + /** + * The list of countries where this tweet will not be shown. + * + * This field contains countries for both the tweet and the user, so it may + * contain values even if has_takedown is false. + * + * @deprecated, use field 30 takedown_reasons which includes the same information and more + */ + 12: optional list takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + /** + * Interaction metrics for this tweet. + * + * Included when one of GetTweetOptions.load_retweet_count, + * GetTweetOptions.load_reply_count, or GetTweetOptions.load_favorite_count + * is set. This can be missing in a PARTIAL response if the TFlock request + * fails. + */ + 13: optional StatusCounts counts + + /** + * Properties of the client from which the tweet was sent. + * + * This can be missing in a PARTIAL response if the Passbird request fails. + */ + 14: optional DeviceSource device_source + + /** + * Properties of this tweet from the point of view of + * GetTweetOptions.for_user_id. + * + * This field is included only when for_user_id is provided and + * include_perspective == true This can be missing in a PARTIAL response if + * the timeline service request fails. + */ + 15: optional StatusPerspective perspective + + /** + * Version 1 cards. + * + * This field is included only when GetTweetOptions.include_cards == true. + */ + 16: optional list cards + + /** + * Version 2 cards. + * + * This field is included only included when GetTweetOptions.include_cards + * == true and GetTweetOptions.cards_platform_key is set to valid value. + */ + 17: optional cards.Card2 card2 + + /** + * Human language of tweet text as determined by TwitterLanguageIdentifier. + */ + 18: optional Language language + + /** + * @deprecated + */ + 19: optional map spam_labels + + /** + * User responsible for creating this tweet when it is not the same as the + * core_data.user_id. + * + * This is sensitive information and must not be shared externally (via UI, + * API, or streaming) except to the the owner of the tweet + * (core_data.user_id) or a contributor to the owner's account. + */ + 20: optional Contributor contributor + + // obsolete 21: optional list topic_labels + + 22: optional enrichments_profilegeo.ProfileGeoEnrichment profile_geo_enrichment + + // Maps extension name to value; only populated if the request contained an extension on tweets. + // obsolete 24: optional map extensions + + /** + * Deprecated. + * Semantic entities that are related to this tweet. + */ + 25: optional TweetPivots tweet_pivots + + /** + * @deprecated + * Strato Tweet Extensions support has moved to birdherd. + * + * Internal thrift clients should query strato columns directly and + * not rely upon ext/*.Tweet columns which are designed to serve + * client APIs. + */ + 26: optional binary extensions_reply + + /** + * Has the requesting user muted the conversation referred to by + * `conversation_id`? When this field is absent, the conversation may + * or may not be muted. Use the `include_conversation_muted` field in + * GetTweetOptions to request this field. + * + * If this field has a value, the value applies to the user in the + * `for_user_id` field of the requesting `GetTweetOptions`. + */ + 27: optional bool conversation_muted + + /** + * The user id of the tweet referenced by conversation_id + * + * @deprecated Was conversation_owner_id. This was never implemented. + */ + 28: optional i64 unused28 + + /** + * Has this tweet been removed from its conversation by the conversation owner? + * + * @deprecated Was is_removed_from_conversation. This was never implemented. + */ + 29: optional bool unused29 + + /** + * A list of takedown reasons indicating which country and reason this tweet was taken down. + */ + 30: optional list takedown_reasons + + /** + * @obsolete, self-thread metadata is now stored in field 151, self_thread_metadata + */ + 31: optional self_thread.SelfThreadInfo self_thread_info + + // field 32 to 99 are reserved + // field 100 is used for flexible schema proof of concept + // additional fields + // these fields are stored in Manhattan flexible schema + 101: optional TweetMediaTags media_tags + 102: optional SchedulingInfo scheduling_info + + /** + * @deprecated + */ + 103: optional CardBindingValues binding_values + + /** + * @deprecated + */ + 104: optional ReplyAddresses reply_addresses + + /** + * OBSOLETE, but originally contained information about synthetic tweets created by the first + * version of Twitter Suggests. + * + * @deprecated + */ + 105: optional TwitterSuggestInfo obsolete_twitter_suggest_info + + 106: optional EscherbirdEntityAnnotations escherbird_entity_annotations (personalDataType = 'AnnotationValue') + + // @deprecated 2021-07-19 + 107: optional safety_label.SafetyLabel spam_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 108: optional safety_label.SafetyLabel abusive_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 109: optional safety_label.SafetyLabel low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 110: optional safety_label.SafetyLabel nsfw_high_precision_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 111: optional safety_label.SafetyLabel nsfw_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 112: optional safety_label.SafetyLabel abusive_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 113: optional safety_label.SafetyLabel low_quality_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 114: optional safety_label.SafetyLabel persona_non_grata_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 115: optional safety_label.SafetyLabel recommendations_low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 116: optional safety_label.SafetyLabel experimentation_label (personalDataType = 'TweetSafetyLabels') + + 117: optional tweet_location_info.TweetLocationInfo tweet_location_info + 118: optional CardReference card_reference + + /** + * @deprecated 2020-07-08 no longer populated. + */ + 119: optional SupplementalLanguage supplemental_language + + // field 120, additional_media_metadata, is deprecated. + // field 121, media_metadatas, is deprecated + + // under certain circumstances, including long form tweets, we create and store a self-permalink + // to this tweet. in the case of a long-form tweet, this will be used in a truncated version + // of the tweet text. + 122: optional ShortenedUrl self_permalink + + // metadata that is present on extended tweets. + 123: optional ExtendedTweetMetadata extended_tweet_metadata + + // obsolete 124: crosspost_destinations.CrosspostDestinations crosspost_destinations + + // Communities associated with a tweet + 125: optional Communities communities (personalDataType = 'PrivateTweetEntitiesAndMetadata', tweetEditAllowed='false') + + // If some text at the beginning or end of the tweet should be hidden, then this + // field indicates the range of text that should be shown in clients. + 126: optional TextRange visible_text_range + + // @deprecated 2021-07-19 + 127: optional safety_label.SafetyLabel spam_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 128: optional safety_label.SafetyLabel duplicate_content_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 129: optional safety_label.SafetyLabel live_low_quality_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 130: optional safety_label.SafetyLabel nsfa_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 131: optional safety_label.SafetyLabel pdna_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 132: optional safety_label.SafetyLabel search_blacklist_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 133: optional safety_label.SafetyLabel low_quality_mention_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 134: optional safety_label.SafetyLabel bystander_abusive_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 135: optional safety_label.SafetyLabel automation_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 136: optional safety_label.SafetyLabel gore_and_violence_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 137: optional safety_label.SafetyLabel untrusted_url_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 138: optional safety_label.SafetyLabel gore_and_violence_high_recall_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 139: optional safety_label.SafetyLabel nsfw_video_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 140: optional safety_label.SafetyLabel nsfw_near_perfect_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 141: optional safety_label.SafetyLabel automation_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 142: optional safety_label.SafetyLabel nsfw_card_image_label (personalDataType = 'TweetSafetyLabels') + // @deprecated 2021-07-19 + 143: optional safety_label.SafetyLabel duplicate_mention_label (personalDataType = 'TweetSafetyLabels') + + // @deprecated 2021-07-19 + 144: optional safety_label.SafetyLabel bounce_label (personalDataType = 'TweetSafetyLabels') + // field 145 to 150 is reserved for safety labels + + /** + * If this tweet is part of a self_thread (tweetstorm) then this value may be set. + * See SelfThreadMetadata for details. + */ + 151: optional SelfThreadMetadata self_thread_metadata + // field 152 has been deprecated + + // The composer used to create this tweet. Either via the standard tweet creator or the + // Camera flow (go/newscamera). + // + // NOTE: this field is only set if a client passed an explicit ComposerSource in the PostTweetRequest. + // News Camera is deprecated and we no longer set ComposerSource in the PostTweetRequest so no new Tweets will + // have this field. + 153: optional ComposerSource composer_source + + // Present if replies are restricted, see ConversationControl for more details + 154: optional ConversationControl conversation_control + + // Determines the super follows requirements for being able to view a tweet. + 155: optional ExclusiveTweetControl exclusive_tweet_control (tweetEditAllowed='false') + + // Present for a Trusted Friends tweet, see TrustedFriendsControl for more details. + 156: optional TrustedFriendsControl trusted_friends_control (tweetEditAllowed='false') + + // Data about edits and editability. See EditControl for more details. + 157: optional edit_control.EditControl edit_control + + // Present for a CollabTweet or CollabInvitation, see CollabControl for more details. + 158: optional CollabControl collab_control (tweetEditAllowed='false') + + // Present for a 3rd-party developer-built card. See http://go/developer-built-cards-prd + 159: optional i64 developer_built_card_id (personalDataType = 'CardId') + + // Data about enrichments attached to a tweet. + 160: optional creative_entity_enrichments.CreativeEntityEnrichments creative_entity_enrichments_for_tweet + + // This field includes summed engagements from the previous tweets in the edit chain. + 161: optional StatusCounts previous_counts + + // A list of media references, including information about the source Tweet for pasted media. + // Prefer this field to media_keys, as media_keys is not present for old Tweets or pasted media Tweets. + 162: optional list media_refs + + // Whether this tweet is a 'backend tweet' to be referenced only by the creatives containers service + // go/cea-cc-integration for more details + 163: optional bool is_creatives_container_backend_tweet + + /** + * Aggregated perspective of this tweet and all other versions from the point of view of the + * user specified in for_user_id. + * + * This field is included only when for_user_id is provided and can be missing in a PARTIAL response + * if the timeline service request fails. + */ + 164: optional api_fields.TweetPerspective edit_perspective + + // Visibility controls related to Toxic Reply Filtering + // go/toxrf for more details + 165: optional filtered_reply_details.FilteredReplyDetails filtered_reply_details + + // The list of mentions that have unmentioned from the tweet's associated conversation + 166: optional unmentions.UnmentionData unmention_data + + /** + * A list of users that were mentioned in the tweet and have a blocking + * relationship with the author. + */ + 167: optional BlockingUnmentions blocking_unmentions + + /** + * A list of users that were mentioned in the tweet and should be unmentioned + * based on their mention setttings + */ + 168: optional SettingsUnmentions settings_unmentions + + /** + * A Note associated with this Tweet. + */ + 169: optional note_tweet.NoteTweet note_tweet + + // For additional fields, the next available field id is 169. + // NOTE: when adding a new additional field, please also update UnrequestedFieldScrubber.scrubKnownFields + + /** + * INTERNAL FIELDS + * + * These fields are used by tweetypie only and should not be accessed externally. + * The field ids are in descending order, starting with `32767`. + */ + + /** + * Present if tweet data is provided creatives container service instead of tweetypie storage, + * with encapsulated tweets or customized data. + */ + 32763: optional i64 underlying_creatives_container_id + + /** + * Stores tweetypie-internal metadata about a DirectedAtUser. + * + * A tweet's DirectedAtUser is hydrated as follows: + * 1. if this field is present, then DirectedAtUserMetadata.userId is the directed-at user + * 2. if this field is absent, then if the tweet has a reply and has a mention starting at text + * index 0 then that user is the directed-at user. + * + * Note: External clients should use CoreData.directed_at_user. + */ + 32764: optional DirectedAtUserMetadata directed_at_user_metadata + + // list of takedowns that are applied directly to the tweet + 32765: optional list tweetypie_only_takedown_reasons + + // Stores the media keys used to interact with the media platform systems. + // Prefer `media_refs` which will always have media data, unlike this field which is empty for + // older Tweets and Tweets with pasted media. + 32766: optional list media_keys + + // field 32767 is the list of takedowns that are applied directly to the tweet + 32767: optional list tweetypie_only_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + + // for internal fields, the next available field id is 32765 (counting down) +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift new file mode 100644 index 000000000..db8361805 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_audit.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.tweet_audit +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +// Copied from UserActionReason in guano.thrift - this should be kept in sync (though upper cased) +enum AuditUserActionReason { + SPAM + CHURNING + OTHER + PHISHING + BOUNCING + + RESERVED_1 + RESERVED_2 +} + +// This struct contains all fields of DestroyStatus in guano.thrift that can be set per remove/deleteTweets invocation +// Values are passed through TweetyPie as-is to guano scribe and not used by TweetyPie. +struct AuditDeleteTweet { + 1: optional string host (personalDataType = 'IpAddress') + 2: optional string bulk_id + 3: optional AuditUserActionReason reason + 4: optional string note + 5: optional bool done + 6: optional string run_id + // OBSOLETE 7: optional i64 id + 8: optional i64 client_application_id (personalDataType = 'AppId') + 9: optional string user_agent (personalDataType = 'UserAgent') +}(persisted = 'true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift new file mode 100644 index 000000000..4ad96e564 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_comparison_service.thrift @@ -0,0 +1,28 @@ +namespace java com.twitter.tweetypiecomparison.thriftjava +#@namespace scala com.twitter.tweetypiecomparison.thriftscala +#@namespace strato com.twitter.tweetypiecomparison + +include "com/twitter/tweetypie/tweet_service.thrift" +include "com/twitter/context/viewer.thrift" + +service TweetComparisonService { + void compare_retweet( + 1: tweet_service.RetweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_post_tweet( + 1: tweet_service.PostTweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_unretweet( + 1: tweet_service.UnretweetRequest request, + 2: optional viewer.Viewer viewer + ) + + void compare_delete_tweets( + 1: tweet_service.DeleteTweetsRequest request, + 2: optional viewer.Viewer viewer + ) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift new file mode 100644 index 000000000..a80a74bf9 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_events.thrift @@ -0,0 +1,277 @@ +namespace java com.twitter.tweetypie.thriftjava +namespace py gen.twitter.tweetypie.tweet_events +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/gizmoduck/user.thrift" + +/** + * SafetyType encodes the event user's safety state in an enum so downstream + * event processors can filter events without having to load the user. + */ +enum SafetyType { + PRIVATE = 0 // user.safety.isProtected + RESTRICTED = 1 // !PRIVATE && user.safety.suspended + PUBLIC = 2 // !(PRIVATE || RESTRICTED) + RESERVED0 = 3 + RESERVED1 = 4 + RESERVED2 = 5 + RESERVED3 = 6 +} + +struct TweetCreateEvent { + /** + * The tweet that has been created. + */ + 1: tweet.Tweet tweet + + /** + * The user who owns the created tweet. + */ + 2: user.User user + + /** + * The tweet being retweeted. + */ + 3: optional tweet.Tweet source_tweet + + /** + * The user who owns source_tweet. + */ + 4: optional user.User source_user + + /** + * The user whose tweet or retweet is being retweeted. + * + * This is the id of the user who owns + * tweet.core_data.share.parent_status_id. In many cases this will be the + * same as source_user.id; it is different when the tweet is created via + * another retweet. See the explanation of source_user_id and parent_user_id + * in Share for examples. + */ + 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') + + /** + * The tweet quoted in the created tweet. + */ + 6: optional tweet.Tweet quoted_tweet + + /** + * The user who owns quoted_tweet. + */ + 7: optional user.User quoted_user + + /** + * Arbitrary passthrough metadata about tweet creation. + * + * See TweetCreateContextKey for more details about the data that may be + * present here. + */ + 8: optional map additional_context (personalDataTypeValue='UserId') + + /** + * Additional request arguments passed through to consumers. + */ + 9: optional transient_context.TransientCreateContext transient_context + + /** + * Flag exposing if a quoted tweet has been quoted by the user previously. + **/ + 10: optional bool quoter_has_already_quoted_tweet +}(persisted='true', hasPersonalData = 'true') + +struct TweetDeleteEvent { + /** + * The tweet being deleted. + */ + 1: tweet.Tweet tweet + + /** + * The user who owns the deleted tweet. + */ + 2: optional user.User user + + /** + * Whether this tweet was deleted as part of user erasure (the process of deleting tweets + * belonging to deactivated accounts). + * + * These deletions occur in high volume spikes and the tweets have already been made invisible + * externally. You may wish to process them in batches or offline. + */ + 3: optional bool is_user_erasure + + /** + * Audit information from the DeleteTweetRequest that caused this deletion. + * + * This field is used to track the reason for deletion in non-user-initiated + * tweet deletions, like Twitter support agents deleting tweets or spam + * cleanup. + */ + 4: optional tweet_audit.AuditDeleteTweet audit + + /** + * Id of the user initiating this request. + * It could be either the owner of the tweet or an admin. + * It is used for scrubbing. + */ + 5: optional i64 by_user_id (personalDataType = 'UserId') + + /** + * Whether this tweet was deleted by an admin user or not + * + * It is used for scrubbing. + */ + 6: optional bool is_admin_delete +}(persisted='true', hasPersonalData = 'true') + +struct TweetUndeleteEvent { + 1: tweet.Tweet tweet + 2: optional user.User user + 3: optional tweet.Tweet source_tweet + 4: optional user.User source_user + 5: optional i64 retweet_parent_user_id (personalDataType = 'UserId') + 6: optional tweet.Tweet quoted_tweet + 7: optional user.User quoted_user + // timestamp of the deletion that this undelete is reversing + 8: optional i64 deleted_at_msec +}(persisted='true', hasPersonalData = 'true') + +/** + * When a user deletes the location information for their tweets, we send one + * TweetScrubGeoEvent for every tweet from which the location is removed. + * + * Users cause this by selecting "Delete location information" in Settings -> + * Privacy. + */ +struct TweetScrubGeoEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +/** + * When a user deletes the location information for their tweets, we send one + * UserScrubGeoEvent with the max tweet ID that was scrubbed (in addition to + * sending multiple TweetScrubGeoEvents as described above). + * + * Users cause this by selecting "Delete location information" in Settings -> + * Privacy. This additional event is sent to maintain backwards compatibility + * with Hosebird. + */ +struct UserScrubGeoEvent { + 1: i64 user_id (personalDataType = 'UserId') + 2: i64 max_tweet_id (personalDataType = 'TweetId') +}(persisted='true', hasPersonalData = 'true') + +struct TweetTakedownEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + // This is the complete list of takedown country codes for the tweet, + // including whatever modifications were made to trigger this event. + // @deprecated Prefer takedown_reasons once TWEETYPIE-4329 deployed + 3: list takedown_country_codes = [] + // This is the complete list of takedown reasons for the tweet, + // including whatever modifications were made to trigger this event. + 4: list takedown_reasons = [] +}(persisted='true', hasPersonalData = 'true') + +struct AdditionalFieldUpdateEvent { + // Only contains the tweet id and modified or newly added fields on that tweet. + // Unchanged fields and tweet core data are omitted. + 1: tweet.Tweet updated_fields + 2: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +struct AdditionalFieldDeleteEvent { + // a map from tweet id to deleted field ids + // Each event will only contain one tweet. + 1: map> deleted_fields (personalDataTypeKey='TweetId') + 2: optional i64 user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +// This event is only logged to scribe not sent to EventBus +struct TweetMediaTagEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + 3: set tagged_user_ids (personalDataType = 'UserId') + 4: optional i64 timestamp_ms +}(persisted='true', hasPersonalData = 'true') + +struct TweetPossiblySensitiveUpdateEvent { + 1: i64 tweet_id (personalDataType = 'TweetId') + 2: i64 user_id (personalDataType = 'UserId') + // The below two fields contain the results of the update. + 3: bool nsfw_admin + 4: bool nsfw_user +}(persisted='true', hasPersonalData = 'true') + +struct QuotedTweetDeleteEvent { + 1: i64 quoting_tweet_id (personalDataType = 'TweetId') + 2: i64 quoting_user_id (personalDataType = 'UserId') + 3: i64 quoted_tweet_id (personalDataType = 'TweetId') + 4: i64 quoted_user_id (personalDataType = 'UserId') +}(persisted='true', hasPersonalData = 'true') + +struct QuotedTweetTakedownEvent { + 1: i64 quoting_tweet_id (personalDataType = 'TweetId') + 2: i64 quoting_user_id (personalDataType = 'UserId') + 3: i64 quoted_tweet_id (personalDataType = 'TweetId') + 4: i64 quoted_user_id (personalDataType = 'UserId') + // This is the complete list of takedown country codes for the tweet, + // including whatever modifications were made to trigger this event. + // @deprecated Prefer takedown_reasons + 5: list takedown_country_codes = [] + // This is the complete list of takedown reasons for the tweet, + // including whatever modifications were made to trigger this event. + 6: list takedown_reasons = [] +}(persisted='true', hasPersonalData = 'true') + +union TweetEventData { + 1: TweetCreateEvent tweet_create_event + 2: TweetDeleteEvent tweet_delete_event + 3: AdditionalFieldUpdateEvent additional_field_update_event + 4: AdditionalFieldDeleteEvent additional_field_delete_event + 5: TweetUndeleteEvent tweet_undelete_event + 6: TweetScrubGeoEvent tweet_scrub_geo_event + 7: TweetTakedownEvent tweet_takedown_event + 8: UserScrubGeoEvent user_scrub_geo_event + 9: TweetPossiblySensitiveUpdateEvent tweet_possibly_sensitive_update_event + 10: QuotedTweetDeleteEvent quoted_tweet_delete_event + 11: QuotedTweetTakedownEvent quoted_tweet_takedown_event +}(persisted='true', hasPersonalData = 'true') + +/** + * @deprecated + */ +struct Checksum { + 1: i32 checksum +}(persisted='true') + +struct TweetEventFlags { + /** + * @deprecated Was dark_for_service. + */ + 1: list unused1 = [] + + 2: i64 timestamp_ms + + 3: optional SafetyType safety_type + + /** + * @deprecated Was checksum. + */ + 4: optional Checksum unused4 +}(persisted='true') + +/** + * A TweetEvent is a notification published to the tweet_events stream. + */ +struct TweetEvent { + 1: TweetEventData data + 2: TweetEventFlags flags +}(persisted='true', hasPersonalData = 'true') diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift new file mode 100644 index 000000000..3be5f3b12 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service.thrift @@ -0,0 +1,2320 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala +#@namespace strato com.twitter.tweetypie +namespace py gen.twitter.tweetypie.service +namespace rb TweetyPie +namespace go tweetypie + +include "com/twitter/bouncer/bounce.thrift" +include "com/twitter/carousel/service/carousel_service.thrift" +include "com/twitter/context/feature_context.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/servo/exceptions.thrift" +include "com/twitter/spam/features/safety_meta_data.thrift" +include "com/twitter/spam/rtf/safety_label.thrift" +include "com/twitter/spam/rtf/safety_level.thrift" +include "com/twitter/spam/rtf/safety_result.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/deleted_tweet.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/incentives/jiminy/jiminy.thrift" +include "unified_cards_contract.thrift" + +typedef i16 FieldId + +struct TweetGeoSearchRequestID { + 1: required string id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') +}(hasPersonalData = 'true') + +struct TweetCreateGeo { + 1: optional tweet.GeoCoordinates coordinates + 2: optional string place_id (personalDataType = 'InferredLocation') + 3: optional map place_metadata (personalDataTypeKey = 'InferredLocation', personalDataTypeValue = 'InferredLocation') + 4: bool auto_create_place = 1 + // deprecated; use tweet.GeoCoordinates.display + 5: bool display_coordinates = 1 + 6: bool override_user_geo_setting = 0 + 7: optional TweetGeoSearchRequestID geo_search_request_id +}(hasPersonalData = 'true') + +enum StatusState { + /** + * The tweet was found and successfully hydrated. + */ + FOUND = 0 + + /** + * The tweet was not found. It may have been deleted, or could just be an invalid or + * unused tweet id. + */ + NOT_FOUND = 1 + + /** + * The tweet was found, but there was at least one error hydrating some data on the tweet. + * GetTweetResult.missing_fields indicates which fields may have not been hydrated completely. + */ + PARTIAL = 2 + + /** + * @deprecated All failures, including time outs, are indicated by `Failed`. + */ + TIMED_OUT = 3 + + /** + * There was an upstream or internal failure reading this tweet. Usually indicates a + * transient issue that is safe to retry immediately. + */ + FAILED = 4 + + /** + * @deprecated tweets from deactivated users will soon be indicated via `Drop` with + * a `FilteredReason` of `authorAccountIsInactive`. + */ + DEACTIVATED_USER = 5 + + /** + * @deprecated tweets from suspended users will soon be indicated via `Drop` with + * a `FilteredReason` of `authorAccountIsInactive`. + */ + SUSPENDED_USER = 6 + + /** + * @deprecated tweets from protected users that the viewer can't see will soon be + * indicated via `Drop` with a `FilteredReason` of `authorIsProtected`. + */ + PROTECTED_USER = 7 + /** + * @deprecated tweets that have been reported by the viewer will soon be indicated + * via `Drop` or `Suppress` with a `FilteredReason` of `reportedTweet`. + */ + REPORTED_TWEET = 8 + + // PrivateTweet was originally used for TwitterSuggest v1 but has since been removed + // obsolete: PRIVATE_TWEET = 9 + + /** + * Could not return this tweet because of backpressure, should + * not be retried immediately; try again later + */ + OVER_CAPACITY = 10 + + /** + * Returned when the requesting client is considered to not be + * able to render the tweet properly + */ + UNSUPPORTED_CLIENT = 11 + + /** + * The tweet exists, but was not returned because it should not be seen by the + * viewer. The reason for the tweet being filtered is indicated via + * GetTweetResult.filtered_reason. + */ + DROP = 12 + + /** + * The tweet exists and was returned, but should not be directly shown to the + * user without additional user intent to see the tweet, as it may be offensive. + * The reason for the suppression is indicated via GetTweetResult.filtered_reason. + */ + SUPPRESS = 13 + + /** + * The tweet once existed and has been deleted. + * When GetTweetOptions.enable_deleted_state is true, deleted tweets + * will be returned as DELETED + * When GetTweetOptions.enable_deleted_state is false, deleted tweets + * will be returned as NOT_FOUND. + */ + DELETED = 14 + + /** + * The tweet once existed, had violated Twitter Rules, and has been deleted. + * When GetTweetOptions.enable_deleted_state is true, bounce-deleted tweets + * will be returned as BOUNCE_DELETED + * When GetTweetOptions.enable_deleted_state is false, bounce-deleted tweets + * will be returned as NOT_FOUND. + */ + BOUNCE_DELETED = 15 + + RESERVED_1 = 16 + RESERVED_2 = 17 + RESERVED_3 = 18 + RESERVED_4 = 19 +} + +enum TweetCreateState { + /** + * Tweet was created successfully. + */ + OK = 0, + + /** + * The user_id field from the creation request does not correspond to a user. + */ + USER_NOT_FOUND = 1, + + SOURCE_TWEET_NOT_FOUND = 2, + SOURCE_USER_NOT_FOUND = 3, + + /** + * @deprecated Users can now retweet their own tweets. + */ + CANNOT_RETWEET_OWN_TWEET = 4, + + CANNOT_RETWEET_PROTECTED_TWEET = 5, + CANNOT_RETWEET_SUSPENDED_USER = 6, + CANNOT_RETWEET_DEACTIVATED_USER = 7, + CANNOT_RETWEET_BLOCKING_USER = 8, + + ALREADY_RETWEETED = 9, + CONTRIBUTOR_NOT_SUPPORTED = 10, + + /** + * The created_via field from the creation request does not correspond to a + * known client application. + */ + DEVICE_SOURCE_NOT_FOUND = 11, + + MALWARE_URL = 12, + INVALID_URL = 13, + USER_DEACTIVATED = 14, + USER_SUSPENDED = 15, + TEXT_TOO_LONG = 16, + TEXT_CANNOT_BE_BLANK = 17, + DUPLICATE = 18, + + /** + * PostTweetRequest.in_reply_to_tweet_id was set to a tweet that cannot be found. + * + * This usually means that the tweet was recently deleted, but could also + * mean that the tweet isn't visible to the reply author. (This is the + * case for replies by blocked users.) + */ + IN_REPLY_TO_TWEET_NOT_FOUND = 19, + + INVALID_IMAGE = 20, + INVALID_ADDITIONAL_FIELD = 21, + RATE_LIMIT_EXCEEDED = 22, + INVALID_NARROWCAST = 23, + + /** + * Antispam systems (Scarecrow) denied the request. + * + * This happens for tweets that are probably spam, but there is some + * uncertainty. Tweets that Scarecrow is certain are spammy will appear to + * succeed, but will not be added to backends. + */ + SPAM = 24, + SPAM_CAPTCHA = 25, + + /** + * A provided media upload ID can't be resolved. + */ + MEDIA_NOT_FOUND = 26, + + /** + * Catch-all for when uploaded media violate some condition. + * + * For example, too many photos in a multi-photo-set, or including an + * animated gif or video in a multi-photo-set. + */ + INVALID_MEDIA = 27, + + /** + * Returned when Scarecrow tell us to rate limit a tweet request. + * + * Non verified users (i.e., phone verified, email verified) have more + * strict rate limit. + */ + SAFETY_RATE_LIMIT_EXCEEDED = 28, + + /** + * Scarecrow has rejected the creation request until the user completes the + * bounce assignment. + * + * This flag indicates that PostTweetResult.bounce will contain a Bounce + * struct to be propagated to the client. + */ + BOUNCE = 29, + + /** + * Tweet creation was denied because the user is in ReadOnly mode. + * + * As with SPAM, tweets will appear to succeed but will not be actually + * created. + */ + USER_READONLY = 30, + + /** + * Maximum number of mentions allowed in a tweet was exceeded. + */ + MENTION_LIMIT_EXCEEDED = 31, + + /** + * Maximum number of URLs allowed in a tweet was exceeded. + */ + URL_LIMIT_EXCEEDED = 32, + + /** + * Maximum number of hashtags allowed in a tweet was exceeded. + */ + HASHTAG_LIMIT_EXCEEDED = 33, + + /** + * Maximum number of cashtags allowed in a tweet was exceeded. + */ + CASHTAG_LIMIT_EXCEEDED = 34, + + /** + * Maximum length of a hashtag was exceeded. + */ + HASHTAG_LENGTH_LIMIT_EXCEEDED = 35, + + /** + * Returned if a request contains more than one attachment type, which + * includes media, attachment_url, and card_reference. + */ + TOO_MANY_ATTACHMENT_TYPES = 36, + + /** + * Returned if the request contained an attachment URL that isn't allowed. + */ + INVALID_ATTACHMENT_URL = 37, + + /** + * We don't allow users without screen names to be retweeted. + */ + CANNOT_RETWEET_USER_WITHOUT_SCREEN_NAME = 38, + + /** + * Tweets may not be allowed if replying or retweeting IPI'd tweets + * See go/tp-ipi-tdd for more details + */ + DISABLED_BY_IPI_POLICY = 39, + + /** + * This state expands our transparency around which URLs are blacklisted or limited + */ + URL_SPAM = 40, + + // Conversation controls are only valid when present on a root + // conversation tweet and quoted tweets. + INVALID_CONVERSATION_CONTROL = 41, + + // Reply Tweet is limited due to conversation controls state set on + // root conversation Tweet. + REPLY_TWEET_NOT_ALLOWED = 42, + + // Nudge is returned when the client provides nudgeOptions and tweetypie receives a nudge + // from the Jiminy strato column. + NUDGE = 43, + + // ApiError BadRequest (400) "Reply to a community tweet must also be a community tweet" + // -- Triggered when a user tries replying to a community tweet with a non community tweet. + COMMUNITY_REPLY_TWEET_NOT_ALLOWED = 44, + // ApiError Forbidden (403) "User is not authorized to post to this community" + // -- Triggered when a user tries posting to a public/closed community that they are not part of. + COMMUNITY_USER_NOT_AUTHORIZED = 45, + // ApiError NotFound (404) "Community does not exist" -- Triggered when: + // a) A user tries posting to a private community they are not a part of. + // b) A user tries posting to a non existent community + COMMUNITY_NOT_FOUND = 46, + // ApiError BadRequest (400) "Cannot retweet a community tweet" + // -- Triggered when a user tries to retweet a community tweet. Community tweets can not be retweeted. + COMMUNITY_RETWEET_NOT_ALLOWED = 47, + + // Attempt to tweet with Conversation Controls was rejected, e.g. due to feature switch authorization. + CONVERSATION_CONTROL_NOT_ALLOWED = 48, + + // Super follow tweets require a special permission to create. + SUPER_FOLLOWS_CREATE_NOT_AUTHORIZED = 49, + + // Not all params can go together. E.g. super follow tweets can not be community tweets. + SUPER_FOLLOWS_INVALID_PARAMS = 50, + + // ApiError Forbidden (403) "Protected user can not post to communities" + // -- Triggered when a protected user tries tweeting or replying + // to a community tweet. They are not allowed to create community tweets. + COMMUNITY_PROTECTED_USER_CANNOT_TWEET = 51, + + // ApiError Forbidden (451) "User is not permitted to engage with this exclusive tweet." + // -- Triggered when a user tries to reply to an exclusive tweet without being + // a superfollower of the tweet author. Could be used for other engagements in the future (e.g. favorite) + EXCLUSIVE_TWEET_ENGAGEMENT_NOT_ALLOWED = 52 + + /** + * ApiError BadRequest (400) "Invalid parameters on Trusted Friends tweet creation" + * + * Returned when either of the following occur: + * a) A user tries setting Trusted Friends Control on a reply + * b) A user tries setting Trusted Friends Control on a tweet with any of the following set: + * i) Conversation Control + * ii) Community + * iii) Exclusive Tweet Control + */ + TRUSTED_FRIENDS_INVALID_PARAMS = 53, + + /** + * ApiError Forbidden (403) + * + * Returned when a user tries to retweet a Trusted Friends tweet. + */ + TRUSTED_FRIENDS_RETWEET_NOT_ALLOWED = 54, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to reply to a Trusted Friends tweet + * and they are not a trusted friend. + */ + TRUSTED_FRIENDS_ENGAGEMENT_NOT_ALLOWED = 55, + + /** + * ApiError BadRequest (400) "Invalid parameters for creating a CollabTweet or CollabInvitation" + * + * Returned when any of the following are true: + * a) A user tries setting Collab Control on a reply + * b) A user tries setting Collab Control on a tweet with any of the following set: + * i) Conversation Control + * ii) Community + * iii) Exclusive Tweet Control + * iv) Trusted Friends Control + **/ + COLLAB_TWEET_INVALID_PARAMS = 56, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to create a Trusted Friends tweet but they are not allowed to tweet + * to the requested Trusted Friends list. + */ + TRUSTED_FRIENDS_CREATE_NOT_ALLOWED = 57, + + /** + * Returned when the current user is not allowed to edit in general, this might be due to missing + * roles during development, or a missing subscription. + */ + EDIT_TWEET_USER_NOT_AUTHORIZED = 58, + + /** + * Returned when a user tries to edit a Tweet which they didn't author. + */ + EDIT_TWEET_USER_NOT_AUTHOR = 59, + + /** + * Returned when a user tries edit a stale tweet, meaning a tweet which has already been edited. + */ + EDIT_TWEET_NOT_LATEST_VERSION = 60, + + /** + * ApiError Forbidden (460) + * + * Returned when a user tries to create a Trusted Friends tweet that quotes tweets a Trusted + * Friends tweet. + */ + TRUSTED_FRIENDS_QUOTE_TWEET_NOT_ALLOWED = 61, + + /** + * Returned when a user tries edit a tweet for which the editing time has already expired. + */ + EDIT_TIME_LIMIT_REACHED = 62, + + /** + * Returned when a user tries edit a tweet which has been already edited maximum number of times. + */ + EDIT_COUNT_LIMIT_REACHED = 63, + + /* Returned when a user tries to edit a field that is not allowed to be edited */ + FIELD_EDIT_NOT_ALLOWED = 64, + + /* Returned when the initial Tweet could not be found when trying to validate an edit */ + INITIAL_TWEET_NOT_FOUND = 65, + + /** + * ApiError Forbidden (457) + * + * Returned when a user tries to reply to a stale tweet + */ + STALE_TWEET_ENGAGEMENT_NOT_ALLOWED = 66, + + /** + * ApiError Forbidden (460) + * + * Returned when a user tries to create a tweet that quotes tweets a stale tweet + */ + STALE_TWEET_QUOTE_TWEET_NOT_ALLOWED = 67, + + /* Tweet cannot be edited because the initial tweet is + * marked as not edit eligible */ + NOT_ELIGIBLE_FOR_EDIT = 68, + + /* A stale version of an edit tweet cannot be retweeted + * Only latest version of an edit chain should be allowed to be retweeted. */ + STALE_TWEET_RETWEET_NOT_ALLOWED = 69, + + RESERVED_32 = 70, + RESERVED_33 = 71, + RESERVED_34 = 72, + RESERVED_35 = 73, + RESERVED_36 = 74, + RESERVED_37 = 75, +} + +enum UndeleteTweetState { + /** + * The Tweet was successfully undeleted. + */ + SUCCESS = 0, + + /** + * The Tweet was deleted and is still deleted. It cannot be undeleted + * because the tweet is no longer in the soft delete archive. + */ + SOFT_DELETE_EXPIRED = 1, + + /** + * The Tweet likely has never existed, and therefore cannot be undeleted. + */ + TWEET_NOT_FOUND = 2, + + /** + * The Tweet could not be undeleted because it was not deleted in + * the first place. + */ + TWEET_ALREADY_EXISTS = 3, + + /** + * The user who created the Tweet being undeleted could not be found. + */ + USER_NOT_FOUND = 4, + + /** + * The Tweet could not be undeleted because it is a retweet and the original + * tweet is gone. + */ + SOURCE_TWEET_NOT_FOUND = 5, + + /** + * The Tweet could not be undeleted because it is a retweet and the author + * of the original tweet is gone. + */ + SOURCE_USER_NOT_FOUND = 6, + + /** + * The Tweet was deleted and is still deleted. It cannot be undeleted + * because the tweet has been bounce deleted. Bounce deleted tweet + * has been found to violate Twitter Rules. go/bouncer go/bounced-tweet + */ + TWEET_IS_BOUNCE_DELETED = 7, + + /** + * This tweet cannot be undeleted because the tweet was created by a + * user when they were under 13. + **/ + TWEET_IS_U13_TWEET = 8, + + RESERVED_2 = 9, + RESERVED_3 = 10 +} + +enum TweetDeleteState { + /** + * Tweet was deleted successfully. + */ + OK = 0, + + /** + * Tweet was not deleted because of the associated user. + * + * The DeleteTweetsRequest.by_user_id must match the tweet owner or be an + * admin user. + */ + PERMISSION_ERROR = 1, + + /** + * The expected_user_id provided in DeleteTweetsRequest does not match the + * user_id of the tweet owner. + */ + EXPECTED_USER_ID_MISMATCH = 2, + + /** + * @deprecated. + * + * is_user_erasure was set in DeleteTweetsRequest but the user was not in + * the erased state. + */ + USER_NOT_IN_ERASED_STATE = 3, + + /** + * Failed to Load the source Tweet while unretweeting stale revisions in an edit chain. + */ + SOURCE_TWEET_NOT_FOUND = 4, + + RESERVED_4 = 5, + RESERVED_5 = 6, + RESERVED_6 = 7, + RESERVED_7 = 8 +} + +enum DeletedTweetState { + /** + * The tweet has been marked as deleted but has not been permanently deleted. + */ + SOFT_DELETED = 1 + + /** + * The tweet has never existed. + */ + NOT_FOUND = 2 + + /** + * The tweet has been permanently deleted. + */ + HARD_DELETED = 3 + + /** + * The tweet exists and is not currently deleted. + */ + NOT_DELETED = 4 + + RESERVED1 = 5 + RESERVED2 = 6 + RESERVED3 = 7 +} + +/** + * Hydrations to perform on the Tweet returned by post_tweet and post_retweet. + */ +struct WritePathHydrationOptions { + /** + * Return cards for tweets with cards in Tweet.cards or Tweet.card2 + * + * card2 also requires setting a valid cards_platform_key + */ + 1: bool include_cards = 0 + + /** + * The card format version supported by the requesting client + */ + 2: optional string cards_platform_key + + # 3: obsolete + # 4: obsolete + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 5: optional binary extensions_args + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. Also see simple_quoted_tweet + * field in GetTweetOptions and GetTweetFieldsOptions + */ + 6: bool simple_quoted_tweet = 0 +} + +struct RetweetRequest { + /** + * Id of the tweet being retweeted. + */ + 1: required i64 source_status_id (personalDataType = 'TweetId') + + /** + * User creating the retweet. + */ + 2: required i64 user_id (personalDataType = 'UserId') + + /** + * @see PostTweetRequest.created_via + */ + 3: required string created_via (personalDataType = 'ClientType') + 4: optional i64 contributor_user_id (personalDataType = 'UserId') // no longer supported + + /** + * @see PostTweetRequest.tracking_id + */ + 5: optional i64 tracking_id (personalDataType = 'ImpressionId') + 6: optional tweet.Narrowcast narrowcast + + /** + * @see PostTweetRequest.nullcast + */ + 7: bool nullcast = 0 + + /** + * @see PostTweetRequest.dark + */ + 8: bool dark = 0 + + // OBSOLETE 9: bool send_retweet_sms_push = 0 + + 10: optional WritePathHydrationOptions hydration_options + + /** + * @see PostTweetRequest.additional_fields + */ + 11: optional tweet.Tweet additional_fields + + /** + * @see PostTweetRequest.uniqueness_id + */ + 12: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + 13: optional feature_context.FeatureContext feature_context + + 14: bool return_success_on_duplicate = 0 + + /** + * Passthrough data for Scarecrow that is used for safety checks. + */ + 15: optional safety_meta_data.SafetyMetaData safety_meta_data + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 16: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') +}(hasPersonalData = 'true') + +/** + * A request to set or unset nsfw_admin and/or nsfw_user. + */ +struct UpdatePossiblySensitiveTweetRequest { + /** + * Id of tweet being updated + */ + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * Id of the user initiating this request. + * + * It could be either the owner of the tweet or an admin. It is used when + * auditing the request in Guano. + */ + 2: required i64 by_user_id (personalDataType = 'UserId') + + /** + * New value for tweet.core_data.nsfw_admin. + */ + 3: optional bool nsfw_admin + + /** + * New value for tweet.core_data.nsfw_user. + */ + 4: optional bool nsfw_user + + /** + * Host or remote IP where the request originated. + * + * This data is used when auditing the request in Guano. If unset, it will + * be logged as "". + */ + 5: optional string host (personalDataType = 'IpAddress') + + /** + * Pass-through message sent to the audit service. + */ + 6: optional string note +}(hasPersonalData = 'true') + +struct UpdateTweetMediaRequest { + /** + * The tweet id that's being updated + */ + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * A mapping from old (existing) media ids on the tweet to new media ids. + * + * Existing tweet media not in this map will remain unchanged. + */ + 2: required map old_to_new_media_ids (personalDataTypeKey = 'MediaId', personalDataTypeValue = 'MediaId') +}(hasPersonalData = 'true') + +struct TakedownRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * The list of takedown country codes to add to the tweet. + * + * DEPRECATED, reasons_to_add should be used instead. + */ + 2: list countries_to_add = [] (personalDataType = 'ContentRestrictionStatus') + + /** + * This field is the list of takedown country codes to remove from the tweet. + * + * DEPRECATED, reasons_to_remove should be used instead. + */ + 3: list countries_to_remove = [] (personalDataType = 'ContentRestrictionStatus') + + /** + * This field is the list of takedown reasons to add to the tweet. + */ + 11: list reasons_to_add = [] + + /** + * This field is the list of takedown reasons to remove from the tweet. + */ + 12: list reasons_to_remove = [] + + /** + * Motivation for the takedown which is written to the audit service. + * + * This data is not persisted with the takedown itself. + */ + 4: optional string audit_note (personalDataType = 'AuditMessage') + + /** + * Whether to send this request to the audit service. + */ + 5: bool scribe_for_audit = 1 + + // DEPRECATED, this field is no longer used. + 6: bool set_has_takedown = 1 + + // DEPRECATED, this field is no longer used. + 7: optional list previous_takedown_country_codes (personalDataType = 'ContentRestrictionStatus') + + /** + * Whether this request should enqueue a TweetTakedownEvent to EventBus and + * Hosebird. + */ + 8: bool eventbus_enqueue = 1 + + /** + * ID of the user who initiated the takedown. + * + * This is used when writing the takedown to the audit service. If unset, it + * will be logged as -1. + */ + 9: optional i64 by_user_id (personalDataType = 'UserId') + + /** + * Host or remote IP where the request originated. + * + * This data is used when auditing the request in Guano. If unset, it will + * be logged as "". + */ + 10: optional string host (personalDataType = 'IpAddress') +}(hasPersonalData = 'true') + +// Arguments to delete_location_data +struct DeleteLocationDataRequest { + 1: i64 user_id (personalDataType = 'UserId') +}(hasPersonalData = 'true') + +// structs for API V2 (flexible schema) + +struct GetTweetOptions { + /** + * Return the original tweet in GetTweetResult.source_tweet for retweets. + */ + 1: bool include_source_tweet = 1 + + /** + * Return the hydrated Place object in Tweet.place for tweets with geolocation. + */ + 2: bool include_places = 0 + + /** + * Language used for place names when include_places is true. Also passed to + * the cards service, if cards are hydrated for the request. + */ + 3: string language_tag = "en" + + /** + * Return cards for tweets with cards in Tweet.cards or Tweet.card2 + * + * card2 also requires setting a valid cards_platform_key + */ + 4: bool include_cards = 0 + + /** + * Return the number of times a tweet has been retweeted in + * Tweet.counts.retweet_count. + */ + 5: bool include_retweet_count = 0 + + /** + * Return the number of direct replies to a tweet in + * Tweet.counts.reply_count. + */ + 6: bool include_reply_count = 0 + + /** + * Return the number of favorites a tweet has received in + * Tweet.counts.favorite_count. + */ + 7: bool include_favorite_count = 0 + + # OBSOLETE 8: bool include_unique_users_impressed_count = 0 + # OBSOLETE 9: bool include_click_count = 0 + # OBSOLETE 10: bool include_descendent_reply_count = 0 + + /** + * @deprecated Use safety_level for spam filtering. + */ + 11: optional tweet.SpamSignalType spam_signal_type + + /** + * If the requested tweet is not already in cache, do not add it. + * + * You should set do_not_cache to true if you are requesting old tweets + * (older than 30 days) and they are unlikely to be requested again. + */ + 12: bool do_not_cache = 0 + + /** + * The card format version supported by the requesting client + */ + 13: optional string cards_platform_key (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + /** + * The user for whose perspective this request should be processed. + * + * If you are requesting tweets on behalf of a user, set this to their user + * id. The effect of setting this option is: + * + * - Tweetypie will return protected tweets that the user is allowed to + * access, rather than filtering out protected tweets. + * + * - If this field is set *and* `include_perspectivals` is set, then the + * tweets will have the `perspective` field set to a struct with flags + * that indicate whether the user has favorited, retweeted, or reported + * the tweet in question. + * + * If you have a specific need to access all protected tweets (not + * just tweets that should be accessible to the current user), see the + * documentation for `include_protected`. + */ + 14: optional i64 for_user_id (personalDataType = 'UserId') + + /** + * Do not enforce normal filtering for protected tweets, blocked quote tweets, + * contributor data, etc. This does not affect Visibility Library (http://go/vf) + * based filtering which executes when safety_level is specified, see request + * field 24 safety_level below + * + * If `bypass_visibility_filtering` is true, Tweetypie will not enforce filtering + * for protected tweets, blocked quote tweets, contributor data, etc. and your client + * will receive all tweets regardless of follow relationship. You will also be able + * to access tweets from deactivated and suspended users. This is only necessary + * for special cases, such as indexing or analyzing tweets, or administrator access. + * Since this elevated access is usually unnecessary, and is a security risk, you will + * need to get your client id whitelisted to access this feature. + * + * If you are accessing tweets on behalf of a user, set + * `bypass_visibility_filtering` to false and set `for_user_id`. This will + * allow access to exactly the set of tweets that that user is authorized to + * access, and filter out tweets the user should not be authorized to access + * (returned with a StatusState of PROTECTED_USER). + */ + 15: bool bypass_visibility_filtering = 0 + + /** + * Return the user-specific view of a tweet in Tweet.perspective + * + * for_user_id must also be set. + */ + 16: bool include_perspectivals = 0 + + // OBSOLETE media faces are always included + 17: bool include_media_faces = 0 + + /** + * The flexible schema fields of the tweet to return. + * + * Fields of tweets in the 100+ range will only be returned if they are + * explicitly requested. + */ + 18: list additional_field_ids = [] + + // OBSOLETE 19: bool include_topic_labels = 0 + + /** + * Exclude user-reported tweets from this request. Only applicable if + * forUserId is set. + * + * Users can report individual tweets in the UI as uninteresting, spam, + * sensitive, or abusive. + */ + 20: bool exclude_reported = 0 + + // if set to true, disables suggested tweet visibility checks + // OBSOLETE (TwitterSuggestInfo version of suggested tweets has been removed) + 21: bool obsolete_skip_twitter_suggests_visibility_check = 0 + // OBSOLETE 22: optional set spam_signal_types + + /** + * Return the quoted tweet in GetTweetResult.quoted_tweet + */ + 23: bool include_quoted_tweet = 0 + + /** + * Content filtering policy that will be used to drop or suppress tweets + * from response. The filtering is based on the result of Visibility Library + * and does not affect filtering of tweets from blocked or non-followed protected users, see + * request field 15 bypass_visibility_filtering above + * + * If not specified SafetyLevel.FilterDefault will be used. + */ + 24: optional safety_level.SafetyLevel safety_level + + // obsolete 25: bool include_animated_gif_media_entities = 0 + 26: bool include_profile_geo_enrichment = 0 + // obsolete 27: optional set extensions + 28: bool include_tweet_pivots = 0 + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 29: optional binary extensions_args + + /** + * Return the number of times a tweet has been quoted in Tweet.counts.quote_count + */ + 30: bool include_quote_count = 0 + + /** + * Return media metadata from MediaInfoService in MediaEntity.additional_metadata + */ + 31: bool include_media_additional_metadata = 0 + + /** + * Populate the conversation_muted field of the Tweet for the requesting + * user. + * + * Setting this to true will have no effect unless for_user_id is set. + */ + 32: bool include_conversation_muted = 0 + + /** + * @deprecated go/sunsetting-carousels + */ + 33: bool include_carousels = 0 + + /** + * When enable_deleted_state is true and we have evidence that the + * tweet once existed and was deleted, Tweetypie returns + * StatusState.DELETED or StatusState.BOUNCE_DELETED. (See comments + * on StatusState for details on these two states.) + * + * When enable_deleted_state is false, deleted tweets are + * returned as StatusState.NOT_FOUND. + * + * Note: even when enable_deleted_state is true, a deleted tweet may + * still be returned as StatusState.NOT_FOUND due to eventual + * consistency. + * + * This option is false by default for compatibility with clients + * expecting StatusState.NOT_FOUND. + */ + 34: bool enable_deleted_state = 0 + + /** + * Populate the conversation_owner_id field of the Tweet for the requesting + * user. Which translate into is_conversation_owner in birdherd + * + */ + // obsolete 35: bool include_conversation_owner_id = 0 + + /** + * Populate the is_removed_from_conversation field of the Tweet for the requesting + * user. + * + */ + // obsolete 36: bool include_is_removed_from_conversation = 0 + + // To retrieve self-thread metadata request field Tweet.SelfThreadMetadataField + // obsolete 37: bool include_self_thread_info = 0 + + /** + * This option surfaces CardReference field (118) in Tweet thrift object. + * We use card_uri present in card reference, to get access to stored card information. + */ + 37: bool include_card_uri = 0 + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. + */ + 38: bool simple_quoted_tweet = 0 + + /** + * This flag is used and only take affect if the requested tweet is creatives container backed + * tweet. This will suprress the tweet materialization and return tweet not found. + * + * go/creatives-containers-tdd + **/ + 39: bool disable_tweet_materialization = 0 + + + /** + * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service + * is struggling. + **/ + 40: optional bool is_request_sheddable + +}(hasPersonalData = 'true') + +struct GetTweetsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + // @deprecated unused + 2: optional list source_tweet_id_hints (personalDataType = 'TweetId') + 3: optional GetTweetOptions options + // @deprecated unused + 4: optional list quoted_tweet_id_hints (personalDataType = 'TweetId') +}(hasPersonalData = 'true') + +/** + * Can be used to reference an arbitrary nested field of some struct via + * a list of field IDs describing the path of fields to reach the referenced + * field. + */ +struct FieldByPath { + 1: required list field_id_path +} + +struct GetTweetResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + + /** + * Indicates what happened when the tweet was loaded. + */ + 2: required StatusState tweet_state + + /** + * The requested tweet when tweet_state is `FOUND`, `PARTIAL`, or `SUPPRESS`. + * + * This field will be set if the tweet exists, access is authorized, + * and enough data about the tweet is available to materialize a + * tweet. When this field is set, you should look at the tweet_state + * field to determine how to treat this tweet. + * + * If tweet_state is FOUND, then this tweet is complete and passes the + * authorization checks requested in GetTweetOptions. (See + * GetTweetOptions.for_user_id for more information about authorization.) + * + * If tweet_state is PARTIAL, then enough data was available to return + * a tweet, but there was an error when loading the tweet that prevented + * some data from being returned (for example, if a request to the cards + * service times out when cards were requested, then the tweet will be + * marked PARTIAL). `missing_fields` indicates which parts of the tweet + * failed to load. When you receive a PARTIAL tweet, it is up to you + * whether to proceed with the degraded tweet data or to consider it a + * failure. For example, a mobile client might choose to display a + * PARTIAL tweet to the user, but not store it in an internal cache. + * + * If tweet_state is SUPPRESS, then the tweet is complete, but soft + * filtering is enabled. This state is intended to hide potentially + * harmful tweets from user's view while not taking away the option for + * the user to override our filtering decision. See http://go/rtf + * (render-time filtering) for more information about how to treat these + * tweets. + */ + 3: optional tweet.Tweet tweet + + /** + * The tweet fields that could not be loaded when tweet_state is `PARTIAL` + * or `SUPPRESS`. + * + * This field will be set when the `tweet_state` is `PARTIAL`, and may + * be set when `tweet_state` is SUPPRESS. It indicates degraded data in + * the `tweet`. Each entry in `missing_fields` indicates a traversal of + * the `Tweet` thrift object terminating at the field that is + * missing. For most non-core fields, the path will just be the field id + * of the field that is missing. + * + * For example, if card2 failed to load for a tweet, the `tweet_state` + * will be `PARTIAL`, the `tweet` field will be set, the Tweet's `card2` + * field will be empty, and this field will be set to: + * + * Set(FieldByPath(Seq(17))) + */ + 4: optional set missing_fields + + /** + * The original tweet when `tweet` is a retweet and + * GetTweetOptions.include_source_tweet is true. + */ + 5: optional tweet.Tweet source_tweet + + /** + * The retweet fields that could not be loaded when tweet_state is `PARTIAL`. + */ + 6: optional set source_tweet_missing_fields + + /** + * The quoted tweet when `tweet` is a quote tweet and + * GetTweetOptions.include_quoted_tweet is true. + */ + 7: optional tweet.Tweet quoted_tweet + + /** + * The quoted tweet fields that could not be loaded when tweet_state is `PARTIAL`. + */ + 8: optional set quoted_tweet_missing_fields + + /** + * The reason that a tweet should not be displayed when tweet_state is + * `SUPPRESS` or `DROP`. + */ + 9: optional safety_result.FilteredReason filtered_reason + + /** + * Hydrated carousel if the tweet contains a carousel URL and the + * GetTweetOptions.include_carousel is true. + * + * In this case Carousel Service is requested to hydrate the carousel, and + * the result stored in this field. + * + * @deprecated go/sunsetting-carousels + */ + 10: optional carousel_service.GetCarouselResult carousel_result + + /** + * If a quoted tweet would be present, but it was filtered out, then + * this field will be set to the reason that it was filtered. + */ + 11: optional safety_result.FilteredReason quoted_tweet_filtered_reason +}(hasPersonalData = 'true') + +union TweetInclude { + /** + * Field ID within the `Tweet` struct to include. All fields may be optionally included + * except for the `id` field. + */ + 1: FieldId tweetFieldId + + /** + * Field ID within the `StatusCounts` struct to include. Only specifically requested + * count fields will be included. Including any `countsFieldIds` values automatically + * implies including `Tweet.counts`. + * + */ + 2: FieldId countsFieldId + + /** + * Field ID within the `MediaEntity` struct to include. Currently, only `MediaEntity.additionalMetadata` + * may be optionally included (i.e., it will not be included by default if you include + * `tweetFieldId` = `Tweet.media` without also including `mediaEntityFieldId` = + * `MediaEntity.additionalMetadata`. Including any `mediaEntityFieldId` values automatically + * implies include `Tweet.media`. + */ + 3: FieldId mediaEntityFieldId +} + +/** + * An enumeration of policy options indicating how tweets should be filtered (protected tweets, blocked quote tweets, + * contributor data, etc.). This does not affect Visibility Library (http://go/vf) based filtering. + * This is equivalent to `bypass_visibility_filtering` in get_tweets() call. This means that + * `TweetVisibilityPolicy.NO_FILTERING` is equivalent to `bypass_visibility_filtering` = true + */ +enum TweetVisibilityPolicy { + /** + * only return tweets that should be visible to either the `forUserId` user, if specified, + * or from the perspective of a logged-out user if `forUserId` is not specified. This option + * should always be used if requesting data to be returned via the public API. + */ + USER_VISIBLE = 1, + + /** + * returns all tweets that can be found, regardless of user visibility. This option should + * never be used when gather data to be return in an API, and should only be used for internal + * processing. because this option allows access to potentially sensitive data, clients + * must be whitelisted to use it. + */ + NO_FILTERING = 2 +} + +struct GetTweetFieldsOptions { + /** + * Identifies which `Tweet` or nested fields to include in the response. + */ + 1: required set tweet_includes + + /** + * If true and the requested tweet is a retweet, then a `Tweet` + * containing the requested fields for the retweeted tweet will be + * included in the response. + */ + 2: bool includeRetweetedTweet = 0 + + /** + * If true and the requested tweet is a quote-tweet, then the quoted + * tweet will also be queried and the result for the quoted tweet + * included in `GetTweetFieldsResult.quotedTweetResult`. + */ + 3: bool includeQuotedTweet = 0 + + /** + * If true and the requested tweet contains a carousel URL, then the + * carousel will also be queried and the result for the carousel + * included in `GetTweetFieldsResult.carouselResult`. + * + * @deprecated go/sunsetting-carousels + */ + 4: bool includeCarousel = 0 + + /** + * If you are requesting tweets on behalf of a user, set this to their + * user id. The effect of setting this option is: + * + * - Tweetypie will return protected tweets that the user is allowed + * to access, rather than filtering out protected tweets, when `visibility_policy` + * is set to `USER_VISIBLE`. + * + * - If this field is set *and* `Tweet.perspective` is requested, then + * the tweets will have the `perspective` field set to a struct with + * flags that indicate whether the user has favorited, retweeted, or + * reported the tweet in question. + */ + 10: optional i64 forUserId (personalDataType = 'UserId') + + /** + * language_tag is used when hydrating a `Place` object, to get localized names. + * Also passed to the cards service, if cards are hydrated for the request. + */ + 11: optional string languageTag (personalDataType = 'InferredLanguage') + + /** + * if requesting card2 cards, you must specify the platform key + */ + 12: optional string cardsPlatformKey (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + /** + * The argument passed to the Stratostore extension points mechanism. + */ + 13: optional binary extensionsArgs + + /** + * the policy to use when filtering tweets for basic visibility. + */ + 20: TweetVisibilityPolicy visibilityPolicy = TweetVisibilityPolicy.USER_VISIBLE + + /** + * Content filtering policy that will be used to drop or suppress tweets from response. + * The filtering is based on the result of Visibility Library (http://go/vf) + * and does not affect filtering of tweets from blocked or non-followed protected users, see + * request field 20 visibilityPolicy above + * + * If not specified SafetyLevel.FilterNone will be used. + */ + 21: optional safety_level.SafetyLevel safetyLevel + + /** + * The tweet result won't be cached by Tweetypie if doNotCache is true. + * You should set it as true if old tweets (older than 30 days) are requested, + * and they are unlikely to be requested again. + */ + 30: bool doNotCache = 0 + + /** + * When returning a tweet that quotes another tweet, do not include + * the URL to the quoted tweet in the tweet text and url entities. + * This is intended for clients that use the quoted_tweet field of + * the tweet to display quoted tweets. + * + */ + 31: bool simple_quoted_tweet = 0 + + /** + * This flag is used and only take affect if the requested tweet is creatives container backed + * tweet. This will suprress the tweet materialization and return tweet not found. + * + * go/creatives-containers-tdd + **/ + 32: bool disable_tweet_materialization = 0 + + /** + * Used for load shedding. If set to true, Tweetypie service might shed the request, if the service + * is struggling. + **/ + 33: optional bool is_request_sheddable +}(hasPersonalData = 'true') + +struct GetTweetFieldsRequest { + 1: required list tweetIds (personalDataType = 'TweetId') + 2: required GetTweetFieldsOptions options +} (hasPersonalData = 'true') + +/** + * Used in `TweetFieldsResultState` when the requested tweet is found. + */ +struct TweetFieldsResultFound { + 1: required tweet.Tweet tweet + + /** + * If `tweet` is a retweet, `retweetedTweet` will be the retweeted tweet. + * Just like with the requested tweet, only the requested fields will be + * hydrated and set on the retweeted tweet. + */ + 2: optional tweet.Tweet retweetedTweet + + /** + * If specified, then the tweet should be soft filtered. + */ + 3: optional safety_result.FilteredReason suppressReason +} + +/** + * Used in `TweetFieldsResultState` when the requested tweet is not found. + */ +struct TweetFieldsResultNotFound { + // If this field is true, then we know that the tweet once existed and + // has since been deleted. + 1: bool deleted = 0 + + // This tweet is deleted after being bounced for violating the Twitter + // Rules and should never be rendered or undeleted. see go/bounced-tweet + // In certain timelines we render a tombstone in its place. + 2: bool bounceDeleted = 0 + + // The reason that a tweet should not be displayed. See go/vf-tombstones-in-tweetypie + // Tweets that are not found do not going through Visibility Filtering rule evaluation and thus + // are not `TweetFieldsResultFiltered`, but may still have a filtered_reason that distinguishes + // whether the unavailable tweet should be tombstoned or hard-filtered based on the Safety Level. + 3: optional safety_result.FilteredReason filtered_reason +} + +struct TweetFieldsPartial { + 1: required TweetFieldsResultFound found + + /** + * The tweet fields that could not be loaded when hydration fails + * and a backend fails with an exception. This field is populated + * when a tweet is "partially" hydrated, i.e. some fields were + * successfully fetched while others were not. + * + * It indicates degraded data in the `tweet`. Each entry in `missing_fields` + * indicates a traversal of the `Tweet` thrift object terminating at + * the field that is missing. For most non-core fields, the path will + * just be the field id of the field that is missing. + * + * For example, if card2 failed to load for a tweet, the tweet is marked "partial", + * the `tweet` field will be set, the Tweet's `card2` + * field will be empty, and this field will be set to: + * + * Set(FieldByPath(Seq(17))) + */ + 2: required set missingFields + + /** + * Same as `missing_fields` but for the source tweet in case the requested tweet + * was a retweet. + */ + 3: required set sourceTweetMissingFields +} +/** + * Used in `TweetFieldsResultState` when there was a failure loading the requested tweet. + */ +struct TweetFieldsResultFailed { + /** + * If true, the failure was the result of backpressure, which means the request + * should not be immediately retried. It is safe to retry again later. + * + * If false, the failure is probably transient and safe to retry immediately. + */ + 1: required bool overCapacity + + /** + * An optional message about the cause of the failure. + */ + 2: optional string message + + /** + * This field is populated when some tweet fields fail to load and the + * tweet is marked "partial" in tweetypie. It contains the tweet/RT + * information along with the set of tweet fields that failed to + * get populated. + */ + 3: optional TweetFieldsPartial partial +} + +/** + * Used in `TweetFieldsResultState` when the requested tweet has been filtered out. + */ +struct TweetFieldsResultFiltered { + 1: required safety_result.FilteredReason reason +} + +/** + * A union of the different possible outcomes of a fetching a single tweet. + */ +union TweetFieldsResultState { + 1: TweetFieldsResultFound found + 2: TweetFieldsResultNotFound notFound + 3: TweetFieldsResultFailed failed + 4: TweetFieldsResultFiltered filtered +} + +/** + * The response to get_tweet_fields will include a TweetFieldsResultRow for each + * requested tweet id. + */ +struct GetTweetFieldsResult { + /** + * The id of the requested tweet. + */ + 1: required i64 tweetId (personalDataType = 'TweetId') + + /** + * the result for the requested tweet + */ + 2: required TweetFieldsResultState tweetResult + + /** + * If quoted-tweets were requested and the primary tweet was found, + * this field will contain the result state for the quoted tweeted. + */ + 3: optional TweetFieldsResultState quotedTweetResult + + /** + * If the primary tweet was found, carousels were requested and there + * was a carousel URL in the primary tweet, this field will contain the + * result for the carousel. + * + * @deprecated + */ + 4: optional carousel_service.GetCarouselResult carouselResult +} + +struct TweetCreateConversationControlByInvitation { + 1: optional bool invite_via_mention +} + +struct TweetCreateConversationControlCommunity { + 1: optional bool invite_via_mention +} + +struct TweetCreateConversationControlFollowers { + 1: optional bool invite_via_mention +} + +/** + * Specify limits on user participation in a conversation. + * + * This is a union rather than a struct to support adding conversation + * controls that require carrying metadata along with them, such as a list id. + * + * See also: + * Tweet.conversation_control + * PostTweetRequest.conversation_control + */ +union TweetCreateConversationControl { + 1: TweetCreateConversationControlCommunity community + 2: TweetCreateConversationControlByInvitation byInvitation + 3: TweetCreateConversationControlFollowers followers +} + +/* + * Specifies the exclusivity of a tweet + * This limits the audience of the tweet to the author + * and the author's super followers + * While empty now, we are expecting to add additional fields in v1+ + */ +struct ExclusiveTweetControlOptions {} + +struct TrustedFriendsControlOptions { + 1: i64 trusted_friends_list_id = 0 (personalDataType = 'TrustedFriendsListMetadata') +}(hasPersonalData = 'true') + +struct CollabInvitationOptions { + 1: required list collaborator_user_ids (personalDataType = 'UserId') + // Note: status not sent here, will be added in TweetBuilder to set all but author as PENDING +} + +struct CollabTweetOptions { + 1: required list collaborator_user_ids (personalDataType = 'UserId') +} + +union CollabControlOptions { + 1: CollabInvitationOptions collabInvitation + 2: CollabTweetOptions collabTweet +} + +/** + * When this struct is supplied, this PostTweetRequest is interpreted as + * an edit of the Tweet whose latest version is represented by previous_tweet_id. + * If this is the first edit of a Tweet, this will be the same as the initial_tweet_id. + **/ +struct EditOptions { + /** + * The ID of the previous latest version of the Tweet that is being edited. + * If this is the first edit, this will be the same as the initial_tweet_id. + **/ + 1: required i64 previous_tweet_id (personalDataType = 'TweetId') +} + +struct NoteTweetOptions { + /** + * The ID of the NoteTweet to be associated with this Tweet. + **/ + 1: required i64 note_tweet_id (personalDataType = 'TwitterArticleID') + // Deprecated + 2: optional list mentioned_screen_names (personalDataType = 'Username') + /** + * The user IDs of the mentioned users + **/ + 3: optional list mentioned_user_ids (personalDataType = 'UserId') + /** + * Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text + **/ + 4: optional bool is_expandable +} + +struct PostTweetRequest { + /** + * Id of the user creating the tweet. + */ + 1: required i64 user_id (personalDataType = 'UserId') + + /** + * The user-supplied text of the tweet. + */ + 2: required string text (personalDataType = 'PrivateTweets, PublicTweets') + + /** + * The OAuth client application from which the creation request originated. + * + * This must be in the format "oauth:". For requests + * from a user this is the application id of their client; for internal + * services this is the id of an associated application registered at + * https://apps.twitter.com. + */ + 3: required string created_via (personalDataType = 'ClientType') + + 4: optional i64 in_reply_to_tweet_id (personalDataType = 'TweetId') + 5: optional TweetCreateGeo geo + 6: optional list media_upload_ids (personalDataType = 'MediaId') + 7: optional tweet.Narrowcast narrowcast + + /** + * Do not deliver this tweet to a user's followers. + * + * When true this tweet will not be fanned out, appear in the user's + * timeline, or appear in search results. It will be distributed via the + * firehose and available in the public API. + * + * This is primarily used to create tweets that can be used as ads without + * broadcasting them to an advertiser's followers. + * + */ + 8: bool nullcast = 0 + + /** + * The impression id of the ad from which this tweet was created. + * + * This is set when a user retweets or replies to a promoted tweet. It is + * used to attribute the "earned" exposure of an advertisement. + */ + 9: optional i64 tracking_id (personalDataType = 'ImpressionId') + + /** + * @deprecated. + * TOO clients don't actively use this input param, and the v2 API does not plan + * to expose this parameter. The value associated with this field that's + * stored with a tweet is obtained from the user's account preferences stored in + * `User.safety.nsfw_user`. (See go/user.thrift for more details on this field) + * + * Field indicates whether a individual tweet may contain objectionable content. + * + * If specified, tweet.core_data.nsfw_user will equal this value (otherwise, + * tweet.core_data.nsfw_user will be set to user.nsfw_user). + */ + 10: optional bool possibly_sensitive + + /** + * Do not save, index, fanout, or otherwise persist this tweet. + * + * When true, the tweet is validated, created, and returned but is not + * persisted. This can be used for dark testing or pre-validating a tweet + * scheduled for later creation. + */ + 11: bool dark = 0 + + /** + * IP address of the user making the request. + * + * This is used for logging certain kinds of actions, like attempting to + * tweet malware urls. + */ + 12: optional string remote_host (personalDataType = 'IpAddress') + + /** + * Additional fields to write with this tweet. + * + * This Tweet object should contain only additional fields to write with + * this tweet. Additional fields are tweet fields with id > 100. Set + * tweet.id to be 0; the id will be generated by Tweetypie. Any other non- + * additional fields set on this tweet will be considered an invalid + * request. + * + */ + 14: optional tweet.Tweet additional_fields + + 15: optional WritePathHydrationOptions hydration_options + + // OBSOLETE 16: optional bool bypass_rate_limit_for_xfactor + + /** + * ID to explicitly identify a creation request for the purpose of rejecting + * duplicates. + * + * If two requests are received with the same uniqueness_id, then they will + * be considered duplicates of each other. This only applies for tweets + * created within the same datacenter. This id should be a snowflake id so + * that it's globally unique. + */ + 17: optional i64 uniqueness_id (personalDataType = 'PrivateTweetEntitiesAndMetadata, PublicTweetEntitiesAndMetadata') + + 18: optional feature_context.FeatureContext feature_context + + /** + * Passthrough data for Scarecrow that is used for safety checks. + */ + 19: optional safety_meta_data.SafetyMetaData safety_meta_data + + // OBSOLETE 20: bool community_narrowcast = 0 + + /** + * Toggle narrowcasting behavior for leading @mentions. + * + * If in_reply_to_tweet_id is not set: + * - When this flag is true and the tweet text starts with a leading mention then the tweet + * will be narrowcasted. + * + * If in_reply_to_tweet_id is set: + * - If auto_populate_reply_metadata is true + * - Setting this flag to true will use the default narrowcast determination logic where + * most replies will be narrowcast but some special-cases of self-replies will not. + * - Setting this flag to false will disable narrowcasting and the tweet will be fanned out + * to all the author's followers. Previously users prefixed their reply text with "." to + * achieve this effect. + * - If auto_populate_reply_metadata is false, this flag will control whether a leading + * mention in the tweet text will be narrowcast (true) or broadcast (false). + */ + 21: bool enable_tweet_to_narrowcasting = 1 + + /** + * Automatically populate replies with leading mentions from tweet text. + */ + 22: bool auto_populate_reply_metadata = 0 + + /** + * Metadata at the tweet-asset relationship level. + */ + 23: optional map media_metadata + + /** + * An optional URL that identifies a resource that is treated as an attachment of the + * the tweet, such as a quote-tweet permalink. + * + * When provided, it is appended to the end of the tweet text, but is not + * included in the visible_text_range. + */ + 24: optional string attachment_url (personalDataType = 'CardId, ShortUrl') + + /** + * Pass-through information to be published in `TweetCreateEvent`. + * + * This data is not persisted by Tweetypie. + * + * @deprecated prefer transient_context (see field 27) over this. + */ + 25: optional map additional_context + + /** + * Users to exclude from the automatic reply population behavior. + * + * When auto_populate_reply_metadata is true, screen names appearing in the + * mention prefix can be excluded by specifying a corresponding user id in + * exclude_reply_user_ids. Because the mention prefix must always include + * the leading mention to preserve directed-at addressing for the in-reply- + * to tweet author, attempting to exclude that user id will have no effect. + * Specifying a user id not in the prefix will be silently ignored. + */ + 26: optional list exclude_reply_user_ids (personalDataType = 'UserId') + + /** + * Used to pass structured data to Tweetypie and tweet_events eventbus + * stream consumers. This data is not persisted by Tweetypie. + * + * If adding a new passthrough field, prefer this over additional_context, + * as this is structured data, while additional_context is text data. + */ + 27: optional transient_context.TransientCreateContext transient_context + + /** + * Composer flow used to create this tweet. Unless using the News Camera (go/newscamera) + * flow, this should be `STANDARD`. + * + * When set to `CAMERA`, clients are expected to display the tweet with a different UI + * to emphasize attached media. + */ + 28: optional tweet.ComposerSource composer_source + + /** + * present if we want to restrict replies to this tweet (go/dont-at-me-api) + * - This gets converted to Tweet.conversation_control and changes type + * - This is only valid for conversation root tweets + * - This applies to all replies to this tweet + */ + 29: optional TweetCreateConversationControl conversation_control + + // OBSOLETE 30: optional jiminy.CreateNudgeOptions nudge_options + + /** + * Provided if the client wants to have the tweet create evaluated for a nudge (e.g. to notify + * the user that they are about to create a toxic tweet). Reference: go/docbird/jiminy + */ + 31: optional jiminy.CreateTweetNudgeOptions nudge_options + + /** + * Provided for correlating requests originating from REST endpoints and GraphQL endpoints. + * Its presence or absence does not affect Tweet mutation. It used for validation + * and debugging. The expected format is a 36 ASCII UUIDv4. + * Please see API specification at go/graphql-tweet-mutations for more information. + */ + 32: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') + + /** + * Options that determine the shape of an exclusive tweet's restrictions. + * The existence of this object indicates that the tweet is intended to be an exclusive tweet + * While this is an empty structure for now, it will have fields added to it later in later versions. + */ + 33: optional ExclusiveTweetControlOptions exclusiveTweetControlOptions + + 34: optional TrustedFriendsControlOptions trustedFriendsControlOptions + + /** + * Provided if tweet data is backed up by a creative container, that at tweet hydration + * time, tweetypie would delegate to creative container service. + * + * go/creatives-containers-tdd + * Please note that this id is never publically shared with clients, its only used for + * internal purposes. + */ + 35: optional i64 underlying_creatives_container_id (personalDataType = 'TweetId') + + /** + * Provided if tweet is a CollabTweet or a CollabInvitation, along with a list of Collaborators + * which includes the original author. + * + * go/collab-tweets + **/ + 36: optional CollabControlOptions collabControlOptions + + /** + * When supplied, this PostTweetRequest is an edit. See [[EditOptions]] for more details. + **/ + 37: optional EditOptions editOptions + + /** + * When supplied, the NoteTweet specified is associated with the created Tweet. + **/ + 38: optional NoteTweetOptions noteTweetOptions +} (hasPersonalData = 'true') + +struct SetAdditionalFieldsRequest { + 1: required tweet.Tweet additional_fields +} + +struct DeleteAdditionalFieldsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: required list field_ids +}(hasPersonalData = 'true') + +struct DeleteTweetsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + // DEPRECATED and moved to tweetypie_internal.thrift's CascadedDeleteTweetsRequest + 2: optional i64 cascaded_from_tweet_id (personalDataType = 'TweetId') + 3: optional tweet_audit.AuditDeleteTweet audit_passthrough + + /** + * The id of the user initiating this request. + * + * It could be either the owner of the tweet or an admin. If not specified + * we will use TwitterContext.userId. + */ + 4: optional i64 by_user_id (personalDataType = 'UserId') + + + /** + * Where these tweets are being deleted as part of a user erasure, the process + * of deleting tweets belonging to deactivated accounts. + * + * This lets backends optimize processing of mass deletes of tweets from the + * same user. Talk to the Tweetypie team before setting this flag. + */ + 5: bool is_user_erasure = 0 + + /** + * Id to compare with the user id of the tweets being deleted. + * + * This provides extra protection against accidental deletion of tweets. + * This is required when is_user_erasure is true. If any of the tweets + * specified in tweet_ids do not match expected_user_id a + * EXPECTED_USER_ID_MISMATCH state will be returned. + */ + 6: optional i64 expected_user_id (personalDataType = 'UserId') + + /** + * A bounced tweet is a tweet that has been found to violate Twitter Rules. + * This is represented as a tweet with its bounce_label field set. + * + * When the Tweet owner deletes their offending bounced tweet in the Bounced workflow, Bouncer + * will submit a delete request with `is_bounce_delete` set to true. If the tweet(s) being deleted + * have a bounce_label set, this request results in the tweet transitioning into the + * BounceDeleted state which means the tweet is partially deleted. + * + * Most of the normal tweet deletion side-effects occur but the tweet remains in a + * few tflock graphs, tweet cache, and a Manhattan marker is added. Other than timelines services, + * bounce deleted tweets are considered deleted and will return a StatusState.BounceDelete. + * + * After a defined grace period, tweets in this state will be fully deleted. + * + * If the tweet(s) being deleted do not have the bounce_label set, they will be deleted as usual. + * + * Other than Bouncer, no service should use `is_bounce_delete` flag. + */ + 7: bool is_bounce_delete = 0 + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 8: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') + + /** + * When an edited tweet is deleted via daemons, we take a different action + * than if it was deleted normally. If deleted normally, we delete the + * initial tweet in the chain. When deleted via daemons, we delete the actual tweet. + */ + 9: optional bool cascaded_edited_tweet_deletion +}(hasPersonalData = 'true') + +struct DeleteTweetResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required TweetDeleteState state +}(hasPersonalData = 'true') + +struct UnretweetResult { + /** + * Id of the retweet that was deleted if a retweet could be found. + */ + 1: optional i64 tweet_id (personalDataType = 'TweetId') + + 2: required TweetDeleteState state +}(hasPersonalData = 'true') + +struct PostTweetResult { + 1: required TweetCreateState state + + /** + * The created tweet when state is OK. + */ + 2: optional tweet.Tweet tweet + + /** + * The original tweet when state is OK and tweet is a retweet. + */ + 3: optional tweet.Tweet source_tweet + + /** + * The quoted tweet when state is OK and tweet is a quote tweet. + */ + 4: optional tweet.Tweet quoted_tweet + + /** + * The required user remediation from Scarecrow when state is BOUNCE. + */ + 5: optional bounce.Bounce bounce + + /** + * Additional information when TweetCreateState is not OK. + * + * Not all failures provide a reason. + */ + 6: optional string failure_reason + + // OBSOLETE 7: optional jiminy.Nudge nudge + + /** + * Returned when the state is NUDGE to indicate that the tweet has not been created, and that + * the client should instead display the nudge to the user. Reference: go/docbird/jiminy + */ + 8: optional jiminy.TweetNudge nudge +} (persisted = "true", hasPersonalData = "true") + +/** + * Specifies the cause of an AccessDenied error. + */ +enum AccessDeniedCause { + // obsolete: INVALID_CLIENT_ID = 0, + // obsolete: DEPRECATED = 1, + USER_DEACTIVATED = 2, + USER_SUSPENDED = 3, + + RESERVED_4 = 4, + RESERVED_5 = 5, + RESERVED_6 = 6 +} + +/** + * AccessDenied error is returned by delete_tweets endpoint when + * by_user_id is suspended or deactivated. + */ +exception AccessDenied { + 1: required string message + 2: optional AccessDeniedCause errorCause +} + +struct UndeleteTweetRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: optional WritePathHydrationOptions hydration_options + + /** + * Perform the side effects of undeletion even if the tweet is not deleted. + * + * This flag is useful if you know that the tweet is present in Manhattan + * but is not undeleted with respect to other services. + */ + 3: optional bool force +}(hasPersonalData = 'true') + +struct UndeleteTweetResponse { + 1: required UndeleteTweetState state + 2: optional tweet.Tweet tweet +} + +struct EraseUserTweetsRequest { + 1: required i64 user_id (personalDataType = 'UserId') +}(hasPersonalData = 'true') + +struct UnretweetRequest { + /** + * The id of the user who owns the retweet. + */ + 1: required i64 user_id (personalDataType = 'UserId') + + /** + * The source tweet that should be unretweeted. + */ + 2: required i64 source_tweet_id (personalDataType = 'TweetId') + + /** + * This is a unique identifier used in both the REST and GraphQL-dark + * requests that will be used to correlate the GraphQL mutation requests to the REST requests + * during a transition period when clients will be moving toward tweet creation via GraphQL. + * See also, the "Comparison Testing" section at go/tweet-create-on-graphql-tdd for additional + * context. + */ + 3: optional string comparison_id (personalDataType = 'UniversallyUniqueIdentifierUuid') +}(hasPersonalData = 'true') + +struct GetDeletedTweetsRequest { + 1: required list tweetIds (personalDataType = 'TweetId') +}(hasPersonalData = 'true') + +struct GetDeletedTweetResult { + 1: required i64 tweetId (personalDataType = 'TweetId') + 2: required DeletedTweetState state + 4: optional deleted_tweet.DeletedTweet tweet +}(hasPersonalData = 'true') + +/** + * Flushes tweets and/or their counts from cache. + * + * Typically will be used manually for testing or when a particular problem is + * found that needs to be fixed by hand. Defaults to flushing both tweet + * struct and associated counts. + */ +struct FlushRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: bool flushTweets = 1 + 3: bool flushCounts = 1 +}(hasPersonalData = 'true') + +/** + * A request to retrieve counts for one or more tweets. + */ +struct GetTweetCountsRequest { + 1: required list tweet_ids (personalDataType = 'TweetId') + 2: bool include_retweet_count = 0 + 3: bool include_reply_count = 0 + 4: bool include_favorite_count = 0 + 5: bool include_quote_count = 0 + 6: bool include_bookmark_count = 0 +}(hasPersonalData = 'true') + +/** + * A response optionally indicating one or more counts for a tweet. + */ +struct GetTweetCountsResult { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: optional i64 retweet_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') + 3: optional i64 reply_count (personalDataType = 'CountOfPrivateReplies, CountOfPublicReplies') + 4: optional i64 favorite_count (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') + 5: optional i64 quote_count (personalDataType = 'CountOfPrivateRetweets, CountOfPublicRetweets') + 6: optional i64 bookmark_count (personalDataType = 'CountOfPrivateLikes') +}(hasPersonalData = 'true') + +/** + * A request to increment the cached favorites count for a tweet. + * + * Negative values decrement the count. This request is automatically + * replicated to other data centers. + */ +struct IncrTweetFavCountRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required i32 delta (personalDataType = 'CountOfPrivateLikes, CountOfPublicLikes') +}(hasPersonalData = 'true') + +/** + * A request to increment the cached bookmarks count for a tweet. + * + * Negative values decrement the count. This request is automatically + * replicated to other data centers. + */ +struct IncrTweetBookmarkCountRequest { + 1: required i64 tweet_id (personalDataType = 'TweetId') + 2: required i32 delta (personalDataType = 'CountOfPrivateLikes') +}(hasPersonalData = 'true') + +/** + * Request to scrub geolocation from 1 or more tweets, and replicates to other + * data centers. + */ +struct GeoScrub { + 1: required list status_ids (personalDataType = 'TweetId') + // OBSOLETE 2: bool write_through = 1 + 3: bool hosebird_enqueue = 0 + 4: i64 user_id = 0 (personalDataType = 'UserId') // should always be set for hosebird enqueue +}(hasPersonalData = 'true') + +/** + * Contains different indicators of a tweets "nsfw" status. + */ +struct NsfwState { + 1: required bool nsfw_user + 2: required bool nsfw_admin + 3: optional safety_label.SafetyLabel nsfw_high_precision_label + 4: optional safety_label.SafetyLabel nsfw_high_recall_label +} + +/** + * Interface to Tweetypie + */ +service TweetService { + /** + * Performs a multi-get of tweets. This endpoint is geared towards fetching + * tweets for the API, with many fields returned by default. + * + * The response list is ordered the same as the requested ids list. + */ + list get_tweets(1: GetTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs a multi-get of tweets. This endpoint is geared towards internal + * processing that needs only specific subsets of the data. + * + * The response list is ordered the same as the requested ids list. + */ + list get_tweet_fields(1: GetTweetFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link GetTweetCountsRequest} and return one or more {@link GetTweetCountsResult} + */ + list get_tweet_counts(1: GetTweetCountsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set/Update additional fields on an existing tweet + */ + void set_additional_fields(1: SetAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Delete additional fields on a tweet + */ + void delete_additional_fields(1: DeleteAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Creates and saves a tweet. + * + * URLs contained in the text will be shortened via Talon. Validations that are + * handled by this endpoint include: + * + * - tweet length not greater than 140 display characters, after URL shortening; + * - tweet is not a duplicate of a recently created tweet by the same user; + * - user is not suspended or deactivated; + * - text does not contain malware urls, as determined by talon; + * + * Checks that are not handled here that should be handled by the web API: + * - oauth authentication; + * - client application has narrowcasting/nullcasting privileges; + */ + PostTweetResult post_tweet(1: PostTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Creates and saves a retweet. + * + * Validations that are handled by this endpoint include: + * + * - source tweet exists; + * - source-tweet user exists and is not suspended or deactivated; + * - source-tweet user is not blocking retweeter; + * - user has not already retweeted the source tweet; + * + * Checks that are not handled here that should be handled by the web API: + * - oauth authentication; + * - client application has narrowcasting/nullcasting privileges; + */ + PostTweetResult post_retweet(1: RetweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Remove tweets. It removes all associated fields of the tweets in + * cache and the persistent storage. + */ + list delete_tweets(1: DeleteTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error, + 3: AccessDenied access_denied) + + /** + * Restore a deleted Tweet. + * + * Tweets exist in a soft-deleted state for N days during which they can be + * restored by support agents following the internal restoration guidelines. + * If the undelete succeeds, the Tweet is given similar treatment to a new + * tweet e.g inserted into cache, sent to the timeline service, reindexed by + * TFlock etc. + */ + UndeleteTweetResponse undelete_tweet(1: UndeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Add or remove takedown countries associated with a Tweet. + */ + void takedown(1: TakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set or unset the nsfw_admin and/or nsfw_user bit of tweet.core_data. + **/ + void update_possibly_sensitive_tweet(1: UpdatePossiblySensitiveTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error + ) + + /** + * Delete all tweets for a given user. Currently only called by Test User Service, but we + * can also use it ad-hoc. + * + * Note: regular user erasure is handled by the EraseUserTweets daemon. + */ + void erase_user_tweets(1: EraseUserTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Unretweet a given tweet. + * + * There are two ways to unretweet: + * - call deleteTweets() with the retweetId + * - call unretweet() with the retweeter userId and sourceTweetId + * + * This is useful if you want to be able to undo a retweet without having to + * keep track of a retweetId. + */ + UnretweetResult unretweet(1: UnretweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Get tweet content and deletion times for soft-deleted tweets. + * + * The response list is ordered the same as the requested ids list. + */ + list get_deleted_tweets(1: GetDeletedTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link FlushRequest} + */ + void flush(1: FlushRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute an {@link IncrTweetFavCountRequest} + */ + void incr_tweet_fav_count(1: IncrTweetFavCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute an {@link IncrTweetBookmarkCountRequest} + */ + void incr_tweet_bookmark_count(1: IncrTweetBookmarkCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Delete location data from all of a user's tweets. + * + * This endpoint initiates the process of deleting the user's location data + * from all of their tweets, as well as clearing the has_geotagged_statuses + * flag of the user. This method returns as soon as the event is enqueued, + * but the location data won't be scrubbed until the event is processed. + * Usually the latency for the whole process to complete is small, but it + * could take up to a couple of minutes if the user has a very large number + * of tweets, or if the request gets backed up behind other requests that + * need to scrub a large number of tweets. + * + * The event is processed by the Tweetypie geoscrub daemon. + * + */ + void delete_location_data(1: DeleteLocationDataRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Execute a {@link GeoScrub} request. + * + */ + void scrub_geo(1: GeoScrub geo_scrub) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift new file mode 100644 index 000000000..dd69a3299 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_federated.thrift @@ -0,0 +1,32 @@ +namespace java com.twitter.tweetypie.thriftjava.federated +#@namespace scala com.twitter.tweetypie.thriftscala.federated +#@namespace strato com.twitter.tweetypie.federated + +include "com/twitter/tweetypie/stored_tweet_info.thrift" + +typedef i16 FieldId + +struct GetStoredTweetsView { + 1: bool bypass_visibility_filtering = 0 + 2: optional i64 for_user_id + 3: list additional_field_ids = [] +} + +struct GetStoredTweetsResponse { + 1: stored_tweet_info.StoredTweetInfo stored_tweet +} + +struct GetStoredTweetsByUserView { + 1: bool bypass_visibility_filtering = 0 + 2: bool set_for_user_id = 0 + 3: optional i64 start_time_msec + 4: optional i64 end_time_msec + 5: optional i64 cursor + 6: bool start_from_oldest = 0 + 7: list additional_field_ids = [] +} + +struct GetStoredTweetsByUserResponse { + 1: required list stored_tweets + 2: optional i64 cursor +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift new file mode 100644 index 000000000..3aa0ada82 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/tweet_service_graphql.thrift @@ -0,0 +1,391 @@ +namespace java com.twitter.tweetypie.thriftjava.graphql +#@namespace scala com.twitter.tweetypie.thriftscala.graphql +#@namespace strato com.twitter.tweetypie.graphql + +/** + * Reasons for defining "prefetch" structs: + * i) It enables GraphQL prefetch caching + * ii) All tweet mutation operations are defined to support prefetch caching for API consistency + * and future flexibility. (Populating the cache with VF results being a potential use case.) + */ +include "com/twitter/ads/callback/engagement_request.thrift" +include "com/twitter/strato/graphql/existsAndPrefetch.thrift" + +struct UnretweetRequest { + /** + * Tweet ID of the source tweet being referenced in the unretweet. + * Note: The retweet_id isn't being passed here as it will result in a + * successful response, but won't have any effect. This is due to + * how Tweetypie's unretweet endpoint works. + */ + 1: required i64 source_tweet_id ( + strato.json.numbers.type='string', + strato.description='The source tweet to be unretweeted.' + ) + 2: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints.' + ) +} (strato.graphql.typename='UnretweetRequest') + +struct UnretweetResponse { + /** + * The response contains the source tweet's ID being unretweeted. + * Reasons for this: + * i) The operation should return a non-void response to retain consistency + * with other tweet mutation APIs. + * ii) The response struct should define at least one field due to requirements + * of the GraphQL infrastructure. + * iii) This allows the caller to hydrate the source tweet if required and request + * updated counts on the source tweet if desired. (since this operation decrements + * the source tweet's retweet count) + */ + 1: optional i64 source_tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='source_tweet', + strato.description='The source tweet that was unretweeted.' + ) +} (strato.graphql.typename='UnretweetResponse') + +struct UnretweetResponseWithSubqueryPrefetchItems { + 1: optional UnretweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + + +struct CreateRetweetRequest { + 1: required i64 tweet_id (strato.json.numbers.type='string') + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast + 2: bool nullcast = 0 ( + strato.description='Do not deliver this retweet to a user\'s followers. http://go/nullcast' + ) + + // @see com.twitter.ads.callback.thriftscala.EngagementRequest + 3: optional engagement_request.EngagementRequest engagement_request ( + strato.description='The ad engagement from which this retweet was created.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 4: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) +} (strato.graphql.typename='CreateRetweetRequest') + +struct CreateRetweetResponse { + 1: optional i64 retweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='retweet', + strato.description='The created retweet.' + ) +} (strato.graphql.typename='CreateRetweetResponse') + +struct CreateRetweetResponseWithSubqueryPrefetchItems { + 1: optional CreateRetweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + +struct TweetReply { + //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.in_reply_to_tweet_id + 1: i64 in_reply_to_tweet_id ( + strato.json.numbers.type='string', + strato.description='The id of the tweet that this tweet is replying to.' + ) + //@see com.twitter.tweetypie.thriftscala.PostTweetRequest.exclude_reply_user_ids + 2: list exclude_reply_user_ids = [] ( + strato.json.numbers.type='string', + strato.description='Screen names appearing in the mention prefix can be excluded. Because the mention prefix must always include the leading mention to preserve directed-at addressing for the in-reply-to tweet author, attempting to exclude that user id will have no effect. Specifying a user id not in the prefix will be silently ignored.' + ) +} (strato.graphql.typename='TweetReply') + +struct TweetMediaEntity { + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.media_upload_ids + 1: i64 media_id ( + strato.json.numbers.type='string', + strato.description='Media id as obtained from the User Image Service when uploaded.' + ) + + // @see com.twitter.tweetypie.thriftscala.Tweet.media_tags + 2: list tagged_users = [] ( + strato.json.numbers.type='string', + strato.description='List of user_ids to tag in this media entity. Requires Client App Privelege MEDIA_TAGS. Contributors (http://go/teams) are not supported. Tags are silently dropped when unauthorized.' + ) +} (strato.graphql.typename='TweetMediaEntity') + +struct TweetMedia { + 1: list media_entities = [] ( + strato.description='You may include up to 4 photos or 1 animated GIF or 1 video in a Tweet.' + ) + + /** + * @deprecated @see com.twitter.tweetypie.thriftscala.PostTweetRequest.possibly_sensitive for + * more details on why this field is ignored. + */ + 2: bool possibly_sensitive = 0 ( + strato.description='Mark this tweet as possibly containing objectionable media.' + ) +} (strato.graphql.typename='TweetMedia') + +//This is similar to the APITweetAnnotation struct except that here all the id fields are required. +struct TweetAnnotation { + 1: i64 group_id (strato.json.numbers.type='string') + 2: i64 domain_id (strato.json.numbers.type='string') + 3: i64 entity_id (strato.json.numbers.type='string') +} (strato.graphql.typename='TweetAnnotation', strato.case.format='preserve') + +struct TweetGeoCoordinates { + 1: double latitude (strato.description='The latitude of the location this Tweet refers to. The valid range for latitude is -90.0 to +90.0 (North is positive) inclusive.') + 2: double longitude (strato.description='The longitude of the location this Tweet refers to. The valid range for longitude is -180.0 to +180.0 (East is positive) inclusive.') + 3: bool display_coordinates = 1 (strato.description='Whether or not make the coordinates public. When false, geo coordinates are persisted with the Tweet but are not shared publicly.') +} (strato.graphql.typename='TweetGeoCoordinates') + +struct TweetGeo { + 1: optional TweetGeoCoordinates coordinates ( + strato.description='The geo coordinates of the location this Tweet refers to.' + ) + 2: optional string place_id ( + strato.description='A place in the world. See also https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo#place' + ) + 3: optional string geo_search_request_id ( + strato.description='See https://confluence.twitter.biz/display/GEO/Passing+the+geo+search+request+ID' + ) +} ( + strato.graphql.typename='TweetGeo', + strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' +) + +enum BatchComposeMode { + BATCH_FIRST = 1 (strato.description='This is the first Tweet in a batch.') + BATCH_SUBSEQUENT = 2 (strato.description='This is any of the subsequent Tweets in a batch.') +}( + strato.graphql.typename='BatchComposeMode', + strato.description='Indicates whether a Tweet was created using a batch composer, and if so position of a Tweet within the batch. A value of None, indicates that the tweet was not created in a batch. More info: go/batchcompose.' +) + +/** + * Conversation Controls + * See also: + * tweet.thrift/Tweet.conversation_control + * tweet_service.thrift/TweetCreateConversationControl + * tweet_service.thrift/PostTweetRequest.conversation_control + * + * These types are isomorphic/equivalent to tweet_service.thrift/TweetCreateConversationControl* to + * avoid exposing internal service thrift types. + */ +enum ConversationControlMode { + BY_INVITATION = 1 (strato.description='Users that the conversation owner mentions by @screenname in the tweet text are invited.') + COMMUNITY = 2 (strato.description='The conversation owner, invited users, and users who the conversation owner follows can reply.') +} ( + strato.graphql.typename='ConversationControlMode' +) + +struct TweetConversationControl { + 1: ConversationControlMode mode +} ( + strato.graphql.typename='TweetConversationControl', + strato.description='Specifies limits on user participation in a conversation. See also http://go/dont-at-me. Up to one value may be provided. (Conceptually this is a union, however graphql doesn\'t support union types as inputs.)' +) + +// empty for now, but intended to be populated in later iterations of the super follows project. +struct ExclusiveTweetControlOptions {} ( + strato.description='Marks a tweet as exclusive. See go/superfollows.', + strato.graphql.typename='ExclusiveTweetControlOptions', +) + +struct EditOptions { + 1: optional i64 previous_tweet_id (strato.json.numbers.type='string', strato.description='previous Tweet id') +} ( + strato.description='Edit options for a Tweet.', + strato.graphql.typename='EditOptions', +) + +struct TweetPeriscopeContext { + 1: bool is_live = 0 ( + strato.description='Indicates if the tweet contains live streaming video. A value of false is equivalent to this struct being undefined in the CreateTweetRequest.' + ) + + // Note that the REST API also defines a context_periscope_creator_id param. The GraphQL + // API infers this value from the TwitterContext Viewer.userId since it should always be + // the same as the Tweet.coreData.userId which is also inferred from Viewer.userId. +} ( + strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name.', + strato.graphql.typename='TweetPeriscopeContext', +) + +struct TrustedFriendsControlOptions { + 1: required i64 trusted_friends_list_id ( + strato.json.numbers.type='string', + strato.description='The ID of the Trusted Friends List whose members can view this tweet.' + ) +} ( + strato.description='Specifies information for a Trusted Friends tweet. See go/trusted-friends', + strato.graphql.typename='TrustedFriendsControlOptions', +) + +enum CollabControlType { + COLLAB_INVITATION = 1 (strato.description='This represents a CollabInvitation.') + // Note that a CollabTweet cannot be created through external graphql request, + // rather a user can create a CollabInvitation (which is automatically nullcasted) and a + // public CollabTweet will be created when all Collaborators have accepted the CollabInvitation, + // triggering a strato column to instantiate the CollabTweet directly +}( + strato.graphql.typename='CollabControlType', +) + +struct CollabControlOptions { + 1: required CollabControlType collabControlType + 2: required list collaborator_user_ids ( + strato.json.numbers.type='string', + strato.description='A list of user ids representing all Collaborators on a CollabTweet or CollabInvitation') +}( + strato.graphql.typename='CollabControlOptions', + strato.description='Specifies information about a CollabTweet or CollabInvitation (a union is used to ensure CollabControl defines one or the other). See more at go/collab-tweets.' +) + +struct NoteTweetOptions { + 1: required i64 note_tweet_id ( + strato.json.numbers.type='string', + strato.description='The ID of the Note Tweet that has to be associated with the created Tweet.') + // Deprecated + 2: optional list mentioned_screen_names ( + strato.description = 'Screen names of the users mentioned in the NoteTweet. This is used to set conversation control on the Tweet.') + + 3: optional list mentioned_user_ids ( + strato.description = 'User ids of mentioned users in the NoteTweet. This is used to set conversation control on the Tweet, send mentioned user ids to TLS' + ) + 4: optional bool is_expandable ( + strato.description = 'Specifies if the Tweet can be expanded into the NoteTweet, or if they have the same text' + ) +} ( + strato.graphql.typename='NoteTweetOptions', + strato.description='Note Tweet options for a Tweet.' +) + +// NOTE: Some clients were using the dark_request directive in GraphQL to signal that a Tweet should not be persisted +// but this is not recommended, since the dark_request directive is not meant to be used for business logic. +struct UndoOptions { + 1: required bool is_undo ( + strato.description='Set to true if the Tweet is undo-able. Tweetypie will process the Tweet but will not persist it.' + ) +} ( + strato.graphql.typename='UndoOptions' +) + +struct CreateTweetRequest { + 1: string tweet_text = "" ( + strato.description='The user-supplied text of the tweet. Defaults to empty string. Leading & trailing whitespace are trimmed, remaining value may be empty if and only if one or more media entity ids are also provided.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.nullcast + 2: bool nullcast = 0 ( + strato.description='Do not deliver this tweet to a user\'s followers. http://go/nullcast' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 3: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) + + // @see com.twitter.ads.callback.thriftscala.EngagementRequest + 4: optional engagement_request.EngagementRequest engagement_request ( + strato.description='The ad engagement from which this tweet was created.' + ) + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.attachment_url + 5: optional string attachment_url ( + strato.description='Tweet permalink (i.e. Quoted Tweet) or Direct Message deep link. This URL is not included in the visible_text_range.' + ) + + // @see com.twitter.tweetypie.thriftscala.Tweet.card_reference + 6: optional string card_uri ( + strato.description='Link to the card to associate with a tweet.' + ) + + 7: optional TweetReply reply ( + strato.description='Reply parameters.' + ) + + 8: optional TweetMedia media ( + strato.description='Media parameters.' + ) + + 9: optional list semantic_annotation_ids ( + strato.description='Escherbird Annotations.' + ) + + 10: optional TweetGeo geo ( + strato.description='Tweet geo location metadata. See https://developer.twitter.com/en/docs/twitter-api/v1/data-dictionary/object-model/geo' + ) + + 11: optional BatchComposeMode batch_compose ( + strato.description='Batch Compose Mode. See go/batchcompose' + ) + + 12: optional ExclusiveTweetControlOptions exclusive_tweet_control_options ( + strato.description='When defined, this tweet will be marked as exclusive. Leave undefined to signify a regular, non-exclusive tweet. See go/superfollows.' + ) + + 13: optional TweetConversationControl conversation_control ( + strato.description='Restrict replies to this tweet. See http://go/dont-at-me-api. Only valid for conversation root tweets. Applies to all replies to this tweet.' + ) + + 14: optional TweetPeriscopeContext periscope ( + strato.description='Specifies information about live video streaming. Note that the Periscope product was shut down in March 2021, however some live video streaming features remain in the Twitter app. This struct keeps the Periscope naming convention to retain parity and traceability to other areas of the codebase that also retain the Periscope name. Note: A value of periscope.isLive=false is equivalent to this struct being left undefined.' + ) + + 15: optional TrustedFriendsControlOptions trusted_friends_control_options ( + strato.description='Trusted Friends parameters.' + ) + + 16: optional CollabControlOptions collab_control_options ( + strato.description='Collab Tweet & Collab Invitation parameters.' + ) + + 17: optional EditOptions edit_options ( + strato.description='when defined, this tweet will be marked as an edit of the tweet represented by previous_tweet_id in edit_options.' + ) + + 18: optional NoteTweetOptions note_tweet_options ( + strato.description='The Note Tweet that is to be associated with the created Tweet.', + strato.graphql.skip='true' + ) + + 19: optional UndoOptions undo_options ( + strato.description='If the user has Undo Tweets enabled, the Tweet is created so that it can be previewed by the client but is not persisted.', + ) +} (strato.graphql.typename='CreateTweetRequest') + +struct CreateTweetResponse { + 1: optional i64 tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='tweet', + strato.description='The created tweet.' + ) +} (strato.graphql.typename='CreateTweetResponse') + +struct CreateTweetResponseWithSubqueryPrefetchItems { + 1: optional CreateTweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} + +// Request struct, ResponseStruct, ResponseWithPrefetchStruct +struct DeleteTweetRequest { + 1: required i64 tweet_id (strato.json.numbers.type='string') + + // @see com.twitter.tweetypie.thriftscala.PostTweetRequest.PostTweetRequest.comparison_id + 2: optional string comparison_id ( + strato.description='Correlates requests originating from REST endpoints and GraphQL endpoints. UUID v4 (random) 36 character string.' + ) +} (strato.graphql.typename='DeleteTweetRequest') + +struct DeleteTweetResponse { + 1: optional i64 tweet_id ( + strato.space='Tweet', + strato.graphql.fieldname='tweet', + strato.description='The deleted Tweet. Since the Tweet will always be not found after deletion, the TweetResult will always be empty.' + ) +} (strato.graphql.typename='DeleteTweetResponse') + +struct DeleteTweetResponseWithSubqueryPrefetchItems { + 1: optional DeleteTweetResponse data + 2: optional existsAndPrefetch.PrefetchedData subqueryPrefetchItems +} diff --git a/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift new file mode 100644 index 000000000..b01ac21d3 --- /dev/null +++ b/tweetypie/common/src/thrift/com/twitter/tweetypie/unmentions/unmentions.thrift @@ -0,0 +1,9 @@ +namespace java com.twitter.tweetypie.unmentions.thriftjava +#@ namespace scala com.twitter.tweetypie.unmentions.thriftscala +#@ namespace strato com.twitter.tweetypie.unmentions +namespace py gen.twitter.tweetypie.unmentions + +struct UnmentionData { + 1: optional i64 conversationId (personalDataType = 'TweetConversationId') + 2: optional list mentionedUsers (personalDataType = 'UserId') +} (strato.space = "Unmentions", persisted='true', hasPersonalData = 'true') \ No newline at end of file diff --git a/tweetypie/server/BUILD b/tweetypie/server/BUILD new file mode 100644 index 000000000..2b17d8a49 --- /dev/null +++ b/tweetypie/server/BUILD @@ -0,0 +1,16 @@ +target( + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/config", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + ], +) \ No newline at end of file diff --git a/tweetypie/server/README.md b/tweetypie/server/README.md new file mode 100644 index 000000000..49d3c7ef7 --- /dev/null +++ b/tweetypie/server/README.md @@ -0,0 +1,45 @@ +# Tweetypie + +## Overview + +Tweetypie is the core Tweet service that handles the reading and writing of Tweet data. It is called by the Twitter clients (through GraphQL), as well as various internal Twitter services, to fetch, create, delete, and edit Tweets. Tweetypie calls several backends to hydrate Tweet related data to return to callers. + +## How It Works + +The next sections describe the layers involved in the read and create paths for Tweets. + +### Read Path + +In the read path, Tweetypie fetches the Tweet data from [Manhattan](https://blog.twitter.com/engineering/en_us/a/2014/manhattan-our-real-time-multi-tenant-distributed-database-for-twitter-scale) or [Twemcache](https://blog.twitter.com/engineering/en_us/a/2012/caching-with-twemcache), and hydrates data about the Tweet from various other backend services. + +#### Relevant Packages + +- [backends](src/main/scala/com/twitter/tweetypie/backends/): A "backend" is a wrapper around a thrift service that Tweetypie calls. For example [Talon.scala](src/main/scala/com/twitter/tweetypie/backends/Talon.scala) is the backend for Talon, the URL shortener. +- [repository](src/main/scala/com/twitter/tweetypie/repository/): A "repository" wraps a backend and provides a structured interface for retrieving data from the backend. [UrlRepository.scala](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) is the repository for the Talon backend. +- [hydrator](src/main/scala/com/twitter/tweetypie/hydrator/): Tweetypie doesn't store all the data associated with Tweets. For example, it doesn't store User objects, but it stores screennames in the Tweet text (as mentions). It stores media IDs, but it doesn't store the media metadata. Hydrators take the raw Tweet data from Manhattan or Cache and return it with some additional information, along with hydration metadata that says whether the hydration took place. This information is usually fetched using a repository. For example, during the hydration process, the [UrlEntityHydrator](src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala) calls Talon using the [UrlRepository](src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala) and fetches the expanded URLs for the t.co links in the Tweet. +- [handler](src/main/scala/com/twitter/tweetypie/handler/): A handler is a function that handles requests to one of the Tweetypie endpoints. The [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala) handles requests to `get_tweets`, one of the endpoints used to fetch Tweets. + +#### Through the Read Path + +At a high level, the path a `get_tweets` request takes is as follows. + +- The request is handled by [GetTweetsHandler](src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala). +- GetTweetsHandler uses the TweetResultRepository (defined in [LogicalRepositories.scala](src/main/scala/com/twitter/tweetypie/config/LogicalRepositories#L301)). The TweetResultRepository has at its core a [ManhattanTweetRespository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that fetches the Tweet data from Manhattan), wrapped in a [CachingTweetRepository](src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala) (that applies caching using Twemcache). Finally, the caching repository is wrapped in a hydration layer (provided by [TweetHydration.hydrateRepo](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala#L789)). Essentially, the TweetResultRepository fetches the Tweet data from cache or Manhattan, and passes it through the hydration pipeline. +- The hydration pipeline is described in [TweetHydration.scala](src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala), where all the hydrators are combined together. + +### Write Path + +The write path follows different patterns to the read path, but reuses some of the code. + +#### Relevant Packages + +- [store](src/main/scala/com/twitter/tweetypie/store/): The store package includes the code for updating backends on write, and the coordination code for describing which backends need to be updated for which endpoints. There are two types of file in this package: stores and store modules. Files that end in Store are stores and define the logic for updating a backend, for example [ManhattanTweetStore](src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala) writes Tweets to Manhattan. Most of the files that don't end in Store are store modules and define the logic for handling a write endpoint, and describe which stores are called, for example [InsertTweet](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala) which handles the `post_tweet` endpoint. Modules define which stores they call, and stores define which modules they handle. + +#### Through the Write Path + +The path a `post_tweet` request takes is as follows. + +- The request is handled in [PostTweet.scala](src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala#L338). +- [TweetBuilder](src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala) creates a Tweet from the request, after performing text processing, validation, URL shortening, media processing, checking for duplicates etc. +- [WritePathHydration.hydrateInsertTweet](src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala#L54) passes the Tweet through the hydration pipeline to return the caller. +- The Tweet data is written to various stores as described in [InsertTweet.scala](src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala#L84). diff --git a/tweetypie/server/config/BUILD b/tweetypie/server/config/BUILD new file mode 100644 index 000000000..45a90181c --- /dev/null +++ b/tweetypie/server/config/BUILD @@ -0,0 +1,7 @@ +resources( + sources = [ + "!**/*.pyc", + "!BUILD*", + "**/*", + ], +) diff --git a/tweetypie/server/config/decider.yml b/tweetypie/server/config/decider.yml new file mode 100644 index 000000000..6d3042fbc --- /dev/null +++ b/tweetypie/server/config/decider.yml @@ -0,0 +1,313 @@ +stratofed_forward_dark_traffic: + comment: + Forward Federated Strato traffic to DarkTrafficProxy (DTP). + Note, this decider is not overrideable via Tweetypie tooling, + as it is only used by the StratoFedServer frameworkDecider instance. + Note, you cannot use this decider as a TweetypieDeciderGates. + default_availability: 0 + +tweetypie_enable_community_tweet_creates: + comment: When enable, it allows the creation of community tweets + default_availability: 10000 + +tweetypie_hydrate_scrub_engagements: + comment: Redact Tweet engagement related data (StatusCounts) from Interstital Public Interest (IPI) Tweets. + default_availability: 10000 + +tweetypie_check_spam_on_retweet: + comment: Enable Scarecrow spam check during retweet creation + default_availability: 10000 + +tweetypie_check_spam_on_tweet: + comment: Enable Scarecrow spam check during tweet creation + default_availability: 10000 + +tweetypie_conversation_control_use_feature_switch_results: + comment: + Controls whether Tweetypie uses feature switch results during conversation control parameter validation. This decider supports migration of feature switches from macaw-tweets to tweetypie. + default_availability: 0 + +tweetypie_conversation_control_tweet_create_enabled: + comment: + Controls whether we will enforce conversation control policy on tweet create. http://go/dont-at-me-backend-tdd + default_availability: 0 + +tweetypie_enable_exclusive_tweet_control_validation: + comment: + Controls whether we will restrict the exclusiveTweetControlOptions parameter to only be usable by creators. http://go/superfollows + default_availability: 0 + +tweetypie_delay_erase_user_tweets: + comment: sleep for a fixed number of seconds before deleting a page of tweets during user erasure. Used as a rate limiting mechanism. + default_availability: 5000 + +tweetypie_deny_non_tweet_permalinks: + comment: + Right now we would create a quote-tweet which would contain a non-working permalink + whereas with latest QT + media changes we're blocking the tweet creation. If tweet + from permalink is not found, we would throw an exception if this decider is on else + return a default pass-through response. + default_availability: 0 + +tweetypie_enable_trusted_friends_control_validation: + comment: Controls whether we will enforce trusted friends control policy on replies + default_availability: 0 + +tweetypie_enforce_rate_limited_clients: + comment: + Controls whether we will rate-limit low-priority clients based on per-instance requests per sec. + We enable clients for rate-limiting by setting the rate_limit to true in clients.yml + default_availability: 10000 + +tweetypie_fail_closed_in_vf: + comment: Propagate failure from backends such as Strato when running Visibility Filtering + default_availability: 0 + +tweetypie_fork_dark_traffic: + comment: Forks traffic to Darkhub + default_availability: 0 + +tweetypie_hydrate_conversation_muted: + comment: Hydrates the conversationMuted field if requested. This calls timelineservice. + default_availability: 10000 + +tweetypie_hydrate_counts: + comment: Hydrate status counts, if asked for. This calls TFlock. + default_availability: 10000 + +tweetypie_hydrate_previous_counts: + comment: Hydrate previous engagements on a tweet in an edit chain + default_availability: 0 + +tweetypie_hydrate_device_sources: + comment: Hydrate device sources. This reads from DBs. + default_availability: 10000 + +tweetypie_hydrate_escherbird_annotations: + comment: Hydrate the escherbirdEntityAnnotations additional field on tweet create. This calls the Escherbird Annotation Service. + default_availability: 10000 + +tweetypie_hydrate_gnip_profile_geo_enrichment: + comment: Hydrates each tweet with the profile geo enrichment. + default_availability: 10000 + +tweetypie_hydrate_has_media: + comment: Hydrate the hasMedia field based on whether the tweet has a media entity, a media card, or a URL that matches partner media regexes + default_availability: 10000 + +tweetypie_hydrate_media: + comment: Hydrate media entities. This calls MediaInfo Service. + default_availability: 10000 + +tweetypie_hydrate_media_refs: + comment: Hydrate MediaRefs. Calls Tweetypie for pasted media. + default_availability: 0 + +tweetypie_hydrate_media_tags: + comment: Hydrate media tags. This calls Gizmoduck for user view. + default_availability: 10000 + +tweetypie_hydrate_pasted_media: + comment: Copies media entities and media cards from tweets referenced by a media permalink url entity. + default_availability: 10000 + +tweetypie_hydrate_perspectives: + comment: Hydrate perspectival attributes, if asked for. This calls TLS, which may call TFlock if items are not in cache. + default_availability: 10000 + +tweetypie_hydrate_perspectives_edits_for_timelines: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for timelines safety levels if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_perspectives_edits_for_tweet_details: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for tweet detail safety levels if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_perspectives_edits_for_other_levels: + comment: + Hydrated perspectival attributes across versions of tweet edit, + for safety levels not mentioned in other deciders, if asked for. + This results in more TLS calls (one for each version of tweet). + default_availability: 0 + +tweetypie_hydrate_places: + comment: Hydrate place data, if asked for. This calls geoduck. + default_availability: 10000 + +tweetypie_jiminy_dark_requests: + comment: Enables dark requests to the Jiminy backend for the specified % of traffic + default_availability: 0 + +tweetypie_log_cache_exceptions: + comment: Enables logging of cache exceptions to loglens. + default_availability: 0 + +tweetypie_log_reads: + comment: Enables logging of reads. + default_availability: 50 + +tweetypie_log_tweet_cache_writes: + comment: Scribe a record for each cache write for tweets that pass this decider. + default_availability: 0 + +tweetypie_log_writes: + comment: Enables logging of status writes. + default_availability: 10000 + +tweetypie_log_young_tweet_cache_writes: + comment: + Scribe a record of cache writes for recently-created tweets that + pass this decider. + default_availability: 0 + +tweetypie_log_vf_dark_read_mismatches: + comment: Log mismatches from the tweetypie_dark_read_safety_labels_from_strato code path. + default_availability: 0 + +tweetypie_max_request_width_enabled: + comment: + Controls whether the max request width limit is enabled or not. + 0 means the limit is disabled, 10000 means it is turned on and + request widths > maxSize will be rejected. + default_availability: 0 + +tweetypie_media_refs_hydrator_include_pasted_media: + comment: + For debugging MediaRefsHydrator - determine if extra calls to pasted media are causing the GC issues. + default_availability: 0 + +tweetypie_prefer_forwarded_service_identifier_for_client_id: + comment: Effective client ID logic prefers forwarded service identifier to service identifier if available. + default_availability: 0 + +tweetypie_rate_limit_by_limiter_service: + comment: + Controls whether we will consult limiter service to see whether a + user is allowed to create more tweets. + default_availability: 10000 + +tweetypie_rate_limit_tweet_creation_failure: + comment: + Controls whether we rate limit tweet creation based on failed + attempts to create tweets via limiter service. This is separate + from the limit of created tweets. It is intended to prevent + unforeseen abuse by failing tweet creation attempts immediately if + the user has had too many recent tweet creation + failures. Disabling this decider will reduce traffic to limiter + service, but will remove the per-user abuse protection. + default_availability: 10000 + +tweetypie_replicate_reads_to_atla: + comment: Send reads to deferredrpc for replication to atla. We can use this to warm caches in atla. + default_availability: 0 + +tweetypie_replicate_reads_to_pdxa: + comment: Send reads to deferredrpc for replication to pdxa. We can use this to warm caches in pdxa. + default_availability: 0 + +tweetypie_disable_invite_via_mention: + comment: + Disables invite via mention field in the conversation control struct of + root tweets + reply tweets. + default_availability: 0 + +tweetypie_shed_read_traffic_voluntarily: + comment: + Preferred way to reject read requests during an incident from a subset of clients that have + volunteered to shed load. These clients have load_shed_envs set in clients.yml, often for + staging environments. Although this decider is available and clients have volunteered, should + still only be used in an emergency. + default_availability: 0 + +tweetypie_validate_card_ref_attachment_android: + comment: + When enabled tweet creates from Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type. + default_availability: 0 + +tweetypie_validate_card_ref_attachment_non_android: + comment: + When enabled tweet creates from non-Android consider CardReference for the TooManyAttachmentTypes error when creating tweets with more than one attachment type + default_availability: 0 + +# Additional Fields + +tweetypie_short_circuit_likely_partial_tweet_reads_ms: + comment: + Specifies a number of milliseconds before which, we short circuit likely + partial reads from MH and return NotFound tweet response state. + After experimenting decided to go with 1500 ms. + default_availability: 0 + +tweetypie_populate_quoted_tweet_results_as_contextual_tweet_ref: + comment: + CreateTweet and CreateReTweet column to return the 'quoted_tweet_results' PrefetchedItem as + ContextualTweetRef type instead of just tweetId. + This will be used during the quotedTweet.Tweet column migration (see http://go/qt-col-migration) + Post-migration, this will be removed. + default_availability: 0 +tweetypie_enable_unmentions_timeline_warmup: + comment: + When enabled, read-path calls execute an async call to the getUnmentionedUsersFromConverstion + strato column to warm the unmentioned NH/haplolite cache. + http://go/unmention-me-onepager + default_availability: 0 + +tweetypie_tweet_visibility_library_enable_parity_test: + comment: measure TVL parity against VF federated service, for a fraction of traffic + default_availability: 0 + +tweetypie_enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim: + comment: when enabled, all features are hydrated in QuotedTweetVisibilityLibrary shim + default_availability: 0 + +tweetypie_enable_remove_unmentioned_implicit_mentions: + comment: + When enabled, implicit mentions are filtered based on users that have unmentioned themselves + from the tweet's conversation. http://go/unmention-me-onepager + default_availability: 0 + +tweetypie_enable_stale_tweet_validation: + comment: Controls whether we will enforce stale tweet policy on replies and QT + default_availability: 0 + +tweetypie_disable_promoted_tweet_edit: + comment: + Controls whether we will disable edits on promoted tweets + default_availability: 0 + +tweetypie_should_materialize_containers: + comment: + When enabled, Creatives Container Service will be called to materialize container-backed tweets. + Otherwise, TP will not call CCS and return a StatusState of NotFound. + default_availability: 0 + +tweetypie_check_twitter_blue_subscription_for_edit: + comment: + Controls whether we check if the User is subscribed to Twitter Blue when editing a Tweet. + default_availability: 0 + +tweetypie_hydrate_bookmarks_count: + comment: + Controls whether we hydrate bookmarks count for a Tweet + default_availability: 0 + +tweetypie_hydrate_bookmarks_perspective: + comment: + Controls whether we request the Bookmarked perspective from TLS + default_availability: 0 + +tweetypie_set_edit_time_window_to_sixty_minutes: + comment: + Set time window in which Tweets are editable to 60 minutes + default_availability: 0 + +tweetypie_enable_federated_column_dark_traffic: + comment: + Enable dark traffic for federated column. + default_availability: 0 diff --git a/tweetypie/server/config/decider_staging.yml b/tweetypie/server/config/decider_staging.yml new file mode 100644 index 000000000..e69de29bb diff --git a/tweetypie/server/config/logging/logback-all-include.xml b/tweetypie/server/config/logging/logback-all-include.xml new file mode 100644 index 000000000..269cb149f --- /dev/null +++ b/tweetypie/server/config/logging/logback-all-include.xml @@ -0,0 +1,23 @@ + + + tweetypie-all.log + true + + tweetypie-all-%i.log + 1 + 20 + + + 800MB + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + diff --git a/tweetypie/server/config/logging/logback-without-loglens.xml b/tweetypie/server/config/logging/logback-without-loglens.xml new file mode 100644 index 000000000..b2e6c4d6a --- /dev/null +++ b/tweetypie/server/config/logging/logback-without-loglens.xml @@ -0,0 +1,12 @@ + + + + true + + + + + + + + diff --git a/tweetypie/server/config/logging/logback.xml b/tweetypie/server/config/logging/logback.xml new file mode 100644 index 000000000..04d686c20 --- /dev/null +++ b/tweetypie/server/config/logging/logback.xml @@ -0,0 +1,146 @@ + + + + + true + + + + + + + + tweetypie-important.log + true + + tweetypie-important-%i.log + 1 + 20 + + + 800MB + + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + true + ${log.lens.tag} + ${log.lens.index} + loglens + + %msg + + + + + + + + + alertable-exception.log + true + + alertable-exception-%i.log + 1 + 17 + + + 100MB + + + + + %date [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + true + ${log.lens.tag} + ${log.lens.index} + loglens + + ${ALERTABLE_MESSAGE_FORMAT} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tweetypie/server/config/partner_media.yml b/tweetypie/server/config/partner_media.yml new file mode 100644 index 000000000..f737dd7c1 --- /dev/null +++ b/tweetypie/server/config/partner_media.yml @@ -0,0 +1,30 @@ +http_or_https: + - 'vine\.co/v/[a-zA-Z0-9]+' + - 'amp\.twimg\.com/' + - '(www\.)?dailymotion.com/video/[a-zA-Z0-9_\-/]+' + - '(www\.)?dai.ly/[a-zA-Z0-9_\-/]+' + - '(www\.)?youtu\.be/[a-zA-Z0-9_\-\?\&\=/]+' + - '(www\.)?youtube\.com/watch[a-zA-Z0-9_\-\?\&\=/]+' + - '(www\.)?ustream\.tv/recorded/\d+' + - '(www\.)?vevo\.com/watch/[\w-]+/[\w-]+/[a-zA-Z0-9_]+' + - '(www\.)?flickr\.com/photos/[\w\@\-]+/\d+/?' + - '(www\.)?flic\.kr/p/[A-Z0-9a-z\-]+' + - '([\w\-]+\.)deviantart\.com/(art|deviation|view)/[\w\@-]+' + - '(www\.)?vimeo\.com/\d+' + - '(www\.)?photozou\.(com|jp)/photo/show/\d+/\d+' + - '(www\.)?twitpic\.com/(?!(place|photos|events)/)([a-zA-Z0-9\?\=\-]+)' + - '(www\.)?mtv\.com/videos/([a-z0-9\-\_]+/)+[0-9]+/[a-z0-9\-\_]+\.jhtml(#[a-z0-9\=\&]+)?' + - '([\w\-\_]+\.)?washingtonpost\.com/wp-dyn/content/video/\d{4}/\d{2}/\d{2}/VI\d+\.html([a-zA-Z0-9_#\.\-\?\&\=/]+)?' + - '([\w\-\_]+\.)?msnbc\.msn\.com/id/\d{1,8}/vp/\d{1,8}([a-zA-Z0-9_#\.\-\?\&\=/]+)?' + - '((www|edition|us)\.)?cnn\.com/video/[\?|#]/[a-zA-Z0-9_#\.\-\?\&\=/]+' + - 'itunes\.apple\.com(/[a-z][a-z])?/(music-)?video/' + - '(www\.)?blip\.tv/((file/[\w-]+)|(([\w-]+/)?[\w-]+-\d+))/?' + - 'online\.wsj\.com/video/[A-Z0-9a-z\-]+/[A-Z0-9a-z\-]+\.html' + - '(www\.)?hulu\.com/w(atch)?/[a-zA-Z0-9]+' + - 'video\.([a-z]{4,11}\.)?nhl\.com/videocenter/console\?(((catid=-?\d+&)?id=\d+)|(hlg=\d{8},\d,\d{1,4}(&event=[A-Z0-9]{4,6})?)|(hlp=\d{5,10}(&event=[A-Z0-9]{4,6})?))' + - '([a-zA-Z0-9\-]+\.)*grabyo\.com/((g/v/[a-zA-Z0-9]{11})|((studio/)?studiotimeline\.jsp\?shareId=[a-zA-Z0-9]{11}))[a-zA-Z0-9_?&=#:%/\.\-]*' + +http_only: + - 'on\.nba\.com/[a-zA-Z0-9]+' + - 'on\.nfl\.com/[a-zA-Z0-9]+' + - 'snpy\.tv/[a-zA-Z0-9]+' diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD new file mode 100644 index 000000000..261fcb099 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "mediaservices/commons/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake:id", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "util/util-slf4j-api", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD new file mode 100644 index 000000000..ab03f48f3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/BUILD @@ -0,0 +1,48 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "configbus/client/src/main/scala/com/twitter/configbus/client", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "finagle/finagle-core/src/main", + "finagle/finagle-thriftmux/src/main/scala", + # "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", + "finatra-internal/messaging/kafka/src/main/scala", + "finatra-internal/thrift/src/main/thrift:thrift-scala", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + # "kafka/finagle-kafka/finatra-kafka/src/main/scala", + "limiter/thrift-only/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "src/thrift/com/twitter/dataproducts:service-scala", + "src/thrift/com/twitter/escherbird:annotation-service-scala", + "src/thrift/com/twitter/escherbird:tweet-annotation-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/timelineservice:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "storage/clients/manhattan/client/src/main/scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala new file mode 100644 index 000000000..2daa79e87 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Backend.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.context.Deadline +import com.twitter.finagle.service.RetryBudget +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.util.Timer +import com.twitter.util.TimeoutException + +object Backend { + val log: Logger = Logger(getClass) + + /** + * Common stuff that is needed as part of the configuration of all + * of the backends. + */ + case class Context(val timer: Timer, val stats: StatsReceiver) + + /** + * All backend operations are encapsulated in the FutureArrow type. The Builder type + * represents functions that can decorate the FutureArrow, typically by calling the various + * combinator methods on FutureArrow. + */ + type Builder[A, B] = FutureArrow[A, B] => FutureArrow[A, B] + + /** + * A Policy defines some behavior to apply to a FutureArrow that wraps an endpoint. + */ + trait Policy { + + /** + * Using an endpoint name and Context, returns a Builder that does the actual + * application of the policy to the FutureArrow. + */ + def apply[A, B](name: String, ctx: Context): Builder[A, B] + + /** + * Sequentially combines policies, first applying this policy and then applying + * the next policy. Order matters! For example, to retry on timeouts, the FailureRetryPolicy + * needs to be applied after the TimeoutPolicy: + * + * TimeoutPolicy(100.milliseconds) >>> FailureRetryPolicy(retryPolicy) + */ + def andThen(next: Policy): Policy = { + val first = this + new Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = + first(name, ctx).andThen(next(name, ctx)) + + override def toString = s"$first >>> $next" + } + } + + /** + * An alias for `andThen`. + */ + def >>>(next: Policy): Policy = andThen(next) + } + + /** + * Applies a timeout to the underlying FutureArrow. + */ + case class TimeoutPolicy(timeout: Duration) extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + val ex = new TimeoutException(name + ": " + timeout) + (_: FutureArrow[A, B]).raiseWithin(ctx.timer, timeout, ex) + } + } + + /** + * Attaches a RetryHandler with the given RetryPolicy to retry failures. + */ + case class FailureRetryPolicy( + retryPolicy: RetryPolicy[Try[Nothing]], + retryBudget: RetryBudget = RetryBudget()) + extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + (_: FutureArrow[A, B]) + .retry(RetryHandler.failuresOnly(retryPolicy, ctx.timer, stats, retryBudget)) + } + } + + /** + * This policy applies standardized endpoint metrics. This should be used with every endpoint. + */ + case object TrackPolicy extends Policy { + def apply[A, B](name: String, ctx: Context): Builder[A, B] = { + val stats = ctx.stats.scope(name) + (_: FutureArrow[A, B]) + .onFailure(countOverCapacityExceptions(stats)) + .trackOutcome(ctx.stats, (_: A) => name) + .trackLatency(ctx.stats, (_: A) => name) + } + } + + /** + * The default "policy" for timeouts, retries, exception counting, latency tracking, etc. to + * apply to each backend operation. This returns a Builder type (an endofunction on FutureArrow), + * which can be composed with other Builders via simple function composition. + */ + def defaultPolicy[A, B]( + name: String, + requestTimeout: Duration, + retryPolicy: RetryPolicy[Try[B]], + ctx: Context, + retryBudget: RetryBudget = RetryBudget(), + totalTimeout: Duration = Duration.Top, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): Builder[A, B] = { + val scopedStats = ctx.stats.scope(name) + val requestTimeoutException = new TimeoutException( + s"$name: hit request timeout of $requestTimeout" + ) + val totalTimeoutException = new TimeoutException(s"$name: hit total timeout of $totalTimeout") + base => + base + .raiseWithin( + ctx.timer, + // We defer to a per-request deadline. When the deadline is missing or wasn't toggled, + // 'requestTimeout' is used instead. This mimics the behavior happening within a standard + // Finagle client stack and its 'TimeoutFilter'. + Deadline.currentToggled.fold(requestTimeout)(_.remaining), + requestTimeoutException + ) + .retry(RetryHandler(retryPolicy, ctx.timer, scopedStats, retryBudget)) + .raiseWithin(ctx.timer, totalTimeout, totalTimeoutException) + .onFailure(countOverCapacityExceptions(scopedStats)) + .trackOutcome(ctx.stats, (_: A) => name, exceptionCategorizer) + .trackLatency(ctx.stats, (_: A) => name) + } + + /** + * An onFailure FutureArrow callback that counts OverCapacity exceptions to a special counter. + * These will also be counted as failures and by exception class name, but having a special + * counter for this is easier to use in success rate computations where you want to factor out + * backpressure responses. + */ + def countOverCapacityExceptions[A](scopedStats: StatsReceiver): (A, Throwable) => Unit = { + val overCapacityCounter = scopedStats.counter("over_capacity") + + { + case (_, ex: OverCapacity) => overCapacityCounter.incr() + case _ => () + } + } + + /** + * Provides a simple mechanism for applying a Policy to an endpoint FutureArrow from + * an underlying service interface. + */ + class PolicyAdvocate[S](backendName: String, ctx: Backend.Context, svc: S) { + + /** + * Tacks on the TrackPolicy to the given base policy, and then applies the policy to + * a FutureArrow. This is more of a convenience method that every Backend can use to + * build the fully configured FutureArrow. + */ + def apply[A, B]( + endpointName: String, + policy: Policy, + endpoint: S => FutureArrow[A, B] + ): FutureArrow[A, B] = { + log.info(s"appling policy to $backendName.$endpointName: $policy") + policy.andThen(TrackPolicy)(endpointName, ctx)(endpoint(svc)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala new file mode 100644 index 000000000..f77ad3d77 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/ConfigBus.scala @@ -0,0 +1,50 @@ +package com.twitter.tweetypie.backends + +import com.twitter.configbus.client.ConfigbusClientException +import com.twitter.configbus.client.file.PollingConfigSourceBuilder +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.util.Activity +import com.twitter.util.Activity._ +import com.twitter.conversions.DurationOps._ +import com.twitter.io.Buf + +trait ConfigBus { + def file(path: String): Activity[String] +} + +object ConfigBus { + private[this] val basePath = "appservices/tweetypie" + private[this] val log = Logger(getClass) + + def apply(stats: StatsReceiver, instanceId: Int, instanceCount: Int): ConfigBus = { + + val client = PollingConfigSourceBuilder() + .statsReceiver(stats) + .pollPeriod(30.seconds) + .instanceId(instanceId) + .numberOfInstances(instanceCount) + .build() + + val validBuffer = stats.counter("valid_buffer") + + def subscribe(path: String) = + client.subscribe(s"$basePath/$path").map(_.configs).map { + case Buf.Utf8(string) => + validBuffer.incr() + string + } + + new ConfigBus { + def file(path: String): Activity[String] = { + val changes = subscribe(path).run.changes.dedupWith { + case (Failed(e1: ConfigbusClientException), Failed(e2: ConfigbusClientException)) => + e1.getMessage == e2.getMessage + case other => + false + } + Activity(changes) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala new file mode 100644 index 000000000..781e2ad81 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/CreativesContainerService.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie.backends + +import com.twitter.container.{thriftscala => ccs} +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finatra.thrift.thriftscala.ServerError +import com.twitter.finatra.thrift.thriftscala.ServerErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.Duration +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Try +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.tweetypie.{thriftscala => tp} +import com.twitter.util.Throw + +object CreativesContainerService { + import Backend._ + + type MaterializeAsTweet = FutureArrow[ccs.MaterializeAsTweetRequests, Seq[tp.GetTweetResult]] + type MaterializeAsTweetFields = + FutureArrow[ccs.MaterializeAsTweetFieldsRequests, Seq[tp.GetTweetFieldsResult]] + + def fromClient( + client: ccs.CreativesContainerService.MethodPerEndpoint + ): CreativesContainerService = + new CreativesContainerService { + val materializeAsTweet: MaterializeAsTweet = FutureArrow(client.materializeAsTweets) + val materializeAsTweetFields: MaterializeAsTweetFields = FutureArrow( + client.materializeAsTweetFields) + + def ping(): Future[Unit] = client.materializeAsTweets(ccs.MaterializeAsTweetRequests()).unit + } + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + serverErrorBackoffs: Stream[Duration]) { + def apply(svc: CreativesContainerService, ctx: Backend.Context): CreativesContainerService = + new CreativesContainerService { + override val materializeAsTweet: MaterializeAsTweet = + policy("materializeAsTweets", ctx)(svc.materializeAsTweet) + + override val materializeAsTweetFields: MaterializeAsTweetFields = + policy("materializeAsTweetFields", ctx)(svc.materializeAsTweetFields) + + override def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { + case Throw(ex: ServerError) if ex.errorCause != ServerErrorCause.NotImplemented => true + } + ) + + implicit val warmup: Warmup[CreativesContainerService] = + Warmup[CreativesContainerService]("creativesContainerService")(_.ping()) + } +} + +trait CreativesContainerService { + import CreativesContainerService._ + + val materializeAsTweet: MaterializeAsTweet + val materializeAsTweetFields: MaterializeAsTweetFields + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala new file mode 100644 index 000000000..fc9e1acc3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Escherbird.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.escherbird.{thriftscala => escherbird} +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Escherbird { + import Backend._ + + type Annotate = FutureArrow[Tweet, Seq[TweetEntityAnnotation]] + + def fromClient(client: escherbird.TweetEntityAnnotationService.MethodPerEndpoint): Escherbird = + new Escherbird { + val annotate = FutureArrow(client.annotate) + } + + case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(svc: Escherbird, ctx: Backend.Context): Escherbird = + new Escherbird { + val annotate: FutureArrow[Tweet, Seq[TweetEntityAnnotation]] = + policy("annotate", requestTimeout, ctx)(svc.annotate) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait Escherbird { + import Escherbird._ + val annotate: Annotate +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala new file mode 100644 index 000000000..10cdc28e1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Expandodo.scala @@ -0,0 +1,83 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.expandodo.thriftscala.AttachmentEligibilityRequest +import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses +import com.twitter.expandodo.thriftscala.Card2Request +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.expandodo.thriftscala.Card2Responses +import com.twitter.expandodo.thriftscala.CardsResponse +import com.twitter.expandodo.thriftscala.GetCardUsersRequests +import com.twitter.expandodo.thriftscala.GetCardUsersResponses +import com.twitter.expandodo.{thriftscala => expandodo} +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Expandodo { + import Backend._ + + type GetCards = FutureArrow[Set[String], collection.Map[String, expandodo.CardsResponse]] + type GetCards2 = FutureArrow[ + (Seq[expandodo.Card2Request], expandodo.Card2RequestOptions), + expandodo.Card2Responses + ] + type GetCardUsers = FutureArrow[expandodo.GetCardUsersRequests, expandodo.GetCardUsersResponses] + type CheckAttachmentEligibility = + FutureArrow[Seq[ + expandodo.AttachmentEligibilityRequest + ], expandodo.AttachmentEligibilityResponses] + + def fromClient(client: expandodo.CardsService.MethodPerEndpoint): Expandodo = + new Expandodo { + val getCards = FutureArrow(client.getCards _) + val getCards2 = FutureArrow((client.getCards2 _).tupled) + val getCardUsers = FutureArrow(client.getCardUsers _) + val checkAttachmentEligibility = FutureArrow(client.checkAttachmentEligibility _) + } + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + serverErrorBackoffs: Stream[Duration]) { + def apply(svc: Expandodo, ctx: Backend.Context): Expandodo = + new Expandodo { + val getCards: FutureArrow[Set[String], collection.Map[String, CardsResponse]] = + policy("getCards", ctx)(svc.getCards) + val getCards2: FutureArrow[(Seq[Card2Request], Card2RequestOptions), Card2Responses] = + policy("getCards2", ctx)(svc.getCards2) + val getCardUsers: FutureArrow[GetCardUsersRequests, GetCardUsersResponses] = + policy("getCardUsers", ctx)(svc.getCardUsers) + val checkAttachmentEligibility: FutureArrow[Seq[ + AttachmentEligibilityRequest + ], AttachmentEligibilityResponses] = + policy("checkAttachmentEligibility", ctx)(svc.checkAttachmentEligibility) + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(serverErrorBackoffs)) { + case Throw(ex: expandodo.InternalServerError) => true + } + ) + } + + implicit val warmup: Warmup[Expandodo] = + Warmup[Expandodo]("expandodo")( + _.getCards2((Seq.empty, expandodo.Card2RequestOptions("iPhone-13"))) + ) +} + +trait Expandodo { + import Expandodo._ + + val getCards: GetCards + val getCards2: GetCards2 + val getCardUsers: GetCardUsers + val checkAttachmentEligibility: CheckAttachmentEligibility +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala new file mode 100644 index 000000000..e05d9950e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GeoScrubEventStore.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections._ +import com.twitter.storage.client.manhattan.kv._ +import com.twitter.storage.client.manhattan.kv.impl._ +import com.twitter.util.Time + +/** + * Read and write the timestamp of the last delete_location_data request + * for a user. This is used as a safeguard to prevent leaking geo data + * with tweets that have not yet been scrubbed or were missed during the + * geo scrubbing process. + */ +object GeoScrubEventStore { + type GetGeoScrubTimestamp = UserId => Stitch[Option[Time]] + type SetGeoScrubTimestamp = FutureArrow[(UserId, Time), Unit] + + private[this] val KeyDesc = + KeyDescriptor( + Component(LongInjection), + Component(LongInjection, StringInjection) + ).withDataset("geo_scrub") + + private[this] val ValDesc = ValueDescriptor(LongInjection) + + // This modulus determines how user ids get assigned to PKeys, and + // thus to shards within the MH cluster. The origin of the specific + // value has been lost to time, but it's important that we don't + // change it, or else the existing data will be inaccessible. + private[this] val PKeyModulus: Long = 25000L + + private[this] def toKey(userId: Long) = + KeyDesc + .withPkey(userId % PKeyModulus) + .withLkey(userId, "_last_scrub") + + def apply(client: ManhattanKVClient, config: Config, ctx: Backend.Context): GeoScrubEventStore = { + new GeoScrubEventStore { + val getGeoScrubTimestamp: UserId => Stitch[Option[Time]] = { + val endpoint = config.read.endpoint(client) + + (userId: UserId) => { + endpoint + .get(toKey(userId), ValDesc) + .map(_.map(value => Time.fromMilliseconds(value.contents))) + } + } + + val setGeoScrubTimestamp: SetGeoScrubTimestamp = { + val endpoint = config.write.endpoint(client) + + FutureArrow { + case (userId, timestamp) => + val key = toKey(userId) + + // Use the geo scrub timestamp as the MH entry timestamp. This + // ensures that whatever timestamp is highest will win any + // update races. + val value = ValDesc.withValue(timestamp.inMilliseconds, timestamp) + Stitch.run(endpoint.insert(key, value)) + } + } + } + } + + case class EndpointConfig(requestTimeout: Duration, maxRetryCount: Int) { + def endpoint(client: ManhattanKVClient): ManhattanKVEndpoint = + ManhattanKVEndpointBuilder(client) + .defaultMaxTimeout(requestTimeout) + .maxRetryCount(maxRetryCount) + .build() + } + + case class Config(read: EndpointConfig, write: EndpointConfig) +} + +trait GeoScrubEventStore { + import GeoScrubEventStore._ + val getGeoScrubTimestamp: GetGeoScrubTimestamp + val setGeoScrubTimestamp: SetGeoScrubTimestamp +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala new file mode 100644 index 000000000..79f519250 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Gizmoduck.scala @@ -0,0 +1,93 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.gizmoduck.thriftscala.CountsUpdateField +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.ModifiedUser +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.gizmoduck.{thriftscala => gd} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Gizmoduck { + import Backend._ + + type GetById = FutureArrow[(gd.LookupContext, Seq[UserId], Set[UserField]), Seq[gd.UserResult]] + type GetByScreenName = + FutureArrow[(gd.LookupContext, Seq[String], Set[UserField]), Seq[gd.UserResult]] + type IncrCount = FutureArrow[(UserId, gd.CountsUpdateField, Int), Unit] + type ModifyAndGet = FutureArrow[(gd.LookupContext, UserId, gd.ModifiedUser), gd.User] + + def fromClient(client: gd.UserService.MethodPerEndpoint): Gizmoduck = + new Gizmoduck { + val getById = FutureArrow((client.get _).tupled) + val getByScreenName = FutureArrow((client.getByScreenName _).tupled) + val incrCount = FutureArrow((client.incrCount _).tupled) + val modifyAndGet = FutureArrow((client.modifyAndGet _).tupled) + def ping(): Future[Unit] = client.get(gd.LookupContext(), Seq.empty, Set.empty).unit + } + + case class Config( + readTimeout: Duration, + writeTimeout: Duration, + modifyAndGetTimeout: Duration, + modifyAndGetTimeoutBackoffs: Stream[Duration], + defaultTimeoutBackoffs: Stream[Duration], + gizmoduckExceptionBackoffs: Stream[Duration]) { + + def apply(svc: Gizmoduck, ctx: Backend.Context): Gizmoduck = + new Gizmoduck { + val getById: FutureArrow[(LookupContext, Seq[UserId], Set[UserField]), Seq[UserResult]] = + policy("getById", readTimeout, ctx)(svc.getById) + val getByScreenName: FutureArrow[(LookupContext, Seq[String], Set[UserField]), Seq[ + UserResult + ]] = policy("getByScreenName", readTimeout, ctx)(svc.getByScreenName) + val incrCount: FutureArrow[(UserId, CountsUpdateField, Int), Unit] = + policy("incrCount", writeTimeout, ctx)(svc.incrCount) + val modifyAndGet: FutureArrow[(LookupContext, UserId, ModifiedUser), User] = policy( + "modifyAndGet", + modifyAndGetTimeout, + ctx, + timeoutBackoffs = modifyAndGetTimeoutBackoffs + )(svc.modifyAndGet) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context, + timeoutBackoffs: Stream[Duration] = defaultTimeoutBackoffs + ): Builder[A, B] = + translateExceptions andThen + defaultPolicy(name, requestTimeout, retryPolicy(timeoutBackoffs), ctx) + + private[this] def translateExceptions[A, B]: Builder[A, B] = + _.translateExceptions { + case gd.OverCapacity(msg) => OverCapacity(s"gizmoduck: $msg") + } + + private[this] def retryPolicy[B](timeoutBackoffs: Stream[Duration]): RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(gizmoduckExceptionBackoffs)) { + case Throw(ex: gd.InternalServerError) => true + } + ) + } + + implicit val warmup: Warmup[Gizmoduck] = + Warmup[Gizmoduck]("gizmoduck")(_.ping()) +} + +trait Gizmoduck { + import Gizmoduck._ + val getById: GetById + val getByScreenName: GetByScreenName + val incrCount: IncrCount + val modifyAndGet: ModifyAndGet + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala new file mode 100644 index 000000000..3b716c5b1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/GnipEnricherator.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.conversions.PercentOps._ +import com.twitter.conversions.DurationOps._ +import com.twitter.dataproducts.enrichments.thriftscala._ +import com.twitter.dataproducts.enrichments.thriftscala.Enricherator +import com.twitter.finagle.thriftmux.MethodBuilder +import com.twitter.servo.util.FutureArrow + +object GnipEnricherator { + + type HydrateProfileGeo = FutureArrow[ProfileGeoRequest, Seq[ProfileGeoResponse]] + + private def methodPerEndpoint(methodBuilder: MethodBuilder) = + Enricherator.MethodPerEndpoint( + methodBuilder + .servicePerEndpoint[Enricherator.ServicePerEndpoint] + .withHydrateProfileGeo( + methodBuilder + .withTimeoutTotal(300.milliseconds) + .withTimeoutPerRequest(100.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[Enricherator.ServicePerEndpoint](methodName = "hydrateProfileGeo") + .hydrateProfileGeo + ) + ) + + def fromMethod(methodBuilder: MethodBuilder): GnipEnricherator = { + val mpe = methodPerEndpoint(methodBuilder) + + new GnipEnricherator { + override val hydrateProfileGeo: HydrateProfileGeo = + FutureArrow(mpe.hydrateProfileGeo) + } + } +} + +trait GnipEnricherator { + import GnipEnricherator._ + val hydrateProfileGeo: HydrateProfileGeo +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala new file mode 100644 index 000000000..3bfe1a682 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterBackend.scala @@ -0,0 +1,55 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.limiter.thriftscala.FeatureRequest +import com.twitter.limiter.thriftscala.Usage +import com.twitter.limiter.{thriftscala => ls} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object LimiterBackend { + import Backend._ + + type IncrementFeature = FutureArrow[(ls.FeatureRequest, Int), Unit] + type GetFeatureUsage = FutureArrow[ls.FeatureRequest, ls.Usage] + + def fromClient(client: ls.LimitService.MethodPerEndpoint): LimiterBackend = + new LimiterBackend { + val incrementFeature: IncrementFeature = + FutureArrow { + case (featureReq, amount) => client.incrementFeature(featureReq, amount).unit + } + + val getFeatureUsage: GetFeatureUsage = + FutureArrow(featureReq => client.getLimitUsage(None, Some(featureReq))) + } + + case class Config(requestTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(client: LimiterBackend, ctx: Backend.Context): LimiterBackend = + new LimiterBackend { + val incrementFeature: FutureArrow[(FeatureRequest, Int), Unit] = + policy("incrementFeature", requestTimeout, ctx)(client.incrementFeature) + val getFeatureUsage: FutureArrow[FeatureRequest, Usage] = + policy("getFeatureUsage", requestTimeout, ctx)(client.getFeatureUsage) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait LimiterBackend { + import LimiterBackend._ + + val incrementFeature: IncrementFeature + val getFeatureUsage: GetFeatureUsage +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala new file mode 100644 index 000000000..289c92c0b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/LimiterService.scala @@ -0,0 +1,193 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.limiter.thriftscala.FeatureRequest +import com.twitter.tweetypie.backends.LimiterBackend.GetFeatureUsage +import com.twitter.tweetypie.backends.LimiterBackend.IncrementFeature +import com.twitter.tweetypie.backends.LimiterService.Feature + +/** + * Why does LimiterService exist? + * + * The underlying Limiter thrift service doesn't support batching. This trait and implementation + * basically exist to allow a batch like interface to the Limiter. This keeps us from having to + * spread batching throughout our code base. + * + * Why is LimiterService in the backends package? + * + * In some ways it is like a backend if the backend supports batching. There is a modest amount of + * business logic LimiterService, but that logic exists here to allow easier consumption throughout + * the tweetypie code base. We did look at moving LimiterService to another package, but all likely + * candidates (service, serverutil) caused circular dependencies. + * + * When I need to add functionality, should I add it to LimiterBackend or LimiterService? + * + * LimiterBackend is used as a simple wrapper around the Limiter thrift client. The LimiterBackend + * should be kept as dumb as possible. You will most likely want to add the functionality in + * LimiterService. + */ +object LimiterService { + type MinRemaining = (UserId, Option[UserId]) => Future[Int] + type HasRemaining = (UserId, Option[UserId]) => Future[Boolean] + type Increment = (UserId, Option[UserId], Int) => Future[Unit] + type IncrementByOne = (UserId, Option[UserId]) => Future[Unit] + + sealed abstract class Feature(val name: String, val hasPerApp: Boolean = false) { + def forUser(userId: UserId): FeatureRequest = FeatureRequest(name, userId = Some(userId)) + def forApp(appId: AppId): Option[FeatureRequest] = + if (hasPerApp) { + Some( + FeatureRequest( + s"${name}_per_app", + applicationId = Some(appId), + identifier = Some(appId.toString) + ) + ) + } else { + None + } + } + object Feature { + case object Updates extends Feature("updates", hasPerApp = true) + case object MediaTagCreate extends Feature("media_tag_create") + case object TweetCreateFailure extends Feature("tweet_creation_failure") + } + + def fromBackend( + incrementFeature: IncrementFeature, + getFeatureUsage: GetFeatureUsage, + getAppId: => Option[ + AppId + ], // the call-by-name here to invoke per request to get the current request's app id + stats: StatsReceiver = NullStatsReceiver + ): LimiterService = + new LimiterService { + def increment( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId], + amount: Int + ): Future[Unit] = { + Future.when(amount > 0) { + def increment(req: FeatureRequest): Future[Unit] = incrementFeature((req, amount)) + + val incrementUser: Option[Future[Unit]] = + Some(increment(feature.forUser(userId))) + + val incrementContributor: Option[Future[Unit]] = + for { + id <- contributorUserId + if id != userId + } yield increment(feature.forUser(id)) + + val incrementPerApp: Option[Future[Unit]] = + for { + appId <- getAppId + req <- feature.forApp(appId) + } yield increment(req) + + Future.collect(Seq(incrementUser, incrementContributor, incrementPerApp).flatten) + } + } + + def minRemaining( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Int] = { + def getRemaining(req: FeatureRequest): Future[Int] = getFeatureUsage(req).map(_.remaining) + + val getUserRemaining: Option[Future[Int]] = + Some(getRemaining(feature.forUser(userId))) + + val getContributorRemaining: Option[Future[Int]] = + contributorUserId.map(id => getRemaining(feature.forUser(id))) + + val getPerAppRemaining: Option[Future[Int]] = + for { + appId <- getAppId + req <- feature.forApp(appId) + } yield getRemaining(req) + + Future + .collect(Seq(getUserRemaining, getContributorRemaining, getPerAppRemaining).flatten) + .map(_.min) + } + } +} + +trait LimiterService { + + /** + * Increment the feature count for both the user and the contributor. If either increment fails, + * the resulting future will be the first exception encountered. + * + * @param feature The feature that is incremented + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * @param amount The amount that each feature should be incremented. + */ + def increment( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId], + amount: Int + ): Future[Unit] + + /** + * Increment the feature count, by one, for both the user and the contributor. If either + * increment fails, the resulting future will be the first exception encountered. + * + * @param feature The feature that is incremented + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * + * @see [[increment]] if you want to increment a feature by a specified amount + */ + def incrementByOne( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Unit] = + increment(feature)(userId, contributorUserId, 1) + + /** + * The minimum remaining limit between the user and contributor. If an exception occurs, then the + * resulting Future will be the first exception encountered. + * + * @param feature The feature that is queried + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * + * @return a `Future[Int]` with the minimum limit left between the user and contributor + */ + def minRemaining(feature: Feature)(userId: UserId, contributorUserId: Option[UserId]): Future[Int] + + /** + * Can the user and contributor increment the given feature. If the result cannot be determined + * because of an exception, then we assume they can increment. This will allow us to continue + * servicing requests even if the limiter service isn't responding. + * + * @param feature The feature that is queried + * @param userId The current user tied to the current request + * @param contributorUserId The contributor, if one exists, tied to the current request + * @return a `Future[Boolean]` with true if both the user and contributor have remaining limit + * cap. + * + * @see [[minRemaining]] if you would like to handle any exceptions that occur on your own + */ + def hasRemaining( + feature: Feature + )( + userId: UserId, + contributorUserId: Option[UserId] + ): Future[Boolean] = + minRemaining(feature)(userId, contributorUserId) + .map(_ > 0) + .handle { case _ => true } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala new file mode 100644 index 000000000..ce4e0838e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Manhattan.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.servo.exception.thriftscala +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.kv.TimeoutManhattanException +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.storage.TweetStorageClient.Ping +import com.twitter.tweetypie.storage.ClientError +import com.twitter.tweetypie.storage.RateLimited +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.TimeoutException + +object Manhattan { + def fromClient(underlying: TweetStorageClient): TweetStorageClient = + new TweetStorageClient { + val addTweet = translateExceptions(underlying.addTweet) + val deleteAdditionalFields = translateExceptions(underlying.deleteAdditionalFields) + val getDeletedTweets = translateExceptions(underlying.getDeletedTweets) + val getTweet = translateExceptions(underlying.getTweet) + val getStoredTweet = translateExceptions(underlying.getStoredTweet) + val scrub = translateExceptions(underlying.scrub) + val softDelete = translateExceptions(underlying.softDelete) + val undelete = translateExceptions(underlying.undelete) + val updateTweet = translateExceptions(underlying.updateTweet) + val hardDeleteTweet = translateExceptions(underlying.hardDeleteTweet) + val ping: Ping = underlying.ping + val bounceDelete = translateExceptions(underlying.bounceDelete) + } + + private[backends] object translateExceptions { + private[this] def pf: PartialFunction[Throwable, Throwable] = { + case e: RateLimited => OverCapacity(s"storage: ${e.getMessage}") + case e: TimeoutManhattanException => new TimeoutException(e.getMessage) + case e: ClientError => thriftscala.ClientError(ClientErrorCause.BadRequest, e.message) + } + + def apply[A, B](f: A => Stitch[B]): A => Stitch[B] = + a => StitchUtils.translateExceptions(f(a), pf) + + def apply[A, B, C](f: (A, B) => Stitch[C]): (A, B) => Stitch[C] = + (a, b) => StitchUtils.translateExceptions(f(a, b), pf) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala new file mode 100644 index 000000000..a355507cf --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/MediaInfoService.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoRequest +import com.twitter.mediainfo.server.thriftscala.GetTweetMediaInfoResponse +import com.twitter.mediainfo.server.{thriftscala => mis} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object MediaInfoService { + import Backend._ + + type GetTweetMediaInfo = FutureArrow[mis.GetTweetMediaInfoRequest, mis.GetTweetMediaInfoResponse] + + def fromClient(client: mis.MediaInfoService.MethodPerEndpoint): MediaInfoService = + new MediaInfoService { + val getTweetMediaInfo = FutureArrow(client.getTweetMediaInfo) + } + + case class Config( + requestTimeout: Duration, + totalTimeout: Duration, + timeoutBackoffs: Stream[Duration]) { + + def apply(svc: MediaInfoService, ctx: Backend.Context): MediaInfoService = + new MediaInfoService { + val getTweetMediaInfo: FutureArrow[GetTweetMediaInfoRequest, GetTweetMediaInfoResponse] = + policy("getTweetMediaInfo", ctx)(svc.getTweetMediaInfo) + } + + private[this] def policy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx, totalTimeout = totalTimeout) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait MediaInfoService { + import MediaInfoService._ + val getTweetMediaInfo: GetTweetMediaInfo +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala new file mode 100644 index 000000000..d8df2beb5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Scarecrow.scala @@ -0,0 +1,73 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.service.gen.scarecrow.thriftscala.CheckTweetResponse +import com.twitter.service.gen.scarecrow.thriftscala.Retweet +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TweetContext +import com.twitter.service.gen.scarecrow.thriftscala.TweetNew +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Scarecrow { + import Backend._ + + type CheckTweet2 = + FutureArrow[(scarecrow.TweetNew, scarecrow.TweetContext), scarecrow.CheckTweetResponse] + type CheckRetweet = FutureArrow[scarecrow.Retweet, scarecrow.TieredAction] + + def fromClient(client: scarecrow.ScarecrowService.MethodPerEndpoint): Scarecrow = + new Scarecrow { + val checkTweet2 = FutureArrow((client.checkTweet2 _).tupled) + val checkRetweet = FutureArrow(client.checkRetweet _) + def ping(): Future[Unit] = client.ping() + } + + case class Config( + readTimeout: Duration, + writeTimeout: Duration, + timeoutBackoffs: Stream[Duration], + scarecrowExceptionBackoffs: Stream[Duration]) { + def apply(svc: Scarecrow, ctx: Backend.Context): Scarecrow = + new Scarecrow { + val checkTweet2: FutureArrow[(TweetNew, TweetContext), CheckTweetResponse] = + writePolicy("checkTweet2", ctx)(svc.checkTweet2) + val checkRetweet: FutureArrow[Retweet, TieredAction] = + writePolicy("checkRetweet", ctx)(svc.checkRetweet) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def readPolicy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, readTimeout, readRetryPolicy, ctx) + + private[this] def writePolicy[A, B](name: String, ctx: Context): Builder[A, B] = + defaultPolicy(name, writeTimeout, nullRetryPolicy, ctx) + + private[this] def readRetryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(scarecrowExceptionBackoffs)) { + case Throw(ex: scarecrow.InternalServerError) => true + } + ) + + private[this] def nullRetryPolicy[B]: RetryPolicy[Try[B]] = + // retry policy that runs once, and will not retry on any exception + RetryPolicy.backoff(Backoff.fromStream(Stream(0.milliseconds))) { + case Throw(_) => false + } + } + + implicit val warmup: Warmup[Scarecrow] = Warmup[Scarecrow]("scarecrow")(_.ping()) +} + +trait Scarecrow { + import Scarecrow._ + val checkTweet2: CheckTweet2 + val checkRetweet: CheckRetweet + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala new file mode 100644 index 000000000..37ac1243d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/SocialGraphService.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.socialgraph.thriftscala.ExistsRequest +import com.twitter.socialgraph.thriftscala.ExistsResult +import com.twitter.socialgraph.thriftscala.RequestContext +import com.twitter.socialgraph.{thriftscala => sg} +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object SocialGraphService { + import Backend._ + + type Exists = + FutureArrow[(Seq[sg.ExistsRequest], Option[sg.RequestContext]), Seq[sg.ExistsResult]] + + def fromClient(client: sg.SocialGraphService.MethodPerEndpoint): SocialGraphService = + new SocialGraphService { + val exists = FutureArrow((client.exists _).tupled) + def ping: Future[Unit] = client.ping().unit + } + + case class Config(socialGraphTimeout: Duration, timeoutBackoffs: Stream[Duration]) { + + def apply(svc: SocialGraphService, ctx: Backend.Context): SocialGraphService = + new SocialGraphService { + val exists: FutureArrow[(Seq[ExistsRequest], Option[RequestContext]), Seq[ExistsResult]] = + policy("exists", socialGraphTimeout, ctx)(svc.exists) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } + + implicit val warmup: Warmup[SocialGraphService] = + Warmup[SocialGraphService]("socialgraphservice")(_.ping) +} + +trait SocialGraphService { + import SocialGraphService._ + val exists: Exists + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala new file mode 100644 index 000000000..e056db8c9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TFlock.scala @@ -0,0 +1,98 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.flockdb.client.{thriftscala => flockdb, _} +import com.twitter.servo +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.util.Future +import com.twitter.util.TimeoutException + +object TFlock { + val log = Logger(this.getClass) + + case class Config( + requestTimeout: Duration, + timeoutBackoffs: Stream[Duration], + flockExceptionBackoffs: Stream[Duration], + overCapacityBackoffs: Stream[Duration], + defaultPageSize: Int = 1000) { + def apply(svc: flockdb.FlockDB.MethodPerEndpoint, ctx: Backend.Context): TFlockClient = { + val retryHandler = + RetryHandler[Any]( + retryPolicy(timeoutBackoffs, flockExceptionBackoffs, overCapacityBackoffs), + ctx.timer, + ctx.stats + ) + val rescueHandler = translateExceptions.andThen(Future.exception) + val exceptionCounter = new servo.util.ExceptionCounter(ctx.stats, "failures") + val timeoutException = new TimeoutException(s"tflock: $requestTimeout") + val wrapper = + new WrappingFunction { + def apply[T](f: => Future[T]): Future[T] = + retryHandler { + exceptionCounter { + f.raiseWithin(ctx.timer, requestTimeout, timeoutException) + .onFailure(logFlockExceptions) + .rescue(rescueHandler) + } + } + } + + val wrappedClient = new WrappingFlockClient(svc, wrapper, wrapper) + val statsClient = new StatsCollectingFlockService(wrappedClient, ctx.stats) + new TFlockClient(statsClient, defaultPageSize) + } + } + + def isOverCapacity(ex: flockdb.FlockException): Boolean = + ex.errorCode match { + case Some(flockdb.Constants.READ_OVERCAPACITY_ERROR) => true + case Some(flockdb.Constants.WRITE_OVERCAPACITY_ERROR) => true + case _ => false + } + + /** + * Builds a RetryPolicy for tflock operations that will retry timeouts with the specified + * timeout backoffs, and will retry non-overcapacity FlockExceptions with the + * specified flockExceptionBackoffs backoffs, and will retry over-capacity exceptions with + * the specified overCapacityBackoffs. + */ + def retryPolicy( + timeoutBackoffs: Stream[Duration], + flockExceptionBackoffs: Stream[Duration], + overCapacityBackoffs: Stream[Duration] + ): RetryPolicy[Try[Any]] = + RetryPolicy.combine[Try[Any]]( + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(flockExceptionBackoffs)) { + case Throw(ex: flockdb.FlockException) if !isOverCapacity(ex) => true + case Throw(_: flockdb.FlockQuotaException) => false + }, + RetryPolicy.backoff(Backoff.fromStream(overCapacityBackoffs)) { + case Throw(ex: flockdb.FlockException) if isOverCapacity(ex) => true + case Throw(_: flockdb.FlockQuotaException) => true + case Throw(_: OverCapacity) => true + } + ) + + val logFlockExceptions: Throwable => Unit = { + case t: flockdb.FlockException => { + log.info("FlockException from TFlock", t) + } + case _ => + } + + /** + * Converts FlockExceptions with overcapacity codes into tweetypie's OverCapacity. + */ + val translateExceptions: PartialFunction[Throwable, Throwable] = { + case t: flockdb.FlockQuotaException => + OverCapacity(s"tflock: throttled ${t.description}") + case t: flockdb.FlockException if isOverCapacity(t) => + OverCapacity(s"tflock: ${t.description}") + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala new file mode 100644 index 000000000..95385b510 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Talon.scala @@ -0,0 +1,94 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.service.talon.thriftscala.ExpandRequest +import com.twitter.service.talon.thriftscala.ExpandResponse +import com.twitter.service.talon.thriftscala.ResponseCode +import com.twitter.service.talon.thriftscala.ShortenRequest +import com.twitter.service.talon.thriftscala.ShortenResponse +import com.twitter.service.talon.{thriftscala => talon} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object Talon { + import Backend._ + + type Expand = FutureArrow[talon.ExpandRequest, talon.ExpandResponse] + type Shorten = FutureArrow[talon.ShortenRequest, talon.ShortenResponse] + + case object TransientError extends Exception() + case object PermanentError extends Exception() + + def fromClient(client: talon.Talon.MethodPerEndpoint): Talon = + new Talon { + val shorten = FutureArrow(client.shorten _) + val expand = FutureArrow(client.expand _) + def ping(): Future[Unit] = client.serviceInfo().unit + } + + case class Config( + shortenTimeout: Duration, + expandTimeout: Duration, + timeoutBackoffs: Stream[Duration], + transientErrorBackoffs: Stream[Duration]) { + def apply(svc: Talon, ctx: Backend.Context): Talon = + new Talon { + val shorten: FutureArrow[ShortenRequest, ShortenResponse] = + policy("shorten", shortenTimeout, shortenResponseCode, ctx)(svc.shorten) + val expand: FutureArrow[ExpandRequest, ExpandResponse] = + policy("expand", expandTimeout, expandResponseCode, ctx)(svc.expand) + def ping(): Future[Unit] = svc.ping() + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + getResponseCode: B => talon.ResponseCode, + ctx: Context + ): Builder[A, B] = + handleResponseCodes(name, getResponseCode, ctx) andThen + defaultPolicy(name, requestTimeout, retryPolicy, ctx) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicy.combine[Try[B]]( + RetryPolicyBuilder.timeouts[B](timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(transientErrorBackoffs)) { + case Throw(TransientError) => true + } + ) + + private[this] def handleResponseCodes[A, B]( + name: String, + extract: B => talon.ResponseCode, + ctx: Context + ): Builder[A, B] = { + val scopedStats = ctx.stats.scope(name) + val responseCodeStats = scopedStats.scope("response_code") + _ andThen FutureArrow[B, B] { res => + val responseCode = extract(res) + responseCodeStats.counter(responseCode.toString).incr() + responseCode match { + case talon.ResponseCode.TransientError => Future.exception(TransientError) + case talon.ResponseCode.PermanentError => Future.exception(PermanentError) + case talon.ResponseCode.ServerOverloaded => Future.exception(OverCapacity("talon")) + case _ => Future.value(res) + } + } + } + } + + def shortenResponseCode(res: talon.ShortenResponse): ResponseCode = res.responseCode + def expandResponseCode(res: talon.ExpandResponse): ResponseCode = res.responseCode + + implicit val warmup: Warmup[Talon] = Warmup[Talon]("talon")(_.ping()) +} + +trait Talon { + import Talon._ + val shorten: Shorten + val expand: Expand + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala new file mode 100644 index 000000000..a8c9b74db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/TimelineService.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.timelineservice.thriftscala.Event +import com.twitter.timelineservice.thriftscala.PerspectiveQuery +import com.twitter.timelineservice.thriftscala.PerspectiveResult +import com.twitter.timelineservice.thriftscala.ProcessEventResult +import com.twitter.timelineservice.thriftscala.StatusTimelineResult +import com.twitter.timelineservice.thriftscala.TimelineQuery +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.util.RetryPolicyBuilder + +object TimelineService { + import Backend._ + + type GetStatusTimeline = FutureArrow[Seq[tls.TimelineQuery], Seq[tls.StatusTimelineResult]] + type GetPerspectives = FutureArrow[Seq[tls.PerspectiveQuery], Seq[tls.PerspectiveResult]] + type ProcessEvent2 = FutureArrow[tls.Event, tls.ProcessEventResult] + + private val warmupQuery = + // we need a non-empty query, since tls treats empty queries as an error + tls.TimelineQuery( + timelineType = tls.TimelineType.User, + timelineId = 620530287L, // same user id that timelineservice-api uses for warmup + maxCount = 1 + ) + + def fromClient(client: tls.TimelineService.MethodPerEndpoint): TimelineService = + new TimelineService { + val processEvent2 = FutureArrow(client.processEvent2 _) + val getStatusTimeline = FutureArrow(client.getStatusTimeline _) + val getPerspectives = FutureArrow(client.getPerspectives _) + def ping(): Future[Unit] = + client.touchTimeline(Seq(warmupQuery)).handle { case _: tls.InternalServerError => } + } + + case class Config(writeRequestPolicy: Policy, readRequestPolicy: Policy) { + + def apply(svc: TimelineService, ctx: Backend.Context): TimelineService = { + val build = new PolicyAdvocate("TimelineService", ctx, svc) + new TimelineService { + val processEvent2: FutureArrow[Event, ProcessEventResult] = + build("processEvent2", writeRequestPolicy, _.processEvent2) + val getStatusTimeline: FutureArrow[Seq[TimelineQuery], Seq[StatusTimelineResult]] = + build("getStatusTimeline", readRequestPolicy, _.getStatusTimeline) + val getPerspectives: FutureArrow[Seq[PerspectiveQuery], Seq[PerspectiveResult]] = + build("getPerspectives", readRequestPolicy, _.getPerspectives) + def ping(): Future[Unit] = svc.ping() + } + } + } + + case class FailureBackoffsPolicy( + timeoutBackoffs: Stream[Duration] = Stream.empty, + tlsExceptionBackoffs: Stream[Duration] = Stream.empty) + extends Policy { + def toFailureRetryPolicy: FailureRetryPolicy = + FailureRetryPolicy( + RetryPolicy.combine( + RetryPolicyBuilder.timeouts(timeoutBackoffs), + RetryPolicy.backoff(Backoff.fromStream(tlsExceptionBackoffs)) { + case Throw(ex: tls.InternalServerError) => true + } + ) + ) + + def apply[A, B](name: String, ctx: Context): Builder[A, B] = + toFailureRetryPolicy(name, ctx) + } + + implicit val warmup: Warmup[TimelineService] = + Warmup[TimelineService]("timelineservice")(_.ping()) +} + +trait TimelineService { + import TimelineService._ + val processEvent2: ProcessEvent2 + val getStatusTimeline: GetStatusTimeline + val getPerspectives: GetPerspectives + def ping(): Future[Unit] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala new file mode 100644 index 000000000..e756d5202 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/UserImageService.scala @@ -0,0 +1,71 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.user_image_service.thriftscala.ProcessTweetMediaRequest +import com.twitter.user_image_service.thriftscala.ProcessTweetMediaResponse +import com.twitter.user_image_service.thriftscala.UpdateProductMetadataRequest +import com.twitter.user_image_service.thriftscala.UpdateTweetMediaRequest +import com.twitter.user_image_service.thriftscala.UpdateTweetMediaResponse +import com.twitter.user_image_service.{thriftscala => uis} + +object UserImageService { + import Backend._ + + type ProcessTweetMedia = FutureArrow[uis.ProcessTweetMediaRequest, uis.ProcessTweetMediaResponse] + type UpdateProductMetadata = FutureArrow[uis.UpdateProductMetadataRequest, Unit] + type UpdateTweetMedia = FutureArrow[uis.UpdateTweetMediaRequest, uis.UpdateTweetMediaResponse] + + def fromClient(client: uis.UserImageService.MethodPerEndpoint): UserImageService = + new UserImageService { + val processTweetMedia = FutureArrow(client.processTweetMedia) + val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = FutureArrow( + client.updateProductMetadata).unit + val updateTweetMedia = FutureArrow(client.updateTweetMedia) + } + + case class Config( + processTweetMediaTimeout: Duration, + updateTweetMediaTimeout: Duration, + timeoutBackoffs: Stream[Duration]) { + + def apply(svc: UserImageService, ctx: Backend.Context): UserImageService = + new UserImageService { + val processTweetMedia: FutureArrow[ProcessTweetMediaRequest, ProcessTweetMediaResponse] = + policy("processTweetMedia", processTweetMediaTimeout, ctx)(svc.processTweetMedia) + val updateProductMetadata: FutureArrow[UpdateProductMetadataRequest, Unit] = + policy("updateProductMetadata", processTweetMediaTimeout, ctx)(svc.updateProductMetadata) + val updateTweetMedia: FutureArrow[UpdateTweetMediaRequest, UpdateTweetMediaResponse] = + policy("updateTweetMedia", updateTweetMediaTimeout, ctx)(svc.updateTweetMedia) + } + + private[this] def policy[A, B]( + name: String, + requestTimeout: Duration, + ctx: Context + ): Builder[A, B] = + defaultPolicy( + name = name, + requestTimeout = requestTimeout, + retryPolicy = retryPolicy, + ctx = ctx, + exceptionCategorizer = { + case _: uis.BadRequest => Some("success") + case _ => None + } + ) + + private[this] def retryPolicy[B]: RetryPolicy[Try[B]] = + RetryPolicyBuilder.timeouts[Any](timeoutBackoffs) + } +} + +trait UserImageService { + import UserImageService._ + + val processTweetMedia: ProcessTweetMedia + val updateProductMetadata: UpdateProductMetadata + val updateTweetMedia: UpdateTweetMedia +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala new file mode 100644 index 000000000..06c61934e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/backends/Warmup.scala @@ -0,0 +1,266 @@ +package com.twitter.tweetypie +package backends + +import com.twitter.concurrent.AsyncSemaphore +import com.twitter.util.Timer +import com.twitter.util.Promise +import scala.util.control.NoStackTrace + +/** + * Tools for building warmup actions on backend clients. The basic + * idea is to make requests to backends repeatedly until they succeed. + */ +object Warmup { + + /** + * Signals that a warmup action was aborted because warmup is + * complete. + */ + object WarmupComplete extends Exception with NoStackTrace + + /** + * Configuration for warmup actions. + * + * @param maxOutstandingRequests: Limit on total number of outstanding warmup requests. + * @param maxWarmupDuration: Total amount of time warmup is allowed to take. + * @param requestTimeouts: Time limit for individual warmup actions. + * @param reliability: Criteria for how many times each warmup should be run. + */ + case class Settings( + maxOutstandingRequests: Int, + maxWarmupDuration: Duration, + requestTimeouts: Map[String, Duration], + reliability: Reliably) { + def toRunner(logger: Logger, timer: Timer): Runner = + new WithTimeouts(requestTimeouts, timer) + .within(new Logged(logger)) + .within(new LimitedConcurrency(maxOutstandingRequests)) + .within(reliability) + + def apply[A: Warmup](value: A, logger: Logger, timer: Timer): Future[Unit] = + toRunner(logger, timer) + .run(value) + .raiseWithin(maxWarmupDuration)(timer) + .handle { case _ => } + } + + /** + * Strategy for running Warmup actions. + */ + trait Runner { self => + + /** + * Run one single warmup action. + */ + def runOne(name: String, action: => Future[Unit]): Future[Unit] + + /** + * Compose these two Runners by calling this Runner's runOne + * inside of other's runOne. + */ + final def within(other: Runner): Runner = + new Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + other.runOne(name, self.runOne(name, action)) + } + + /** + * Execute all of the warmup actions for the given value using + * this runner. + */ + final def run[T](t: T)(implicit w: Warmup[T]): Future[Unit] = + Future.join(w.actions.toSeq.map { case (name, f) => runOne(name, f(t).unit) }) + } + + /** + * Set a ceiling on the amount of time each kind of warmup action is + * allowed to take. + */ + class WithTimeouts(timeouts: Map[String, Duration], timer: Timer) extends Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + timeouts.get(name).map(action.raiseWithin(_)(timer)).getOrElse(action) + } + + /** + * Execute each action until its reliability is estimated to be + * above the given threshold. The reliability is initially assumed + * to be zero. The reliability is estimated as an exponential moving + * average, with the new data point given the appropriate weight so + * that a single data point will no longer be able to push the + * average below the threshold. + * + * The warmup action is considered successful if it does not throw + * an exception. No timeouts are applied. + * + * The threshold must be in the interval [0, 1). + * + * The concurrency level determines how many outstanding requests + * to maintain until the threshold is reached. This allows warmup + * to happen more rapidly when individual requests have high + * latency. + * + * maxAttempts limits the total number of tries that we are allowed + * to try to reach the reliability threshold. This is a safety + * measure to prevent overloading whatever subsystem we are + * attempting to warm up. + */ + case class Reliably(reliabilityThreshold: Double, concurrency: Int, maxAttempts: Int) + extends Runner { + require(reliabilityThreshold < 1) + require(reliabilityThreshold >= 0) + require(concurrency > 0) + require(maxAttempts > 0) + + // Find the weight at which one failure will not push us under the + // reliabilityThreshold. + val weight: Double = 1 - math.pow( + 1 - reliabilityThreshold, + (1 - reliabilityThreshold) / reliabilityThreshold + ) + + // Make sure that rounding error did not cause weight to become zero. + require(weight > 0) + require(weight <= 1) + + // On each iteration, we discount the current reliability by this + // factor before adding in the new reliability data point. + val decay: Double = 1 - weight + + // Make sure that rounding error did not cause decay to be zero. + require(decay < 1) + + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { + def go(attempts: Int, reliability: Double, outstanding: Seq[Future[Unit]]): Future[Unit] = + if (reliability >= reliabilityThreshold || (attempts == 0 && outstanding.isEmpty)) { + // We hit the threshold or ran out of tries. Don't cancel any + // outstanding requests, just wait for them all to complete. + Future.join(outstanding.map(_.handle { case _ => })) + } else if (attempts > 0 && outstanding.length < concurrency) { + // We have not yet hit the reliability threshold, and we + // still have available concurrency, so make a new request. + go(attempts - 1, reliability, action +: outstanding) + } else { + val sel = Future.select(outstanding) + + // We need this promise wrapper because if the select is + // interrupted, it relays the interrupt to the outstanding + // requests but does not itself return with a + // failure. Wrapping in a promise lets us differentiate + // between an interrupt coming from above and the created + // Future failing for another reason. + val p = new Promise[(Try[Unit], Seq[Future[Unit]])] + p.setInterruptHandler { + case e => + // Interrupt the outstanding requests. + sel.raise(e) + // Halt the computation with a failure. + p.updateIfEmpty(Throw(e)) + } + + // When the select finishes, update the promise with the value. + sel.respond(p.updateIfEmpty) + p.flatMap { + case (tryRes, remaining) => + val delta = if (tryRes.isReturn) weight else 0 + go(attempts, reliability * decay + delta, remaining) + } + } + + go(maxAttempts, 0, Seq.empty) + } + } + + /** + * Write a log message recording each invocation of each warmup + * action. The log message is comma-separated, with the following + * fields: + * + * name: + * The supplied name. + * + * start time: + * The number of milliseconds since the start of the Unix + * epoch. + * + * duration: + * How long this warmup action took, in milliseconds. + * + * result: + * "passed" or "failed" depending on whether the Future + * returned an exception. + * + * exception type: + * If the result "failed", then this will be the name of + * the exception that casued the failure. If it "passed", + * it will be the empty string. + * + * These messages should be sufficient to get a picture of how + * warmup proceeded, since they allow the messages to be ordered + * and sorted by type. You can use this information to tune the + * warmup parameters. + */ + class Logged(logger: Logger) extends Runner { + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = { + val start = Time.now + val startStr = start.sinceEpoch.inMilliseconds.toString + + action.respond { + case Throw(WarmupComplete) => + // Don't log anything for computations that we abandoned + // because warmup is complete. + + case r => + val duration = (Time.now - start).inMilliseconds + val result = r match { + case Throw(e) => "failed," + e.toString.takeWhile(_ != '\n') + case _ => "passed," + } + logger.info(s"$name,${startStr}ms,${duration}ms,$result") + } + } + } + + /** + * Ensure that no more than the specified number of invocations of a + * warmup action are happening at one time. + */ + class LimitedConcurrency(limit: Int) extends Runner { + private[this] val sem = new AsyncSemaphore(limit) + override def runOne(name: String, action: => Future[Unit]): Future[Unit] = + sem.acquireAndRun(action) + } + + /** + * Create a new Warmup that performs this single action. + */ + def apply[A](name: String)(f: A => Future[_]): Warmup[A] = new Warmup(Map(name -> f)) + + /** + * Create a Warmup that does nothing. This is useful in concert with + * warmField. + */ + def empty[A]: Warmup[A] = new Warmup[A](Map.empty) +} + +/** + * A set of independent warmup actions. Each action should be the + * minimum work that can be done in order to exercise a code + * path. Runners can be used to e.g. run the actions repeatedly or + * with timeouts. + */ +class Warmup[A](val actions: Map[String, A => Future[_]]) { + def ++(other: Warmup[A]) = new Warmup[A](actions ++ other.actions) + + /** + * The names of the individual warmup actions that this warmup is + * composed of. + */ + def names: Set[String] = actions.keySet + + /** + * Create a new Warmup that does all of the actions of this warmup + * and additionally does warmup on the value specified by `f`. + */ + def warmField[B](f: A => B)(implicit w: Warmup[B]): Warmup[A] = + new Warmup[A](actions ++ (w.actions.mapValues(f.andThen))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD new file mode 100644 index 000000000..7dc0a6379 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BUILD @@ -0,0 +1,135 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/dataformat:jackson-dataformat-yaml", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/io/netty:netty4-tcnative-boringssl-static", + "3rdparty/jvm/org/apache/kafka:kafka-clients", + "3rdparty/jvm/org/apache/thrift:libthrift", + "ads-common/loggingclient/src/main/scala", + "core-app-services/failed_task/src/scala/com/twitter/coreservices/failed_task/writer", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "decider", + "deferredrpc/client", + "deferredrpc/client/src/main/thrift:thrift-scala", + "eventbus/client", + "fanoutservice/thrift/src/main/thrift:thrift-scala", + "featureswitches/featureswitches-core:v2", + "featureswitches/featureswitches-core/src/main/scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authorization/server", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/client", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/server", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/transport", + "finagle/finagle-core/src/main", + "finagle/finagle-http/src/main/scala", + "finagle/finagle-memcached/src/main/scala", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-stats", + "finagle/finagle-thrift", + "finagle/finagle-thrift/src/main/java", + "finagle/finagle-thriftmux", + "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/producers", + "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "kafka/finagle-kafka/finatra-kafka/src/main/scala", + "limiter-client", + "limiter/thrift-only/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "passbird/thrift-only/src/main/thrift:thrift-scala", + "quill/capture", + "quill/core/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/repo/src/main/scala", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/request/src/main/scala", + "tweetypie/servo/util", + "snowflake:client", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "snowflake/src/main/thrift:thrift-scala", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/scala/com/twitter/search/blender/services/strato", + "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:annotation-service-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/merlin:thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_comparison_service-scala", + "stitch/stitch-core", + "stitch/stitch-repo/src/main/scala", + "stitch/stitch-timelineservice/src/main/scala", + "storage/clients/manhattan/client/src/main/scala", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/fed/server", + "strato/src/main/scala/com/twitter/strato/rpc", + "strato/src/main/scala/com/twitter/strato/server", + "strato/src/main/scala/com/twitter/strato/util", + "stringcenter/client/src/main/scala", + "tweetypie/server/config", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/caching", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/context", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider", + "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/matching", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-config/yaml", + "twitter-context", + "twitter-server-internal", + "twitter-server/server/src/main/scala", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-app", + "util/util-hashing/src/main/scala", + "util/util-slf4j-api/src/main/scala", + "util/util-stats/src/main/scala", + "visibility/common/src/main/scala/com/twitter/visibility/common", + "visibility/common/src/main/scala/com/twitter/visibility/common/tflock", + "visibility/lib:tweets", + "visibility/lib/src/main/scala/com/twitter/visibility/util", + "visibility/writer/src/main/scala/com/twitter/visibility/writer", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala new file mode 100644 index 000000000..044177438 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/BackendClients.scala @@ -0,0 +1,796 @@ +package com.twitter.tweetypie +package config + +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.ads.loggingclient.AdsLoggingClient +import com.twitter.adserver.thriftscala.AdCallbackEvent +import com.twitter.conversions.DurationOps._ +import com.twitter.conversions.PercentOps._ +import com.twitter.container.{thriftscala => ccs} +import com.twitter.deferredrpc.client.DeferredThriftService +import com.twitter.deferredrpc.thrift.Datacenter +import com.twitter.deferredrpc.thrift.DeferredRPC +import com.twitter.deferredrpc.thrift.Target +import com.twitter.escherbird.thriftscala.TweetEntityAnnotationService$FinagleClient +import com.twitter.escherbird.thriftscala.{ + TweetEntityAnnotationService => TweetEntityAnnotationScroogeIface +} +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.eventbus.client.EventBusPublisherBuilder +import com.twitter.expandodo.thriftscala.CardsService$FinagleClient +import com.twitter.expandodo.thriftscala.{CardsService => CardsScroogeIface} +import com.twitter.finagle._ +import com.twitter.finagle.builder.ClientBuilder +import com.twitter.finagle.client.Transporter +import com.twitter.finagle.factory.TimeoutFactory +import com.twitter.finagle.liveness.FailureAccrualFactory +import com.twitter.finagle.loadbalancer.Balancers +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.finagle.mtls.client.MtlsClientBuilder._ +import com.twitter.finagle.mtls.client.MtlsStackClient._ +import com.twitter.finagle.partitioning.param +import com.twitter.finagle.service.TimeoutFilter.PropagateDeadlines +import com.twitter.finagle.service._ +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ThriftClientRequest +import com.twitter.finagle.thriftmux.MethodBuilder +import com.twitter.finagle.tracing.DefaultTracer +import com.twitter.flockdb.client.thriftscala.FlockDB +import com.twitter.flockdb.client.FlockResponse +import com.twitter.flockdb.client.TFlockClient +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.geoduck.backend.hydration.thriftscala.{Hydration => GeoduckHydration} +import com.twitter.geoduck.backend.relevance.thriftscala.Relevance +import com.twitter.geoduck.backend.relevance.thriftscala.Relevance$FinagleClient +import com.twitter.geoduck.backend.relevance.thriftscala.RelevanceContext +import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate +import com.twitter.geoduck.thriftscala.ReverseGeocoder +import com.twitter.geoduck.util.service.GeoduckLocate +import com.twitter.gizmoduck.thriftscala.UserService +import com.twitter.hashing.KeyHasher +import com.twitter.limiter.client.LimiterClientFactory +import com.twitter.mediainfo.server.thriftscala.MediaInfoService$FinagleClient +import com.twitter.mediainfo.server.thriftscala.{MediaInfoService => MediaInfoScroogeIface} +import com.twitter.merlin.thriftscala.UserRolesService +import com.twitter.passbird.thriftscala.PassbirdService +import com.twitter.passbird.thriftscala.PassbirdService$FinagleClient +import com.twitter.service.gen.scarecrow.thriftscala.ScarecrowService$FinagleClient +import com.twitter.service.gen.scarecrow.thriftscala.{ScarecrowService => ScarecrowScroogeIface} +import com.twitter.service.talon.thriftscala.Talon$FinagleClient +import com.twitter.service.talon.thriftscala.{Talon => TalonScroogeIface} +import com.twitter.snowflake.client.SnowflakeClient +import com.twitter.snowflake.thriftscala.Snowflake +import com.twitter.socialgraph.thriftscala.SocialGraphService$FinagleClient +import com.twitter.socialgraph.thriftscala.{SocialGraphService => SocialGraphScroogeIface} +import com.twitter.storage.client.manhattan.kv.Experiments +import com.twitter.storage.client.manhattan.kv.ManhattanKVClient +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storage.client.manhattan.kv.NoMtlsParams +import com.twitter.strato.client.Strato +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.timelineservice.fanout.thriftscala.FanoutService +import com.twitter.timelineservice.fanout.thriftscala.FanoutService$FinagleClient +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends._ +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.media.MediaClient +import com.twitter.tweetypie.service.ReplicatingTweetService.GatedReplicationClient +import com.twitter.tweetypie.storage.ManhattanTweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent +import com.twitter.tweetypie.thriftscala.TweetEvent +import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleClient +import com.twitter.user_image_service.thriftscala.UserImageService$FinagleClient +import com.twitter.user_image_service.thriftscala.{UserImageService => UserImageScroogeIface} +import com.twitter.util.Throw +import com.twitter.util.Timer +import com.twitter.util.{TimeoutException => UtilTimeoutException} +import scala.util.Random + +trait BackendClients { + + /** returns all the finagle.Names created while building clients */ + def referencedNames: Seq[Name] + + val asyncRetryTweetService: ThriftTweetService + val asyncTweetDeletionService: ThriftTweetService + val asyncTweetService: ThriftTweetService + val configBus: ConfigBus + val creativesContainerService: CreativesContainerService + val darkTrafficClient: Service[Array[Byte], Array[Byte]] + val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] + val escherbird: Escherbird + val expandodo: Expandodo + val fanoutServiceClient: FanoutService.MethodPerEndpoint + val geoHydrationLocate: GeoduckLocate + val geoRelevance: Relevance.MethodPerEndpoint + val geoScrubEventStore: GeoScrubEventStore + val geoduckGeohashLocate: GeoduckGeohashLocate + val gizmoduck: Gizmoduck + val gnipEnricherator: GnipEnricherator + val guano: Guano + val limiterService: LimiterService + val lowQoSReplicationClients: Seq[GatedReplicationClient] + val mediaClient: MediaClient + val mediaInfoService: MediaInfoService + val memcacheClient: memcached.Client + val merlin: UserRolesService.MethodPerEndpoint + val passbirdClient: PassbirdService.MethodPerEndpoint + val replicationClient: ThriftTweetService + val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] + val scarecrow: Scarecrow + val snowflakeClient: SnowflakeClient.SnowflakeClient + val socialGraphService: SocialGraphService + val stratoserverClient: StratoClient + val talon: Talon + val tflockReadClient: TFlockClient + val tflockWriteClient: TFlockClient + val timelineService: TimelineService + val tweetEventsPublisher: EventBusPublisher[TweetEvent] + val tweetStorageClient: TweetStorageClient + val userImageService: UserImageService + val callbackPromotedContentLogger: CallbackPromotedContentLogger +} + +/** + * default implementation of BackendClients that connects to real, remote + * backend services. + */ +object BackendClients { + // for most services, tweetypie typically maintains only a single connection to + // each host in the cluster, and that is enough for normal steady-state work. + // to prevent ddos'ing backends during unusual traffic influxes, we set the host + // connection limit to be 2-3x the steady-state daily peak, giving plenty of head + // room but without allowing an excessive number of connections. + private val defaultHostConnectionLimit = 3 + + // 100ms is greater than most gc pauses; smaller values cause more timeouts + private val defaultConnectTimeout = 100.milliseconds + // tcpConnect timeout is less than half of defaultConnectTimeout, to allow at least + // two tries (except when there is a GC pause) + private val defaultTcpConnectTimeout = 20.milliseconds + + private val WriteExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = + RetryPolicy.WriteExceptionsOnly + + private val ClosedExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { + case Throw(_: ChannelClosedException) => true + } + + private val TimeoutExceptionsOnly: PartialFunction[Try[Nothing], Boolean] = { + case Throw(_: TimeoutException) => true + case Throw(_: UtilTimeoutException) => true + } + + private val NoBackoff = Backoff.const(0.second) + + private def retry(writeExceptions: Int = 100, closedExceptions: Int = 2, timeouts: Int = 0) = + RetryPolicy.combine( + RetryPolicy.backoff(NoBackoff.take(writeExceptions))(WriteExceptionsOnly), + RetryPolicy.backoff(NoBackoff.take(closedExceptions))(ClosedExceptionsOnly), + RetryPolicy.backoff(NoBackoff.take(timeouts))(TimeoutExceptionsOnly) + ) + + implicit val warmup: Warmup[BackendClients] = { + // Use a random string so that the keys are likely to hash to + // different memcache instances. Request multiple keys at a time so + // that we don't consider the backend warm just because we can get a + // bunch of successful responses to one cache. + val cacheGet = (_: memcached.Client).get(Seq.fill(20)(Random.nextLong.toString)) + + Warmup + .empty[BackendClients] + .warmField(_.expandodo) + .warmField(_.gizmoduck) + .warmField(_.memcacheClient)(Warmup("memcache")(cacheGet)) + .warmField(_.talon) + .warmField(_.tweetStorageClient)(Warmup("tweetstorage")(_.ping())) + .warmField(_.tflockReadClient)(Warmup("tflock")(_.contains(UserTimelineGraph, 0, 0))) + .warmField(_.scarecrow) + .warmField(_.socialGraphService) + .warmField(_.timelineService) + .warmField(_.geoRelevance)(Warmup("geo_relevance")(_.placeSearch(RelevanceContext()))) + } + + def apply( + settings: TweetServiceSettings, + deciderGates: TweetypieDeciderGates, + statsReceiver: StatsReceiver, + hostStatsReceiver: StatsReceiver, + timer: Timer, + clientIdHelper: ClientIdHelper, + ): BackendClients = { + val thriftClientId = settings.thriftClientId + val tracer = DefaultTracer + + val env = settings.env.toString + val zone = settings.zone + val log = Logger(getClass) + val backendsScope = statsReceiver.scope("backends") + + /** a Seq builder of finagle.Names loaded via getName */ + val referencedNamesBuilder = Seq.newBuilder[Name] + + /** the default set of exceptions we believe are safe for Tweetypie to retry */ + val defaultResponseClassifier: ResponseClassifier = + ResponseClassifier.RetryOnChannelClosed.orElse(ResponseClassifier.RetryOnTimeout) + + /** + * Resolve a string into a Finagle Name and record it + * in referencedNames. + */ + def eval(address: String): Name = { + val name = Resolver.eval(address) + referencedNamesBuilder += name + name + } + + def backendContext(name: String) = + Backend.Context(timer, backendsScope.scope(name)) + + // by default, retries on most exceptions (see defaultRetryExceptions). if an rpc is not + // idempotent, it should use a different retry policy. + def clientBuilder(name: String) = { + ClientBuilder() + .name(name) + .reportTo(statsReceiver) + .reportHostStats(hostStatsReceiver) + .tracer(tracer) + .daemon(true) + .tcpConnectTimeout(defaultTcpConnectTimeout) + .connectTimeout(defaultConnectTimeout) + .retryPolicy(retry()) + } + + def thriftMuxClientBuilder(name: String, address: String, clazz: Class[_]) = { + clientBuilder(name) + .stack( + ThriftMux.client + .withClientId(thriftClientId) + .withOpportunisticTls(OpportunisticTls.Required) + .withServiceClass(clazz)) + .loadBalancer(balancer()) + .dest(eval(address)) + .mutualTls(settings.serviceIdentifier) + } + + // Our base ThriftMux.Client + // Prefer using thriftMuxMethodBuilder below but + // can be used to build custom clients (re: darkTrafficClient) + def thriftMuxClient(name: String, propagateDeadlines: Boolean = true): ThriftMux.Client = { + ThriftMux.client + .withClientId(thriftClientId) + .withLabel(name) + .withStatsReceiver(statsReceiver) + .withTracer(tracer) + .withTransport.connectTimeout(defaultTcpConnectTimeout) + .withSession.acquisitionTimeout(defaultConnectTimeout) + .withMutualTls(settings.serviceIdentifier) + .withOpportunisticTls(OpportunisticTls.Required) + .configured(PropagateDeadlines(enabled = propagateDeadlines)) + } + + // If an endpoint is non-idempotent you should add .nonidempotent and + // leave off any ResponseClassifiers (it will remove any placed before but not after) + // If it is unequivocally idempotent you should add .idempotent and + // leave off any ResponseClassifiers (it will retry on all Throws). This will also + // enable backup requests + def thriftMuxMethodBuilder( + name: String, + dest: String, + ): MethodBuilder = { + thriftMuxClient(name) + .withLoadBalancer(balancer(minAperture = 2)) + .methodBuilder(dest) + .withRetryForClassifier(defaultResponseClassifier) + .withTimeoutTotal(2.seconds) // total timeout including 1st attempt and up to 2 retries + } + + def balancer(minAperture: Int = 2) = Balancers.aperture(minAperture = minAperture) + + val eventBusPublisherBuilder = + EventBusPublisherBuilder() + .dest(eval("/s/eventbus/provisioning")) + .clientId(settings.thriftClientId) + // eventbus stats are further scoped by stream, so put all + // publishers under the same stats namespace + .statsReceiver(backendsScope.scope("event_bus")) + // This makes the underlying kps-client to be resolved over WilyNs vs DNS + .serviceIdentifier(settings.serviceIdentifier) + + new BackendClients { + def referencedNames: Seq[Name] = referencedNamesBuilder.result() + + val memcacheClient: memcached.Client = + Memcached.client + .withMutualTls(settings.serviceIdentifier) + .connectionsPerEndpoint(2) + .configured(param.KeyHasher(KeyHasher.FNV1_32)) + .configured(Transporter.ConnectTimeout(100.milliseconds)) + .configured(TimeoutFilter.Param(200.milliseconds)) + .configured(TimeoutFactory.Param(200.milliseconds)) + .configured(param.EjectFailedHost(false)) + .configured(FailureAccrualFactory.Param(numFailures = 20, markDeadFor = 30.second)) + .configured( + PendingRequestFilter.Param(limit = Some(settings.cacheClientPendingRequestLimit)) + ) + .filtered(new MemcacheExceptionLoggingFilter) + .newRichClient(dest = eval(settings.twemcacheDest), label = "memcache") + + /* clients */ + val tweetStorageClient: TweetStorageClient = + Manhattan.fromClient( + new ManhattanTweetStorageClient( + settings.tweetStorageConfig, + statsReceiver = backendsScope.scope("tweet_storage"), + clientIdHelper = clientIdHelper, + ) + ) + + val socialGraphService: SocialGraphService = { + val finagleClient = + new SocialGraphService$FinagleClient( + thriftMuxClientBuilder( + "socialgraph", + "/s/socialgraph/socialgraph", + classOf[SocialGraphScroogeIface.MethodPerEndpoint] + ).loadBalancer(Balancers.aperturePeakEwma(minAperture = 16)) + .build() + ) + + settings.socialGraphSeviceConfig( + SocialGraphService.fromClient(finagleClient), + backendContext("socialgraph") + ) + } + + val tflockClient = + new FlockDB.FinagledClient( + thriftMuxClientBuilder("tflock", "/s/tflock/tflock", classOf[FlockDB.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 5)) + .responseClassifier(FlockResponse.classifier) + .build(), + serviceName = "tflock", + stats = statsReceiver + ) + + val tflockReadClient: TFlockClient = + settings.tflockReadConfig(tflockClient, backendContext("tflock")) + + val tflockWriteClient: TFlockClient = + settings.tflockWriteConfig(tflockClient, backendContext("tflock")) + + val gizmoduck: Gizmoduck = { + val clientBuilder = + thriftMuxClientBuilder( + "gizmoduck", + "/s/gizmoduck/gizmoduck", + classOf[UserService.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 63)) + val mb = MethodBuilder + .from(clientBuilder) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[UserService.ServicePerEndpoint] + + val gizmoduckClient = ThriftMux.Client.methodPerEndpoint(mb) + settings.gizmoduckConfig(Gizmoduck.fromClient(gizmoduckClient), backendContext("gizmoduck")) + } + + val merlin: UserRolesService.MethodPerEndpoint = { + val thriftClient = thriftMuxMethodBuilder("merlin", "/s/merlin/merlin") + .withTimeoutPerRequest(100.milliseconds) + .withTimeoutTotal(400.milliseconds) + .idempotent(0.01) + .servicePerEndpoint[UserRolesService.ServicePerEndpoint] + + ThriftMux.Client.methodPerEndpoint(thriftClient) + } + + val talon: Talon = { + val talonClient = + new Talon$FinagleClient( + thriftMuxClientBuilder( + "talon", + "/s/talon/backend", + classOf[TalonScroogeIface.MethodPerEndpoint]) + .build() + ) + + settings.talonConfig(Talon.fromClient(talonClient), backendContext("talon")) + } + + val guano = Guano() + + val mediaInfoService: MediaInfoService = { + val finagleClient = + new MediaInfoService$FinagleClient( + thriftMuxClientBuilder( + "mediainfo", + "/s/photurkey/mediainfo", + classOf[MediaInfoScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 75)) + .build() + ) + + settings.mediaInfoServiceConfig( + MediaInfoService.fromClient(finagleClient), + backendContext("mediainfo") + ) + } + + val userImageService: UserImageService = { + val finagleClient = + new UserImageService$FinagleClient( + thriftMuxClientBuilder( + "userImage", + "/s/user-image-service/uis", + classOf[UserImageScroogeIface.MethodPerEndpoint]) + .build() + ) + + settings.userImageServiceConfig( + UserImageService.fromClient(finagleClient), + backendContext("userImage") + ) + } + + val mediaClient: MediaClient = + MediaClient.fromBackends( + userImageService = userImageService, + mediaInfoService = mediaInfoService + ) + + val timelineService: TimelineService = { + val timelineServiceClient = + new tls.TimelineService$FinagleClient( + thriftMuxClientBuilder( + "timelineService", + "/s/timelineservice/timelineservice", + classOf[tls.TimelineService.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 13)) + .build() + ) + + settings.timelineServiceConfig( + TimelineService.fromClient(timelineServiceClient), + backendContext("timelineService") + ) + } + + val expandodo: Expandodo = { + val cardsServiceClient = + new CardsService$FinagleClient( + thriftMuxClientBuilder( + "expandodo", + "/s/expandodo/server", + classOf[CardsScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 6)) + .build() + ) + + settings.expandodoConfig( + Expandodo.fromClient(cardsServiceClient), + backendContext("expandodo") + ) + } + + val creativesContainerService: CreativesContainerService = { + val mb = thriftMuxMethodBuilder( + "creativesContainerService", + "/s/creatives-container/creatives-container", + ).withTimeoutTotal(300.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + .servicePerEndpoint[ccs.CreativesContainerService.ServicePerEndpoint] + + settings.creativesContainerServiceConfig( + CreativesContainerService.fromClient(ccs.CreativesContainerService.MethodPerEndpoint(mb)), + backendContext("creativesContainerService") + ) + } + + val scarecrow: Scarecrow = { + val scarecrowClient = new ScarecrowService$FinagleClient( + thriftMuxClientBuilder( + "scarecrow", + "/s/abuse/scarecrow", + classOf[ScarecrowScroogeIface.MethodPerEndpoint]) + .loadBalancer(balancer(minAperture = 6)) + .build(), + serviceName = "scarecrow", + stats = statsReceiver + ) + + settings.scarecrowConfig(Scarecrow.fromClient(scarecrowClient), backendContext("scarecrow")) + } + + val snowflakeClient: Snowflake.MethodPerEndpoint = { + eval("/s/snowflake/snowflake") // eagerly resolve the serverset + val mb = thriftMuxMethodBuilder( + "snowflake", + "/s/snowflake/snowflake" + ).withTimeoutTotal(300.milliseconds) + .withTimeoutPerRequest(100.milliseconds) + .idempotent(maxExtraLoad = 1.percent) + + SnowflakeClient.snowflakeClient(mb) + } + + val deferredRpcClient = + new DeferredRPC.FinagledClient( + thriftMuxClientBuilder( + "deferredrpc", + "/s/kafka-shared/krpc-server-main", + classOf[DeferredRPC.MethodPerEndpoint]) + .requestTimeout(200.milliseconds) + .retryPolicy(retry(timeouts = 3)) + .build(), + serviceName = "deferredrpc", + stats = statsReceiver + ) + + def deferredTweetypie(target: Target): ThriftTweetService = { + // When deferring back to the local datacenter, preserve the finagle + // context and dtabs. This will ensure that developer dtabs are honored + // and that context is preserved in eventbus. (eventbus enqueues only + // happen in async requests within the same datacenter.) + // + // Effectively, this means we consider deferredrpc requests within the + // same datacenter to be part of the same request, but replicated + // requests are not. + val isLocal: Boolean = target.datacenter == Datacenter.Local + + val deferredThriftService: Service[ThriftClientRequest, Array[Byte]] = + new DeferredThriftService( + deferredRpcClient, + target, + serializeFinagleContexts = isLocal, + serializeFinagleDtabs = isLocal + ) + + new TweetServiceInternal$FinagleClient(deferredThriftService) + } + + val replicationClient: ThriftTweetService = + deferredTweetypie(Target(Datacenter.AllOthers, "tweetypie-replication")) + + // used for read endpoints replication + val lowQoSReplicationClients: Seq[GatedReplicationClient] = { + val rampUpGate = Gate.linearRampUp(Time.now, settings.forkingRampUp) + + // Gates to avoid sending replicated reads from a cluster to itself + val inATLA = if (settings.zone == "atla") Gate.True else Gate.False + val inPDXA = if (settings.zone == "pdxa") Gate.True else Gate.False + + Seq( + GatedReplicationClient( + client = deferredTweetypie(Target(Datacenter.Atla, "tweetypie-lowqos")), + gate = rampUpGate & deciderGates.replicateReadsToATLA & !inATLA + ), + GatedReplicationClient( + client = deferredTweetypie(Target(Datacenter.Pdxa, "tweetypie-lowqos")), + gate = rampUpGate & deciderGates.replicateReadsToPDXA & !inPDXA + ) + ) + } + + // used for async operations in the write path + val asyncTweetService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie")) + + // used to trigger asyncEraseUserTweetsRequest + val asyncTweetDeletionService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie-retweet-deletion")) + + // used for async retries + val asyncRetryTweetService: ThriftTweetService = + deferredTweetypie(Target(Datacenter.Local, "tweetypie-async-retry")) + + val darkTrafficClient: Service[Array[Byte], Array[Byte]] = { + val thriftService = + thriftMuxClient( + "tweetypie.dark", + propagateDeadlines = false + ).withRequestTimeout(100.milliseconds) + .newService("/s/tweetypie/proxy") + + val transformer = + new Filter[Array[Byte], Array[Byte], ThriftClientRequest, Array[Byte]] { + override def apply( + request: Array[Byte], + service: Service[ThriftClientRequest, Array[Byte]] + ): Future[Array[Byte]] = + service(new ThriftClientRequest(request, false)) + } + + transformer andThen thriftService + } + + val geoHydrationClient: GeoduckHydration.MethodPerEndpoint = { + val mb = thriftMuxMethodBuilder("geoduck_hydration", "/s/geo/hydration") + .withTimeoutPerRequest(100.millis) + .idempotent(maxExtraLoad = 1.percent) + ThriftMux.Client.methodPerEndpoint( + mb.servicePerEndpoint[GeoduckHydration.ServicePerEndpoint]) + } + + val geoHydrationLocate: GeoduckLocate = geoHydrationClient.locate + + val geoReverseGeocoderClient: ReverseGeocoder.MethodPerEndpoint = { + val mb = thriftMuxMethodBuilder("geoduck_reversegeocoder", "/s/geo/geoduck_reversegeocoder") + .withTimeoutPerRequest(100.millis) + .idempotent(maxExtraLoad = 1.percent) + ThriftMux.Client.methodPerEndpoint( + mb.servicePerEndpoint[ReverseGeocoder.ServicePerEndpoint]) + } + + val geoduckGeohashLocate: GeoduckGeohashLocate = { + new GeoduckGeohashLocate( + reverseGeocoderClient = geoReverseGeocoderClient, + hydrationClient = geoHydrationClient, + classScopedStatsReceiver = statsReceiver.scope("geo_geohash_locate")) + } + + val geoRelevance = + new Relevance$FinagleClient( + thriftMuxClientBuilder( + "geoduck_relevance", + "/s/geo/relevance", + classOf[Relevance.MethodPerEndpoint]) + .requestTimeout(100.milliseconds) + .retryPolicy(retry(timeouts = 1)) + .build(), + stats = statsReceiver + ) + + val fanoutServiceClient = + new FanoutService$FinagleClient( + new DeferredThriftService(deferredRpcClient, Target(Datacenter.Local, "fanoutservice")), + serviceName = "fanoutservice", + stats = statsReceiver + ) + + val limiterService: LimiterService = { + val limiterClient = + new LimiterClientFactory( + name = "limiter", + clientId = thriftClientId, + tracer = tracer, + statsReceiver = statsReceiver, + serviceIdentifier = settings.serviceIdentifier, + opportunisticTlsLevel = OpportunisticTls.Required, + daemonize = true + )(eval("/s/limiter/limiter")) + + val limiterBackend = settings.limiterBackendConfig( + LimiterBackend.fromClient(limiterClient), + backendContext("limiter") + ) + + LimiterService.fromBackend( + limiterBackend.incrementFeature, + limiterBackend.getFeatureUsage, + getAppId, + backendsScope.scope("limiter") + ) + } + + val passbirdClient = + new PassbirdService$FinagleClient( + thriftMuxClientBuilder( + "passbird", + "/s/passbird/passbird", + classOf[PassbirdService.MethodPerEndpoint]) + .requestTimeout(100.milliseconds) + .retryPolicy(retry(timeouts = 1)) + .build(), + serviceName = "passbird", + stats = statsReceiver + ) + + val escherbird: Escherbird = { + val escherbirdClient = + new TweetEntityAnnotationService$FinagleClient( + thriftMuxClientBuilder( + "escherbird", + "/s/escherbird/annotationservice", + classOf[TweetEntityAnnotationScroogeIface.MethodPerEndpoint]) + .build() + ) + settings.escherbirdConfig( + Escherbird.fromClient(escherbirdClient), + backendContext("escherbird") + ) + } + + val geoScrubEventStore: GeoScrubEventStore = { + val mhMtlsParams = + if (settings.serviceIdentifier == EmptyServiceIdentifier) NoMtlsParams + else + ManhattanKVClientMtlsParams( + serviceIdentifier = settings.serviceIdentifier, + opportunisticTls = OpportunisticTls.Required) + + val mhClient = + new ManhattanKVClient( + appId = "geoduck_scrub_datastore", + dest = "/s/manhattan/omega.native-thrift", + mtlsParams = mhMtlsParams, + label = "mh_omega", + Seq(Experiments.ApertureLoadBalancer) + ) + + GeoScrubEventStore( + mhClient, + settings.geoScrubEventStoreConfig, + backendContext("geoScrubEventStore") + ) + } + + val tweetEventsPublisher: EventBusPublisher[TweetEvent] = + eventBusPublisherBuilder + .streamName("tweet_events") + .thriftStruct(TweetEvent) + .publishTimeout(500.milliseconds) + .serializeFinagleDtabs(true) + .build() + + val deleteLocationDataPublisher: EventBusPublisher[DeleteLocationData] = + eventBusPublisherBuilder + .streamName("tweetypie_delete_location_data_prod") + .thriftStruct(DeleteLocationData) + // deleteLocationData is relatively rare, and publishing to + // eventbus is all that the endpoint does. This means that it + // is much more likely that we will have to make a connection, + // which has much greater latency, and also makes us more + // tolerant of slow requests, so we choose a long timeout. + .publishTimeout(2.seconds) + .build() + + val retweetArchivalEventPublisher: EventBusPublisher[RetweetArchivalEvent] = + eventBusPublisherBuilder + .streamName("retweet_archival_events") + .thriftStruct(RetweetArchivalEvent) + .publishTimeout(500.milliseconds) + .build() + + val gnipEnricherator: GnipEnricherator = { + val gnipEnricherator = + thriftMuxMethodBuilder( + "enricherator", + "/s/datadelivery-enrichments/enricherator" + ) + GnipEnricherator.fromMethod(gnipEnricherator) + } + + val stratoserverClient: StratoClient = Strato.client + .withMutualTls( + serviceIdentifier = settings.serviceIdentifier, + opportunisticLevel = OpportunisticTls.Required) + .withLabel("stratoserver") + .withRequestTimeout(100.milliseconds) + .build() + + val configBus: ConfigBus = + ConfigBus(backendsScope.scope("config_bus"), settings.instanceId, settings.instanceCount) + + val callbackPromotedContentLogger: CallbackPromotedContentLogger = { + val publisher = + eventBusPublisherBuilder + .streamName(settings.adsLoggingClientTopicName) + .thriftStruct(AdCallbackEvent) + .publishTimeout(500.milliseconds) + .serializeFinagleDtabs(true) + .maxQueuedEvents(1000) + .kafkaDest("/s/kafka/ads-callback:kafka-tls") + .build() + + val stats = backendsScope.scope("promoted_content") + val adsLoggingClient = AdsLoggingClient(publisher, stats, "Tweetypie") + new CallbackPromotedContentLogger(adsLoggingClient, stats) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala new file mode 100644 index 000000000..4a670f4d2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Caches.scala @@ -0,0 +1,281 @@ +package com.twitter.tweetypie +package config + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.memcached +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.{Serializer => CacheSerializer, _} +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.handler.CacheBasedTweetCreationLock +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util._ +import com.twitter.util.Timer + +/** + * Provides configured caches (most backed by memcached) wrapped with appropriate metrics and locks. + * + * All memcached-backed caches share: + * - one Finagle memcached client from backends.memcacheClient + * - one in memory caffeine cache + * - one Twemcache pool + * + * Each memcached-backed cache specialization provides its own: + * - key prefix or "namespace" + * - value serializer/deserializer + * - stats scope + * - log name + */ +trait Caches { + val memcachedClientWithInProcessCaching: memcached.Client + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] + val geoScrubCache: LockingCache[UserId, Cached[Time]] +} + +object Caches { + object NoCache extends Caches { + override val memcachedClientWithInProcessCaching: memcached.Client = new NullMemcacheClient() + private val toLockingCache: LockingCacheFactory = NonLockingCacheFactory + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = + toLockingCache(new NullCache) + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = + toLockingCache(new NullCache) + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = + toLockingCache(new NullCache) + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = + new NullCache + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = + toLockingCache(new NullCache) + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = + toLockingCache(new NullCache) + val geoScrubCache: LockingCache[UserId, Cached[Time]] = + toLockingCache(new NullCache) + } + + def apply( + settings: TweetServiceSettings, + stats: StatsReceiver, + timer: Timer, + clients: BackendClients, + tweetKeyFactory: TweetKeyFactory, + deciderGates: TweetypieDeciderGates, + clientIdHelper: ClientIdHelper, + ): Caches = { + val cachesStats = stats.scope("caches") + val cachesInprocessStats = cachesStats.scope("inprocess") + val cachesMemcacheStats = cachesStats.scope("memcache") + val cachesMemcacheObserver = new StatsReceiverCacheObserver(cachesStats, 10000, "memcache") + val cachesMemcacheTweetStats = cachesMemcacheStats.scope("tweet") + val cachesInprocessDeviceSourceStats = cachesInprocessStats.scope("device_source") + val cachesMemcacheCountStats = cachesMemcacheStats.scope("count") + val cachesMemcacheTweetCreateStats = cachesMemcacheStats.scope("tweet_create") + val cachesMemcacheGeoScrubStats = cachesMemcacheStats.scope("geo_scrub") + val memcacheClient = clients.memcacheClient + + val caffieneMemcachedClient = settings.inProcessCacheConfigOpt match { + case Some(inProcessCacheConfig) => + new CaffeineMemcacheClient( + proxyClient = memcacheClient, + inProcessCacheConfig.maximumSize, + inProcessCacheConfig.ttl, + cachesMemcacheStats.scope("caffeine") + ) + case None => + memcacheClient + } + + val observedMemcacheWithCaffeineClient = + new ObservableMemcache( + new FinagleMemcache( + caffieneMemcachedClient + ), + cachesMemcacheObserver + ) + + def observeCache[K, V]( + cache: Cache[K, V], + stats: StatsReceiver, + logName: String, + windowSize: Int = 10000 + ) = + ObservableCache( + cache, + stats, + windowSize, + // Need to use an old-school c.t.logging.Logger because that's what servo needs + com.twitter.logging.Logger(s"com.twitter.tweetypie.cache.$logName") + ) + + def mkCache[K, V]( + ttl: Duration, + serializer: CacheSerializer[V], + perCacheStats: StatsReceiver, + logName: String, + windowSize: Int = 10000 + ): Cache[K, V] = { + observeCache( + new MemcacheCache[K, V]( + observedMemcacheWithCaffeineClient, + ttl, + serializer + ), + perCacheStats, + logName, + windowSize + ) + } + + def toLockingCache[K, V]( + cache: Cache[K, V], + stats: StatsReceiver, + backoffs: Stream[Duration] = settings.lockingCacheBackoffs + ): LockingCache[K, V] = + new OptimisticLockingCache( + underlyingCache = cache, + backoffs = Backoff.fromStream(backoffs), + observer = new OptimisticLockingCacheObserver(stats), + timer = timer + ) + + def mkLockingCache[K, V]( + ttl: Duration, + serializer: CacheSerializer[V], + stats: StatsReceiver, + logName: String, + windowSize: Int = 10000, + backoffs: Stream[Duration] = settings.lockingCacheBackoffs + ): LockingCache[K, V] = + toLockingCache( + mkCache(ttl, serializer, stats, logName, windowSize), + stats, + backoffs + ) + + def trackTimeInCache[K, V]( + cache: Cache[K, Cached[V]], + stats: StatsReceiver + ): Cache[K, Cached[V]] = + new CacheWrapper[K, Cached[V]] { + val ageStat: Stat = stats.stat("time_in_cache_ms") + val underlyingCache: Cache[K, Cached[V]] = cache + + override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = + underlyingCache.get(keys).onSuccess(record) + + private def record(res: KeyValueResult[K, Cached[V]]): Unit = { + val now = Time.now + for (c <- res.found.values) { + ageStat.add(c.cachedAt.until(now).inMilliseconds) + } + } + } + + new Caches { + override val memcachedClientWithInProcessCaching: memcached.Client = caffieneMemcachedClient + + private val observingTweetCache: Cache[TweetKey, Cached[CachedTweet]] = + trackTimeInCache( + mkCache( + ttl = settings.tweetMemcacheTtl, + serializer = Serializer.CachedTweet.CachedCompact, + perCacheStats = cachesMemcacheTweetStats, + logName = "MemcacheTweetCache" + ), + cachesMemcacheTweetStats + ) + + // Wrap the tweet cache with a wrapper that will scribe the cache writes + // that happen to a fraction of tweets. This was added as part of the + // investigation into missing place ids and cache inconsistencies that + // were discovered by the additional fields hydrator. + private[this] val writeLoggingTweetCache = + new ScribeTweetCacheWrites( + underlyingCache = observingTweetCache, + logYoungTweetCacheWrites = deciderGates.logYoungTweetCacheWrites, + logTweetCacheWrites = deciderGates.logTweetCacheWrites + ) + + val tweetCache: LockingCache[TweetKey, Cached[CachedTweet]] = + toLockingCache( + cache = writeLoggingTweetCache, + stats = cachesMemcacheTweetStats + ) + + val tweetDataCache: LockingCache[TweetId, Cached[TweetData]] = + toLockingCache( + cache = TweetDataCache(tweetCache, tweetKeyFactory.fromId), + stats = cachesMemcacheTweetStats + ) + + val tweetResultCache: LockingCache[TweetId, Cached[TweetResult]] = + toLockingCache( + cache = TweetResultCache(tweetDataCache), + stats = cachesMemcacheTweetStats + ) + + val tweetCountsCache: LockingCache[TweetCountKey, Cached[Count]] = + mkLockingCache( + ttl = settings.tweetCountsMemcacheTtl, + serializer = Serializers.CachedLong.Compact, + stats = cachesMemcacheCountStats, + logName = "MemcacheTweetCountCache", + windowSize = 1000, + backoffs = Backoff.linear(0.millis, 2.millis).take(2).toStream + ) + + val tweetCreateLockerCache: Cache[TweetCreationLock.Key, TweetCreationLock.State] = + observeCache( + new TtlCacheToCache( + underlyingCache = new KeyValueTransformingTtlCache( + underlyingCache = observedMemcacheWithCaffeineClient, + transformer = TweetCreationLock.State.Serializer, + underlyingKey = (_: TweetCreationLock.Key).toString + ), + ttl = CacheBasedTweetCreationLock.ttlChooser( + shortTtl = settings.tweetCreateLockingMemcacheTtl, + longTtl = settings.tweetCreateLockingMemcacheLongTtl + ) + ), + stats = cachesMemcacheTweetCreateStats, + logName = "MemcacheTweetCreateLockingCache", + windowSize = 1000 + ) + + val deviceSourceInProcessCache: LockingCache[String, Cached[DeviceSource]] = + toLockingCache( + observeCache( + new ExpiringLruCache( + ttl = settings.deviceSourceInProcessTtl, + maximumSize = settings.deviceSourceInProcessCacheMaxSize + ), + stats = cachesInprocessDeviceSourceStats, + logName = "InprocessDeviceSourceCache" + ), + stats = cachesInprocessDeviceSourceStats + ) + + val geoScrubCache: LockingCache[UserId, Cached[Time]] = + toLockingCache[UserId, Cached[Time]]( + new KeyTransformingCache( + mkCache[GeoScrubTimestampKey, Cached[Time]]( + ttl = settings.geoScrubMemcacheTtl, + serializer = Serializer.toCached(CacheSerializer.Time), + perCacheStats = cachesMemcacheGeoScrubStats, + logName = "MemcacheGeoScrubCache" + ), + (userId: UserId) => GeoScrubTimestampKey(userId) + ), + cachesMemcacheGeoScrubStats + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala new file mode 100644 index 000000000..5ebca25c2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ClientsParser.scala @@ -0,0 +1,126 @@ +package com.twitter.tweetypie.config + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.util.Try + +case object EmptyConfigException extends Exception + +case class ServiceIdentifierPattern( + role: Option[String], + service: Option[String], + environment: Option[String], +) { + // Service identifier matches if the fields of service identifier + // match all the defined fields of pattern. + def matches(id: ServiceIdentifier): Boolean = + Seq( + role.map(_ == id.role), + service.map(_ == id.service), + environment.map(_ == id.environment), + ) + .flatten + .forall(identity) + + // True if this is the kind of pattern that only specifies environment. + // This should be used in rare cases, for example letting all devel clients + // use permitted methods - like get_tweet_fields. + def onlyEnv: Boolean = + role.isEmpty && service.isEmpty && environment.isDefined +} + +case class Client( + clientId: String, + serviceIdentifiers: Seq[ServiceIdentifierPattern], + tpsLimit: Option[Int], + environments: Seq[String], + loadShedEnvs: Seq[String], + permittedMethods: Set[String], + accessAllMethods: Boolean, + bypassVisibilityFiltering: Boolean, + enforceRateLimit: Boolean) { + + // Client matches a service identifier if any of its patterns + // match. + def matches(id: ServiceIdentifier): Boolean = + serviceIdentifiers.exists(_.matches(id)) +} + +object ClientsParser { + + // Case classes for parsing yaml - should match the structure of clients.yml + private case class YamlServiceIdentifier( + role: Option[String], + service: Option[String], + environment: Option[String], + ) + private case class YamlClient( + client_id: String, + service_identifiers: Option[Seq[YamlServiceIdentifier]], + service_name: String, + tps_quota: String, + contact_email: String, + environments: Seq[String], + load_shed_envs: Option[ + Seq[String] + ], // list of environments we can rejects requests from if load shedding + comment: Option[String], + permitted_methods: Option[Seq[String]], + access_all_methods: Boolean, + bypass_visibility_filtering: Boolean, + bypass_visibility_filtering_reason: Option[String], + rate_limit: Boolean) { + def toClient: Client = { + + // we provision tps_quota for both DCs during white-listing, to account for full fail-over. + val tpsLimit: Option[Int] = Try(tps_quota.replaceAll("[^0-9]", "").toInt * 1000).toOption + + Client( + clientId = client_id, + serviceIdentifiers = service_identifiers.getOrElse(Nil).flatMap { id => + if (id.role.isDefined || id.service.isDefined || id.environment.isDefined) { + Seq(ServiceIdentifierPattern( + role = id.role, + service = id.service, + environment = id.environment, + )) + } else { + Seq() + } + }, + tpsLimit = tpsLimit, + environments = environments, + loadShedEnvs = load_shed_envs.getOrElse(Nil), + permittedMethods = permitted_methods.getOrElse(Nil).toSet, + accessAllMethods = access_all_methods, + bypassVisibilityFiltering = bypass_visibility_filtering, + enforceRateLimit = rate_limit + ) + } + } + + private val mapper: ObjectMapper = new ObjectMapper(new YAMLFactory()) + mapper.registerModule(DefaultScalaModule) + + private val yamlClientTypeFactory = + mapper + .getTypeFactory() + .constructCollectionLikeType( + classOf[Seq[YamlClient]], + classOf[YamlClient] + ) + + def apply(yamlString: String): Seq[Client] = { + val parsed = + mapper + .readValue[Seq[YamlClient]](yamlString, yamlClientTypeFactory) + .map(_.toClient) + + if (parsed.isEmpty) + throw EmptyConfigException + else + parsed + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala new file mode 100644 index 000000000..8d29cdc72 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfig.scala @@ -0,0 +1,100 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.backends.ConfigBus +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.util.Activity + +case class DynamicConfig( + // A map of fully-qualified client ID (including the environment suffix, e.g. tweetypie.prod) to Client case class + clientsByFullyQualifiedId: Option[Map[String, Client]], + // Clients by service identifier parts. + clientsByRole: Option[Map[String, Seq[Client]]] = None, + clientsByService: Option[Map[String, Seq[Client]]] = None, + onlyEnvClients: Option[Seq[Client]] = None, + // These endpoints do not need permissions to be accessed + unprotectedEndpoints: Set[String] = Set("get_tweet_counts", "get_tweet_fields", "get_tweets")) { + + /** + * Function that takes a fully qualified client id and says whether it is included in the allowList + */ + val isAllowListedClient: String => Boolean = + clientsByFullyQualifiedId.map(clients => clients.contains _).getOrElse(_ => true) + + def byServiceIdentifier(serviceIdentifier: ServiceIdentifier): Set[Client] = + Iterable.concat( + get(clientsByRole, serviceIdentifier.role), + get(clientsByService, serviceIdentifier.service), + onlyEnvClients.getOrElse(Seq()), + ) + .filter(_.matches(serviceIdentifier)) + .toSet + + private def get(clientsByKey: Option[Map[String, Seq[Client]]], key: String): Seq[Client] = + clientsByKey match { + case Some(map) => map.getOrElse(key, Seq()) + case None => Seq() + } + + /** + * Take a fully qualified client id and says if the client has offered to shed reads if tweetypie + * is in an emergency + */ + val loadShedEligible: Gate[String] = Gate { (clientId: String) => + val env = ClientIdHelper.getClientIdEnv(clientId) + clientsByFullyQualifiedId.flatMap(clients => clients.get(clientId)).exists { c => + c.loadShedEnvs.contains(env) + } + } +} + +/** + * DynamicConfig uses ConfigBus to update Tweetypie with configuration changes + * dynamically. Every time the config changes, the Activity[DynamicConfig] is + * updated, and anything relying on that config will be reinitialized. + */ +object DynamicConfig { + def fullyQualifiedClientIds(client: Client): Seq[String] = { + val clientId = client.clientId + client.environments match { + case Nil => Seq(clientId) + case envs => envs.map(env => s"$clientId.$env") + } + } + + // Make a Map of fully qualified client id to Client + def byClientId(clients: Seq[Client]): Map[String, Client] = + clients.flatMap { client => + fullyQualifiedClientIds(client).map { fullClientId => fullClientId -> client } + }.toMap + + def by(get: ServiceIdentifierPattern => Option[String])(clients: Seq[Client]): Map[String, Seq[Client]] = + clients.flatMap { c => + c.serviceIdentifiers.collect { + case s if get(s).isDefined => (get(s).get, c) + } + }.groupBy(_._1).mapValues(_.map(_._2)) + + private[this] val clientsPath = "config/clients.yml" + + def apply( + stats: StatsReceiver, + configBus: ConfigBus, + settings: TweetServiceSettings + ): Activity[DynamicConfig] = + DynamicConfigLoader(configBus.file) + .apply(clientsPath, stats.scope("client_allowlist"), ClientsParser.apply) + .map(fromClients) + + def fromClients(clients: Option[Seq[Client]]): DynamicConfig = + DynamicConfig( + clientsByFullyQualifiedId = clients.map(byClientId), + clientsByRole = clients.map(by(_.role)), + clientsByService = clients.map(by(_.service)), + onlyEnvClients = clients.map(_.filter { client => + client.serviceIdentifiers.exists(_.onlyEnv) + }), + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala new file mode 100644 index 000000000..724f97644 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/DynamicConfigLoader.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.util.ExceptionCounter +import com.twitter.tweetypie.serverutil.ActivityUtil +import com.twitter.util.{Activity, Return, Try} +import com.twitter.util.logging.Logger + +trait DynamicConfigLoader { + def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] +} + +object DynamicConfigLoader { + + def apply(read: String => Activity[String]): DynamicConfigLoader = + new DynamicConfigLoader { + val logger = Logger(getClass) + + private def snoopState[T](stats: StatsReceiver)(a: Activity[T]): Activity[T] = { + val pending = stats.counter("pending") + val failure = stats.counter("failure") + val success = stats.counter("success") + + a.mapState { + case s @ Activity.Ok(_) => + success.incr() + s + case Activity.Pending => + pending.incr() + Activity.Pending + case s @ Activity.Failed(_) => + failure.incr() + s + } + } + + def apply[T](path: String, stats: StatsReceiver, parse: String => T): Activity[Option[T]] = { + val exceptionCounter = new ExceptionCounter(stats) + + val rawActivity: Activity[T] = + snoopState(stats.scope("raw"))( + ActivityUtil + .strict(read(path)) + .map(parse) + .handle { + case e => + exceptionCounter(e) + logger.error(s"Invalid config in $path", e) + throw e + } + ) + + val stableActivity = + snoopState(stats.scope("stabilized"))(rawActivity.stabilize).mapState[Option[T]] { + case Activity.Ok(t) => Activity.Ok(Some(t)) + case _ => Activity.Ok(None) + } + + stats.provideGauge("config_state") { + Try(stableActivity.sample()) match { + case Return(Some(c)) => c.hashCode.abs + case _ => 0 + } + } + + stableActivity + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala new file mode 100644 index 000000000..0f8206ffa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ExternalRepositories.scala @@ -0,0 +1,182 @@ +package com.twitter.tweetypie +package config + +import com.twitter.flockdb.client.StatusGraph +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.repository.DeviceSourceRepository.Type +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil._ +import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository + +/** + * Tweetypie's read path composes results from many data sources. This + * trait is a collection of repositories for external data access. + * These repositories should not have (within-Tweetypie) caches, + * deciders, etc. applied to them, since that is done when the + * repositories are composed together. They should be the minimal + * wrapping of the external clients in order to expose an Arrow-based + * interface. + */ +trait ExternalRepositories { + def card2Repo: Card2Repository.Type + def cardRepo: CardRepository.Type + def cardUsersRepo: CardUsersRepository.Type + def conversationIdRepo: ConversationIdRepository.Type + def containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType + def containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType + def deviceSourceRepo: DeviceSourceRepository.Type + def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type + def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type + def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type + def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type + def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type + def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type + def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type + def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type + def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type + def mediaMetadataRepo: MediaMetadataRepository.Type + def perspectiveRepo: PerspectiveRepository.Type + def placeRepo: PlaceRepository.Type + def profileGeoRepo: ProfileGeoRepository.Type + def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type + def relationshipRepo: RelationshipRepository.Type + def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type + def tweetCountsRepo: TweetCountsRepository.Type + def tweetResultRepo: TweetResultRepository.Type + def tweetSpamCheckRepo: TweetSpamCheckRepository.Type + def urlRepo: UrlRepository.Type + def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type + def userRepo: UserRepository.Type +} + +class ExternalServiceRepositories( + clients: BackendClients, + statsReceiver: StatsReceiver, + settings: TweetServiceSettings, + clientIdHelper: ClientIdHelper) + extends ExternalRepositories { + + lazy val card2Repo: Card2Repository.Type = + Card2Repository(clients.expandodo.getCards2, maxRequestSize = 5) + + lazy val cardRepo: CardRepository.Type = + CardRepository(clients.expandodo.getCards, maxRequestSize = 5) + + lazy val cardUsersRepo: CardUsersRepository.Type = + CardUsersRepository(clients.expandodo.getCardUsers) + + lazy val conversationIdRepo: ConversationIdRepository.Type = + ConversationIdRepository(clients.tflockReadClient.multiSelectOne) + + lazy val containerAsTweetRepo: CreativesContainerMaterializationRepository.GetTweetType = + CreativesContainerMaterializationRepository( + clients.creativesContainerService.materializeAsTweet) + + lazy val containerAsTweetFieldsRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = + CreativesContainerMaterializationRepository.materializeAsTweetFields( + clients.creativesContainerService.materializeAsTweetFields) + + lazy val deviceSourceRepo: Type = { + DeviceSourceRepository( + DeviceSourceParser.parseAppId, + FutureArrow(clients.passbirdClient.getClientApplications(_)) + ) + } + + lazy val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = + EscherbirdAnnotationRepository(clients.escherbird.annotate) + + lazy val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = + QuoterHasAlreadyQuotedRepository(clients.tflockReadClient) + + lazy val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = + LastQuoteOfQuoterRepository(clients.tflockReadClient) + + lazy val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = + StratoSafetyLabelsRepository(clients.stratoserverClient) + + lazy val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = + StratoCommunityMembershipRepository(clients.stratoserverClient) + + lazy val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = + StratoCommunityAccessRepository(clients.stratoserverClient) + + lazy val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = + StratoSuperFollowEligibleRepository(clients.stratoserverClient) + + lazy val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = + StratoSuperFollowRelationsRepository(clients.stratoserverClient) + + lazy val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = + StratoPromotedTweetRepository(clients.stratoserverClient) + + lazy val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = + StratoSubscriptionVerificationRepository(clients.stratoserverClient) + + lazy val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = + GeoScrubTimestampRepository(clients.geoScrubEventStore.getGeoScrubTimestamp) + + lazy val mediaMetadataRepo: MediaMetadataRepository.Type = + MediaMetadataRepository(clients.mediaClient.getMediaMetadata) + + lazy val perspectiveRepo: GetPerspectives = + GetPerspectives(clients.timelineService.getPerspectives) + + lazy val placeRepo: PlaceRepository.Type = + GeoduckPlaceRepository(clients.geoHydrationLocate) + + lazy val profileGeoRepo: ProfileGeoRepository.Type = + ProfileGeoRepository(clients.gnipEnricherator.hydrateProfileGeo) + + lazy val relationshipRepo: RelationshipRepository.Type = + RelationshipRepository(clients.socialGraphService.exists, maxRequestSize = 6) + + lazy val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = + RetweetSpamCheckRepository(clients.scarecrow.checkRetweet) + + lazy val tweetCountsRepo: TweetCountsRepository.Type = + TweetCountsRepository( + clients.tflockReadClient, + maxRequestSize = settings.tweetCountsRepoChunkSize + ) + + lazy val tweetResultRepo: TweetResultRepository.Type = + ManhattanTweetRepository( + clients.tweetStorageClient.getTweet, + clients.tweetStorageClient.getStoredTweet, + settings.shortCircuitLikelyPartialTweetReads, + statsReceiver.scope("manhattan_tweet_repo"), + clientIdHelper, + ) + + lazy val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = + TweetSpamCheckRepository(clients.scarecrow.checkTweet2) + + lazy val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = + UnmentionedEntitiesRepository(clients.stratoserverClient) + + lazy val urlRepo: UrlRepository.Type = + UrlRepository( + clients.talon.expand, + settings.thriftClientId.name, + statsReceiver.scope("talon_url_repo"), + clientIdHelper, + ) + + lazy val userRepo: UserRepository.Type = + GizmoduckUserRepository( + clients.gizmoduck.getById, + clients.gizmoduck.getByScreenName, + maxRequestSize = 100 + ) + + lazy val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = + UserIsInvitedToConversationRepository( + FutureArrow(clients.tflockReadClient.multiSelectOne(_)), + FutureArrow((clients.tflockReadClient.contains(_: StatusGraph, _: Long, _: Long)).tupled)) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala new file mode 100644 index 000000000..2060e7bcc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/LogicalRepositories.scala @@ -0,0 +1,807 @@ +package com.twitter.tweetypie +package config + +import com.twitter.abdecider.ABDeciderFactory +import com.twitter.config.yaml.YamlConfig +import com.twitter.decider.Decider +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.memcached +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache._ +import com.twitter.servo.cache.{KeyValueResult => _} +import com.twitter.servo.repository._ +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.stitch.repo.Repo +import com.twitter.stitch.timelineservice.TimelineService +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.stringcenter.client.ExternalStringRegistry +import com.twitter.stringcenter.client.MultiProjectStringCenter +import com.twitter.translation.Languages +import com.twitter.translation.YamlConfigLanguages +import com.twitter.tweetypie.caching.CacheOperations +import com.twitter.tweetypie.caching.Expiry +import com.twitter.tweetypie.caching.ServoCachedValueSerializer +import com.twitter.tweetypie.caching.StitchCaching +import com.twitter.tweetypie.caching.ValueSerializer +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.hydrator.TextRepairer +import com.twitter.tweetypie.hydrator.TweetHydration +import com.twitter.tweetypie.hydrator.TweetQueryOptionsExpander +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.BoringStackTrace +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.Place +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.util.StitchUtils +import com.twitter.util.Duration +import com.twitter.util.FuturePool +import com.twitter.util.Timer +import com.twitter.visibility.VisibilityLibrary +import com.twitter.visibility.common.KeywordMatcher +import com.twitter.visibility.common.LocalizationSource +import com.twitter.visibility.common.TweetMediaMetadataSource +import com.twitter.visibility.common.TweetPerspectiveSource +import com.twitter.visibility.common.UserRelationshipSource +import com.twitter.visibility.common.UserSource +import com.twitter.visibility.common.tflock.UserIsInvitedToConversationRepository +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.generators.CountryNameGenerator +import com.twitter.visibility.generators.LocalizedInterstitialGenerator +import com.twitter.visibility.generators.TombstoneGenerator +import com.twitter.visibility.interfaces.tweets.DeletedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityLibrary +import com.twitter.visibility.util.DeciderUtil +import com.twitter.visibility.util.FeatureSwitchUtil +import java.util.concurrent.Executors + +/** + * LogicalRepositories is a layer above ExternalRepositories. These repos may have additional + * logic layered in, such as memcache-caching, hot-key caching, etc. There may + * also be multiple logical repositories mapped to an single external repository. + * + * These repositories are used in tweet hydration and tweet creation. + */ +trait LogicalRepositories { + + def card2Repo: Card2Repository.Type + def cardRepo: CardRepository.Type + def cardUsersRepo: CardUsersRepository.Type + def conversationIdRepo: ConversationIdRepository.Type + def conversationControlRepo: ConversationControlRepository.Type + def conversationMutedRepo: ConversationMutedRepository.Type + def containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType + def containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType + def deviceSourceRepo: DeviceSourceRepository.Type + def escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type + def geoScrubTimestampRepo: GeoScrubTimestampRepository.Type + def languageRepo: LanguageRepository.Type + def mediaMetadataRepo: MediaMetadataRepository.Type + def pastedMediaRepo: PastedMediaRepository.Type + def perspectiveRepo: PerspectiveRepository.Type + def placeRepo: PlaceRepository.Type + def profileGeoRepo: ProfileGeoRepository.Type + def quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + def lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type + def relationshipRepo: RelationshipRepository.Type + def stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type + def stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type + def stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type + def stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type + def stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + def stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type + def stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type + def takedownRepo: UserTakedownRepository.Type + def tweetSpamCheckRepo: TweetSpamCheckRepository.Type + def retweetSpamCheckRepo: RetweetSpamCheckRepository.Type + def tweetCountsRepo: TweetCountsRepository.Type + def tweetVisibilityRepo: TweetVisibilityRepository.Type + def quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type + def deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type + def unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type + def urlRepo: UrlRepository.Type + def userRepo: UserRepository.Type + def optionalUserRepo: UserRepository.Optional + def userIdentityRepo: UserIdentityRepository.Type + def userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type + def userProtectionRepo: UserProtectionRepository.Type + def userViewRepo: UserViewRepository.Type + def userVisibilityRepo: UserVisibilityRepository.Type + + def tweetResultRepo: TweetResultRepository.Type + def tweetRepo: TweetRepository.Type + def optionalTweetRepo: TweetRepository.Optional + + /** + * Not actually repositories, but intimately intertwined. + */ + def tweetHydrators: TweetHydrators +} + +object LogicalRepositories { + + /** + * Middleware is a function that takes a stitch repo and returns a new stitch repo. + */ + type Middleware[K, V] = (K => Stitch[V]) => K => Stitch[V] + + // Middleware2 is a function that takes a two-arg stitch repo and returns a new two-arg stitch repo. + type Middleware2[K, C, V] = ((K, C) => Stitch[V]) => ((K, C) => Stitch[V]) + val exceptionLog: Logger = Logger(getClass) + + // Converts a Middleware2 to a Middleware for use with withMiddleware. + def tupledMiddleware[K, C, V](middleware2: Middleware2[K, C, V]): Middleware[(K, C), V] = + repo => middleware2(Function.untupled(repo)).tupled + + object ObserveStitch { + def apply[K, V]( + repo: K => Stitch[V], + repoName: String, + stats: StatsReceiver + ): K => Stitch[V] = { + val successCounter = stats.counter("success") + val notFoundCounter = stats.counter("not_found") + val latencyStat = stats.stat("latency_ms") + + val exceptionCounter = + ExceptionCounter( + stats, + // don't count FilteredState exceptions + FilteredState.ignoringCategorizer(ExceptionCounter.defaultCategorizer) + ) + + (key: K) => + StitchUtils.trackLatency(latencyStat, repo(key)).respond { + case Return(_) => successCounter.incr() + case Throw(NotFound) => notFoundCounter.incr() + case Throw(t) => + val message = s"$repoName: $key" + if (BoringStackTrace.isBoring(t)) { + exceptionLog.debug(message, t) + } else { + exceptionLog.warn(message, t) + } + + exceptionCounter(t) + } + } + } + + /** + * Add middleware to configure a repository. The stats receiver is + * scoped for the currently-configured repository. The `toRepo` field + * is the repository with some set of middleware applied. Each method + * adds a new middleware to the current repo, and returns it as a + * `RepoConfig`, allowing method chaining. + * + * Since each method call applies a new middleware, the final middleware is + * the outermost middleware, and thus the one that sees the arguments + * first. + */ + class RepoConfig[K, V]( + val toRepo: K => Stitch[V], + stats: StatsReceiver, + name: String, + memcachedClientWithInProcessCaching: memcached.Client) { + def withMiddleware(middleware: Middleware[K, V]): RepoConfig[K, V] = + new RepoConfig[K, V](middleware(toRepo), stats, name, memcachedClientWithInProcessCaching) + + /** + * Wraps a repo with success/failure/latency stats tracking and logs + * exceptions. This will be applied to every repository. + * + * @param repoName Used when logging exceptions thrown by the underlying repo. + */ + def observe(repoName: String = s"${name}_repo"): RepoConfig[K, V] = { + withMiddleware { repo => ObserveStitch[K, V](repo, repoName, stats) } + } + + /** + * Use the supplied cache to wrap the repository with a read-through + * caching layer. + */ + def caching( + cache: LockingCache[K, Cached[V]], + partialHandler: CachedResult.PartialHandler[K, V], + maxCacheRequestSize: Int = Int.MaxValue + ): RepoConfig[K, V] = { + val stitchLockingCache = StitchLockingCache( + underlying = cache, + picker = new PreferNewestCached[V], + maxRequestSize = maxCacheRequestSize + ) + + val handler: CachedResult.Handler[K, V] = + CachedResult.Handler( + CachedResult.PartialHandler.orElse( + partialHandler, + CachedResult.failuresAreDoNotCache + ) + ) + + withMiddleware { repo => + CacheStitch[K, K, V]( + repo = repo, + cache = stitchLockingCache, + identity, + handler = handler, + cacheable = CacheStitch.cacheFoundAndNotFound + ) + } + } + + def newCaching( + keySerializer: K => String, + valueSerializer: ValueSerializer[Try[V]] + ): RepoConfig[K, V] = + withMiddleware { repo => + val logger = Logger(s"com.twitter.tweetypie.config.LogicalRepositories.$name") + + val cacheOperations: CacheOperations[K, Try[V]] = + new CacheOperations( + keySerializer = keySerializer, + valueSerializer = valueSerializer, + memcachedClient = memcachedClientWithInProcessCaching, + statsReceiver = stats.scope("caching"), + logger = logger + ) + + val tryRepo: K => Stitch[Try[V]] = repo.andThen(_.liftToTry) + val cachingTryRepo: K => Stitch[Try[V]] = new StitchCaching(cacheOperations, tryRepo) + cachingTryRepo.andThen(_.lowerFromTry) + } + + def toRepo2[K1, C](implicit tupleToK: ((K1, C)) <:< K): (K1, C) => Stitch[V] = + (k1, c) => toRepo(tupleToK((k1, c))) + } + + def softTtlPartialHandler[K, V]( + softTtl: Option[V] => Duration, + softTtlPerturbationFactor: Float = 0.05f + ): CachedResult.PartialHandler[K, V] = + CachedResult + .softTtlExpiration[K, V](softTtl, CachedResult.randomExpiry(softTtlPerturbationFactor)) + + def apply( + settings: TweetServiceSettings, + stats: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + external: ExternalRepositories, + caches: Caches, + stratoClient: StratoClient, + hasMedia: Tweet => Boolean, + clientIdHelper: ClientIdHelper, + featureSwitchesWithoutExperiments: FeatureSwitches, + ): LogicalRepositories = { + val repoStats = stats.scope("repositories") + + def repoConfig[K, V](name: String, repo: K => Stitch[V]): RepoConfig[K, V] = + new RepoConfig[K, V]( + name = name, + toRepo = repo, + stats = repoStats.scope(name), + memcachedClientWithInProcessCaching = caches.memcachedClientWithInProcessCaching) + + def repo2Config[K, C, V](name: String, repo: (K, C) => Stitch[V]): RepoConfig[(K, C), V] = + repoConfig[(K, C), V](name, repo.tupled) + + new LogicalRepositories { + // the final tweetResultRepo has a circular dependency, where it depends on hydrators + // that in turn depend on the tweetResultRepo, so we create a `tweetResultRepo` function + // that proxies to `var finalTweetResultRepo`, which gets set at the end of this block. + var finalTweetResultRepo: TweetResultRepository.Type = null + val tweetResultRepo: TweetResultRepository.Type = + (tweetId, opts) => finalTweetResultRepo(tweetId, opts) + val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo) + + val optionalTweetRepo: TweetRepository.Optional = TweetRepository.optional(tweetRepo) + + val userRepo: UserRepository.Type = + repo2Config(repo = external.userRepo, name = "user") + .observe() + .toRepo2 + + val optionalUserRepo: UserRepository.Optional = UserRepository.optional(userRepo) + + private[this] val tweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("tweet_visibility_library") + private[this] val userUnavailableVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("user_unavailable_visibility_library") + private[this] val quotedTweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("quoted_tweet_visibility_library") + private[this] val deletedTweetVisibilityStatsReceiver: StatsReceiver = + repoStats.scope("deleted_tweet_visibility_library") + // TweetVisibilityLibrary still uses the old c.t.logging.Logger + private[this] val tweetVisibilityLogger = + com.twitter.logging.Logger("com.twitter.tweetypie.TweetVisibility") + private[this] val visibilityDecider: Decider = DeciderUtil.mkDecider( + deciderOverlayPath = settings.vfDeciderOverlayFilename, + useLocalDeciderOverrides = true) + private[this] val visibilityDeciderGates = VisibilityDeciderGates(visibilityDecider) + + private[this] def visibilityLibrary(statsReceiver: StatsReceiver) = VisibilityLibrary + .Builder( + log = tweetVisibilityLogger, + statsReceiver = statsReceiver, + memoizeSafetyLevelParams = visibilityDeciderGates.enableMemoizeSafetyLevelParams + ) + .withDecider(visibilityDecider) + .withDefaultABDecider(isLocal = false) + .withCaptureDebugStats(Gate.True) + .withEnableComposableActions(Gate.True) + .withEnableFailClosed(Gate.True) + .withEnableShortCircuiting(visibilityDeciderGates.enableShortCircuitingTVL) + .withSpecialLogging(visibilityDeciderGates.enableSpecialLogging) + .build() + + def countryNameGenerator(statsReceiver: StatsReceiver) = { + // TweetVisibilityLibrary, DeletedTweetVisibilityLibrary, and + // UserUnavailableVisibilityLibrary do not evaluate any Rules + // that require the display of country names in copy + CountryNameGenerator.providesWithCustomMap(Map.empty, statsReceiver) + } + + def tombstoneGenerator( + countryNameGenerator: CountryNameGenerator, + statsReceiver: StatsReceiver + ) = + TombstoneGenerator( + visibilityLibrary(statsReceiver).visParams, + countryNameGenerator, + statsReceiver) + + private[this] val userUnavailableVisibilityLibrary = + UserUnavailableStateVisibilityLibrary( + visibilityLibrary(userUnavailableVisibilityStatsReceiver), + visibilityDecider, + tombstoneGenerator( + countryNameGenerator(userUnavailableVisibilityStatsReceiver), + userUnavailableVisibilityStatsReceiver + ), + LocalizedInterstitialGenerator(visibilityDecider, userUnavailableVisibilityStatsReceiver) + ) + + val userIdentityRepo: UserIdentityRepository.Type = + repoConfig(repo = UserIdentityRepository(userRepo), name = "user_identity") + .observe() + .toRepo + + val userProtectionRepo: UserProtectionRepository.Type = + repoConfig(repo = UserProtectionRepository(userRepo), name = "user_protection") + .observe() + .toRepo + + val userViewRepo: UserViewRepository.Type = + repoConfig(repo = UserViewRepository(userRepo), name = "user_view") + .observe() + .toRepo + + val userVisibilityRepo: UserVisibilityRepository.Type = + repoConfig( + repo = UserVisibilityRepository(userRepo, userUnavailableVisibilityLibrary), + name = "user_visibility" + ).observe().toRepo + + val urlRepo: UrlRepository.Type = + repoConfig(repo = external.urlRepo, name = "url") + .observe() + .toRepo + + val profileGeoRepo: ProfileGeoRepository.Type = + repoConfig(repo = external.profileGeoRepo, name = "profile_geo") + .observe() + .toRepo + + val quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type = + repo2Config(repo = external.quoterHasAlreadyQuotedRepo, name = "quoter_has_already_quoted") + .observe() + .toRepo2 + + val lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type = + repo2Config(repo = external.lastQuoteOfQuoterRepo, name = "last_quote_of_quoter") + .observe() + .toRepo2 + + val mediaMetadataRepo: MediaMetadataRepository.Type = + repoConfig(repo = external.mediaMetadataRepo, name = "media_metadata") + .observe() + .toRepo + + val perspectiveRepo: PerspectiveRepository.Type = + repoConfig(repo = external.perspectiveRepo, name = "perspective") + .observe() + .toRepo + + val conversationMutedRepo: ConversationMutedRepository.Type = + TimelineService.GetPerspectives.getConversationMuted(perspectiveRepo) + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed. + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val deviceSourceRepo: DeviceSourceRepository.Type = + repoConfig(repo = external.deviceSourceRepo, name = "device_source") + .observe() + .newCaching( + keySerializer = appIdStr => DeviceSourceKey(appIdStr).toString, + valueSerializer = ServoCachedValueSerializer( + codec = DeviceSource, + expiry = Expiry.byAge(settings.deviceSourceMemcacheTtl), + softTtl = settings.deviceSourceMemcacheSoftTtl + ) + ) + .caching( + cache = caches.deviceSourceInProcessCache, + partialHandler = softTtlPartialHandler(_ => settings.deviceSourceInProcessSoftTtl) + ) + .toRepo + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val placeRepo: PlaceRepository.Type = + repoConfig(repo = external.placeRepo, name = "place") + .observe() + .newCaching( + keySerializer = placeKey => placeKey.toString, + valueSerializer = ServoCachedValueSerializer( + codec = Place, + expiry = Expiry.byAge(settings.placeMemcacheTtl), + softTtl = settings.placeMemcacheSoftTtl + ) + ) + .toRepo + + val cardRepo: CardRepository.Type = + repoConfig(repo = external.cardRepo, name = "cards") + .observe() + .toRepo + + val card2Repo: Card2Repository.Type = + repo2Config(repo = external.card2Repo, name = "card2") + .observe() + .toRepo2 + + val cardUsersRepo: CardUsersRepository.Type = + repo2Config(repo = external.cardUsersRepo, name = "card_users") + .observe() + .toRepo2 + + val relationshipRepo: RelationshipRepository.Type = + repoConfig(repo = external.relationshipRepo, name = "relationship") + .observe() + .toRepo + + val conversationIdRepo: ConversationIdRepository.Type = + repoConfig(repo = external.conversationIdRepo, name = "conversation_id") + .observe() + .toRepo + + val conversationControlRepo: ConversationControlRepository.Type = + repo2Config( + repo = ConversationControlRepository(tweetRepo, stats.scope("conversation_control")), + name = "conversation_control" + ).observe().toRepo2 + + val containerAsGetTweetResultRepo: CreativesContainerMaterializationRepository.GetTweetType = + repo2Config( + repo = external.containerAsTweetRepo, + name = "container_as_tweet" + ).observe().toRepo2 + + val containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType = + repo2Config( + repo = external.containerAsTweetFieldsRepo, + name = "container_as_tweet_fields" + ).observe().toRepo2 + + val languageRepo: LanguageRepository.Type = { + val pool = FuturePool(Executors.newFixedThreadPool(settings.numPenguinThreads)) + repoConfig(repo = PenguinLanguageRepository(pool), name = "language") + .observe() + .toRepo + } + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val tweetCountsRepo: TweetCountsRepository.Type = + repoConfig(repo = external.tweetCountsRepo, name = "counts") + .observe() + .caching( + cache = caches.tweetCountsCache, + partialHandler = softTtlPartialHandler { + case Some(0) => settings.tweetCountsMemcacheZeroSoftTtl + case _ => settings.tweetCountsMemcacheNonZeroSoftTtl + }, + maxCacheRequestSize = settings.tweetCountsCacheChunkSize + ) + .toRepo + + val pastedMediaRepo: PastedMediaRepository.Type = + repo2Config(repo = PastedMediaRepository(tweetRepo), name = "pasted_media") + .observe() + .toRepo2 + + val escherbirdAnnotationRepo: EscherbirdAnnotationRepository.Type = + repoConfig(repo = external.escherbirdAnnotationRepo, name = "escherbird_annotations") + .observe() + .toRepo + + val stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type = + repo2Config(repo = external.stratoSafetyLabelsRepo, name = "strato_safety_labels") + .observe() + .toRepo2 + + val stratoCommunityMembershipRepo: StratoCommunityMembershipRepository.Type = + repoConfig( + repo = external.stratoCommunityMembershipRepo, + name = "strato_community_memberships") + .observe() + .toRepo + + val stratoCommunityAccessRepo: StratoCommunityAccessRepository.Type = + repoConfig(repo = external.stratoCommunityAccessRepo, name = "strato_community_access") + .observe() + .toRepo + + val stratoSuperFollowEligibleRepo: StratoSuperFollowEligibleRepository.Type = + repoConfig( + repo = external.stratoSuperFollowEligibleRepo, + name = "strato_super_follow_eligible") + .observe() + .toRepo + + val stratoSuperFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type = + repo2Config( + repo = external.stratoSuperFollowRelationsRepo, + name = "strato_super_follow_relations") + .observe() + .toRepo2 + + val stratoPromotedTweetRepo: StratoPromotedTweetRepository.Type = + repoConfig(repo = external.stratoPromotedTweetRepo, name = "strato_promoted_tweet") + .observe() + .toRepo + + val stratoSubscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type = + repo2Config( + repo = external.stratoSubscriptionVerificationRepo, + name = "strato_subscription_verification") + .observe() + .toRepo2 + + val unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type = + repo2Config(repo = external.unmentionedEntitiesRepo, name = "unmentioned_entities") + .observe() + .toRepo2 + + private[this] val userSource = + UserSource.fromRepo( + Repo { (k, _) => + val opts = UserQueryOptions(k.fields, UserVisibility.All) + userRepo(UserKey(k.id), opts) + } + ) + + private[this] val userRelationshipSource = + UserRelationshipSource.fromRepo( + Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => + relationshipRepo( + RelationshipKey(key.subjectId, key.objectId, key.relationship) + ) + } + ) + + private[this] val tweetPerspectiveSource = + TweetPerspectiveSource.fromGetPerspectives(perspectiveRepo) + private[this] val tweetMediaMetadataSource = + TweetMediaMetadataSource.fromFunction(mediaMetadataRepo) + + val userIsInvitedToConversationRepo: UserIsInvitedToConversationRepository.Type = + repo2Config( + repo = external.userIsInvitedToConversationRepo, + name = "user_is_invited_to_conversation") + .observe() + .toRepo2 + + private[this] val stringCenterClient: MultiProjectStringCenter = { + val stringCenterProjects = settings.flags.stringCenterProjects().toList + + val languages: Languages = new YamlConfigLanguages( + new YamlConfig(settings.flags.languagesConfig())) + + val loggingAbDecider = ABDeciderFactory("/usr/local/config/abdecider/abdecider.yml") + .withEnvironment("production") + .buildWithLogging() + + MultiProjectStringCenter( + projects = stringCenterProjects, + defaultBundlePath = MultiProjectStringCenter.StandardDefaultBundlePath, + refreshingBundlePath = MultiProjectStringCenter.StandardRefreshingBundlePath, + refreshingInterval = MultiProjectStringCenter.StandardRefreshingInterval, + requireDefaultBundleExists = true, + languages = languages, + statsReceiver = tweetVisibilityStatsReceiver, + loggingABDecider = loggingAbDecider + ) + } + private[this] val stringRegistry: ExternalStringRegistry = new ExternalStringRegistry() + private[this] val localizationSource: LocalizationSource = + LocalizationSource.fromMultiProjectStringCenterClient(stringCenterClient, stringRegistry) + + val tweetVisibilityRepo: TweetVisibilityRepository.Type = { + val tweetVisibilityLibrary: TweetVisibilityLibrary.Type = + TweetVisibilityLibrary( + visibilityLibrary(tweetVisibilityStatsReceiver), + userSource = userSource, + userRelationshipSource = userRelationshipSource, + keywordMatcher = KeywordMatcher.defaultMatcher(stats), + stratoClient = stratoClient, + localizationSource = localizationSource, + decider = visibilityDecider, + invitedToConversationRepo = userIsInvitedToConversationRepo, + tweetPerspectiveSource = tweetPerspectiveSource, + tweetMediaMetadataSource = tweetMediaMetadataSource, + tombstoneGenerator = tombstoneGenerator( + countryNameGenerator(tweetVisibilityStatsReceiver), + tweetVisibilityStatsReceiver + ), + interstitialGenerator = + LocalizedInterstitialGenerator(visibilityDecider, tweetVisibilityStatsReceiver), + limitedActionsFeatureSwitches = + FeatureSwitchUtil.mkLimitedActionsFeatureSwitches(tweetVisibilityStatsReceiver), + enableParityTest = deciderGates.tweetVisibilityLibraryEnableParityTest + ) + + val underlying = + TweetVisibilityRepository( + tweetVisibilityLibrary, + visibilityDeciderGates, + tweetVisibilityLogger, + repoStats.scope("tweet_visibility_repo") + ) + + repoConfig(repo = underlying, name = "tweet_visibility") + .observe() + .toRepo + } + + val quotedTweetVisibilityRepo: QuotedTweetVisibilityRepository.Type = { + val quotedTweetVisibilityLibrary: QuotedTweetVisibilityLibrary.Type = + QuotedTweetVisibilityLibrary( + visibilityLibrary(quotedTweetVisibilityStatsReceiver), + userSource = userSource, + userRelationshipSource = userRelationshipSource, + visibilityDecider, + userStateVisibilityLibrary = userUnavailableVisibilityLibrary, + enableVfFeatureHydration = deciderGates.enableVfFeatureHydrationInQuotedTweetVLShim + ) + + val underlying = + QuotedTweetVisibilityRepository(quotedTweetVisibilityLibrary, visibilityDeciderGates) + + repoConfig(repo = underlying, name = "quoted_tweet_visibility") + .observe() + .toRepo + } + + val deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type = { + val deletedTweetVisibilityLibrary: DeletedTweetVisibilityLibrary.Type = + DeletedTweetVisibilityLibrary( + visibilityLibrary(deletedTweetVisibilityStatsReceiver), + visibilityDecider, + tombstoneGenerator( + countryNameGenerator(deletedTweetVisibilityStatsReceiver), + deletedTweetVisibilityStatsReceiver + ) + ) + + val underlying = DeletedTweetVisibilityRepository.apply( + deletedTweetVisibilityLibrary + ) + + repoConfig(repo = underlying, name = "deleted_tweet_visibility") + .observe() + .toRepo + } + + val takedownRepo: UserTakedownRepository.Type = + repoConfig(repo = UserTakedownRepository(userRepo), name = "takedowns") + .observe() + .toRepo + + val tweetSpamCheckRepo: TweetSpamCheckRepository.Type = + repo2Config(repo = external.tweetSpamCheckRepo, name = "tweet_spam_check") + .observe() + .toRepo2 + + val retweetSpamCheckRepo: RetweetSpamCheckRepository.Type = + repoConfig(repo = external.retweetSpamCheckRepo, name = "retweet_spam_check") + .observe() + .toRepo + + // Because observe is applied before caching, only cache misses + // (i.e. calls to the underlying repo) are observed + // Note that `newCaching` has stats around cache hit/miss but `caching` does not. + val geoScrubTimestampRepo: GeoScrubTimestampRepository.Type = + repoConfig(repo = external.geoScrubTimestampRepo, name = "geo_scrub") + .observe() + .caching( + cache = caches.geoScrubCache, + partialHandler = (_ => None) + ) + .toRepo + + val tweetHydrators: TweetHydrators = + TweetHydrators( + stats = stats, + deciderGates = deciderGates, + repos = this, + tweetDataCache = caches.tweetDataCache, + hasMedia = hasMedia, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + clientIdHelper = clientIdHelper, + ) + + val queryOptionsExpander: TweetQueryOptionsExpander.Type = + TweetQueryOptionsExpander.threadLocalMemoize( + TweetQueryOptionsExpander.expandDependencies + ) + + // mutations to tweets that we only need to apply when reading from the external + // repository, and not when reading from cache + val tweetMutation: Mutation[Tweet] = + Mutation + .all( + Seq( + EntityExtractor.mutationAll, + TextRepairer.BlankLineCollapser, + TextRepairer.CoreTextBugPatcher + ) + ).onlyIf(_.coreData.isDefined) + + val cachingTweetRepo: TweetResultRepository.Type = + repo2Config(repo = external.tweetResultRepo, name = "saved_tweet") + .observe() + .withMiddleware { repo => + // applies tweetMutation to the results of TweetResultRepository + val mutateResult = TweetResult.mutate(tweetMutation) + repo.andThen(stitchResult => stitchResult.map(mutateResult)) + } + .withMiddleware( + tupledMiddleware( + CachingTweetRepository( + caches.tweetResultCache, + settings.tweetTombstoneTtl, + stats.scope("saved_tweet", "cache"), + clientIdHelper, + deciderGates.logCacheExceptions, + ) + ) + ) + .toRepo2 + + finalTweetResultRepo = repo2Config(repo = cachingTweetRepo, name = "tweet") + .withMiddleware( + tupledMiddleware( + TweetHydration.hydrateRepo( + tweetHydrators.hydrator, + tweetHydrators.cacheChangesEffect, + queryOptionsExpander + ) + ) + ) + .observe() + .withMiddleware(tupledMiddleware(TweetResultRepository.shortCircuitInvalidIds)) + .toRepo2 + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala new file mode 100644 index 000000000..22623039b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Main.scala @@ -0,0 +1,314 @@ +package com.twitter.tweetypie +package config + +import com.twitter.app.Flag +import com.twitter.app.Flaggable +import com.twitter.app.Flags +import com.twitter.finagle.http.HttpMuxer +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.authorization.server.MtlsServerSessionTrackerFilter +import com.twitter.finagle.mtls.server.MtlsStackServer._ +import com.twitter.finagle.param.Reporter +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.util.NullReporterFactory +import com.twitter.finagle.Thrift +import com.twitter.finagle.ThriftMux +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.inject.Injector +import com.twitter.inject.annotations.{Flags => InjectFlags} +import com.twitter.scrooge.ThriftEnum +import com.twitter.scrooge.ThriftEnumObject +import com.twitter.server.handler.IndexHandler +import com.twitter.strato.catalog.Catalog +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.fed.server.StratoFedServer +import com.twitter.strato.util.Ref +import com.twitter.strato.warmup.Warmer +import com.twitter.tweetypie.federated.StratoCatalogBuilder +import com.twitter.tweetypie.federated.warmups.StratoCatalogWarmups +import com.twitter.tweetypie.serverutil.ActivityService +import java.net.InetSocketAddress +import scala.reflect.ClassTag + +object Env extends Enumeration { + val dev: Env.Value = Value + val staging: Env.Value = Value + val prod: Env.Value = Value +} + +class TweetServiceFlags(flag: Flags, injector: => Injector) { + implicit object EnvFlaggable extends Flaggable[Env.Value] { + def parse(s: String): Env.Value = + s match { + // Handle Aurora env names that are different from tweetypie's names + case "devel" => Env.dev + case "test" => Env.staging + // Handle Tweetypie env names + case other => Env.withName(other) + } + } + + val zone: Flag[String] = + flag("zone", "localhost", "One of: atla, pdxa, localhost, etc.") + + val env: Flag[Env.Value] = + flag("env", Env.dev, "One of: testbox, dev, staging, prod") + + val twemcacheDest: Flag[String] = + flag( + "twemcacheDest", + "/s/cache/tweetypie:twemcaches", + "The Name for the tweetypie cache cluster." + ) + + val deciderOverrides: Flag[Map[String, Boolean]] = + flag( + "deciderOverrides", + Map.empty[String, Boolean], + "Set deciders to constant values, overriding decider configuration files." + )( + // Unfortunately, the implicit Flaggable[Boolean] has a default + // value and Flaggable.ofMap[K, V] requires that the implicit + // Flaggable[V] not have a default. Even less fortunately, it + // doesn't say why. We're stuck with this. + Flaggable.ofMap(implicitly, Flaggable.mandatory(_.toBoolean)) + ) + + // "/decider.yml" comes from the resources included at + // "tweetypie/server/config", so you should not normally need to + // override this value. This flag is defined as a step toward making + // our command-line usage more similar to the standard + // twitter-server-internal flags. + def deciderBase(): String = + injector.instance[String](InjectFlags.named("decider.base")) + + // Omitting a value for decider overlay flag causes the server to use + // only the static decider. + def deciderOverlay(): String = + injector.instance[String](InjectFlags.named("decider.overlay")) + + // Omitting a value for the VF decider overlay flag causes the server + // to use only the static decider. + val vfDeciderOverlay: Flag[String] = + flag( + "vf.decider.overlay", + "The location of the overlay decider configuration for Visibility Filtering") + + /** + * Warmup Requests happen as part of the initialization process, before any real requests are + * processed. This prevents real requests from ever being served from a competely cold state + */ + val enableWarmupRequests: Flag[Boolean] = + flag( + "enableWarmupRequests", + true, + """| warms up Tweetypie service by generating random requests + | to Tweetypie that are processed prior to the actual client requests """.stripMargin + ) + + val grayListRateLimit: Flag[Double] = + flag("graylistRateLimit", 5.0, "rate-limit for non-allowlisted clients") + + val servicePort: Flag[InetSocketAddress] = + flag("service.port", "port for tweet-service thrift interface") + + val clientId: Flag[String] = + flag("clientId", "tweetypie.staging", "clientId to send in requests") + + val allowlist: Flag[Boolean] = + flag("allowlist", true, "enforce client allowlist") + + val clientHostStats: Flag[Boolean] = + flag("clientHostStats", false, "enable per client host stats") + + val withCache: Flag[Boolean] = + flag("withCache", true, "if set to false, Tweetypie will launch without memcache") + + /** + * Make any [[ThriftEnum]] value parseable as a [[Flag]] value. This + * will parse case-insensitive values that match the unqualified + * names of the values of the enumeration, in the manner of + * [[ThriftEnum]]'s `valueOf` method. + * + * Consider a [[ThriftEnum]] generated from the following Thrift IDL snippet: + * + * {{{ + * enum Priority { + * Low = 1 + * Throttled = 2 + * High = 3 + * } + * }}} + * + * To enable defining flags that specify one of these enum values: + * + * {{{ + * implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) + * }}} + * + * In this example, the enumeration value `Priority.Low` can be + * represented as the string "Low", "low", or "LOW". + */ + def flaggableThriftEnum[T <: ThriftEnum: ClassTag](enum: ThriftEnumObject[T]): Flaggable[T] = + Flaggable.mandatory[T] { stringValue: String => + enum + .valueOf(stringValue) + .getOrElse { + val validValues = enum.list.map(_.name).mkString(", ") + throw new IllegalArgumentException( + s"Invalid value ${stringValue}. Valid values include: ${validValues}" + ) + } + } + + implicit val flaggablePriority: Flaggable[Priority] = flaggableThriftEnum(Priority) + + val backgroundIndexingPriority: Flag[Priority] = + flag( + "backgroundIndexingPriority", + Priority.Low, + "specifies the queue to use for \"background\" tflock operations, such as removing edges " + + "for deleted Tweets. This exists for testing scenarios, when it is useful to see the " + + "effects of background indexing operations sooner. In production, this should always be " + + "set to \"low\" (the default)." + ) + + val tflockPageSize: Flag[Int] = + flag("tflockPageSize", 1000, "Number of items to return in each page when querying tflock") + + val enableInProcessCache: Flag[Boolean] = + flag( + "enableInProcessCache", + true, + "if set to false, Tweetypie will not use the in-process cache" + ) + + val inProcessCacheSize: Flag[Int] = + flag("inProcessCacheSize", 1700, "maximum items in in-process cache") + + val inProcessCacheTtlMs: Flag[Int] = + flag("inProcessCacheTtlMs", 10000, "milliseconds that hot keys are stored in memory") + + val memcachePendingRequestLimit: Flag[Int] = + flag( + "memcachePendingRequestLimit", + 100, + "Number of requests that can be queued on a single memcache connection (4 per cache server)" + ) + + val instanceId: Flag[Int] = + flag( + "configbus.instanceId", + -1, + "InstanceId of the tweetypie service instance for staged configuration distribution" + ) + + val instanceCount: Flag[Int] = + flag( + "configbus.instanceCount", + -1, + "Total number of tweetypie service instances for staged configuration distribution" + ) + + def serviceIdentifier(): ServiceIdentifier = + injector.instance[ServiceIdentifier] + + val enableReplication: Flag[Boolean] = + flag( + "enableReplication", + true, + "Enable replication of reads (configurable via tweetypie_replicate_reads decider) and writes (100%) via DRPC" + ) + + val simulateDeferredrpcCallbacks: Flag[Boolean] = + flag( + "simulateDeferredrpcCallbacks", + false, + """|For async write path, call back into current instance instead of via DRPC. + |This is used for test and devel instances so we can ensure the test traffic + |is going to the test instance.""".stripMargin + ) + + val shortCircuitLikelyPartialTweetReadsMs: Flag[Int] = + flag( + "shortCircuitLikelyPartialTweetReadsMs", + 1500, + """|Specifies a number of milliseconds before which we will short-circuit likely + |partial reads from MH and return a NotFound tweet response state. After + |experimenting we went with 1500 ms.""".stripMargin + ) + + val stringCenterProjects: Flag[Seq[String]] = + flag( + "stringcenter.projects", + Seq.empty[String], + "String Center project names, comma separated")(Flaggable.ofSeq(Flaggable.ofString)) + + val languagesConfig: Flag[String] = + flag("international.languages", "Supported languages config file") +} + +class TweetypieMain extends StratoFedServer { + override def dest: String = "/s/tweetypie/tweetypie:federated" + + val tweetServiceFlags: TweetServiceFlags = new TweetServiceFlags(flag, injector) + + // display all the registered HttpMuxer handlers + HttpMuxer.addHandler("", new IndexHandler) + + private[this] lazy val serverBuilder = { + val settings = new TweetServiceSettings(tweetServiceFlags) + val serverBuilder = new TweetServerBuilder(settings) + + val mtlsSessionTrackerFilter = + new MtlsServerSessionTrackerFilter[Array[Byte], Array[Byte]](statsReceiver) + + val mtlsTrackedService = mtlsSessionTrackerFilter.andThen(ActivityService(serverBuilder.build)) + + val thriftMuxServer = ThriftMux.server + // by default, finagle logs exceptions to chickadee, which is deprecated and + // basically unused. to avoid wasted overhead, we explicitly disable the reporter. + .configured(Reporter(NullReporterFactory)) + .withLabel("tweetypie") + .withMutualTls(tweetServiceFlags.serviceIdentifier()) + .withOpportunisticTls(OpportunisticTls.Required) + .configured(Thrift.param.ServiceClass(Some(classOf[ThriftTweetService]))) + .serve(tweetServiceFlags.servicePort(), mtlsTrackedService) + + closeOnExit(thriftMuxServer) + await(thriftMuxServer) + + serverBuilder + } + + override def configureRefCatalog( + catalog: Ref[Catalog[StratoFed.Column]] + ): Ref[Catalog[StratoFed.Column]] = + catalog + .join { + Ref( + serverBuilder.stratoTweetService.flatMap { tweetService => + StratoCatalogBuilder.catalog( + tweetService, + serverBuilder.backendClients.stratoserverClient, + serverBuilder.backendClients.gizmoduck.getById, + serverBuilder.backendClients.callbackPromotedContentLogger, + statsReceiver, + serverBuilder.deciderGates.enableCommunityTweetCreates, + ) + } + ) + } + .map { case (l, r) => l ++ r } + + override def configureWarmer(warmer: Warmer): Unit = { + new TweetServiceSettings(tweetServiceFlags).warmupRequestsSettings.foreach { warmupSettings => + warmer.add( + "tweetypie strato catalog", + () => StratoCatalogWarmups.warmup(warmupSettings, composedOps) + ) + } + } +} + +object Main extends TweetypieMain diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala new file mode 100644 index 000000000..04746792b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/MemcacheExceptionLoggingFilter.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package config + +import com.twitter.io.Buf +import com.twitter.finagle.{Service, SimpleFilter} +import com.twitter.finagle.memcached.protocol._ + +class MemcacheExceptionLoggingFilter extends SimpleFilter[Command, Response] { + // Using a custom logger name so that we can target logging rules specifically + // for memcache excpetion logging. + val logger: Logger = Logger(getClass) + + def apply(command: Command, service: Service[Command, Response]): Future[Response] = { + service(command).respond { + case Return(Error(e)) => + log(command, e) + case Return(ValuesAndErrors(_, errors)) if errors.nonEmpty => + errors.foreach { + case (Buf.Utf8(keyStr), e) => + log(command.name, keyStr, e) + } + case Throw(e) => + log(command, e) + + case _ => + } + } + + private def log(command: Command, e: Throwable): Unit = { + log(command.name, getKey(command), e) + } + + private def log(commandName: String, keyStr: String, e: Throwable): Unit = { + logger.debug( + s"CACHE_EXCEPTION command: ${commandName} key: ${keyStr} exception: ${e.getClass.getName}", + e, + ) + } + + private def getKey(command: Command): String = command match { + case Get(keys) => toKeyStr(keys) + case Gets(keys) => toKeyStr(keys) + + case Set(Buf.Utf8(key), _, _, _) => key + case Add(Buf.Utf8(key), _, _, _) => key + case Cas(Buf.Utf8(key), _, _, _, _) => key + case Delete(Buf.Utf8(key)) => key + case Replace(Buf.Utf8(key), _, _, _) => key + case Append(Buf.Utf8(key), _, _, _) => key + case Prepend(Buf.Utf8(key), _, _, _) => key + + case Incr(Buf.Utf8(key), _) => key + case Decr(Buf.Utf8(key), _) => key + case Stats(keys) => toKeyStr(keys) + case Quit() => "quit" + case Upsert(Buf.Utf8(key), _, _, _, _) => key + case Getv(keys) => toKeyStr(keys) + } + + private def toKeyStr(keys: Seq[Buf]): String = + keys.map { case Buf.Utf8(key) => key }.mkString(",") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala new file mode 100644 index 000000000..d1f4721dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/Resources.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie.config + +import com.twitter.config.yaml.YamlMap +import com.twitter.tweetypie.serverutil.PartnerMedia +import scala.util.matching.Regex + +/** + * Helpers for loading resources bundled with Tweetypie. We load them + * through this API in order to be able to unit test the resource + * loading code. + */ +object Resources { + def loadPartnerMediaRegexes(): Seq[Regex] = + PartnerMedia.load(YamlMap.load("/partner_media.yml")) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala new file mode 100644 index 000000000..523c9be1c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/ScribeTweetCacheWrites.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie.config + +import com.twitter.servo.cache.{Cache, Cached, CachedValue, CachedValueStatus} +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.repository.TweetKey +import com.twitter.tweetypie.serverutil.logcachewrites.WriteLoggingCache +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala.{CachedTweet, ComposerSource, TweetCacheWrite} +import com.twitter.util.Time + +class ScribeTweetCacheWrites( + val underlyingCache: Cache[TweetKey, Cached[CachedTweet]], + logYoungTweetCacheWrites: TweetId => Boolean, + logTweetCacheWrites: TweetId => Boolean) + extends WriteLoggingCache[TweetKey, Cached[CachedTweet]] { + private[this] lazy val scribe = Scribe(TweetCacheWrite, "tweetypie_tweet_cache_writes") + + private[this] def mkTweetCacheWrite( + id: Long, + action: String, + cachedValue: CachedValue, + cachedTweet: Option[CachedTweet] = None + ): TweetCacheWrite = { + /* + * If the Tweet id is a Snowflake id, calculate the offset since Tweet creation. + * If it is not a Snowflake id, then the offset should be 0. See [[TweetCacheWrite]]'s Thrift + * documentation for more details. + */ + val timestampOffset = + if (SnowflakeId.isSnowflakeId(id)) { + SnowflakeId(id).unixTimeMillis.asLong + } else { + 0 + } + + TweetCacheWrite( + tweetId = id, + timestamp = Time.now.inMilliseconds - timestampOffset, + action = action, + cachedValue = cachedValue, + cachedTweet = cachedTweet + ) + } + + /** + * Scribe a TweetCacheWrite record to tweetypie_tweet_cache_writes. We scribe the + * messages instead of writing them to the regular log file because the + * primary use of this logging is to get a record over time of the cache + * actions that affected a tweet, so we need a durable log that we can + * aggregate. + */ + override def log(action: String, k: TweetKey, v: Option[Cached[CachedTweet]]): Unit = + v match { + case Some(cachedTweet) => { + val cachedValue = CachedValue( + status = cachedTweet.status, + cachedAtMsec = cachedTweet.cachedAt.inMilliseconds, + readThroughAtMsec = cachedTweet.readThroughAt.map(_.inMilliseconds), + writtenThroughAtMsec = cachedTweet.writtenThroughAt.map(_.inMilliseconds), + doNotCacheUntilMsec = cachedTweet.doNotCacheUntil.map(_.inMilliseconds), + ) + scribe(mkTweetCacheWrite(k.id, action, cachedValue, cachedTweet.value)) + } + // `v` is only None if the action is a "delete" so set CachedValue with a status `Deleted` + case None => { + val cachedValue = + CachedValue(status = CachedValueStatus.Deleted, cachedAtMsec = Time.now.inMilliseconds) + scribe(mkTweetCacheWrite(k.id, action, cachedValue)) + } + } + + private[this] val YoungTweetThresholdMs = 3600 * 1000 + + private[this] def isYoungTweet(tweetId: TweetId): Boolean = + (SnowflakeId.isSnowflakeId(tweetId) && + ((Time.now.inMilliseconds - SnowflakeId(tweetId).unixTimeMillis.asLong) <= + YoungTweetThresholdMs)) + + /** + * Select all tweets for which the log_tweet_cache_writes decider returns + * true and "young" tweets for which the log_young_tweet_cache_writes decider + * returns true. + */ + override def selectKey(k: TweetKey): Boolean = + // When the tweet is young, we log it if it passes either decider. This is + // because the deciders will (by design) select a different subset of + // tweets. We do this so that we have a full record for all tweets for which + // log_tweet_cache_writes is on, but also cast a wider net for tweets that + // are more likely to be affected by replication lag, race conditions + // between different writes, or other consistency issues + logTweetCacheWrites(k.id) || (isYoungTweet(k.id) && logYoungTweetCacheWrites(k.id)) + + /** + * Log newscamera tweets as well as any tweets for which selectKey returns + * true. Note that for newscamera tweets, we will possibly miss "delete" + * actions since those do not have access to the value, and so do not call + * this method. + */ + override def select(k: TweetKey, v: Cached[CachedTweet]): Boolean = + v.value.exists(_.tweet.composerSource.contains(ComposerSource.Camera)) || selectKey(k) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala new file mode 100644 index 000000000..eafd02eaa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetBuilders.scala @@ -0,0 +1,300 @@ +package com.twitter.tweetypie +package config + +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.stitch.repo.Repo +import com.twitter.tweetypie.backends.LimiterService.Feature +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilder +import com.twitter.tweetypie.repository.RelationshipKey +import com.twitter.tweetypie.store.TotalTweetStore +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.visibility.common.TrustedFriendsSource +import com.twitter.visibility.common.UserRelationshipSource +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary + +trait TweetBuilders { + val retweetBuilder: RetweetBuilder.Type + val tweetBuilder: TweetBuilder.Type +} + +object TweetBuilders { + + def validateCardRefAttachmentByUserAgentGate( + android: Gate[Unit], + nonAndroid: Gate[Unit] + ): Gate[Option[String]] = + Gate[Option[String]] { (userAgent: Option[String]) => + if (userAgent.exists(_.startsWith("TwitterAndroid"))) { + android() + } else { + nonAndroid() + } + } + + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + clients: BackendClients, + caches: Caches, + repos: LogicalRepositories, + tweetStore: TotalTweetStore, + hasMedia: Tweet => Boolean, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + ): TweetBuilders = { + val urlShortener = + UrlShortener.scribeMalware(clients.guano) { + UrlShortener.fromTalon(clients.talon.shorten) + } + + val urlEntityBuilder = UrlEntityBuilder.fromShortener(urlShortener) + + val geoBuilder = + GeoBuilder( + repos.placeRepo, + ReverseGeocoder.fromGeoduck(clients.geoduckGeohashLocate), + statsReceiver.scope("geo_builder") + ) + + val replyCardUsersFinder: CardUsersFinder.Type = CardUsersFinder(repos.cardUsersRepo) + + val selfThreadBuilder = SelfThreadBuilder(statsReceiver.scope("self_thread_builder")) + + val replyBuilder = + ReplyBuilder( + repos.userIdentityRepo, + repos.optionalTweetRepo, + replyCardUsersFinder, + selfThreadBuilder, + repos.relationshipRepo, + repos.unmentionedEntitiesRepo, + deciderGates.enableRemoveUnmentionedImplicitMentions, + statsReceiver.scope("reply_builder"), + TweetText.MaxMentions + ) + + val mediaBuilder = + MediaBuilder( + clients.mediaClient.processMedia, + CreateMediaTco(urlShortener), + statsReceiver.scope("media_builder") + ) + + val validateAttachments = + AttachmentBuilder.validateAttachments( + statsReceiver, + validateCardRefAttachmentByUserAgentGate( + android = deciderGates.validateCardRefAttachmentAndroid, + nonAndroid = deciderGates.validateCardRefAttachmentNonAndroid + ) + ) + + val attachmentBuilder = + AttachmentBuilder( + repos.optionalTweetRepo, + urlShortener, + validateAttachments, + statsReceiver.scope("attachment_builder"), + deciderGates.denyNonTweetPermalinks + ) + + val validatePostTweetRequest: FutureEffect[PostTweetRequest] = + TweetBuilder.validateAdditionalFields[PostTweetRequest] + + val validateRetweetRequest = + TweetBuilder.validateAdditionalFields[RetweetRequest] + + val tweetIdGenerator = + () => clients.snowflakeClient.get() + + val retweetSpamChecker = + Spam.gated(deciderGates.checkSpamOnRetweet) { + Spam.allowOnException( + ScarecrowRetweetSpamChecker( + statsReceiver.scope("retweet_builder").scope("spam"), + repos.retweetSpamCheckRepo + ) + ) + } + + val tweetSpamChecker = + Spam.gated(deciderGates.checkSpamOnTweet) { + Spam.allowOnException( + ScarecrowTweetSpamChecker.fromSpamCheckRepository( + statsReceiver.scope("tweet_builder").scope("spam"), + repos.tweetSpamCheckRepo + ) + ) + } + + val duplicateTweetFinder = + DuplicateTweetFinder( + settings = settings.duplicateTweetFinderSettings, + tweetSource = DuplicateTweetFinder.TweetSource.fromServices( + tweetRepo = repos.optionalTweetRepo, + getStatusTimeline = clients.timelineService.getStatusTimeline + ) + ) + + val validateUpdateRateLimit = + RateLimitChecker.validate( + clients.limiterService.hasRemaining(Feature.Updates), + statsReceiver.scope("rate_limits", Feature.Updates.name), + deciderGates.rateLimitByLimiterService + ) + + val tweetBuilderStats = statsReceiver.scope("tweet_builder") + + val updateUserCounts = + TweetBuilder.updateUserCounts(hasMedia) + + val filterInvalidData = + TweetBuilder.filterInvalidData( + validateTweetMediaTags = TweetBuilder.validateTweetMediaTags( + tweetBuilderStats.scope("media_tags_filter"), + RateLimitChecker.getMaxMediaTags( + clients.limiterService.minRemaining(Feature.MediaTagCreate), + TweetBuilder.MaxMediaTagCount + ), + repos.optionalUserRepo + ), + cardReferenceBuilder = TweetBuilder.cardReferenceBuilder( + CardReferenceValidationHandler(clients.expandodo.checkAttachmentEligibility), + urlShortener + ) + ) + + val rateLimitFailures = + PostTweet.RateLimitFailures( + validateLimit = RateLimitChecker.validate( + clients.limiterService.hasRemaining(Feature.TweetCreateFailure), + statsReceiver.scope("rate_limits", Feature.TweetCreateFailure.name), + deciderGates.rateLimitTweetCreationFailure + ), + clients.limiterService.incrementByOne(Feature.Updates), + clients.limiterService.incrementByOne(Feature.TweetCreateFailure) + ) + + val countFailures = + PostTweet.CountFailures[TweetBuilderResult](statsReceiver) + + val tweetBuilderFilter: PostTweet.Filter[TweetBuilderResult] = + rateLimitFailures.andThen(countFailures) + + val conversationControlBuilder = ConversationControlBuilder.fromUserIdentityRepo( + statsReceiver = statsReceiver.scope("conversation_control_builder"), + userIdentityRepo = repos.userIdentityRepo + ) + + val conversationControlValidator = ConversationControlBuilder.Validate( + useFeatureSwitchResults = deciderGates.useConversationControlFeatureSwitchResults, + statsReceiver = statsReceiver + ) + + val communitiesValidator: CommunitiesValidator.Type = CommunitiesValidator() + + val collabControlBuilder: CollabControlBuilder.Type = CollabControlBuilder() + + val userRelationshipSource = UserRelationshipSource.fromRepo( + Repo[UserRelationshipSource.Key, Unit, Boolean] { (key, _) => + repos.relationshipRepo( + RelationshipKey(key.subjectId, key.objectId, key.relationship) + ) + } + ) + + val trustedFriendsSource = + TrustedFriendsSource.fromStrato(clients.stratoserverClient, statsReceiver) + + val validateTweetWrite = TweetWriteValidator( + convoCtlRepo = repos.conversationControlRepo, + tweetWriteEnforcementLibrary = TweetWriteEnforcementLibrary( + userRelationshipSource, + trustedFriendsSource, + repos.userIsInvitedToConversationRepo, + repos.stratoSuperFollowEligibleRepo, + repos.tweetRepo, + statsReceiver.scope("tweet_write_enforcement_library") + ), + enableExclusiveTweetControlValidation = deciderGates.enableExclusiveTweetControlValidation, + enableTrustedFriendsControlValidation = deciderGates.enableTrustedFriendsControlValidation, + enableStaleTweetValidation = deciderGates.enableStaleTweetValidation + ) + + val nudgeBuilder = NudgeBuilder( + clients.stratoserverClient, + deciderGates.jiminyDarkRequests, + statsReceiver.scope("nudge_builder") + ) + + val editControlBuilder = EditControlBuilder( + tweetRepo = repos.tweetRepo, + card2Repo = repos.card2Repo, + promotedTweetRepo = repos.stratoPromotedTweetRepo, + subscriptionVerificationRepo = repos.stratoSubscriptionVerificationRepo, + disablePromotedTweetEdit = deciderGates.disablePromotedTweetEdit, + checkTwitterBlueSubscription = deciderGates.checkTwitterBlueSubscriptionForEdit, + setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes, + stats = statsReceiver, + ) + + val validateEdit = EditValidator(repos.optionalTweetRepo) + + // TweetBuilders builds two distinct TweetBuilders (Tweet and Retweet builders). + new TweetBuilders { + val tweetBuilder: TweetBuilder.Type = + tweetBuilderFilter[PostTweetRequest]( + TweetBuilder( + stats = tweetBuilderStats, + validateRequest = validatePostTweetRequest, + validateEdit = validateEdit, + validateUpdateRateLimit = validateUpdateRateLimit, + tweetIdGenerator = tweetIdGenerator, + userRepo = repos.userRepo, + deviceSourceRepo = repos.deviceSourceRepo, + communityMembershipRepo = repos.stratoCommunityMembershipRepo, + communityAccessRepo = repos.stratoCommunityAccessRepo, + urlShortener = urlShortener, + urlEntityBuilder = urlEntityBuilder, + geoBuilder = geoBuilder, + replyBuilder = replyBuilder, + mediaBuilder = mediaBuilder, + attachmentBuilder = attachmentBuilder, + duplicateTweetFinder = duplicateTweetFinder, + spamChecker = tweetSpamChecker, + filterInvalidData = filterInvalidData, + updateUserCounts = updateUserCounts, + validateConversationControl = conversationControlValidator, + conversationControlBuilder = conversationControlBuilder, + validateTweetWrite = validateTweetWrite, + nudgeBuilder = nudgeBuilder, + communitiesValidator = communitiesValidator, + collabControlBuilder = collabControlBuilder, + editControlBuilder = editControlBuilder, + featureSwitches = featureSwitchesWithExperiments, + ) + ) + + val retweetBuilder: RetweetBuilder.Type = + tweetBuilderFilter[RetweetRequest]( + RetweetBuilder( + validateRequest = validateRetweetRequest, + tweetIdGenerator = tweetIdGenerator, + tweetRepo = repos.tweetRepo, + userRepo = repos.userRepo, + tflock = clients.tflockWriteClient, + deviceSourceRepo = repos.deviceSourceRepo, + validateUpdateRateLimit = validateUpdateRateLimit, + spamChecker = retweetSpamChecker, + updateUserCounts = updateUserCounts, + superFollowRelationsRepo = repos.stratoSuperFollowRelationsRepo, + unretweetEdits = unretweetEdits, + setEditWindowToSixtyMinutes = deciderGates.setEditTimeWindowToSixtyMinutes + ) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala new file mode 100644 index 000000000..af71bf89d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetHydrators.scala @@ -0,0 +1,341 @@ +package com.twitter.tweetypie +package config + +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.LockingCache +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.servo.util.ExceptionCounter +import com.twitter.servo.util.FutureEffect +import com.twitter.servo.util.Scribe +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.hydrator._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.serverutil.{ExceptionCounter => TpExceptionCounter} +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.client_id.ClientIdHelper + +trait TweetHydrators { + + /** + * Hydrator that has all the Tweet hydrators (entire "pipeline") configured + * and wired up. + * This hydrator is used both on the read and write path and is + * customized by different TweetQuery.Options. + * Modifications are not automatically written back to cache. + * `cacheChanges` must be used for that. + */ + def hydrator: TweetDataValueHydrator + + /** + * The `Effect` to use to write modified tweets back to cache. + */ + def cacheChangesEffect: Effect[ValueState[TweetData]] +} + +object TweetHydrators { + + /** + * Creates all the hydrators and calls TweetHydration to wire them up. + */ + def apply( + stats: StatsReceiver, + deciderGates: TweetypieDeciderGates, + repos: LogicalRepositories, + tweetDataCache: LockingCache[TweetId, Cached[TweetData]], + hasMedia: Tweet => Boolean, + featureSwitchesWithoutExperiments: FeatureSwitches, + clientIdHelper: ClientIdHelper + ): TweetHydrators = { + import repos._ + + val repairStats = stats.scope("repairs") + val hydratorStats = stats.scope("hydrators") + + def scoped[A](stats: StatsReceiver, name: String)(f: StatsReceiver => A): A = { + val scopedStats = stats.scope(name) + f(scopedStats) + } + + val isFailureException: Throwable => Boolean = { + case _: FilteredState => false + case NotFound => false + case _ => true + } + + def hydratorExceptionCategorizer(failureScope: String) = + ExceptionCategorizer.const("filtered").onlyIf(_.isInstanceOf[FilteredState]) ++ + ExceptionCategorizer.const("not_found").onlyIf(_ == NotFound) ++ + TpExceptionCounter.defaultCategorizer(failureScope).onlyIf(isFailureException) + + val hydratorExceptionCounter: (StatsReceiver, String) => ExceptionCounter = + (stats, scope) => TpExceptionCounter(stats, hydratorExceptionCategorizer(scope)) + + val tweetHydrator = + TweetHydration( + hydratorStats = hydratorStats, + hydrateFeatureSwitchResults = + FeatureSwitchResultsHydrator(featureSwitchesWithoutExperiments, clientIdHelper), + hydrateMentions = MentionEntitiesHydrator + .once(MentionEntityHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("mentions"), hydratorExceptionCounter), + hydrateLanguage = LanguageHydrator(languageRepo) + .observe(hydratorStats.scope("language"), hydratorExceptionCounter), + hydrateUrls = scoped(hydratorStats, "url") { stats => + UrlEntitiesHydrator + .once(UrlEntityHydrator(urlRepo, stats)) + .observe(stats, hydratorExceptionCounter) + }, + hydrateQuotedTweetRef = QuotedTweetRefHydrator + .once( + QuotedTweetRefHydrator(tweetRepo) + ) + .observe(hydratorStats.scope("quoted_tweet_ref"), hydratorExceptionCounter), + hydrateQuotedTweetRefUrls = QuotedTweetRefUrlsHydrator(userIdentityRepo) + .observe(hydratorStats.scope("quoted_tweet_ref_urls"), hydratorExceptionCounter), + hydrateMediaCacheable = MediaEntitiesHydrator.Cacheable + .once( + MediaEntityHydrator.Cacheable( + hydrateMediaUrls = MediaUrlFieldsHydrator() + .observe(hydratorStats.scope("media_urls"), hydratorExceptionCounter), + hydrateMediaIsProtected = MediaIsProtectedHydrator(userProtectionRepo) + .observe(hydratorStats.scope("media_is_protected"), hydratorExceptionCounter) + ) + ) + .observe(hydratorStats.scope("media_cacheable"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateMedia), + hydrateReplyScreenName = ReplyScreenNameHydrator + .once(ReplyScreenNameHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("in_reply_to_screen_name"), hydratorExceptionCounter), + hydrateConvoId = ConversationIdHydrator(conversationIdRepo) + .observe(hydratorStats.scope("conversation_id"), hydratorExceptionCounter), + hydratePerspective = // Don't cache with the tweet because it depends on the request + PerspectiveHydrator( + repo = perspectiveRepo, + shouldHydrateBookmarksPerspective = deciderGates.hydrateBookmarksPerspective, + stats = hydratorStats.scope("perspective_by_safety_label") + ).observe(hydratorStats.scope("perspective"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePerspectives), + hydrateEditPerspective = EditPerspectiveHydrator( + repo = perspectiveRepo, + timelinesGate = deciderGates.hydratePerspectivesEditsForTimelines, + tweetDetailsGate = deciderGates.hydratePerspectivesEditsForTweetDetail, + otherSafetyLevelsGate = deciderGates.hydratePerspectivesEditsForOtherSafetyLevels, + bookmarksGate = deciderGates.hydrateBookmarksPerspective, + stats = hydratorStats + ).observe(hydratorStats.scope("edit_perspective"), hydratorExceptionCounter), + hydrateConversationMuted = // Don't cache because it depends on the request. If + // possible, this hydrator should be in the same stage as + // PerspectiveHydrator, so that the calls can be batched + // together. + ConversationMutedHydrator(conversationMutedRepo) + .observe(hydratorStats.scope("conversation_muted"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateConversationMuted), + hydrateContributor = ContributorHydrator + .once(ContributorHydrator(userIdentityRepo)) + .observe(hydratorStats.scope("contributors"), hydratorExceptionCounter), + hydrateTakedowns = TakedownHydrator(takedownRepo) + .observe(hydratorStats.scope("takedowns"), hydratorExceptionCounter), + hydrateDirectedAt = scoped(hydratorStats, "directed_at") { stats => + DirectedAtHydrator + .once(DirectedAtHydrator(userIdentityRepo, stats)) + .observe(stats, hydratorExceptionCounter) + }, + hydrateGeoScrub = GeoScrubHydrator( + geoScrubTimestampRepo, + Scribe("test_tweetypie_read_time_geo_scrubs") + .contramap[TweetId](_.toString) + ).observe(hydratorStats.scope("geo_scrub"), hydratorExceptionCounter), + hydrateCacheableRepairs = ValueHydrator + .fromMutation[Tweet, TweetQuery.Options]( + RepairMutation( + repairStats.scope("on_read"), + "created_at" -> + new CreatedAtRepairer(Scribe("test_tweetypie_bad_created_at")), + "retweet_media" -> RetweetMediaRepairer, + "parent_status_id" -> RetweetParentStatusIdRepairer.tweetMutation, + "visible_text_range" -> NegativeVisibleTextRangeRepairer.tweetMutation + ) + ) + .lensed(TweetData.Lenses.tweet) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)), + hydrateMediaUncacheable = MediaEntityHydrator + .Uncacheable( + hydrateMediaKey = MediaKeyHydrator() + .observe(hydratorStats.scope("media_key"), hydratorExceptionCounter), + hydrateMediaInfo = scoped(hydratorStats, "media_info") { stats => + MediaInfoHydrator(mediaMetadataRepo, stats) + .observe(stats, hydratorExceptionCounter) + } + ) + .observe(hydratorStats.scope("media_uncacheable"), hydratorExceptionCounter) + .liftSeq + .ifEnabled(deciderGates.hydrateMedia), + hydratePostCacheRepairs = + // clean-up partially hydrated entities before any of the hydrators that look at + // url and media entities run, so that they never see bad entities. + ValueHydrator.fromMutation[TweetData, TweetQuery.Options]( + RepairMutation( + repairStats.scope("on_read"), + "partial_entity_cleanup" -> PartialEntityCleaner(repairStats), + "strip_not_display_coords" -> StripHiddenGeoCoordinates + ).lensed(TweetData.Lenses.tweet) + ), + hydrateTweetLegacyFormat = scoped(hydratorStats, "tweet_legacy_formatter") { stats => + TweetLegacyFormatter(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)) + }, + hydrateQuoteTweetVisibility = QuoteTweetVisibilityHydrator(quotedTweetVisibilityRepo) + .observe(hydratorStats.scope("quote_tweet_visibility"), hydratorExceptionCounter), + hydrateQuotedTweet = QuotedTweetHydrator(tweetResultRepo) + .observe(hydratorStats.scope("quoted_tweet"), hydratorExceptionCounter), + hydratePastedMedia = + // Don't cache with the tweet because we want to automatically drop this media if + // the referenced tweet is deleted or becomes non-public. + PastedMediaHydrator(pastedMediaRepo) + .observe(hydratorStats.scope("pasted_media")) + .ifEnabled(deciderGates.hydratePastedMedia), + hydrateMediaRefs = MediaRefsHydrator( + optionalTweetRepo, + deciderGates.mediaRefsHydratorIncludePastedMedia + ).observe(hydratorStats.scope("media_refs")) + .ifEnabled(deciderGates.hydrateMediaRefs), + hydrateMediaTags = // depends on AdditionalFieldsHydrator + MediaTagsHydrator(userViewRepo) + .observe(hydratorStats.scope("media_tags"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateMediaTags), + hydrateClassicCards = CardHydrator(cardRepo) + .observe(hydratorStats.scope("cards"), hydratorExceptionCounter), + hydrateCard2 = Card2Hydrator(card2Repo) + .observe(hydratorStats.scope("card2")), + hydrateContributorVisibility = + // Filter out contributors field for all but the user who owns the tweet + ContributorVisibilityFilter() + .observe(hydratorStats.scope("contributor_visibility"), hydratorExceptionCounter), + hydrateHasMedia = + // Sets hasMedia. Comes after PastedMediaHydrator in order to include pasted + // pics as well as other media & urls. + HasMediaHydrator(hasMedia) + .observe(hydratorStats.scope("has_media"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateHasMedia), + hydrateTweetCounts = // Don't cache counts with the tweet because it has its own cache with + // a different TTL + TweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) + .observe(hydratorStats.scope("tweet_counts"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateCounts), + hydratePreviousTweetCounts = // previous counts are not cached + scoped(hydratorStats, "previous_counts") { stats => + PreviousTweetCountsHydrator(tweetCountsRepo, deciderGates.hydrateBookmarksCount) + .observe(stats, hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePreviousCounts) + }, + hydratePlace = + // Don't cache with the tweet because Place has its own tweetypie cache keyspace + // with a different TTL, and it's more efficient to store separately. + // See com.twitter.tweetypie.repository.PlaceKey + PlaceHydrator(placeRepo) + .observe(hydratorStats.scope("place"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydratePlaces), + hydrateDeviceSource = // Don't cache with the tweet because it has its own cache, + // and it's more efficient to cache it separately + DeviceSourceHydrator(deviceSourceRepo) + .observe(hydratorStats.scope("device_source"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateDeviceSources), + hydrateProfileGeo = + // Don't cache gnip profile geo as read request volume is expected to be low + ProfileGeoHydrator(profileGeoRepo) + .observe(hydratorStats.scope("profile_geo"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateGnipProfileGeoEnrichment), + hydrateSourceTweet = scoped(hydratorStats, "source_tweet") { stats => + SourceTweetHydrator( + tweetResultRepo, + stats, + FutureEffect + .inParallel( + Scribe(DetachedRetweet, "tweetypie_detached_retweets"), + Scribe(DetachedRetweet, "test_tweetypie_detached_retweets"), + ) + ).observe(stats, hydratorExceptionCounter) + }, + hydrateIM1837State = IM1837FilterHydrator() + .observe(hydratorStats.scope("im1837_filter"), hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + }, + hydrateIM2884State = scoped(hydratorStats, "im2884_filter") { stats => + IM2884FilterHydrator(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + } + }, + hydrateIM3433State = scoped(hydratorStats, "im3433_filter") { stats => + IM3433FilterHydrator(stats) + .observe(stats, hydratorExceptionCounter) + .onlyIf { (_, ctx) => + ctx.opts.forExternalConsumption && ctx.opts.cause.reading(ctx.tweetId) + } + }, + hydrateTweetAuthorVisibility = TweetAuthorVisibilityHydrator(userVisibilityRepo) + .observe(hydratorStats.scope("tweet_author_visibility"), hydratorExceptionCounter) + .onlyIf((_, ctx) => ctx.opts.cause.reading(ctx.tweetId)), + hydrateReportedTweetVisibility = ReportedTweetFilter() + .observe(hydratorStats.scope("reported_tweet_filter"), hydratorExceptionCounter), + scrubSuperfluousUrlEntities = ValueHydrator + .fromMutation[Tweet, TweetQuery.Options](SuperfluousUrlEntityScrubber.mutation) + .lensed(TweetData.Lenses.tweet), + copyFromSourceTweet = CopyFromSourceTweet.hydrator + .observe(hydratorStats.scope("copy_from_source_tweet"), hydratorExceptionCounter), + hydrateTweetVisibility = scoped(hydratorStats, "tweet_visibility") { stats => + TweetVisibilityHydrator( + tweetVisibilityRepo, + deciderGates.failClosedInVF, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateEscherbirdAnnotations = EscherbirdAnnotationHydrator(escherbirdAnnotationRepo) + .observe(hydratorStats.scope("escherbird_annotations"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateEscherbirdAnnotations), + hydrateScrubEngagements = ScrubEngagementHydrator() + .observe(hydratorStats.scope("scrub_engagements"), hydratorExceptionCounter) + .ifEnabled(deciderGates.hydrateScrubEngagements), + hydrateConversationControl = scoped(hydratorStats, "tweet_conversation_control") { stats => + ConversationControlHydrator( + conversationControlRepo, + deciderGates.disableInviteViaMention, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateEditControl = scoped(hydratorStats, "tweet_edit_control") { stats => + EditControlHydrator( + tweetRepo, + deciderGates.setEditTimeWindowToSixtyMinutes, + stats + ).observe(stats, hydratorExceptionCounter) + }, + hydrateUnmentionData = UnmentionDataHydrator(), + hydrateNoteTweetSuffix = NoteTweetSuffixHydrator().observe(stats, hydratorExceptionCounter) + ) + + new TweetHydrators { + val hydrator: TweetDataValueHydrator = + tweetHydrator.onlyIf { (tweetData, opts) => + // When the caller requests fetchStoredTweets and Tweets are fetched from Manhattan + // irrespective of state, the stored data for some Tweets may be incomplete. + // We skip the hydration of those Tweets. + !opts.fetchStoredTweets || + tweetData.storedTweetResult.exists(_.canHydrate) + } + + val cacheChangesEffect: Effect[ValueState[TweetData]] = + TweetHydration.cacheChanges( + tweetDataCache, + hydratorStats.scope("tweet_caching") + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala new file mode 100644 index 000000000..795e1b300 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServerBuilder.scala @@ -0,0 +1,300 @@ +package com.twitter.tweetypie.config + +import com.twitter.decider.Decider +import com.twitter.decider.DeciderFactory +import com.twitter.decider.LocalOverrides +import com.twitter.featureswitches.v2.builder.FeatureSwitchesBuilder +import com.twitter.finagle.filter.DarkTrafficFilter +import com.twitter.finagle.stats.DefaultStatsReceiver +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.Protocols +import com.twitter.finagle.util.DefaultTimer +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.finagle.SimpleFilter +import com.twitter.quill.capture._ +import com.twitter.servo.util.MemoizingStatsReceiver +import com.twitter.servo.util.WaitForServerSets +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.client_id.ConditionalServiceIdentifierStrategy +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.client_id.UseTransportServiceIdentifier +import com.twitter.tweetypie.context.TweetypieContext +import com.twitter.tweetypie.matching.Tokenizer +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.thriftscala.TweetServiceInternal$FinagleService +import com.twitter.util._ +import com.twitter.util.logging.Logger +import scala.util.control.NonFatal + +class TweetServerBuilder(settings: TweetServiceSettings) { + + /** + * A logger used by some of the built-in initializers. + */ + val log: Logger = Logger(getClass) + + /** + * The top-level stats receiver. Defaults to the default StatsReceiver + * embedded in Finagle. + */ + val statsReceiver: StatsReceiver = + new MemoizingStatsReceiver(DefaultStatsReceiver) + + val hostStatsReceiver: StatsReceiver = + if (settings.clientHostStats) + statsReceiver + else + NullStatsReceiver + + /** + * A timer for scheduling various things. + */ + val timer: Timer = DefaultTimer + + /** + * Creates a decider instance by looking up the decider configuration information + * from the settings object. + */ + val decider: Decider = { + val fileBased = DeciderFactory(settings.deciderBaseFilename, settings.deciderOverlayFilename)() + + // Use the tweetypie decider dashboard name for propagating decider overrides. + LocalOverrides.decider("tweetypie").orElse(fileBased) + } + + val deciderGates: TweetypieDeciderGates = { + val deciderGates = TweetypieDeciderGates(decider, settings.deciderOverrides) + + // Write out the configuration overrides to the log so that it's + // easy to confirm how this instance has been customized. + deciderGates.overrides.foreach { + case (overrideName, overrideValue) => + log.info("Decider feature " + overrideName + " overridden to " + overrideValue) + if (deciderGates.unusedOverrides.contains(overrideName)) { + log.error("Unused decider override flag: " + overrideName) + } + } + + val scopedReceiver = statsReceiver.scope("decider_values") + + deciderGates.availabilityMap.foreach { + case (feature, value) => + scopedReceiver.provideGauge(feature) { + // Default value of -1 indicates error state. + value.getOrElse(-1).toFloat + } + } + + deciderGates + } + + val featureSwitchesWithExperiments = FeatureSwitchesBuilder + .createWithExperiments("/features/tweetypie/main") + .build() + + val featureSwitchesWithoutExperiments = FeatureSwitchesBuilder + .createWithNoExperiments("/features/tweetypie/main", Some(statsReceiver)) + .build() + + // ********* initializer ********** + + private[this] def warmupTextTokenization(logger: Logger): Unit = { + logger.info("Warming up text tokenization") + val watch = Stopwatch.start() + Tokenizer.warmUp() + logger.info(s"Warmed up text tokenization in ${watch()}") + } + + private[this] def runWarmup(tweetService: Activity[ThriftTweetService]): Unit = { + val tokenizationLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.TokenizationWarmup") + warmupTextTokenization(tokenizationLogger) + + val warmupLogger = Logger("com.twitter.tweetypie.TweetServerBuilder.BackendWarmup") + // #1 warmup backends + Await.ready(settings.backendWarmupSettings(backendClients, warmupLogger, timer)) + + // #2 warmup Tweet Service + Await.ready { + tweetService.values.toFuture.map(_.get).map { service => + settings.warmupRequestsSettings.foreach(new TweetServiceWarmer(_)(service)) + } + } + } + + private[this] def waitForServerSets(): Unit = { + val names = backendClients.referencedNames + val startTime = Time.now + log.info("will wait for serversets: " + names.mkString("\n", "\t\n", "")) + + try { + Await.result(WaitForServerSets.ready(names, settings.waitForServerSetsTimeout, timer)) + val duration = Time.now.since(startTime) + log.info("resolved all serversets in " + duration) + } catch { + case NonFatal(ex) => log.warn("failed to resolve all serversets", ex) + } + } + + private[this] def initialize(tweetService: Activity[ThriftTweetService]): Unit = { + waitForServerSets() + runWarmup(tweetService) + + // try to force a GC before starting to serve requests; this may or may not do anything + System.gc() + } + + // ********* builders ********** + + val clientIdHelper = new ClientIdHelper( + new ConditionalServiceIdentifierStrategy( + condition = deciderGates.preferForwardedServiceIdentifierForClientId, + ifTrue = PreferForwardedServiceIdentifierForStrato, + ifFalse = UseTransportServiceIdentifier, + ), + ) + + val backendClients: BackendClients = + BackendClients( + settings = settings, + deciderGates = deciderGates, + statsReceiver = statsReceiver, + hostStatsReceiver = hostStatsReceiver, + timer = timer, + clientIdHelper = clientIdHelper, + ) + + val tweetService: Activity[ThriftTweetService] = + TweetServiceBuilder( + settings = settings, + statsReceiver = statsReceiver, + timer = timer, + deciderGates = deciderGates, + featureSwitchesWithExperiments = featureSwitchesWithExperiments, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + backendClients = backendClients, + clientIdHelper = clientIdHelper, + ) + + // Strato columns should use this tweetService + def stratoTweetService: Activity[ThriftTweetService] = + tweetService.map { service => + // Add quill functionality to the strato tweet service only + val quillCapture = QuillCaptureBuilder(settings, deciderGates) + new QuillTweetService(quillCapture, service) + } + + def build: Activity[Service[Array[Byte], Array[Byte]]] = { + + val quillCapture = QuillCaptureBuilder(settings, deciderGates) + + val darkTrafficFilter: SimpleFilter[Array[Byte], Array[Byte]] = + if (!settings.trafficForkingEnabled) { + Filter.identity + } else { + new DarkTrafficFilter( + backendClients.darkTrafficClient, + _ => deciderGates.forkDarkTraffic(), + statsReceiver + ) + } + + val serviceFilter = + quillCapture + .getServerFilter(ThriftProto.server) + .andThen(TweetypieContext.Local.filter[Array[Byte], Array[Byte]]) + .andThen(darkTrafficFilter) + + initialize(tweetService) + + // tweetService is an Activity[ThriftTweetService], so this callback + // is called every time that Activity updates (on ConfigBus changes). + tweetService.map { service => + val finagleService = + new TweetServiceInternal$FinagleService( + service, + protocolFactory = Protocols.binaryFactory(), + stats = NullStatsReceiver, + maxThriftBufferSize = settings.maxThriftBufferSize + ) + + serviceFilter andThen finagleService + } + } +} + +object QuillCaptureBuilder { + val tweetServiceWriteMethods: Set[String] = + Set( + "async_delete", + "async_delete_additional_fields", + "async_erase_user_tweets", + "async_incr_fav_count", + "async_insert", + "async_set_additional_fields", + "async_set_retweet_visibility", + "async_takedown", + "async_undelete_tweet", + "async_update_possibly_sensitive_tweet", + "cascaded_delete_tweet", + "delete_additional_fields", + "delete_retweets", + "delete_tweets", + "erase_user_tweets", + "flush", + "incr_fav_count", + "insert", + "post_retweet", + "post_tweet", + "remove", + "replicated_delete_additional_fields", + "replicated_delete_tweet", + "replicated_delete_tweet2", + "replicated_incr_fav_count", + "replicated_insert_tweet2", + "replicated_scrub_geo", + "replicated_set_additional_fields", + "replicated_set_has_safety_labels", + "replicated_set_retweet_visibility", + "replicated_takedown", + "replicated_undelete_tweet2", + "replicated_update_possibly_sensitive_tweet", + "scrub_geo", + "scrub_geo_update_user_timestamp", + "set_additional_fields", + "set_has_safety_labels", + "set_retweet_visibility", + "set_tweet_user_takedown", + "takedown", + "undelete_tweet" + ) + + val tweetServiceReadMethods: Set[String] = + Set( + "get_tweet_counts", + "get_tweet_fields", + "get_tweets", + "replicated_get_tweet_counts", + "replicated_get_tweet_fields", + "replicated_get_tweets" + ) + + def apply(settings: TweetServiceSettings, deciderGates: TweetypieDeciderGates): QuillCapture = { + val writesStore = SimpleScribeMessageStore("tweetypie_writes") + .enabledBy(deciderGates.logWrites) + + val readsStore = SimpleScribeMessageStore("tweetypie_reads") + .enabledBy(deciderGates.logReads) + + val messageStore = + MessageStore.selected { + case msg if tweetServiceWriteMethods.contains(msg.name) => writesStore + case msg if tweetServiceReadMethods.contains(msg.name) => readsStore + case _ => writesStore + } + + new QuillCapture(Store.legacyStore(messageStore), Some(settings.thriftClientId.name)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala new file mode 100644 index 000000000..765a608a2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceAuthorizers.scala @@ -0,0 +1,399 @@ +package com.twitter.tweetypie +package config + +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.transport.S2STransport +import com.twitter.servo.gate.RateLimitingGate +import com.twitter.servo.request.ClientRequestAuthorizer.UnauthorizedException +import com.twitter.servo.request.{ClientRequestAuthorizer, ClientRequestObserver} +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.core.RateLimited +import com.twitter.tweetypie.service.MethodAuthorizer +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +/** + * Compose a ClientRequestAuthorizer for + * ClientHandlingTweetService + */ +object ClientHandlingTweetServiceAuthorizer { + private val RateLimitExceeded = + RateLimited("Your ClientId has exceeded the rate limit for non-allowListed clients.") + + def apply( + settings: TweetServiceSettings, + dynamicConfig: DynamicConfig, + statsReceiver: StatsReceiver, + getServiceIdentifier: () => ServiceIdentifier = S2STransport.peerServiceIdentifier _ + ): ClientRequestAuthorizer = { + val authorizer = + if (settings.allowlistingRequired) { + val limitingGate = RateLimitingGate.uniform(settings.nonAllowListedClientRateLimitPerSec) + allowListedOrRateLimitedAuthorizer(dynamicConfig, limitingGate) + .andThen(rejectNonAllowListedProdAuthorizer(dynamicConfig)) + .andThen(permittedMethodsAuthorizer(dynamicConfig)) + .andThen(allowProductionAuthorizer(settings.allowProductionClients)) + } else { + ClientRequestAuthorizer.withClientId + } + + val alternativeClientIdHelper = new ClientIdHelper(PreferForwardedServiceIdentifierForStrato) + // pass the authorizer into an observed authorizer for stats tracking. + // (observed authorizers can't be composed with andThen) + ClientRequestAuthorizer.observed( + authorizer, + new ClientRequestObserver(statsReceiver) { + override def apply( + methodName: String, + clientIdScopesOpt: Option[Seq[String]] + ): Future[Unit] = { + // Monitor for the migration taking into account forwarded service identifier + // as effective client ID for strato. + val alternativeClientIdScopes = alternativeClientIdHelper.effectiveClientId.map(Seq(_)) + if (clientIdScopesOpt != alternativeClientIdScopes) { + scopedReceiver.scope(methodName) + .scope("before_migration") + .scope(clientIdScopesOpt.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) + .scope("after_migration") + .counter(alternativeClientIdScopes.getOrElse(Seq(ClientIdHelper.UnknownClientId)): _*) + .incr() + } else { + scopedReceiver.scope(methodName).counter("migration_indifferent").incr() + } + super.apply(methodName, clientIdScopesOpt) + } + + override def authorized(methodName: String, clientIdStr: String): Unit = { + // Monitor for the migration of using service identifier + // as identity instead of client ID. + val serviceIdentifier = getServiceIdentifier() + scopedReceiver.counter( + "authorized_request", + clientIdStr, + serviceIdentifier.role, + serviceIdentifier.service, + serviceIdentifier.environment + ).incr() + val status = dynamicConfig.byServiceIdentifier(serviceIdentifier).toSeq match { + case Seq() => "none" + case Seq(client) if client.clientId == clientIdStr => "equal" + case Seq(_) => "other" + case _ => "ambiguous" + } + scopedReceiver.counter( + "service_id_match_client_id", + clientIdStr, + serviceIdentifier.role, + serviceIdentifier.service, + serviceIdentifier.environment, + status + ).incr() + } + } + ) + } + + /** + * @return A ClientRequestAuthorizer that allows unlimited requests for allowlisted client ids and + * rate-limited requests for unknown clients. + */ + def allowListedOrRateLimitedAuthorizer( + dynamicConfig: DynamicConfig, + nonAllowListedLimiter: Gate[Unit] + ): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered( + { (_, clientId) => + dynamicConfig.isAllowListedClient(clientId) || nonAllowListedLimiter() + }, + RateLimitExceeded) + + /** + * @return A ClientRequestAuthorizer that rejects requests from non-allowListed prod clients. + */ + def rejectNonAllowListedProdAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = { + object UnallowlistedException + extends UnauthorizedException( + "Traffic is only allowed from allow-listed *.prod clients." + + " Please create a ticket to register your clientId to enable production traffic using http://go/tp-new-client." + ) + + def isProdClient(clientId: String): Boolean = + clientId.endsWith(".prod") || clientId.endsWith(".production") + + ClientRequestAuthorizer.filtered( + { (_, clientId) => + !isProdClient(clientId) || dynamicConfig.isAllowListedClient(clientId) + }, + UnallowlistedException) + } + + /** + * @return A ClientRequestAuthorizer that checks if a given client's + * permittedMethods field includes the method they are calling + */ + def permittedMethodsAuthorizer(dynamicConfig: DynamicConfig): ClientRequestAuthorizer = + dynamicConfig.clientsByFullyQualifiedId match { + case Some(clientsById) => permittedMethodsAuthorizer(dynamicConfig, clientsById) + case None => ClientRequestAuthorizer.permissive + } + + private def permittedMethodsAuthorizer( + dynamicConfig: DynamicConfig, + clientsByFullyQualifiedId: Map[String, Client] + ): ClientRequestAuthorizer = { + ClientRequestAuthorizer.filtered { (methodName, clientId) => + dynamicConfig.unprotectedEndpoints(methodName) || + (clientsByFullyQualifiedId.get(clientId) match { + case Some(client) => + client.accessAllMethods || + client.permittedMethods.contains(methodName) + case None => + false // If client id is unknown, don't allow access + }) + } + } + + /** + * @return A ClientRequestAuthorizer that fails the + * request if it is coming from a production client + * and allowProductionClients is false + */ + def allowProductionAuthorizer(allowProductionClients: Boolean): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered { (_, clientId) => + allowProductionClients || !(clientId.endsWith(".prod") || clientId.endsWith(".production")) + } +} + +/** + * Compose a MethodAuthorizer for the `getTweets` endpoint. + */ +object GetTweetsAuthorizer { + import ProtectedTweetsAuthorizer.IncludeProtected + + def apply( + config: DynamicConfig, + maxRequestSize: Int, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + maxRequestWidthEnabled: Gate[Unit], + statsReceiver: StatsReceiver, + ): MethodAuthorizer[GetTweetsRequest] = + MethodAuthorizer.all( + Seq( + ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) + .contramap[GetTweetsRequest] { r => + IncludeProtected(r.options.exists(_.bypassVisibilityFiltering)) + }, + RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) + .contramap[GetTweetsRequest](_.tweetIds.size), + RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) + .contramap[GetTweetsRequest](_.tweetIds.size) + ) + ) +} + +/** + * Compose a MethodAuthorizer for the `getTweetFields` endpoint. + */ +object GetTweetFieldsAuthorizer { + import ProtectedTweetsAuthorizer.IncludeProtected + + def apply( + config: DynamicConfig, + maxRequestSize: Int, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + maxRequestWidthEnabled: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[GetTweetFieldsRequest] = + MethodAuthorizer.all( + Seq( + ProtectedTweetsAuthorizer(config.clientsByFullyQualifiedId) + .contramap[GetTweetFieldsRequest](r => + IncludeProtected(r.options.visibilityPolicy == TweetVisibilityPolicy.NoFiltering)), + RequestSizeAuthorizer(maxRequestSize, maxRequestWidthEnabled) + .contramap[GetTweetFieldsRequest](_.tweetIds.size), + RateLimiterAuthorizer(config, instanceCount, enforceRateLimitedClients, statsReceiver) + .contramap[GetTweetFieldsRequest](_.tweetIds.size) + ) + ) +} + +object ProtectedTweetsAuthorizer { + case class IncludeProtected(include: Boolean) extends AnyVal + + class BypassVisibilityFilteringNotAuthorizedException(message: String) + extends UnauthorizedException(message) + + def apply(optClientsById: Option[Map[String, Client]]): MethodAuthorizer[IncludeProtected] = { + optClientsById match { + case Some(clientsByFullyQualifiedId) => + val clientsWithBypassVisibilityFiltering = clientsByFullyQualifiedId.filter { + case (_, client) => client.bypassVisibilityFiltering + } + apply(clientId => clientsWithBypassVisibilityFiltering.contains(clientId)) + + case None => + apply((_: String) => true) + } + } + + /** + * A MethodAuthorizer that fails the request if a client requests to bypass visibility + * filtering but doesn't have BypassVisibilityFiltering + */ + def apply(protectedTweetsAllowlist: String => Boolean): MethodAuthorizer[IncludeProtected] = + MethodAuthorizer { (includeProtected, clientId) => + // There is only one unauthorized case, a client requesting + // protected tweets when they are not in the allowlist + Future.when(includeProtected.include && !protectedTweetsAllowlist(clientId)) { + Future.exception( + new BypassVisibilityFilteringNotAuthorizedException( + s"$clientId is not authorized to bypass visibility filtering" + ) + ) + } + } +} + +/** + * A MethodAuthorizer[Int] that fails large requests. + */ +object RequestSizeAuthorizer { + class ExceededMaxWidthException(message: String) extends UnauthorizedException(message) + + def apply( + maxRequestSize: Int, + maxWidthLimitEnabled: Gate[Unit] = Gate.False + ): MethodAuthorizer[Int] = + MethodAuthorizer { (requestSize, clientId) => + Future.when(requestSize > maxRequestSize && maxWidthLimitEnabled()) { + Future.exception( + new ExceededMaxWidthException( + s"$requestSize exceeds bulk request size limit. $clientId can request at most $maxRequestSize items per request" + ) + ) + } + } +} + +object RateLimiterAuthorizer { + + type ClientId = String + + /** + * @return client ID to weighted RateLimitingGate map + * + * We want to rate-limit based on requests per sec for every instance. + * When we allowlist new clients to Tweetypie, we assign tweets per sec quota. + * That's why, we compute perInstanceQuota [1] and create a weighted rate-limiting gate [2] + * which returns true if acquiring requestSize number of permits is successful. [3] + * + * [1] tps quota during allowlisting is for both DCs and instanceCount is for one DC. + * Therefore, we are over-compensating perInstanceQuota for all low-priority clients. + * this will act a fudge-factor to account for cluster-wide traffic imbalances. + * + * val perInstanceQuota : Double = math.max(1.0, math.ceil(tpsLimit.toFloat / instanceCount)) + * + * We have some clients like deferredRPC with 0K tps quota and rate limiter expects > 0 permits. + * + * [2] if a client has multiple environments - staging, devel, prod. We provision the + * same rate-limits for all envs instead of distributing the tps quota across envs. + * + * Example: + * + * val c = Client(..., limit = 10k, ...) + * Map("foo.prod" -> c, "foo.staging" -> c, "foo.devel" -> c) + * + * Above client config turns into 3 separate RateLimitingGate.weighted(), each with 10k + * + * [3] RateLimitingGate will always give permit to the initial request that exceeds + * the limit. ex: starting with rate-limit of 1 tps per instance. first request with + * 100 batch size is allowed. + * + * RateLimitFudgeFactor is a multiplier for per-instance quota to account for: + * + * a) High likelihood of concurrent batches hitting the same tweetypie shard due to + * non-uniform load distribution (this can be alleviated by using Deterministic Aperture) + * b) Clients with no retry backoffs and custom batching/concurrency. + * + * We are adding default stitch batch size to per instance quota, to give more headroom for low-tps clients. + * https://cgit.twitter.biz/source/tree/stitch/stitch-tweetypie/src/main/scala/com/twitter/stitch/tweetypie/TweetyPie.scala#n47 + * + */ + case class RateLimiterConfig(limitingGate: Gate[Int], enforceRateLimit: Boolean) + + def perClientRateLimiters( + dynamicConfig: DynamicConfig, + instanceCount: Int + ): Map[ClientId, RateLimiterConfig] = { + val RateLimitFudgeFactor: Double = 1.5 + val DefaultStitchBatchSize: Double = 25.0 + dynamicConfig.clientsByFullyQualifiedId match { + case Some(clients) => + clients.collect { + case (clientId, client) if client.tpsLimit.isDefined => + val perInstanceQuota: Double = + math.max( + 1.0, + math.ceil( + client.tpsLimit.get.toFloat / instanceCount)) * RateLimitFudgeFactor + DefaultStitchBatchSize + clientId -> RateLimiterConfig( + RateLimitingGate.weighted(perInstanceQuota), + client.enforceRateLimit + ) + } + case None => Map.empty + } + } + + /* + enforce rate-limiting on get_tweets and get_tweet_fields requests + given enable_rate_limited_clients decider is true and rate limiting gate + is not giving any more permits. + */ + def apply( + config: DynamicConfig, + limiters: Map[ClientId, RateLimiterConfig], + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[Int] = { + + val tpsExceededScope = statsReceiver.scope("tps_exceeded") + val tpsRejectedScope = statsReceiver.scope("tps_rejected") + val qpsExceededScope = statsReceiver.scope("qps_exceeded") + val qpsRejectedScope = statsReceiver.scope("qps_rejected") + + MethodAuthorizer { (requestSize, clientId) => + val positiveRequestSize = math.max(1, requestSize) + val shouldRateLimit: Boolean = limiters.get(clientId).exists { config => + val exceededLimit = !config.limitingGate(positiveRequestSize) + if (exceededLimit) { + qpsExceededScope.counter(clientId).incr() + tpsExceededScope.counter(clientId).incr(positiveRequestSize) + } + exceededLimit && config.enforceRateLimit + } + + Future.when(shouldRateLimit && enforceRateLimitedClients()) { + qpsRejectedScope.counter(clientId).incr() + tpsRejectedScope.counter(clientId).incr(positiveRequestSize) + Future.exception( + RateLimited(s"Your client ID $clientId has exceeded its reserved tps quota.") + ) + } + } + } + + def apply( + config: DynamicConfig, + instanceCount: Int, + enforceRateLimitedClients: Gate[Unit], + statsReceiver: StatsReceiver + ): MethodAuthorizer[Int] = { + val limiters = perClientRateLimiters(config, instanceCount) + apply(config, limiters, instanceCount, enforceRateLimitedClients, statsReceiver) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala new file mode 100644 index 000000000..518d0edcd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceBuilder.scala @@ -0,0 +1,683 @@ +package com.twitter.tweetypie +package config + +import com.twitter.coreservices.failed_task.writer.FailedTaskWriter +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.flockdb.client._ +import com.twitter.servo.forked +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.Scribe +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.service.ReplicatingTweetService +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Activity +import com.twitter.util.Timer + +/** + * Builds a fully configured ThriftTweetService instance. + * + * The core of the tweet service is a DispatchingTweetService, which is responsible + * for dispatching requests to underlying handlers and stores. + * The DispatchingTweetService instance is wrapped in: + * - ObservedTweetService (adds stats counting) + * - ClientHandlingTweetService (authentication, exception handling, etc) + * - ReplicatingTweetService (replicates some reads) + * + * TweetServiceBuilder returns an Activity[ThriftTweetService] which updates + * on config changes. See DynamicConfig.scala for more details. + */ +object TweetServiceBuilder { + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + featureSwitchesWithoutExperiments: FeatureSwitches, + backendClients: BackendClients, + clientIdHelper: ClientIdHelper, + ): Activity[ThriftTweetService] = { + // a forward reference, will be set to the DispatchingTweetService once created + val syncTweetService = new MutableTweetServiceProxy(null) + + val tweetServiceScope = statsReceiver.scope("tweet_service") + + val dispatchingTweetService = + DispatchingTweetServiceBuilder( + settings, + statsReceiver, + tweetServiceScope, + syncTweetService, + timer, + deciderGates, + featureSwitchesWithExperiments, + featureSwitchesWithoutExperiments, + backendClients, + clientIdHelper, + ) + + val failureLoggingTweetService = + // Add the failure writing inside of the authorization filter so + // that we don't write out the failures when authorization fails. + new FailureLoggingTweetService( + failedTaskWriter = FailedTaskWriter("tweetypie_service_failures", identity), + underlying = dispatchingTweetService + ) + + val observedTweetService = + new ObservedTweetService(failureLoggingTweetService, tweetServiceScope, clientIdHelper) + + // Every time config is updated, create a new tweet service. Only + // ClientHandlingTweetService and ReplicatingTweetService need to + // be recreated, as the underlying TweetServices above don't depend + // on the config. + DynamicConfig( + statsReceiver.scope("dynamic_config"), + backendClients.configBus, + settings + ).map { dynamicConfig => + val clientHandlingTweetService = + new ClientHandlingTweetService( + observedTweetService, + tweetServiceScope, + dynamicConfig.loadShedEligible, + deciderGates.shedReadTrafficVoluntarily, + ClientHandlingTweetServiceAuthorizer( + settings = settings, + dynamicConfig = dynamicConfig, + statsReceiver = statsReceiver + ), + GetTweetsAuthorizer( + config = dynamicConfig, + maxRequestSize = settings.maxGetTweetsRequestSize, + instanceCount = settings.instanceCount, + enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, + maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, + statsReceiver = tweetServiceScope.scope("get_tweets"), + ), + GetTweetFieldsAuthorizer( + config = dynamicConfig, + maxRequestSize = settings.maxGetTweetsRequestSize, + instanceCount = settings.instanceCount, + enforceRateLimitedClients = deciderGates.enforceRateLimitedClients, + maxRequestWidthEnabled = deciderGates.maxRequestWidthEnabled, + statsReceiver = tweetServiceScope.scope("get_tweet_fields"), + ), + RequestSizeAuthorizer(settings.maxRequestSize, deciderGates.maxRequestWidthEnabled), + clientIdHelper, + ) + + syncTweetService.underlying = clientHandlingTweetService + + val replicatingService = + if (!settings.enableReplication) + clientHandlingTweetService + else { + new ReplicatingTweetService( + underlying = clientHandlingTweetService, + replicationTargets = backendClients.lowQoSReplicationClients, + executor = new forked.QueueExecutor( + 100, + statsReceiver.scope("replicating_tweet_service") + ), + ) + } + + replicatingService + } + } +} + +object DispatchingTweetServiceBuilder { + val hasMedia: Tweet => Boolean = MediaIndexHelper(Resources.loadPartnerMediaRegexes()) + + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + tweetServiceScope: StatsReceiver, + syncTweetService: ThriftTweetService, + timer: Timer, + deciderGates: TweetypieDeciderGates, + featureSwitchesWithExperiments: FeatureSwitches, + featureSwitchesWithoutExperiments: FeatureSwitches, + backendClients: BackendClients, + clientIdHelper: ClientIdHelper, + ): ThriftTweetService = { + val (syncInvocationBuilder, asyncInvocationBuilder) = { + val b = + new ServiceInvocationBuilder(syncTweetService, settings.simulateDeferredrpcCallbacks) + (b.withClientId(settings.thriftClientId), b.withClientId(settings.deferredrpcClientId)) + } + + val tweetKeyFactory = TweetKeyFactory(settings.tweetKeyCacheVersion) + + val caches = + if (!settings.withCache) + Caches.NoCache + else + Caches( + settings = settings, + stats = statsReceiver, + timer = timer, + clients = backendClients, + tweetKeyFactory = tweetKeyFactory, + deciderGates = deciderGates, + clientIdHelper = clientIdHelper, + ) + + val logicalRepos = + LogicalRepositories( + settings = settings, + stats = statsReceiver, + timer = timer, + deciderGates = deciderGates, + external = new ExternalServiceRepositories( + clients = backendClients, + statsReceiver = statsReceiver, + settings = settings, + clientIdHelper = clientIdHelper, + ), + caches = caches, + stratoClient = backendClients.stratoserverClient, + hasMedia = hasMedia, + clientIdHelper = clientIdHelper, + featureSwitchesWithoutExperiments = featureSwitchesWithoutExperiments, + ) + + val tweetCreationLock = + new CacheBasedTweetCreationLock( + cache = caches.tweetCreateLockerCache, + maxTries = 3, + stats = statsReceiver.scope("tweet_save").scope("locker"), + logUniquenessId = + if (settings.scribeUniquenessIds) CacheBasedTweetCreationLock.ScribeUniquenessId + else CacheBasedTweetCreationLock.LogUniquenessId + ) + + val tweetStores = + TweetStores( + settings = settings, + statsReceiver = statsReceiver, + timer = timer, + deciderGates = deciderGates, + tweetKeyFactory = tweetKeyFactory, + clients = backendClients, + caches = caches, + asyncBuilder = asyncInvocationBuilder, + hasMedia = hasMedia, + clientIdHelper = clientIdHelper, + ) + + val tweetDeletePathHandler = + new DefaultTweetDeletePathHandler( + tweetServiceScope, + logicalRepos.tweetResultRepo, + logicalRepos.optionalUserRepo, + logicalRepos.stratoSafetyLabelsRepo, + logicalRepos.lastQuoteOfQuoterRepo, + tweetStores, + getPerspectives = backendClients.timelineService.getPerspectives, + ) + + val tweetBuilders = + TweetBuilders( + settings = settings, + statsReceiver = statsReceiver, + deciderGates = deciderGates, + featureSwitchesWithExperiments = featureSwitchesWithExperiments, + clients = backendClients, + caches = caches, + repos = logicalRepos, + tweetStore = tweetStores, + hasMedia = hasMedia, + unretweetEdits = tweetDeletePathHandler.unretweetEdits, + ) + + val hydrateTweetForInsert = + WritePathHydration.hydrateTweet( + logicalRepos.tweetHydrators.hydrator, + statsReceiver.scope("insert_tweet") + ) + + val defaultTweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + val parentUserIdRepo: ParentUserIdRepository.Type = + ParentUserIdRepository( + tweetRepo = logicalRepos.tweetRepo + ) + + val undeleteTweetHandler = + UndeleteTweetHandlerBuilder( + backendClients.tweetStorageClient, + logicalRepos, + tweetStores, + parentUserIdRepo, + statsReceiver + ) + + val eraseUserTweetsHandler = + EraseUserTweetsHandlerBuilder( + backendClients, + asyncInvocationBuilder, + deciderGates, + settings, + timer, + tweetDeletePathHandler, + tweetServiceScope + ) + + val setRetweetVisibilityHandler = + SetRetweetVisibilityHandler( + tweetGetter = + TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), + tweetStores.setRetweetVisibility + ) + + val takedownHandler = + TakedownHandlerBuilder( + logicalRepos = logicalRepos, + tweetStores = tweetStores + ) + + val updatePossiblySensitiveTweetHandler = + UpdatePossiblySensitiveTweetHandler( + HandlerError.getRequired( + TweetRepository.tweetGetter(logicalRepos.optionalTweetRepo, defaultTweetQueryOptions), + HandlerError.tweetNotFoundException + ), + HandlerError.getRequired( + FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + ) + .compose(UserKey.byId) + ), + HandlerError.userNotFoundException + ), + tweetStores.updatePossiblySensitiveTweet + ) + + val userTakedownHandler = + UserTakedownHandlerBuilder( + logicalRepos = logicalRepos, + tweetStores = tweetStores, + stats = tweetServiceScope + ) + + val getDeletedTweetsHandler = + GetDeletedTweetsHandler( + getDeletedTweets = backendClients.tweetStorageClient.getDeletedTweets, + tweetsExist = + GetDeletedTweetsHandler.tweetsExist(backendClients.tweetStorageClient.getTweet), + stats = tweetServiceScope.scope("get_deleted_tweets_handler") + ) + + val hydrateQuotedTweet = + WritePathHydration.hydrateQuotedTweet( + logicalRepos.optionalTweetRepo, + logicalRepos.optionalUserRepo, + logicalRepos.quoterHasAlreadyQuotedRepo + ) + + val deleteLocationDataHandler = + DeleteLocationDataHandler( + backendClients.geoScrubEventStore.getGeoScrubTimestamp, + Scribe(DeleteLocationData, "tweetypie_delete_location_data"), + backendClients.deleteLocationDataPublisher + ) + + val getStoredTweetsHandler = GetStoredTweetsHandler(logicalRepos.tweetResultRepo) + + val getStoredTweetsByUserHandler = GetStoredTweetsByUserHandler( + getStoredTweetsHandler = getStoredTweetsHandler, + getStoredTweet = backendClients.tweetStorageClient.getStoredTweet, + selectPage = FutureArrow { select => + backendClients.tflockReadClient + .selectPage(select, Some(settings.getStoredTweetsByUserPageSize)) + }, + maxPages = settings.getStoredTweetsByUserMaxPages + ) + + val getTweetsHandler = + GetTweetsHandler( + logicalRepos.tweetResultRepo, + logicalRepos.containerAsGetTweetResultRepo, + logicalRepos.deletedTweetVisibilityRepo, + statsReceiver.scope("read_path"), + deciderGates.shouldMaterializeContainers + ) + + val getTweetFieldsHandler = + GetTweetFieldsHandler( + logicalRepos.tweetResultRepo, + logicalRepos.deletedTweetVisibilityRepo, + logicalRepos.containerAsGetTweetFieldsResultRepo, + statsReceiver.scope("read_path"), + deciderGates.shouldMaterializeContainers + ) + + val unretweetHandler = + UnretweetHandler( + tweetDeletePathHandler.deleteTweets, + backendClients.timelineService.getPerspectives, + tweetDeletePathHandler.unretweetEdits, + logicalRepos.tweetRepo, + ) + + val hydrateInsertEvent = + WritePathHydration.hydrateInsertTweetEvent( + hydrateTweet = hydrateTweetForInsert, + hydrateQuotedTweet = hydrateQuotedTweet + ) + + val scrubGeoUpdateUserTimestampBuilder = + ScrubGeoEventBuilder.UpdateUserTimestamp( + stats = tweetServiceScope.scope("scrub_geo_update_user_timestamp"), + userRepo = logicalRepos.optionalUserRepo + ) + + val scrubGeoScrubTweetsBuilder = + ScrubGeoEventBuilder.ScrubTweets( + stats = tweetServiceScope.scope("scrub_geo"), + userRepo = logicalRepos.optionalUserRepo + ) + + val handlerFilter = + PostTweet + .DuplicateHandler( + tweetCreationLock = tweetCreationLock, + getTweets = getTweetsHandler, + stats = statsReceiver.scope("duplicate") + ) + .andThen(PostTweet.RescueTweetCreateFailure) + .andThen(PostTweet.LogFailures) + + val postTweetHandler = + handlerFilter[PostTweetRequest]( + PostTweet.Handler( + tweetBuilder = tweetBuilders.tweetBuilder, + hydrateInsertEvent = hydrateInsertEvent, + tweetStore = tweetStores, + ) + ) + + val postRetweetHandler = + handlerFilter[RetweetRequest]( + PostTweet.Handler( + tweetBuilder = tweetBuilders.retweetBuilder, + hydrateInsertEvent = hydrateInsertEvent, + tweetStore = tweetStores, + ) + ) + + val quotedTweetDeleteBuilder: QuotedTweetDeleteEventBuilder.Type = + QuotedTweetDeleteEventBuilder(logicalRepos.optionalTweetRepo) + + val quotedTweetTakedownBuilder: QuotedTweetTakedownEventBuilder.Type = + QuotedTweetTakedownEventBuilder(logicalRepos.optionalTweetRepo) + + val setAdditionalFieldsBuilder: SetAdditionalFieldsBuilder.Type = + SetAdditionalFieldsBuilder( + tweetRepo = logicalRepos.tweetRepo + ) + + val asyncSetAdditionalFieldsBuilder: AsyncSetAdditionalFieldsBuilder.Type = + AsyncSetAdditionalFieldsBuilder( + userRepo = logicalRepos.userRepo + ) + + val deleteAdditionalFieldsBuilder: DeleteAdditionalFieldsBuilder.Type = + DeleteAdditionalFieldsBuilder( + tweetRepo = logicalRepos.tweetRepo + ) + + val asyncDeleteAdditionalFieldsBuilder: AsyncDeleteAdditionalFieldsBuilder.Type = + AsyncDeleteAdditionalFieldsBuilder( + userRepo = logicalRepos.userRepo + ) + + new DispatchingTweetService( + asyncDeleteAdditionalFieldsBuilder = asyncDeleteAdditionalFieldsBuilder, + asyncSetAdditionalFieldsBuilder = asyncSetAdditionalFieldsBuilder, + deleteAdditionalFieldsBuilder = deleteAdditionalFieldsBuilder, + deleteLocationDataHandler = deleteLocationDataHandler, + deletePathHandler = tweetDeletePathHandler, + eraseUserTweetsHandler = eraseUserTweetsHandler, + getDeletedTweetsHandler = getDeletedTweetsHandler, + getStoredTweetsHandler = getStoredTweetsHandler, + getStoredTweetsByUserHandler = getStoredTweetsByUserHandler, + getTweetsHandler = getTweetsHandler, + getTweetFieldsHandler = getTweetFieldsHandler, + getTweetCountsHandler = GetTweetCountsHandler(logicalRepos.tweetCountsRepo), + postTweetHandler = postTweetHandler, + postRetweetHandler = postRetweetHandler, + quotedTweetDeleteBuilder = quotedTweetDeleteBuilder, + quotedTweetTakedownBuilder = quotedTweetTakedownBuilder, + scrubGeoUpdateUserTimestampBuilder = scrubGeoUpdateUserTimestampBuilder, + scrubGeoScrubTweetsBuilder = scrubGeoScrubTweetsBuilder, + setAdditionalFieldsBuilder = setAdditionalFieldsBuilder, + setRetweetVisibilityHandler = setRetweetVisibilityHandler, + statsReceiver = statsReceiver, + takedownHandler = takedownHandler, + tweetStore = tweetStores, + undeleteTweetHandler = undeleteTweetHandler, + unretweetHandler = unretweetHandler, + updatePossiblySensitiveTweetHandler = updatePossiblySensitiveTweetHandler, + userTakedownHandler = userTakedownHandler, + clientIdHelper = clientIdHelper, + ) + } +} + +object TakedownHandlerBuilder { + type Type = FutureArrow[TakedownRequest, Unit] + + def apply(logicalRepos: LogicalRepositories, tweetStores: TotalTweetStore) = + TakedownHandler( + getTweet = HandlerError.getRequired( + tweetGetter(logicalRepos), + HandlerError.tweetNotFoundException + ), + getUser = HandlerError.getRequired( + userGetter(logicalRepos), + HandlerError.userNotFoundException + ), + writeTakedown = tweetStores.takedown + ) + + def tweetGetter(logicalRepos: LogicalRepositories): FutureArrow[TweetId, Option[Tweet]] = + FutureArrow( + TweetRepository.tweetGetter( + logicalRepos.optionalTweetRepo, + TweetQuery.Options( + include = GetTweetsHandler.BaseInclude.also( + tweetFields = Set( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + Tweet.TweetypieOnlyTakedownReasonsField.id + ) + ) + ) + ) + ) + + def userGetter(logicalRepos: LogicalRepositories): FutureArrow[UserId, Option[User]] = + FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions( + Set(UserField.Roles, UserField.Safety, UserField.Takedowns), + UserVisibility.All + ) + ) + .compose(UserKey.byId) + ) +} + +object UserTakedownHandlerBuilder { + def apply( + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + stats: StatsReceiver + ): UserTakedownHandler.Type = + UserTakedownHandler( + getTweet = TakedownHandlerBuilder.tweetGetter(logicalRepos), + tweetTakedown = tweetStores.takedown, + ) +} + +object EraseUserTweetsHandlerBuilder { + def apply( + backendClients: BackendClients, + asyncInvocationBuilder: ServiceInvocationBuilder, + deciderGates: TweetypieDeciderGates, + settings: TweetServiceSettings, + timer: Timer, + tweetDeletePathHandler: DefaultTweetDeletePathHandler, + tweetServiceScope: StatsReceiver + ): EraseUserTweetsHandler = + EraseUserTweetsHandler( + selectPage(backendClients, settings), + deleteTweet(tweetDeletePathHandler), + eraseUserTweets(backendClients, asyncInvocationBuilder), + tweetServiceScope.scope("erase_user_tweets"), + sleep(deciderGates, settings, timer) + ) + + def selectPage( + backendClients: BackendClients, + settings: TweetServiceSettings + ): FutureArrow[Select[StatusGraph], PageResult[Long]] = + FutureArrow( + backendClients.tflockWriteClient.selectPage(_, Some(settings.eraseUserTweetsPageSize)) + ) + + def deleteTweet( + tweetDeletePathHandler: DefaultTweetDeletePathHandler + ): FutureEffect[(TweetId, UserId)] = + FutureEffect[(TweetId, UserId)] { + case (tweetId, expectedUserId) => + tweetDeletePathHandler + .internalDeleteTweets( + request = DeleteTweetsRequest( + Seq(tweetId), + isUserErasure = true, + expectedUserId = Some(expectedUserId) + ), + byUserId = None, + authenticatedUserId = None, + validate = tweetDeletePathHandler.validateTweetsForUserErasureDaemon + ) + .unit + } + + def eraseUserTweets( + backendClients: BackendClients, + asyncInvocationBuilder: ServiceInvocationBuilder + ): FutureArrow[AsyncEraseUserTweetsRequest, Unit] = + asyncInvocationBuilder + .asyncVia(backendClients.asyncTweetDeletionService) + .method(_.asyncEraseUserTweets) + + def sleep( + deciderGates: TweetypieDeciderGates, + settings: TweetServiceSettings, + timer: Timer + ): () => Future[Unit] = + () => + if (deciderGates.delayEraseUserTweets()) { + Future.sleep(settings.eraseUserTweetsDelay)(timer) + } else { + Future.Unit + } +} + +object UndeleteTweetHandlerBuilder { + def apply( + tweetStorage: TweetStorageClient, + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + parentUserIdRepo: ParentUserIdRepository.Type, + statsReceiver: StatsReceiver + ): UndeleteTweetHandler.Type = + UndeleteTweetHandler( + undelete = tweetStorage.undelete, + tweetExists = tweetExists(tweetStorage), + getUser = FutureArrow( + UserRepository + .userGetter( + logicalRepos.optionalUserRepo, + UserQueryOptions( + // ExtendedProfile is needed to view a user's birthday to + // guarantee we are not undeleting tweets from when a user was < 13 + TweetBuilder.userFields ++ Set(UserField.ExtendedProfile), + UserVisibility.All, + filteredAsFailure = false + ) + ) + .compose(UserKey.byId) + ), + getDeletedTweets = tweetStorage.getDeletedTweets, + parentUserIdRepo = parentUserIdRepo, + save = save( + logicalRepos, + tweetStores, + statsReceiver + ) + ) + + private def tweetExists(tweetStorage: TweetStorageClient): FutureArrow[TweetId, Boolean] = + FutureArrow { id => + Stitch + .run(tweetStorage.getTweet(id)) + .map { + case _: GetTweet.Response.Found => true + case _ => false + } + } + + // 1. hydrates the undeleted tweet + // 2. hands a UndeleteTweetEvent to relevant stores. + // 3. return the hydrated tweet + def save( + logicalRepos: LogicalRepositories, + tweetStores: TotalTweetStore, + statsReceiver: StatsReceiver + ): FutureArrow[UndeleteTweet.Event, Tweet] = { + + val hydrateTweet = + WritePathHydration.hydrateTweet( + logicalRepos.tweetHydrators.hydrator, + statsReceiver.scope("undelete_tweet") + ) + + val hydrateQuotedTweet = + WritePathHydration.hydrateQuotedTweet( + logicalRepos.optionalTweetRepo, + logicalRepos.optionalUserRepo, + logicalRepos.quoterHasAlreadyQuotedRepo + ) + + val hydrateUndeleteEvent = + WritePathHydration.hydrateUndeleteTweetEvent( + hydrateTweet = hydrateTweet, + hydrateQuotedTweet = hydrateQuotedTweet + ) + + FutureArrow[UndeleteTweet.Event, Tweet] { event => + for { + hydratedEvent <- hydrateUndeleteEvent(event) + _ <- tweetStores.undeleteTweet(hydratedEvent) + } yield hydratedEvent.tweet + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala new file mode 100644 index 000000000..d4d6e054f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceInvocationBuilder.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie.config + +import com.twitter.finagle.thrift.ClientId +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie._ +import com.twitter.tweetypie.service.{ClientIdSettingTweetServiceProxy, TweetServiceProxy} + +/** + * This class builds deciderable ThriftTweetService and FutureArrows that respect the + * simulateDeferredrpcCallbacks decider. When simulateDeferredrpcCallbacks=true, invocations will + * be performed synchronously by the root ThriftTweetService. + */ +class ServiceInvocationBuilder( + val service: ThriftTweetService, + simulateDeferredrpcCallbacks: Boolean) { + + def withClientId(clientId: ClientId): ServiceInvocationBuilder = + new ServiceInvocationBuilder( + new ClientIdSettingTweetServiceProxy(clientId, service), + simulateDeferredrpcCallbacks + ) + + def asyncVia(asyncService: ThriftTweetService): ServiceInvocationBuilder = + new ServiceInvocationBuilder( + new TweetServiceProxy { + override def underlying: ThriftTweetService = + if (simulateDeferredrpcCallbacks) service else asyncService + }, + simulateDeferredrpcCallbacks + ) + + def method[A, B](op: ThriftTweetService => A => Future[B]): FutureArrow[A, B] = + FutureArrow(op(service)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala new file mode 100644 index 000000000..08592c16d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetServiceSettings.scala @@ -0,0 +1,475 @@ +package com.twitter.tweetypie +package config + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.Backoff +import com.twitter.finagle.memcached.exp.localMemcachedPort +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.ssl.OpportunisticTls +import com.twitter.finagle.thrift.ClientId +import com.twitter.flockdb.client.thriftscala.Priority +import com.twitter.servo.repository.CachedResult +import com.twitter.servo.util.Availability +import com.twitter.tweetypie.backends._ +import com.twitter.tweetypie.caching.SoftTtl +import com.twitter.tweetypie.handler.DuplicateTweetFinder +import com.twitter.tweetypie.repository.TombstoneTtl +import com.twitter.tweetypie.service._ +import com.twitter.tweetypie.storage.ManhattanTweetStorageClient +import com.twitter.util.Duration + +case class InProcessCacheConfig(ttl: Duration, maximumSize: Int) + +class TweetServiceSettings(val flags: TweetServiceFlags) { + + /** + * Convert a Boolean to an Option + * > optional(true, "my value") + * res: Some(my value) + * + * > optional(false, "my value") + * res: None + */ + def optional[T](b: Boolean, a: => T): Option[T] = if (b) Some(a) else None + + /** atla, localhost, etc. */ + val zone: String = flags.zone() + + /** dc is less specific than zone, zone=atla, dc=atl */ + val dc: String = zone.dropRight(1) + + /** one of: prod, staging, dev, testbox */ + val env: Env.Value = flags.env() + + /** instanceId of this aurora instance */ + lazy val instanceId: Int = flags.instanceId() + + /** total number of tweetypie aurora instances */ + val instanceCount: Int = flags.instanceCount() + + /** The Name to resolve to find the memcached cluster */ + val twemcacheDest: String = + // If twemcacheDest is explicitly set, always prefer that to + // localMemcachedPort. + flags.twemcacheDest.get + // Testbox uses this global flag to specify the location of the + // local memcached instance. + .orElse(localMemcachedPort().map("/$/inet/localhost/" + _)) + // If no explicit Name is specified, use the default. + .getOrElse(flags.twemcacheDest()) + + /** Read/write data through Cache */ + val withCache: Boolean = flags.withCache() + + /** + * The TFlock queue to use for background indexing operations. For + * production, this should always be the low priority queue, to + * allow foreground operations to be processed first. + */ + val backgroundIndexingPriority: Priority = flags.backgroundIndexingPriority() + + /** Set certain decider gates to this overridden value */ + val deciderOverrides: Map[String, Boolean] = + flags.deciderOverrides() + + /** use per host stats? */ + val clientHostStats: Boolean = + flags.clientHostStats() + + val warmupRequestsSettings: Option[WarmupQueriesSettings] = + optional(flags.enableWarmupRequests(), WarmupQueriesSettings()) + + /** enables request authorization via a allowlist */ + val allowlistingRequired: Boolean = + flags.allowlist.get.getOrElse(env == Env.prod) + + /** read rate limit for unknown clients (when allowlistingRequired is enabled) */ + val nonAllowListedClientRateLimitPerSec: Double = + flags.grayListRateLimit() + + /** enables requests from production clients */ + val allowProductionClients: Boolean = + env == Env.prod + + /** enables replication via DRPC */ + val enableReplication: Boolean = flags.enableReplication() + + /** enables forking of some traffic to configured target */ + val trafficForkingEnabled: Boolean = + env == Env.prod + + val scribeUniquenessIds: Boolean = + env == Env.prod + + /** ClientId to send to backend services */ + val thriftClientId: ClientId = + flags.clientId.get.map(ClientId(_)).getOrElse { + env match { + case Env.dev | Env.staging => ClientId("tweetypie.staging") + case Env.prod => ClientId("tweetypie.prod") + } + } + + /** + * Instead of using DRPC for calling into the async code path, call back into the + * current instance. Used for development and test to ensure logic in the current + * instance is being tested. + */ + val simulateDeferredrpcCallbacks: Boolean = flags.simulateDeferredrpcCallbacks() + + /** + * ClientId to set in 'asynchronous' requests when simulateDeferredrpcCallbacks is + * true and Tweetypie ends up just calling itself synchronously. + */ + val deferredrpcClientId: ClientId = ClientId("deferredrpc.prod") + + /** + * ServiceIdentifier used to enable mTLS + */ + val serviceIdentifier: ServiceIdentifier = flags.serviceIdentifier() + + /** + * Decider settings + */ + val deciderBaseFilename: Option[String] = Option(flags.deciderBase()) + val deciderOverlayFilename: Option[String] = Option(flags.deciderOverlay()) + val vfDeciderOverlayFilename: Option[String] = flags.vfDeciderOverlay.get + + /** + * Used to determine whether we should fail requests for Tweets that are likely too young + * to return a non-partial response. We return NotFound for Tweets that are deemed too young. + * Used by [[com.twitter.tweetypie.repository.ManhattanTweetRepository]]. + */ + val shortCircuitLikelyPartialTweetReads: Gate[Duration] = { + // interpret the flag as a duration in milliseconds + val ageCeiling: Duration = flags.shortCircuitLikelyPartialTweetReadsMs().milliseconds + Gate(tweetAge => tweetAge < ageCeiling) + } + + // tweet-service internal settings + + val tweetKeyCacheVersion = 1 + + /** how often to flush aggregated count updates for tweet counts */ + val aggregatedTweetCountsFlushInterval: Duration = 5.seconds + + /** maximum number of keys for which aggregated cached count updates may be cached */ + val maxAggregatedCountsSize = 1000 + + /** ramp up period for decidering up forked traffic (if enabled) to the full decidered value */ + val forkingRampUp: Duration = 3.minutes + + /** how long to wait after startup for serversets to resolve before giving up and moving on */ + val waitForServerSetsTimeout: Duration = 120.seconds + + /** number of threads to use in thread pool for language identification */ + val numPenguinThreads = 4 + + /** maximum number of tweets that clients can request per getTweets RPC call */ + val maxGetTweetsRequestSize = 200 + + /** maximum batch size for any batched request (getTweets is exempt, it has its own limiting) */ + val maxRequestSize = 200 + + /** + * maximum size to allow the thrift response buffer to grow before resetting it. this is set to + * approximately the current value of `srv/thrift/response_payload_bytes.p999`, meaning roughly + * 1 out of 1000 requests will cause the buffer to be reset. + */ + val maxThriftBufferSize: Int = 200 * 1024 + + // ********* timeouts and backoffs ********** + + /** backoffs for OptimisticLockingCache lockAndSet operations */ + val lockingCacheBackoffs: Stream[Duration] = + Backoff.exponentialJittered(10.millisecond, 50.milliseconds).take(3).toStream + + /** retry once on timeout with no backoff */ + val defaultTimeoutBackoffs: Stream[Duration] = Stream(0.milliseconds).toStream + + /** backoffs when user view is missing */ + val gizmoduckMissingUserViewBackoffs: Stream[Duration] = Backoff.const(10.millis).take(3).toStream + + /** backoffs for retrying failed async-write actions after first retry failure */ + val asyncWriteRetryBackoffs: Stream[Duration] = + Backoff.exponential(10.milliseconds, 2).take(9).toStream.map(_ min 1.second) + + /** backoffs for retrying failed deferredrpc enqueues */ + val deferredrpcBackoffs: Stream[Duration] = + Backoff.exponential(10.milliseconds, 2).take(3).toStream + + /** backoffs for retrying failed cache updates for replicated events */ + val replicatedEventCacheBackoffs: Stream[Duration] = + Backoff.exponential(100.milliseconds, 2).take(10).toStream + + val escherbirdConfig: Escherbird.Config = + Escherbird.Config( + requestTimeout = 200.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val expandodoConfig: Expandodo.Config = + Expandodo.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val creativesContainerServiceConfig: CreativesContainerService.Config = + CreativesContainerService.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + serverErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val geoScrubEventStoreConfig: GeoScrubEventStore.Config = + GeoScrubEventStore.Config( + read = GeoScrubEventStore.EndpointConfig( + requestTimeout = 200.milliseconds, + maxRetryCount = 1 + ), + write = GeoScrubEventStore.EndpointConfig( + requestTimeout = 1.second, + maxRetryCount = 1 + ) + ) + + val gizmoduckConfig: Gizmoduck.Config = + Gizmoduck.Config( + readTimeout = 300.milliseconds, + writeTimeout = 300.milliseconds, + // We bump the timeout value to 800ms because modifyAndGet is called only in async request path in GeoScrub daemon + // and we do not expect sync/realtime apps calling this thrift method + modifyAndGetTimeout = 800.milliseconds, + modifyAndGetTimeoutBackoffs = Backoff.const(0.millis).take(3).toStream, + defaultTimeoutBackoffs = defaultTimeoutBackoffs, + gizmoduckExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val limiterBackendConfig: LimiterBackend.Config = + LimiterBackend.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val mediaInfoServiceConfig: MediaInfoService.Config = + MediaInfoService.Config( + requestTimeout = 300.milliseconds, + totalTimeout = 500.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val scarecrowConfig: Scarecrow.Config = + Scarecrow.Config( + readTimeout = 100.milliseconds, + writeTimeout = 400.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + scarecrowExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + val socialGraphSeviceConfig: SocialGraphService.Config = + SocialGraphService.Config( + socialGraphTimeout = 250.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val talonConfig: Talon.Config = + Talon.Config( + shortenTimeout = 500.milliseconds, + expandTimeout = 150.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + transientErrorBackoffs = Backoff.const(0.millis).take(3).toStream + ) + + /** + * page size when retrieving tflock pages for tweet deletion and undeletion + * tweet erasures have their own page size eraseUserTweetsPageSize + */ + val tflockPageSize: Int = flags.tflockPageSize() + + val tflockReadConfig: TFlock.Config = + TFlock.Config( + requestTimeout = 300.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, + overCapacityBackoffs = Stream.empty, + defaultPageSize = tflockPageSize + ) + + val tflockWriteConfig: TFlock.Config = + TFlock.Config( + requestTimeout = 400.milliseconds, + timeoutBackoffs = defaultTimeoutBackoffs, + flockExceptionBackoffs = Backoff.const(0.millis).take(3).toStream, + overCapacityBackoffs = Backoff.exponential(10.millis, 2).take(3).toStream + ) + + val timelineServiceConfig: TimelineService.Config = { + val tlsExceptionBackoffs = Backoff.const(0.millis).take(3).toStream + TimelineService.Config( + writeRequestPolicy = + Backend.TimeoutPolicy(4.seconds) >>> + TimelineService.FailureBackoffsPolicy( + timeoutBackoffs = defaultTimeoutBackoffs, + tlsExceptionBackoffs = tlsExceptionBackoffs + ), + readRequestPolicy = + Backend.TimeoutPolicy(400.milliseconds) >>> + TimelineService.FailureBackoffsPolicy( + timeoutBackoffs = defaultTimeoutBackoffs, + tlsExceptionBackoffs = tlsExceptionBackoffs + ) + ) + } + + val tweetStorageConfig: ManhattanTweetStorageClient.Config = { + val remoteZone = zone match { + case "atla" => "pdxa" + case "pdxa" => "atla" + case "atla" | "localhost" => "atla" + case _ => + throw new IllegalArgumentException(s"Cannot configure remote DC for unknown zone '$zone'") + } + ManhattanTweetStorageClient.Config( + applicationId = "tbird_mh", + localDestination = "/s/manhattan/cylon.native-thrift", + localTimeout = 290.milliseconds, + remoteDestination = s"/srv#/prod/$remoteZone/manhattan/cylon.native-thrift", + remoteTimeout = 1.second, + maxRequestsPerBatch = 25, + serviceIdentifier = serviceIdentifier, + opportunisticTlsLevel = OpportunisticTls.Required + ) + } + + val userImageServiceConfig: UserImageService.Config = + UserImageService.Config( + processTweetMediaTimeout = 5.seconds, + updateTweetMediaTimeout = 2.seconds, + timeoutBackoffs = defaultTimeoutBackoffs + ) + + val adsLoggingClientTopicName = env match { + case Env.prod => "ads_client_callback_prod" + case Env.dev | Env.staging => "ads_client_callback_staging" + } + + /** Delay between successive cascadedDeleteTweet calls when deleting retweets. Applied via decider. */ + val retweetDeletionDelay: Duration = 20.milliseconds + + /** + * Delay to sleep before each tweet deletion of an eraseUserTweets request. + * This is a simple rate limiting mechanism. The long term solution is + * to move async endpoints like user erasures and retweet deletions out + * of the the main tweetypie cluster and into an async cluster with first class + * rate limiting support + */ + val eraseUserTweetsDelay: Duration = 100.milliseconds + + val eraseUserTweetsPageSize = 100 + + val getStoredTweetsByUserPageSize = 20 + val getStoredTweetsByUserMaxPages = 30 + + // ********* ttls ********** + + // Unfortunately, this tombstone TTL applies equally to the case + // where the tweet was deleted and the case that the tweet does not + // exist or is unavailable. If we could differentiate between those + // cases, we'd cache deleted for a long time and not + // found/unavailable for a short time. We chose 100 + // milliseconds for the minimum TTL because there are known cases in + // which a not found result can be erroneously written to cache on + // tweet creation. This minimum TTL is a trade-off between a + // thundering herd of database requests from clients that just got + // the fanned-out tweet and the window for which these inconsistent + // results will be available. + val tweetTombstoneTtl: CachedResult.CachedNotFound[TweetId] => Duration = + TombstoneTtl.linear(min = 100.milliseconds, max = 1.day, from = 5.minutes, to = 5.hours) + + val tweetMemcacheTtl: Duration = 14.days + val urlMemcacheTtl: Duration = 1.hour + val urlMemcacheSoftTtl: Duration = 1.hour + val deviceSourceMemcacheTtl: Duration = 12.hours + val deviceSourceMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = + SoftTtl.ByAge(softTtl = 1.hour, jitter = 1.minute) + val deviceSourceInProcessTtl: Duration = 8.hours + val deviceSourceInProcessSoftTtl: Duration = 30.minutes + val placeMemcacheTtl: Duration = 1.day + val placeMemcacheSoftTtl: SoftTtl.ByAge[Nothing] = + SoftTtl.ByAge(softTtl = 3.hours, jitter = 1.minute) + val cardMemcacheTtl: Duration = 20.minutes + val cardMemcacheSoftTtl: Duration = 30.seconds + val tweetCreateLockingMemcacheTtl: Duration = 10.seconds + val tweetCreateLockingMemcacheLongTtl: Duration = 12.hours + val geoScrubMemcacheTtl: Duration = 30.minutes + + val tweetCountsMemcacheTtl: Duration = 24.hours + val tweetCountsMemcacheNonZeroSoftTtl: Duration = 3.hours + val tweetCountsMemcacheZeroSoftTtl: Duration = 7.hours + + val cacheClientPendingRequestLimit: Int = flags.memcachePendingRequestLimit() + + val deviceSourceInProcessCacheMaxSize = 10000 + + val inProcessCacheConfigOpt: Option[InProcessCacheConfig] = + if (flags.enableInProcessCache()) { + Some( + InProcessCacheConfig( + ttl = flags.inProcessCacheTtlMs().milliseconds, + maximumSize = flags.inProcessCacheSize() + ) + ) + } else { + None + } + + // Begin returning OverCapacity for tweet repo when cache SR falls below 95%, + // Scale to rejecting 95% of requests when cache SR <= 80% + val tweetCacheAvailabilityFromSuccessRate: Double => Double = + Availability.linearlyScaled(0.95, 0.80, 0.05) + + // ******* repository chunking size ******** + + val tweetCountsRepoChunkSize = 6 + // n times `tweetCountsRepoChunkSize`, so chunking at higher level does not + // generate small batches at lower level. + val tweetCountsCacheChunkSize = 18 + + val duplicateTweetFinderSettings: DuplicateTweetFinder.Settings = + DuplicateTweetFinder.Settings(numTweetsToCheck = 10, maxDuplicateAge = 12.hours) + + val backendWarmupSettings: Warmup.Settings = + Warmup.Settings( + // Try for twenty seconds to warm up the backends before giving + // up. + maxWarmupDuration = 20.seconds, + // Only allow up to 50 outstanding warmup requests of any kind + // to be outstanding at a time. + maxOutstandingRequests = 50, + // These timeouts are just over the p999 latency observed in ATLA + // for requests to these backends. + requestTimeouts = Map( + "expandodo" -> 120.milliseconds, + "geo_relevance" -> 50.milliseconds, + "gizmoduck" -> 200.milliseconds, + "memcache" -> 50.milliseconds, + "scarecrow" -> 120.milliseconds, + "socialgraphservice" -> 180.milliseconds, + "talon" -> 70.milliseconds, + "tflock" -> 320.milliseconds, + "timelineservice" -> 200.milliseconds, + "tweetstorage" -> 50.milliseconds + ), + reliability = Warmup.Reliably( + // Consider a backend warmed up if 99% of requests are succeeding. + reliabilityThreshold = 0.99, + // When performing warmup, use a maximum of 10 concurrent + // requests to each backend. + concurrency = 10, + // Do not allow more than this many attempts to perform the + // warmup action before giving up. + maxAttempts = 1000 + ) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala new file mode 100644 index 000000000..49cc53fb5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetStores.scala @@ -0,0 +1,577 @@ +package com.twitter.tweetypie +package config + +import com.twitter.servo.util.FutureArrow +import com.twitter.servo.util.RetryHandler +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.backends.LimiterService.Feature.MediaTagCreate +import com.twitter.tweetypie.backends.LimiterService.Feature.Updates +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler.TweetBuilder +import com.twitter.tweetypie.repository.TweetKeyFactory +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.tflock.TFlockIndexer +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.RetryPolicyBuilder +import com.twitter.util.Timer + +object TweetStores { + def apply( + settings: TweetServiceSettings, + statsReceiver: StatsReceiver, + timer: Timer, + deciderGates: TweetypieDeciderGates, + tweetKeyFactory: TweetKeyFactory, + clients: BackendClients, + caches: Caches, + asyncBuilder: ServiceInvocationBuilder, + hasMedia: Tweet => Boolean, + clientIdHelper: ClientIdHelper, + ): TotalTweetStore = { + + val deferredrpcRetryPolicy = + // retry all application exceptions for now. however, in the future, deferredrpc + // may throw a backpressure exception that should not be retried. + RetryPolicyBuilder.anyFailure(settings.deferredrpcBackoffs) + + val asyncWriteRetryPolicy = + // currently retries all failures with the same back-off times. might need + // to update to handle backpressure exceptions differently. + RetryPolicyBuilder.anyFailure(settings.asyncWriteRetryBackoffs) + + val replicatedEventRetryPolicy = + RetryPolicyBuilder.anyFailure(settings.replicatedEventCacheBackoffs) + + val logLensStore = + LogLensStore( + tweetCreationsLogger = Logger("com.twitter.tweetypie.store.TweetCreations"), + tweetDeletionsLogger = Logger("com.twitter.tweetypie.store.TweetDeletions"), + tweetUndeletionsLogger = Logger("com.twitter.tweetypie.store.TweetUndeletions"), + tweetUpdatesLogger = Logger("com.twitter.tweetypie.store.TweetUpdates"), + clientIdHelper = clientIdHelper, + ) + + val tweetStoreStats = statsReceiver.scope("tweet_store") + + val tweetStatsStore = TweetStatsStore(tweetStoreStats.scope("stats")) + + val asyncRetryConfig = + new TweetStore.AsyncRetry( + asyncWriteRetryPolicy, + deferredrpcRetryPolicy, + timer, + clients.asyncRetryTweetService, + Scribe(FailedAsyncWrite, "tweetypie_failed_async_writes") + )(_, _) + + val manhattanStore = { + val scopedStats = tweetStoreStats.scope("base") + ManhattanTweetStore(clients.tweetStorageClient) + .tracked(scopedStats) + .asyncRetry(asyncRetryConfig(scopedStats, ManhattanTweetStore.Action)) + } + + val cachingTweetStore = { + val cacheStats = tweetStoreStats.scope("caching") + CachingTweetStore( + tweetKeyFactory = tweetKeyFactory, + tweetCache = caches.tweetCache, + stats = cacheStats + ).tracked(cacheStats) + .asyncRetry(asyncRetryConfig(cacheStats, CachingTweetStore.Action)) + .replicatedRetry(RetryHandler.failuresOnly(replicatedEventRetryPolicy, timer, cacheStats)) + } + + val indexingStore = { + val indexingStats = tweetStoreStats.scope("indexing") + TweetIndexingStore( + new TFlockIndexer( + tflock = clients.tflockWriteClient, + hasMedia = hasMedia, + backgroundIndexingPriority = settings.backgroundIndexingPriority, + stats = indexingStats + ) + ).tracked(indexingStats) + .asyncRetry(asyncRetryConfig(indexingStats, TweetIndexingStore.Action)) + } + + val timelineUpdatingStore = { + val tlsScope = tweetStoreStats.scope("timeline_updating") + TlsTimelineUpdatingStore( + processEvent2 = clients.timelineService.processEvent2, + hasMedia = hasMedia, + stats = tlsScope + ).tracked(tlsScope) + .asyncRetry(asyncRetryConfig(tlsScope, TlsTimelineUpdatingStore.Action)) + } + + val guanoServiceStore = { + val guanoStats = tweetStoreStats.scope("guano") + GuanoServiceStore(clients.guano, guanoStats) + .tracked(guanoStats) + .asyncRetry(asyncRetryConfig(guanoStats, GuanoServiceStore.Action)) + } + + val mediaServiceStore = { + val mediaStats = tweetStoreStats.scope("media") + MediaServiceStore(clients.mediaClient.deleteMedia, clients.mediaClient.undeleteMedia) + .tracked(mediaStats) + .asyncRetry(asyncRetryConfig(mediaStats, MediaServiceStore.Action)) + } + + val userCountsUpdatingStore = { + val userCountsStats = tweetStoreStats.scope("user_counts") + GizmoduckUserCountsUpdatingStore(clients.gizmoduck.incrCount, hasMedia) + .tracked(userCountsStats) + .ignoreFailures + } + + val tweetCountsUpdatingStore = { + val cacheScope = statsReceiver.scope("tweet_counts_cache") + val tweetCountsStats = tweetStoreStats.scope("tweet_counts") + + val memcacheCountsStore = { + val lockingCacheCountsStore = + CachedCountsStore.fromLockingCache(caches.tweetCountsCache) + + new AggregatingCachedCountsStore( + lockingCacheCountsStore, + timer, + settings.aggregatedTweetCountsFlushInterval, + settings.maxAggregatedCountsSize, + cacheScope + ) + } + + TweetCountsCacheUpdatingStore(memcacheCountsStore) + .tracked(tweetCountsStats) + .ignoreFailures + } + + val replicatingStore = { + val replicateStats = tweetStoreStats.scope("replicate_out") + ReplicatingTweetStore( + clients.replicationClient + ).tracked(replicateStats) + .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, replicateStats)) + .asyncRetry(asyncRetryConfig(replicateStats, ReplicatingTweetStore.Action)) + .enabledBy(Gate.const(settings.enableReplication)) + } + + val scribeMediaTagStore = + ScribeMediaTagStore() + .tracked(tweetStoreStats.scope("scribe_media_tag_store")) + + val limiterStore = + LimiterStore( + clients.limiterService.incrementByOne(Updates), + clients.limiterService.increment(MediaTagCreate) + ).tracked(tweetStoreStats.scope("limiter_store")) + + val geoSearchRequestIDStore = { + val statsScope = tweetStoreStats.scope("geo_search_request_id") + GeoSearchRequestIDStore(FutureArrow(clients.geoRelevance.reportConversion _)) + .tracked(statsScope) + .asyncRetry(asyncRetryConfig(statsScope, GeoSearchRequestIDStore.Action)) + } + + val userGeotagUpdateStore = { + val geotagScope = tweetStoreStats.scope("gizmoduck_user_geotag_updating") + GizmoduckUserGeotagUpdateStore( + clients.gizmoduck.modifyAndGet, + geotagScope + ).tracked(geotagScope) + .asyncRetry(asyncRetryConfig(geotagScope, GizmoduckUserGeotagUpdateStore.Action)) + } + + val fanoutServiceStore = { + val fanoutStats = tweetStoreStats.scope("fanout_service_delivery") + FanoutServiceStore(clients.fanoutServiceClient, fanoutStats) + .tracked(fanoutStats) + .asyncRetry(asyncRetryConfig(fanoutStats, FanoutServiceStore.Action)) + } + + /** + * A store that converts Tweetypie TweetEvents to EventBus TweetEvents and sends each event to + * the underlying FutureEffect[eventbus.TweetEvent] + */ + val eventBusEnqueueStore = { + val enqueueStats = tweetStoreStats.scope("event_bus_enqueueing") + val enqueueEffect = FutureEffect[TweetEvent](clients.tweetEventsPublisher.publish) + + TweetEventBusStore( + enqueueEffect + ).tracked(enqueueStats) + .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.EventBusEnqueue)) + } + + val retweetArchivalEnqueueStore = { + val enqueueStats = tweetStoreStats.scope("retweet_archival_enqueueing") + val enqueueEffect = FutureEffect(clients.retweetArchivalEventPublisher.publish) + + RetweetArchivalEnqueueStore(enqueueEffect) + .tracked(enqueueStats) + .asyncRetry(asyncRetryConfig(enqueueStats, AsyncWriteAction.RetweetArchivalEnqueue)) + } + + val asyncEnqueueStore = { + val asyncEnqueueStats = tweetStoreStats.scope("async_enqueueing") + AsyncEnqueueStore( + asyncBuilder.asyncVia(clients.asyncTweetService).service, + TweetBuilder.scrubUserInAsyncInserts, + TweetBuilder.scrubSourceTweetInAsyncInserts, + TweetBuilder.scrubSourceUserInAsyncInserts + ).tracked(asyncEnqueueStats) + .retry(RetryHandler.failuresOnly(deferredrpcRetryPolicy, timer, asyncEnqueueStats)) + } + + val insertTweetStore = + InsertTweet.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + tweetStatsStore = tweetStatsStore, + cachingTweetStore = cachingTweetStore, + limiterStore = limiterStore, + asyncEnqueueStore = asyncEnqueueStore, + userCountsUpdatingStore = userCountsUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val asyncInsertStore = + AsyncInsertTweet.Store( + replicatingStore = replicatingStore, + indexingStore = indexingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + timelineUpdatingStore = timelineUpdatingStore, + eventBusEnqueueStore = eventBusEnqueueStore, + fanoutServiceStore = fanoutServiceStore, + scribeMediaTagStore = scribeMediaTagStore, + userGeotagUpdateStore = userGeotagUpdateStore, + geoSearchRequestIDStore = geoSearchRequestIDStore + ) + + val replicatedInsertTweetStore = + ReplicatedInsertTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val deleteTweetStore = + DeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + userCountsUpdatingStore = userCountsUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + logLensStore = logLensStore + ) + + val asyncDeleteTweetStore = + AsyncDeleteTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + indexingStore = indexingStore, + eventBusEnqueueStore = eventBusEnqueueStore, + timelineUpdatingStore = timelineUpdatingStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + guanoServiceStore = guanoServiceStore, + mediaServiceStore = mediaServiceStore + ) + + val replicatedDeleteTweetStore = + ReplicatedDeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val incrBookmarkCountStore = + IncrBookmarkCount.Store( + asyncEnqueueStore = asyncEnqueueStore, + replicatingStore = replicatingStore + ) + + val asyncIncrBookmarkCountStore = + AsyncIncrBookmarkCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val replicatedIncrBookmarkCountStore = + ReplicatedIncrBookmarkCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val incrFavCountStore = + IncrFavCount.Store( + asyncEnqueueStore = asyncEnqueueStore, + replicatingStore = replicatingStore + ) + + val asyncIncrFavCountStore = + AsyncIncrFavCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val replicatedIncrFavCountStore = + ReplicatedIncrFavCount.Store( + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val scrubGeoStore = + ScrubGeo.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + eventBusEnqueueStore = eventBusEnqueueStore, + replicatingStore = replicatingStore + ) + + val replicatedScrubGeoStore = + ReplicatedScrubGeo.Store( + cachingTweetStore = cachingTweetStore + ) + + val takedownStore = + Takedown.Store( + logLensStore = logLensStore, + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncTakedownStore = + AsyncTakedown.Store( + replicatingStore = replicatingStore, + guanoStore = guanoServiceStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedTakedownStore = + ReplicatedTakedown.Store( + cachingTweetStore = cachingTweetStore + ) + + val updatePossiblySensitiveTweetStore = + UpdatePossiblySensitiveTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + logLensStore = logLensStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncUpdatePossiblySensitiveTweetStore = + AsyncUpdatePossiblySensitiveTweet.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + guanoStore = guanoServiceStore, + eventBusStore = eventBusEnqueueStore + ) + + val replicatedUpdatePossiblySensitiveTweetStore = + ReplicatedUpdatePossiblySensitiveTweet.Store( + cachingTweetStore = cachingTweetStore + ) + + val setAdditionalFieldsStore = + SetAdditionalFields.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + logLensStore = logLensStore + ) + + val asyncSetAdditionalFieldsStore = + AsyncSetAdditionalFields.Store( + replicatingStore = replicatingStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedSetAdditionalFieldsStore = + ReplicatedSetAdditionalFields.Store( + cachingTweetStore = cachingTweetStore + ) + + val setRetweetVisibilityStore = + SetRetweetVisibility.Store(asyncEnqueueStore = asyncEnqueueStore) + + val asyncSetRetweetVisibilityStore = + AsyncSetRetweetVisibility.Store( + tweetIndexingStore = indexingStore, + tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore, + replicatingTweetStore = replicatingStore, + retweetArchivalEnqueueStore = retweetArchivalEnqueueStore + ) + + val replicatedSetRetweetVisibilityStore = + ReplicatedSetRetweetVisibility.Store( + tweetCountsCacheUpdatingStore = tweetCountsUpdatingStore + ) + + val deleteAdditionalFieldsStore = + DeleteAdditionalFields.Store( + cachingTweetStore = cachingTweetStore, + asyncEnqueueStore = asyncEnqueueStore, + logLensStore = logLensStore + ) + + val asyncDeleteAdditionalFieldsStore = + AsyncDeleteAdditionalFields.Store( + manhattanStore = manhattanStore, + cachingTweetStore = cachingTweetStore, + replicatingStore = replicatingStore, + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val replicatedDeleteAdditionalFieldsStore = + ReplicatedDeleteAdditionalFields.Store( + cachingTweetStore = cachingTweetStore + ) + + /* + * This composed store handles all synchronous side effects of an undelete + * but does not execute the undeletion. + * + * This store is executed after the actual undelete request succeeds. + * The undeletion request is initiated by Undelete.apply() + */ + val undeleteTweetStore = + UndeleteTweet.Store( + logLensStore = logLensStore, + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore, + asyncEnqueueStore = asyncEnqueueStore + ) + + val asyncUndeleteTweetStore = + AsyncUndeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + eventBusEnqueueStore = eventBusEnqueueStore, + indexingStore = indexingStore, + replicatingStore = replicatingStore, + mediaServiceStore = mediaServiceStore, + timelineUpdatingStore = timelineUpdatingStore + ) + + val replicatedUndeleteTweetStore = + ReplicatedUndeleteTweet.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val flushStore = + Flush.Store( + cachingTweetStore = cachingTweetStore, + tweetCountsUpdatingStore = tweetCountsUpdatingStore + ) + + val scrubGeoUpdateUserTimestampStore = + ScrubGeoUpdateUserTimestamp.Store( + cache = caches.geoScrubCache, + setInManhattan = clients.geoScrubEventStore.setGeoScrubTimestamp, + geotagUpdateStore = userGeotagUpdateStore, + tweetEventBusStore = eventBusEnqueueStore + ) + + val quotedTweetDeleteStore = + QuotedTweetDelete.Store( + eventBusEnqueueStore = eventBusEnqueueStore + ) + + val quotedTweetTakedownStore = + QuotedTweetTakedown.Store( + eventBusEnqueueStore = eventBusEnqueueStore + ) + + new TotalTweetStore { + val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + asyncDeleteAdditionalFieldsStore.asyncDeleteAdditionalFields + val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + asyncDeleteTweetStore.asyncDeleteTweet + val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + asyncIncrBookmarkCountStore.asyncIncrBookmarkCount + val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = + asyncIncrFavCountStore.asyncIncrFavCount + val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = asyncInsertStore.asyncInsertTweet + val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + asyncSetAdditionalFieldsStore.asyncSetAdditionalFields + val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + asyncSetRetweetVisibilityStore.asyncSetRetweetVisibility + val asyncTakedown: FutureEffect[AsyncTakedown.Event] = asyncTakedownStore.asyncTakedown + val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + asyncUndeleteTweetStore.asyncUndeleteTweet + val asyncUpdatePossiblySensitiveTweet: FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] = + asyncUpdatePossiblySensitiveTweetStore.asyncUpdatePossiblySensitiveTweet + val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + deleteAdditionalFieldsStore.deleteAdditionalFields + val deleteTweet: FutureEffect[DeleteTweet.Event] = deleteTweetStore.deleteTweet + val flush: FutureEffect[Flush.Event] = flushStore.flush + val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + incrBookmarkCountStore.incrBookmarkCount + val incrFavCount: FutureEffect[IncrFavCount.Event] = incrFavCountStore.incrFavCount + val insertTweet: FutureEffect[InsertTweet.Event] = insertTweetStore.insertTweet + val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + quotedTweetDeleteStore.quotedTweetDelete + val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + quotedTweetTakedownStore.quotedTweetTakedown + val replicatedDeleteAdditionalFields: FutureEffect[ReplicatedDeleteAdditionalFields.Event] = + replicatedDeleteAdditionalFieldsStore.replicatedDeleteAdditionalFields + val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + replicatedDeleteTweetStore.replicatedDeleteTweet + val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = + replicatedIncrBookmarkCountStore.replicatedIncrBookmarkCount + val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = + replicatedIncrFavCountStore.replicatedIncrFavCount + val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + replicatedInsertTweetStore.replicatedInsertTweet + val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = + replicatedScrubGeoStore.replicatedScrubGeo + val replicatedSetAdditionalFields: FutureEffect[ReplicatedSetAdditionalFields.Event] = + replicatedSetAdditionalFieldsStore.replicatedSetAdditionalFields + val replicatedSetRetweetVisibility: FutureEffect[ReplicatedSetRetweetVisibility.Event] = + replicatedSetRetweetVisibilityStore.replicatedSetRetweetVisibility + val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = + replicatedTakedownStore.replicatedTakedown + val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + replicatedUndeleteTweetStore.replicatedUndeleteTweet + val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ + ReplicatedUpdatePossiblySensitiveTweet.Event + ] = + replicatedUpdatePossiblySensitiveTweetStore.replicatedUpdatePossiblySensitiveTweet + val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + asyncDeleteAdditionalFieldsStore.retryAsyncDeleteAdditionalFields + val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] = + asyncDeleteTweetStore.retryAsyncDeleteTweet + val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[AsyncInsertTweet.Event]] = + asyncInsertStore.retryAsyncInsertTweet + val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + asyncSetAdditionalFieldsStore.retryAsyncSetAdditionalFields + val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + asyncSetRetweetVisibilityStore.retryAsyncSetRetweetVisibility + val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + asyncTakedownStore.retryAsyncTakedown + val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[AsyncUndeleteTweet.Event]] = + asyncUndeleteTweetStore.retryAsyncUndeleteTweet + val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + asyncUpdatePossiblySensitiveTweetStore.retryAsyncUpdatePossiblySensitiveTweet + val scrubGeo: FutureEffect[ScrubGeo.Event] = scrubGeoStore.scrubGeo + val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + setAdditionalFieldsStore.setAdditionalFields + val setRetweetVisibility: FutureEffect[SetRetweetVisibility.Event] = + setRetweetVisibilityStore.setRetweetVisibility + val takedown: FutureEffect[Takedown.Event] = takedownStore.takedown + val undeleteTweet: FutureEffect[UndeleteTweet.Event] = undeleteTweetStore.undeleteTweet + val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + updatePossiblySensitiveTweetStore.updatePossiblySensitiveTweet + val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + scrubGeoUpdateUserTimestampStore.scrubGeoUpdateUserTimestamp + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala new file mode 100644 index 000000000..a20def18a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/TweetypieDeciderGates.scala @@ -0,0 +1,91 @@ +package com.twitter.tweetypie +package config + +import com.twitter.decider.Decider +import com.twitter.tweetypie.decider.DeciderGates + +object TweetypieDeciderGates { + def apply( + _decider: Decider, + _overrides: Map[String, Boolean] = Map.empty + ): TweetypieDeciderGates = + new TweetypieDeciderGates { + override def decider: Decider = _decider + override def overrides: Map[String, Boolean] = _overrides + override def prefix: String = "tweetypie" + } +} + +trait TweetypieDeciderGates extends DeciderGates { + val checkSpamOnRetweet: Gate[Unit] = linear("check_spam_on_retweet") + val checkSpamOnTweet: Gate[Unit] = linear("check_spam_on_tweet") + val delayEraseUserTweets: Gate[Unit] = linear("delay_erase_user_tweets") + val denyNonTweetPermalinks: Gate[Unit] = linear("deny_non_tweet_permalinks") + val enableCommunityTweetCreates: Gate[Unit] = linear("enable_community_tweet_creates") + val useConversationControlFeatureSwitchResults: Gate[Unit] = linear( + "conversation_control_use_feature_switch_results") + val enableExclusiveTweetControlValidation: Gate[Unit] = linear( + "enable_exclusive_tweet_control_validation") + val enableTrustedFriendsControlValidation: Gate[Unit] = linear( + "enable_trusted_friends_control_validation" + ) + val enableStaleTweetValidation: Gate[Unit] = linear( + "enable_stale_tweet_validation" + ) + val enforceRateLimitedClients: Gate[Unit] = linear("enforce_rate_limited_clients") + val failClosedInVF: Gate[Unit] = linear("fail_closed_in_vf") + val forkDarkTraffic: Gate[Unit] = linear("fork_dark_traffic") + val hydrateConversationMuted: Gate[Unit] = linear("hydrate_conversation_muted") + val hydrateCounts: Gate[Unit] = linear("hydrate_counts") + val hydratePreviousCounts: Gate[Unit] = linear("hydrate_previous_counts") + val hydrateDeviceSources: Gate[Unit] = linear("hydrate_device_sources") + val hydrateEscherbirdAnnotations: Gate[Unit] = linear("hydrate_escherbird_annotations") + val hydrateGnipProfileGeoEnrichment: Gate[Unit] = linear("hydrate_gnip_profile_geo_enrichment") + val hydrateHasMedia: Gate[Unit] = linear("hydrate_has_media") + val hydrateMedia: Gate[Unit] = linear("hydrate_media") + val hydrateMediaRefs: Gate[Unit] = linear("hydrate_media_refs") + val hydrateMediaTags: Gate[Unit] = linear("hydrate_media_tags") + val hydratePastedMedia: Gate[Unit] = linear("hydrate_pasted_media") + val hydratePerspectives: Gate[Unit] = linear("hydrate_perspectives") + val hydratePerspectivesEditsForTimelines: Gate[Unit] = linear( + "hydrate_perspectives_edits_for_timelines") + val hydratePerspectivesEditsForTweetDetail: Gate[Unit] = linear( + "hydrate_perspectives_edits_for_tweet_details") + val hydratePerspectivesEditsForOtherSafetyLevels: Gate[Unit] = + linear("hydrate_perspectives_edits_for_other_levels") + val hydratePlaces: Gate[Unit] = linear("hydrate_places") + val hydrateScrubEngagements: Gate[Unit] = linear("hydrate_scrub_engagements") + val jiminyDarkRequests: Gate[Unit] = linear("jiminy_dark_requests") + val logCacheExceptions: Gate[Unit] = linear("log_cache_exceptions") + val logReads: Gate[Unit] = linear("log_reads") + val logTweetCacheWrites: Gate[TweetId] = byId("log_tweet_cache_writes") + val logWrites: Gate[Unit] = linear("log_writes") + val logYoungTweetCacheWrites: Gate[TweetId] = byId("log_young_tweet_cache_writes") + val maxRequestWidthEnabled: Gate[Unit] = linear("max_request_width_enabled") + val mediaRefsHydratorIncludePastedMedia: Gate[Unit] = linear( + "media_refs_hydrator_include_pasted_media") + val rateLimitByLimiterService: Gate[Unit] = linear("rate_limit_by_limiter_service") + val rateLimitTweetCreationFailure: Gate[Unit] = linear("rate_limit_tweet_creation_failure") + val replicateReadsToATLA: Gate[Unit] = linear("replicate_reads_to_atla") + val replicateReadsToPDXA: Gate[Unit] = linear("replicate_reads_to_pdxa") + val disableInviteViaMention: Gate[Unit] = linear("disable_invite_via_mention") + val shedReadTrafficVoluntarily: Gate[Unit] = linear("shed_read_traffic_voluntarily") + val preferForwardedServiceIdentifierForClientId: Gate[Unit] = + linear("prefer_forwarded_service_identifier_for_client_id") + val enableRemoveUnmentionedImplicitMentions: Gate[Unit] = linear( + "enable_remove_unmentioned_implicit_mentions") + val validateCardRefAttachmentAndroid: Gate[Unit] = linear("validate_card_ref_attachment_android") + val validateCardRefAttachmentNonAndroid: Gate[Unit] = linear( + "validate_card_ref_attachment_non_android") + val tweetVisibilityLibraryEnableParityTest: Gate[Unit] = linear( + "tweet_visibility_library_enable_parity_test") + val enableVfFeatureHydrationInQuotedTweetVLShim: Gate[Unit] = linear( + "enable_vf_feature_hydration_in_quoted_tweet_visibility_library_shim") + val disablePromotedTweetEdit: Gate[Unit] = linear("disable_promoted_tweet_edit") + val shouldMaterializeContainers: Gate[Unit] = linear("should_materialize_containers") + val checkTwitterBlueSubscriptionForEdit: Gate[Unit] = linear( + "check_twitter_blue_subscription_for_edit") + val hydrateBookmarksCount: Gate[Long] = byId("hydrate_bookmarks_count") + val hydrateBookmarksPerspective: Gate[Long] = byId("hydrate_bookmarks_perspective") + val setEditTimeWindowToSixtyMinutes: Gate[Unit] = linear("set_edit_time_window_to_sixty_minutes") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala new file mode 100644 index 000000000..621bb8148 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/WritePathHydration.scala @@ -0,0 +1,223 @@ +package com.twitter.tweetypie +package config + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.handler.TweetBuilder +import com.twitter.tweetypie.handler.WritePathQueryOptions +import com.twitter.tweetypie.hydrator.EscherbirdAnnotationHydrator +import com.twitter.tweetypie.hydrator.LanguageHydrator +import com.twitter.tweetypie.hydrator.PlaceHydrator +import com.twitter.tweetypie.hydrator.ProfileGeoHydrator +import com.twitter.tweetypie.hydrator.TweetDataValueHydrator +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.InsertTweet +import com.twitter.tweetypie.store.UndeleteTweet +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.EditControlUtil + +object WritePathHydration { + type HydrateQuotedTweet = + FutureArrow[(User, QuotedTweet, WritePathHydrationOptions), Option[QuoteTweetMetadata]] + + case class QuoteTweetMetadata( + quotedTweet: Tweet, + quotedUser: User, + quoterHasAlreadyQuotedTweet: Boolean) + + private val log = Logger(getClass) + + val UserFieldsForInsert: Set[UserField] = + TweetBuilder.userFields + + val AllowedMissingFieldsOnWrite: Set[FieldByPath] = + Set( + EscherbirdAnnotationHydrator.hydratedField, + LanguageHydrator.hydratedField, + PlaceHydrator.HydratedField, + ProfileGeoHydrator.hydratedField + ) + + /** + * Builds a FutureArrow that performs the necessary hydration in the write-path for a + * a InsertTweet.Event. There are two separate hydration steps, pre-cache and post-cache. + * The pre-cache hydration step performs the hydration which is safe to cache, while the + * post-cache hydration step performs the hydration whose results we don't want to cache + * on the tweet. + * + * TweetInsertEvent contains two tweet fields, `tweet` and `internalTweet`. `tweet` is + * the input value used for hydration, and in the updated InsertTweet.Event returned by the + * FutureArrow, `tweet` contains the post-cache hydrated tweet while `internalTweet` contains + * the pre-cache hydrated tweet. + */ + def hydrateInsertTweetEvent( + hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], + hydrateQuotedTweet: HydrateQuotedTweet + ): FutureArrow[InsertTweet.Event, InsertTweet.Event] = + FutureArrow { event => + val cause = TweetQuery.Cause.Insert(event.tweet.id) + val hydrationOpts = event.hydrateOptions + val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) + val queryOpts: TweetQuery.Options = + WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) + + val initTweetData = + TweetData( + tweet = event.tweet, + sourceTweetResult = event.sourceTweet.map(TweetResult(_)) + ) + + for { + tweetData <- hydrateTweet((initTweetData, queryOpts)) + hydratedTweet = tweetData.tweet + internalTweet = + tweetData.cacheableTweetResult + .map(_.value.toCachedTweet) + .getOrElse( + throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) + + optQt = getQuotedTweet(hydratedTweet) + .orElse(event.sourceTweet.flatMap(getQuotedTweet)) + + hydratedQT <- optQt match { + case None => Future.value(None) + case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) + } + } yield { + event.copy( + tweet = hydratedTweet, + _internalTweet = Some(internalTweet), + quotedTweet = hydratedQT.map { case QuoteTweetMetadata(t, _, _) => t }, + quotedUser = hydratedQT.map { case QuoteTweetMetadata(_, u, _) => u }, + quoterHasAlreadyQuotedTweet = hydratedQT.exists { case QuoteTweetMetadata(_, _, b) => b } + ) + } + } + + /** + * Builds a FutureArrow for retrieving a quoted tweet metadata + * QuotedTweet struct. If either the quoted tweet or the quoted user + * isn't visible to the tweeting user, the FutureArrow will return None. + */ + def hydrateQuotedTweet( + tweetRepo: TweetRepository.Optional, + userRepo: UserRepository.Optional, + quoterHasAlreadyQuotedRepo: QuoterHasAlreadyQuotedRepository.Type + ): HydrateQuotedTweet = { + FutureArrow { + case (tweetingUser, qt, hydrateOptions) => + val tweetQueryOpts = WritePathQueryOptions.quotedTweet(tweetingUser, hydrateOptions) + val userQueryOpts = + UserQueryOptions( + UserFieldsForInsert, + UserVisibility.Visible, + forUserId = Some(tweetingUser.id) + ) + + Stitch.run( + Stitch + .join( + tweetRepo(qt.tweetId, tweetQueryOpts), + userRepo(UserKey.byId(qt.userId), userQueryOpts), + // We're failing open here on tflock exceptions since this should not + // affect the ability to quote tweet if tflock goes down. (although if + // this call doesn't succeed, quote counts may be inaccurate for a brief + // period of time) + quoterHasAlreadyQuotedRepo(qt.tweetId, tweetingUser.id).liftToTry + ) + .map { + case (Some(tweet), Some(user), isAlreadyQuoted) => + Some(QuoteTweetMetadata(tweet, user, isAlreadyQuoted.getOrElse(false))) + case _ => None + } + ) + } + } + + /** + * Builds a FutureArrow that performs any additional hydration on an UndeleteTweet.Event before + * being passed to a TweetStore. + */ + def hydrateUndeleteTweetEvent( + hydrateTweet: FutureArrow[(TweetData, TweetQuery.Options), TweetData], + hydrateQuotedTweet: HydrateQuotedTweet + ): FutureArrow[UndeleteTweet.Event, UndeleteTweet.Event] = + FutureArrow { event => + val cause = TweetQuery.Cause.Undelete(event.tweet.id) + val hydrationOpts = event.hydrateOptions + val isEditControlEdit = event.tweet.editControl.exists(EditControlUtil.isEditControlEdit) + val queryOpts = WritePathQueryOptions.insert(cause, event.user, hydrationOpts, isEditControlEdit) + + // when undeleting a retweet, don't set sourceTweetResult to enable SourceTweetHydrator to + // hydrate it + val initTweetData = TweetData(tweet = event.tweet) + + for { + tweetData <- hydrateTweet((initTweetData, queryOpts)) + hydratedTweet = tweetData.tweet + internalTweet = + tweetData.cacheableTweetResult + .map(_.value.toCachedTweet) + .getOrElse( + throw new IllegalStateException(s"expected cacheableTweetResult, e=${event}")) + + optQt = getQuotedTweet(hydratedTweet) + .orElse(tweetData.sourceTweetResult.map(_.value.tweet).flatMap(getQuotedTweet)) + + hydratedQt <- optQt match { + case None => Future.value(None) + case Some(qt) => hydrateQuotedTweet((event.user, qt, hydrationOpts)) + } + } yield { + event.copy( + tweet = hydratedTweet, + _internalTweet = Some(internalTweet), + sourceTweet = tweetData.sourceTweetResult.map(_.value.tweet), + quotedTweet = hydratedQt.map { case QuoteTweetMetadata(t, _, _) => t }, + quotedUser = hydratedQt.map { case QuoteTweetMetadata(_, u, _) => u }, + quoterHasAlreadyQuotedTweet = hydratedQt.exists { case QuoteTweetMetadata(_, _, b) => b } + ) + } + } + + /** + * Converts a TweetDataValueHydrator into a FutureArrow that hydrates a tweet for the write-path. + */ + def hydrateTweet( + hydrator: TweetDataValueHydrator, + stats: StatsReceiver, + allowedMissingFields: Set[FieldByPath] = AllowedMissingFieldsOnWrite + ): FutureArrow[(TweetData, TweetQuery.Options), TweetData] = { + val hydrationStats = stats.scope("hydration") + val missingFieldsStats = hydrationStats.scope("missing_fields") + + FutureArrow[(TweetData, TweetQuery.Options), TweetData] { + case (td, opts) => + Stitch + .run(hydrator(td, opts)) + .rescue { + case ex => + log.warn("Hydration failed with exception", ex) + Future.exception( + TweetHydrationError("Hydration failed with exception: " + ex, Some(ex)) + ) + } + .flatMap { r => + // Record missing fields even if the request succeeds) + for (missingField <- r.state.failedFields) + missingFieldsStats.counter(missingField.fieldIdPath.mkString(".")).incr() + + if ((r.state.failedFields -- allowedMissingFields).nonEmpty) { + Future.exception( + TweetHydrationError( + "Failed to hydrate. Missing Fields: " + r.state.failedFields.mkString(",") + ) + ) + } else { + Future.value(r.value) + } + } + } + }.trackOutcome(stats, (_: Any) => "hydration") +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala new file mode 100644 index 000000000..a452abbd8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/config/package.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie + +import com.twitter.context.thriftscala.Viewer + +package object config { + // Bring Tweetypie permitted TwitterContext into scope + private[config] val TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def getAppId: Option[AppId] = TwitterContext().getOrElse(Viewer()).clientApplicationId +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD new file mode 100644 index 000000000..a1e3cab2d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/BUILD @@ -0,0 +1,19 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:core", + "featureswitches/featureswitches-core/src/main/scala", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/thrift:compiled-scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala new file mode 100644 index 000000000..ca185eb4a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/CardReferenceUriExtractor.scala @@ -0,0 +1,32 @@ +package com.twitter.tweetypie +package core + +import com.twitter.tweetypie.thriftscala.CardReference +import java.net.URI + +sealed trait CardUri +object Tombstone extends CardUri +case class NonTombstone(uri: String) extends CardUri + +object CardReferenceUriExtractor { + + private def parseAsUri(cardRef: CardReference) = Try(new URI(cardRef.cardUri)).toOption + private def isTombstone(uri: URI) = uri.getScheme == "tombstone" + + /** + * Parses a CardReference to return Option[CardUri] to differentiate among: + * - Some(NonTombstone): hydrate card2 with provided uri + * - Some(Tombstone): don't hydrate card2 + * - None: fallback and attempt to use url entities uris + */ + def unapply(cardRef: CardReference): Option[CardUri] = + parseAsUri(cardRef) match { + case Some(uri) if !isTombstone(uri) => Some(NonTombstone(uri.toString)) + case Some(uri) => Some(Tombstone) + + // If a cardReference is set, but does not parse as a URI, it's likely a https? URL with + // incorrectly encoded query params. Since these occur frequently in the wild, we'll + // attempt a card2 hydration with it + case None => Some(NonTombstone(cardRef.cardUri)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala new file mode 100644 index 000000000..8766675cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/EditState.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Mutation + +/** + * An EditState is a function that changes a value and may generate + * some state about what was modified. For instance, it may record + * whether an item was changed, or whether there was an error. + * EditStates are useful because they are first-class values that can + * be composed. In particular, it is useful to concurrently access + * external data to build edits and then apply them. + * + * @tparam A The type of the value that is being edited (for instance, + * having fields hydrated with data from another service) + */ +final case class EditState[A](run: A => ValueState[A]) { + + /** + * Composes two EditStates in sequence + */ + def andThen(other: EditState[A]): EditState[A] = + EditState[A] { a0: A => + val ValueState(a1, s1) = run(a0) + val ValueState(a2, s2) = other.run(a1) + ValueState(a2, s1 ++ s2) + } +} + +object EditState { + + /** + * Creates a "passthrough" EditState: + * Leaves A unchanged and produces empty state S + */ + def unit[A]: EditState[A] = + EditState[A](ValueState.unit[A]) + + /** + * Creates an `EditState[A]` using a `Mutation[A]`. + */ + def fromMutation[A](mut: Mutation[A]): EditState[A] = + EditState[A] { a => + mut(a) match { + case None => ValueState.unmodified(a) + case Some(a2) => ValueState.modified(a2) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala new file mode 100644 index 000000000..b4a9fe157 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Exceptions.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package core + +import scala.util.control.NoStackTrace + +case class InternalServerError(message: String) extends Exception(message) with NoStackTrace + +case class OverCapacity(message: String) extends Exception(message) with NoStackTrace + +case class RateLimited(message: String) extends Exception(message) with NoStackTrace + +case class TweetHydrationError(message: String, cause: Option[Throwable] = None) + extends Exception(message, cause.getOrElse(null)) + with NoStackTrace diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala new file mode 100644 index 000000000..0685446f3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/FilteredState.scala @@ -0,0 +1,96 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.spam.rtf.thriftscala.FilteredReason +import scala.util.control.NoStackTrace + +sealed trait FilteredState + +object FilteredState { + + /** + * The tweet exists and the filtered state was due to business rules + * (e.g. safety label filtering, or protected accounts). Note that + * Suppress and Unavailable can both have a FilteredReason. + */ + sealed trait HasFilteredReason extends FilteredState { + def filteredReason: FilteredReason + } + + /** + * The only FilteredState that is not an exception. It indicates that + * the tweet should be returned along with a suppress reason. This is + * sometimes known as "soft filtering". Only used by VF. + */ + case class Suppress(filteredReason: FilteredReason) extends FilteredState with HasFilteredReason + + /** + * FilteredStates that cause the tweet to be unavailable are modeled + * as an [[Exception]]. (Suppressed filtered states cannot be used as + * exceptions because they should not prevent the tweet from being + * returned.) This is sometimes known as "hard filtering". + */ + sealed abstract class Unavailable extends Exception with FilteredState with NoStackTrace + + object Unavailable { + // Used for Tweets that should be dropped because of VF rules + case class Drop(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason + + // Used for Tweets that should be dropped and replaced with their preview because of VF rules + case class Preview(filteredReason: FilteredReason) extends Unavailable with HasFilteredReason + + // Used for Tweets that should be dropped because of Tweetypie business logic + case object DropUnspecified extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.UnspecifiedReason(true) + } + + // Represents a Deleted tweet (NotFound is represented with stitch.NotFound) + case object TweetDeleted extends Unavailable + + // Represents a Deleted tweet that violated Twitter Rules (see go/bounced-tweet) + case object BounceDeleted extends Unavailable + + // Represents both Deleted and NotFound source tweets + case class SourceTweetNotFound(deleted: Boolean) extends Unavailable + + // Used by the [[ReportedTweetFilter]] to signal that a Tweet has a "reported" perspective from TLS + case object Reported extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.ReportedTweet(true) + } + + // The following objects are used by the [[UserRepository]] to signal problems with the Tweet author + object Author { + case object NotFound extends Unavailable + + case object Deactivated extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsDeactivated(true) + } + + case object Offboarded extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorAccountIsInactive(true) + } + + case object Suspended extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsSuspended(true) + } + + case object Protected extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsProtected(true) + } + + case object Unsafe extends Unavailable with HasFilteredReason { + val filteredReason: FilteredReason = FilteredReason.AuthorIsUnsafe(true) + } + } + } + + /** + * Creates a new ExceptionCategorizer which returns an empty category for any + * Unavailable value, and forwards to `underlying` for anything else. + */ + def ignoringCategorizer(underlying: ExceptionCategorizer): ExceptionCategorizer = + ExceptionCategorizer { + case _: Unavailable => Set.empty + case t => underlying(t) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala new file mode 100644 index 000000000..fae6377dd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/GeoSearchRequestId.scala @@ -0,0 +1,3 @@ +package com.twitter.tweetypie.core + +case class GeoSearchRequestId(requestID: String) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala new file mode 100644 index 000000000..8c6a05a84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/HydrationState.scala @@ -0,0 +1,122 @@ +package com.twitter.tweetypie.core + +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.HydrationType + +/** + * HydrationState is used to record whether a particular piece of data was modified as a result + * of hydration, and/or if there was a failure to hydrate the data. + */ +sealed trait HydrationState { + def isEmpty: Boolean + def modified: Boolean + def completedHydrations: Set[HydrationType] = Set.empty + def failedFields: Set[FieldByPath] = Set.empty + def cacheErrorEncountered: Boolean = false + def ++(that: HydrationState): HydrationState +} + +object HydrationState { + + /** + * Base `HydrationState`. It acts as an identity value when combined with any other + * `HydrationState`. + */ + case object Empty extends HydrationState { + def isEmpty = true + def modified = false + def ++(that: HydrationState): HydrationState = that + } + + /** + * A `HydrationState` with metadata indicating a non-fatal hydration operation. + */ + case class Success( + override val modified: Boolean = false, + override val completedHydrations: Set[HydrationType] = Set.empty, + override val failedFields: Set[FieldByPath] = Set.empty, + override val cacheErrorEncountered: Boolean = false) + extends HydrationState { + + def isEmpty: Boolean = !modified && failedFields.isEmpty && !cacheErrorEncountered + + def ++(that: HydrationState): HydrationState = + that match { + case Empty => this + case that: Success => + HydrationState( + modified || that.modified, + completedHydrations ++ that.completedHydrations, + failedFields ++ that.failedFields, + cacheErrorEncountered || that.cacheErrorEncountered + ) + } + + /** + * An implementation of `copy` that avoids unnecessary allocations, by + * using the constant `HydrationState.unmodified` and `HydrationState.modified` + * values when possible. + */ + def copy( + modified: Boolean = this.modified, + completedHydrations: Set[HydrationType] = this.completedHydrations, + failedFields: Set[FieldByPath] = this.failedFields, + cacheErrorEncountered: Boolean = this.cacheErrorEncountered + ): HydrationState = + HydrationState(modified, completedHydrations, failedFields, cacheErrorEncountered) + } + + val empty: HydrationState = Empty + val modified: HydrationState = Success(true) + + def modified(completedHydration: HydrationType): HydrationState = + modified(Set(completedHydration)) + + def modified(completedHydrations: Set[HydrationType]): HydrationState = + Success(modified = true, completedHydrations = completedHydrations) + + def partial(failedField: FieldByPath): HydrationState = + partial(Set(failedField)) + + def partial(failedFields: Set[FieldByPath]): HydrationState = + Success(modified = false, failedFields = failedFields) + + def apply( + modified: Boolean, + completedHydrations: Set[HydrationType] = Set.empty, + failedFields: Set[FieldByPath] = Set.empty, + cacheErrorEncountered: Boolean = false + ): HydrationState = + if (completedHydrations.nonEmpty || failedFields.nonEmpty || cacheErrorEncountered) { + Success(modified, completedHydrations, failedFields, cacheErrorEncountered) + } else if (modified) { + HydrationState.modified + } else { + HydrationState.empty + } + + /** + * Creates a new HydrationState with modified set to true if `next` and `prev` are different, + * or false if they are the same. + */ + def delta[A](prev: A, next: A): HydrationState = + if (next != prev) modified else empty + + /** + * Join a list of HydrationStates into a single HydrationState. + * + * Note: this could just be a reduce over the HydrationStates but that would allocate + * _N_ HydrationStates. This approach also allows for shortcircuiting over the boolean + * fields. + */ + def join(states: HydrationState*): HydrationState = { + val statesSet = states.toSet + + HydrationState( + modified = states.exists(_.modified), + completedHydrations = statesSet.flatMap(_.completedHydrations), + failedFields = statesSet.flatMap(_.failedFields), + cacheErrorEncountered = states.exists(_.cacheErrorEncountered) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala new file mode 100644 index 000000000..28f38a807 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/QuotedTweetResult.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie.core + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +/** + * The data about a quoted tweet that needs to be carried forward to + * Tweetypie clients. + */ +sealed trait QuotedTweetResult { + def filteredReason: Option[FilteredReason] + def toOption: Option[TweetResult] + def map(f: TweetResult => TweetResult): QuotedTweetResult +} + +object QuotedTweetResult { + case object NotFound extends QuotedTweetResult { + def filteredReason: None.type = None + def toOption: None.type = None + def map(f: TweetResult => TweetResult): NotFound.type = this + } + case class Filtered(state: FilteredState.Unavailable) extends QuotedTweetResult { + def filteredReason: Option[FilteredReason] = + state match { + case st: FilteredState.HasFilteredReason => Some(st.filteredReason) + case _ => None + } + def toOption: None.type = None + def map(f: TweetResult => TweetResult): Filtered = this + } + case class Found(result: TweetResult) extends QuotedTweetResult { + def filteredReason: Option[FilteredReason] = result.value.suppress.map(_.filteredReason) + def toOption: Option[TweetResult] = Some(result) + def map(f: TweetResult => TweetResult): QuotedTweetResult = Found(f(result)) + } + + def fromTry(tryResult: Try[TweetResult]): Try[QuotedTweetResult] = + tryResult match { + case Return(result) => Return(Found(result)) + case Throw(state: FilteredState.Unavailable) => Return(Filtered(state)) + case Throw(com.twitter.stitch.NotFound) => Return(NotFound) + case Throw(e) => Throw(e) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala new file mode 100644 index 000000000..e367a8481 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/Serializer.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.cache +import com.twitter.servo.cache.CachedSerializer +import com.twitter.tweetypie.thriftscala +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.tweetypie.thriftscala.Tweet +import org.apache.thrift.protocol.TCompactProtocol + +/** + * A container object for serializers. + * Creates a serializer for every object type cached by the tweetypie service + */ +object Serializer { + lazy val CompactProtocolFactory: TCompactProtocol.Factory = new TCompactProtocol.Factory + + def toCached[T](underlying: cache.Serializer[T]): cache.CachedSerializer[T] = + new cache.CachedSerializer(underlying, CompactProtocolFactory) + + object Tweet { + lazy val Compact: cache.ThriftSerializer[thriftscala.Tweet] = + new cache.ThriftSerializer(thriftscala.Tweet, CompactProtocolFactory) + lazy val CachedCompact: CachedSerializer[Tweet] = toCached(Compact) + } + + object CachedTweet { + lazy val Compact: cache.ThriftSerializer[thriftscala.CachedTweet] = + new cache.ThriftSerializer(thriftscala.CachedTweet, CompactProtocolFactory) + lazy val CachedCompact: CachedSerializer[CachedTweet] = toCached(Compact) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala new file mode 100644 index 000000000..ab81552e9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/StoredTweetResult.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie.core + +sealed trait StoredTweetResult { + def canHydrate: Boolean +} + +object StoredTweetResult { + sealed trait Error + object Error { + case object Corrupt extends Error + case object ScrubbedFieldsPresent extends Error + case object FieldsMissingOrInvalid extends Error + case object ShouldBeHardDeleted extends Error + } + + case class Present(errors: Seq[Error], canHydrate: Boolean) extends StoredTweetResult + + case class HardDeleted(softDeletedAtMsec: Long, hardDeletedAtMsec: Long) + extends StoredTweetResult { + override def canHydrate: Boolean = false + } + + case class SoftDeleted(softDeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class BounceDeleted(deletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class Undeleted(undeletedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class ForceAdded(addedAtMsec: Long, errors: Seq[Error], canHydrate: Boolean) + extends StoredTweetResult + + case class Failed(errors: Seq[Error]) extends StoredTweetResult { + override def canHydrate: Boolean = false + } + + object NotFound extends StoredTweetResult { + override def canHydrate: Boolean = false + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala new file mode 100644 index 000000000..bc4402fa2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetCreateFailure.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.core + +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.tweetypie.TweetId +import com.twitter.incentives.jiminy.thriftscala.TweetNudge +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.thriftscala.TweetCreateState + +sealed abstract class TweetCreateFailure extends Exception { + def toPostTweetResult: PostTweetResult +} + +object TweetCreateFailure { + case class Bounced(bounce: Bounce) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.Bounce, bounce = Some(bounce)) + } + + case class AlreadyRetweeted(retweetId: TweetId) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.AlreadyRetweeted) + } + + case class Nudged(nudge: TweetNudge) extends TweetCreateFailure { + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = TweetCreateState.Nudge, nudge = Some(nudge)) + } + + case class State(state: TweetCreateState, reason: Option[String] = None) + extends TweetCreateFailure { + require(state != TweetCreateState.Bounce) + require(state != TweetCreateState.Ok) + require(state != TweetCreateState.Nudge) + + override def toPostTweetResult: PostTweetResult = + PostTweetResult(state = state, failureReason = reason) + override def toString: String = s"TweetCreateFailure$$State($state, $reason)" + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala new file mode 100644 index 000000000..8e72f1e89 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetData.scala @@ -0,0 +1,86 @@ +package com.twitter.tweetypie +package core + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.tweetypie.thriftscala._ + +object TweetData { + object Lenses { + val tweet: Lens[TweetData, Tweet] = Lens[TweetData, Tweet](_.tweet, _.copy(_)) + + val suppress: Lens[TweetData, Option[FilteredState.Suppress]] = + Lens[TweetData, Option[FilteredState.Suppress]]( + _.suppress, + (td, suppress) => td.copy(suppress = suppress) + ) + + val sourceTweetResult: Lens[TweetData, Option[TweetResult]] = + Lens[TweetData, Option[TweetResult]]( + _.sourceTweetResult, + (td, sourceTweetResult) => td.copy(sourceTweetResult = sourceTweetResult) + ) + + val quotedTweetResult: Lens[TweetData, Option[QuotedTweetResult]] = + Lens[TweetData, Option[QuotedTweetResult]]( + _.quotedTweetResult, + (td, quotedTweetResult) => td.copy(quotedTweetResult = quotedTweetResult) + ) + + val cacheableTweetResult: Lens[TweetData, Option[TweetResult]] = + Lens[TweetData, Option[TweetResult]]( + _.cacheableTweetResult, + (td, cacheableTweetResult) => td.copy(cacheableTweetResult = cacheableTweetResult) + ) + + val tweetCounts: Lens[TweetData, Option[StatusCounts]] = + Lens[TweetData, Option[StatusCounts]]( + _.tweet.counts, + (td, tweetCounts) => td.copy(tweet = td.tweet.copy(counts = tweetCounts)) + ) + } + + def fromCachedTweet(cachedTweet: CachedTweet, cachedAt: Time): TweetData = + TweetData( + tweet = cachedTweet.tweet, + completedHydrations = cachedTweet.completedHydrations.toSet, + cachedAt = Some(cachedAt), + isBounceDeleted = cachedTweet.isBounceDeleted.contains(true) + ) +} + +/** + * Encapsulates a tweet and some hydration metadata in the hydration pipeline. + * + * @param cachedAt if the tweet was read from cache, `cachedAt` contains the time at which + * the tweet was written to cache. + */ +case class TweetData( + tweet: Tweet, + suppress: Option[FilteredState.Suppress] = None, + completedHydrations: Set[HydrationType] = Set.empty, + cachedAt: Option[Time] = None, + sourceTweetResult: Option[TweetResult] = None, + quotedTweetResult: Option[QuotedTweetResult] = None, + cacheableTweetResult: Option[TweetResult] = None, + storedTweetResult: Option[StoredTweetResult] = None, + featureSwitchResults: Option[FeatureSwitchResults] = None, + // The isBounceDeleted flag is only used when reading from an underlying + // tweet repo and caching records for not-found tweets. It only exists + // as a flag on TweetData to marshal bounce-deleted through the layered + // transforming caches injected into CachingTweetRepository, ultimately + // storing this flag in thrift on CachedTweet. + // + // During tweet hydration, TweetData.isBounceDeleted is unused and + // should always be false. + isBounceDeleted: Boolean = false) { + + def addHydrated(fieldIds: Set[HydrationType]): TweetData = + copy(completedHydrations = completedHydrations ++ fieldIds) + + def toCachedTweet: CachedTweet = + CachedTweet( + tweet = tweet, + completedHydrations = completedHydrations, + isBounceDeleted = if (isBounceDeleted) Some(true) else None + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala new file mode 100644 index 000000000..317309be2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/TweetResult.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Lens +import com.twitter.tweetypie.Mutation +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Helper class for building instances of `TweetResult`, which is a type alias + * for `ValueState[TweetData]`. + */ +object TweetResult { + object Lenses { + val value: Lens[TweetResult, TweetData] = + Lens[TweetResult, TweetData](_.value, (r, value) => r.copy(value = value)) + val state: Lens[TweetResult, HydrationState] = + Lens[TweetResult, HydrationState](_.state, (r, state) => r.copy(state = state)) + val tweet: Lens[TweetResult, Tweet] = value.andThen(TweetData.Lenses.tweet) + } + + def apply(value: TweetData, state: HydrationState = HydrationState.empty): TweetResult = + ValueState(value, state) + + def apply(tweet: Tweet): TweetResult = + apply(TweetData(tweet = tweet)) + + /** + * Apply this mutation to the tweet contained in the result, updating the modified flag if the mutation modifies the tweet. + */ + def mutate(mutation: Mutation[Tweet]): TweetResult => TweetResult = + (result: TweetResult) => + mutation(result.value.tweet) match { + case None => result + case Some(updatedTweet) => + TweetResult( + result.value.copy(tweet = updatedTweet), + result.state ++ HydrationState.modified + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala new file mode 100644 index 000000000..3acc2b2d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/UpstreamFailure.scala @@ -0,0 +1,37 @@ +package com.twitter.tweetypie.core + +import scala.util.control.NoStackTrace + +/** + * Parent exception class for failures while talking to upstream services. These will + * be counted and then converted to servo.ServerError.DependencyError + */ +sealed abstract class UpstreamFailure(msg: String) extends Exception(msg) with NoStackTrace + +object UpstreamFailure { + case class SnowflakeFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case object UserProfileEmptyException extends UpstreamFailure("User.profile is empty") + + case object UserViewEmptyException extends UpstreamFailure("User.view is empty") + + case object UserSafetyEmptyException extends UpstreamFailure("User.safety is empty") + + case class TweetLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class UserLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class DeviceSourceLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class TFlockLookupFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case class UrlShorteningFailure(t: Throwable) extends UpstreamFailure(t.toString) + + case object MediaShortenUrlMalformedFailure + extends UpstreamFailure("Media shortened url is malformed") + + case object MediaExpandedUrlNotValidFailure + extends UpstreamFailure("Talon returns badInput on media expanded url") + + case class MediaServiceServerError(t: Throwable) extends UpstreamFailure(t.toString) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala new file mode 100644 index 000000000..e1b9ec0a4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/ValueState.scala @@ -0,0 +1,452 @@ +package com.twitter.tweetypie.core + +import com.twitter.servo.data.Lens +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.HydrationType + +/** + * Encapsulates a value and associated HydrationState. This class is intended to be used + * with `ValueHydrator`, as the result type for hydrators that directly produce updated values, + * in contrast with EditHydrator which uses `EditState` as a result type to produce update functions. + * + * @tparam A The type of the enclosed value, which is the result of hydration. + */ +final case class ValueState[+A](value: A, state: HydrationState) { + + /** + * Applies a function to the enclosed value and produces a new `ValueState` instance. + */ + def map[B](f: A => B): ValueState[B] = + ValueState(f(value), state) + + /** + * Produces a new `ValueState` that contains the value generated by `f`, but with state that is + * the sum of the state from this `ValueState` and the one produced by `f`. + */ + def flatMap[B](f: A => ValueState[B]): ValueState[B] = { + val ValueState(value2, state2) = f(value) + ValueState(value2, state ++ state2) + } + + /** + * Applies a function to the enclosed state and produces a new `ValueState` instance. + */ + def mapState[T](f: HydrationState => HydrationState): ValueState[A] = + ValueState(value, f(state)) + + /** + * Converts a `ValueState[A]` to an `EditState[B]`, using a lens. The resulting `EditState` + * will overwrite the lensed field with the value from this `ValueState`. + */ + def edit[B, A2 >: A](lens: Lens[B, A2]): EditState[B] = + EditState[B](b => ValueState(lens.set(b, value), state)) +} + +object ValueState { + val UnmodifiedNone: ValueState[None.type] = unmodified(None) + val StitchUnmodifiedNone: Stitch[ValueState[None.type]] = Stitch.value(UnmodifiedNone) + + val UnmodifiedUnit: ValueState[Unit] = unmodified(()) + val StitchUnmodifiedUnit: Stitch[ValueState[Unit]] = Stitch.value(UnmodifiedUnit) + + val UnmodifiedNil: ValueState[Nil.type] = unmodified(Nil) + val StitchUnmodifiedNil: Stitch[ValueState[Nil.type]] = Stitch.value(UnmodifiedNil) + + /** + * Produces a ValueState instance with the given value and an empty state HydrationState. + */ + def unit[A](value: A): ValueState[A] = + ValueState[A](value, HydrationState.empty) + + def unmodified[A](value: A): ValueState[A] = + ValueState(value, HydrationState.empty) + + def modified[A](value: A): ValueState[A] = + ValueState(value, HydrationState.modified) + + def modified[A](value: A, hydrationType: HydrationType): ValueState[A] = + ValueState(value, HydrationState.modified(hydrationType)) + + def success[A](value: A, modified: Boolean): ValueState[A] = + ValueState(value, HydrationState(modified)) + + def delta[A](prev: A, next: A): ValueState[A] = + ValueState(next, HydrationState.delta(prev, next)) + + def partial[A](value: A, field: FieldByPath): ValueState[A] = + ValueState(value, HydrationState.partial(field)) + + def partial[A](value: A, fields: Set[FieldByPath]): ValueState[A] = + ValueState(value, HydrationState.partial(fields)) + + /** + * Converts a `Seq` of `ValueState[A]` to a `ValueState` of `Seq[A]`. + */ + def sequence[A](seq: Seq[ValueState[A]]): ValueState[Seq[A]] = { + ValueState( + value = seq.map(_.value), + state = HydrationState.join(seq.map(_.state): _*) + ) + } + + def join[A, B](va: ValueState[A], vb: ValueState[B]): ValueState[(A, B)] = { + val state = + HydrationState.join( + va.state, + vb.state + ) + + val value = ( + va.value, + vb.value + ) + + ValueState(value, state) + } + + def join[A, B, C]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C] + ): ValueState[(A, B, C)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state + ) + + val value = ( + va.value, + vb.value, + vc.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D] + ): ValueState[(A, B, C, D)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E] + ): ValueState[(A, B, C, D, E)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F] + ): ValueState[(A, B, C, D, E, F)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G] + ): ValueState[(A, B, C, D, E, F, G)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H] + ): ValueState[(A, B, C, D, E, F, G, H)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I] + ): ValueState[(A, B, C, D, E, F, G, H, I)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J] + ): ValueState[(A, B, C, D, E, F, G, H, I, J)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J, K]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J], + vk: ValueState[K] + ): ValueState[(A, B, C, D, E, F, G, H, I, J, K)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state, + vk.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value, + vk.value + ) + + ValueState(value, state) + } + + def join[A, B, C, D, E, F, G, H, I, J, K, L]( + va: ValueState[A], + vb: ValueState[B], + vc: ValueState[C], + vd: ValueState[D], + ve: ValueState[E], + vf: ValueState[F], + vg: ValueState[G], + vh: ValueState[H], + vi: ValueState[I], + vj: ValueState[J], + vk: ValueState[K], + vl: ValueState[L] + ): ValueState[(A, B, C, D, E, F, G, H, I, J, K, L)] = { + val state = + HydrationState.join( + va.state, + vb.state, + vc.state, + vd.state, + ve.state, + vf.state, + vg.state, + vh.state, + vi.state, + vj.state, + vk.state, + vl.state + ) + + val value = ( + va.value, + vb.value, + vc.value, + vd.value, + ve.value, + vf.value, + vg.value, + vh.value, + vi.value, + vj.value, + vk.value, + vl.value + ) + + ValueState(value, state) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala new file mode 100644 index 000000000..50952e98a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/core/package.scala @@ -0,0 +1,5 @@ +package com.twitter.tweetypie + +package object core { + type TweetResult = ValueState[TweetData] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD new file mode 100644 index 000000000..967226ca4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/BUILD @@ -0,0 +1,35 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "merlin/util/src/main/scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/merlin:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/src/main/scala/com/twitter/strato/access/filters", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/config", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/server/context", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/thrift:compiled-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala new file mode 100644 index 000000000..42841d393 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/StratoCatalogBuilder.scala @@ -0,0 +1,128 @@ +package com.twitter.tweetypie.federated + +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.scrooge.ThriftStructFieldInfo +import com.twitter.servo.util.Gate +import com.twitter.strato.catalog.Catalog +import com.twitter.strato.client.Client +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.federated.columns._ +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRepositoryBuilder +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.repository.UnmentionInfoRepository +import com.twitter.tweetypie.repository.VibeRepository +import com.twitter.util.Activity +import com.twitter.util.logging.Logger + +object StratoCatalogBuilder { + + def catalog( + thriftTweetService: ThriftTweetService, + stratoClient: Client, + getUserResultsById: Gizmoduck.GetById, + callbackPromotedContentLogger: CallbackPromotedContentLogger, + statsReceiver: StatsReceiver, + enableCommunityTweetCreatesDecider: Gate[Unit], + ): Activity[Catalog[StratoFed.Column]] = { + val log = Logger(getClass) + + val getRequestContext = new GetRequestContext() + val prefetchedDataRepository = + PrefetchedDataRepositoryBuilder(getUserResultsById, statsReceiver) + val unmentionInfoRepository = UnmentionInfoRepository(stratoClient) + val vibeRepository = VibeRepository(stratoClient) + + val tweetPromotedContentLogger = + TweetPromotedContentLogger(callbackPromotedContentLogger) + + // A stitch group builder to be used for Federated Field Column requests. The handler must be the same across + // all Federated Field Columns to ensure requests are batched across columns for different fields + val federatedFieldGroupBuilder: FederatedFieldGroupBuilder.Type = FederatedFieldGroupBuilder( + thriftTweetService.getTweetFields) + + val columns: Seq[StratoFed.Column] = Seq( + new UnretweetColumn( + thriftTweetService.unretweet, + getRequestContext, + ), + new CreateRetweetColumn( + thriftTweetService.postRetweet, + getRequestContext, + prefetchedDataRepository, + tweetPromotedContentLogger, + statsReceiver + ), + new CreateTweetColumn( + thriftTweetService.postTweet, + getRequestContext, + prefetchedDataRepository, + unmentionInfoRepository, + vibeRepository, + tweetPromotedContentLogger, + statsReceiver, + enableCommunityTweetCreatesDecider, + ), + new DeleteTweetColumn( + thriftTweetService.deleteTweets, + getRequestContext, + ), + new GetTweetFieldsColumn(thriftTweetService.getTweetFields, statsReceiver), + new GetStoredTweetsColumn(thriftTweetService.getStoredTweets), + new GetStoredTweetsByUserColumn(thriftTweetService.getStoredTweetsByUser) + ) + + // Gather tweet field ids that are eligible to be federated field columns + val federatedFieldInfos = + Tweet.fieldInfos + .filter((info: ThriftStructFieldInfo) => + FederatedFieldColumn.isFederatedField(info.tfield.id)) + + // Instantiate the federated field columns + val federatedFieldColumns: Seq[FederatedFieldColumn] = + federatedFieldInfos.map { fieldInfo: ThriftStructFieldInfo => + val path = FederatedFieldColumn.makeColumnPath(fieldInfo.tfield) + val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) + log.info(f"creating federated column: $path") + new FederatedFieldColumn( + federatedFieldGroupBuilder, + thriftTweetService.setAdditionalFields, + stratoType, + fieldInfo.tfield, + ) + } + + // Instantiate the federated V1 field columns + val federatedV1FieldColumns: Seq[FederatedFieldColumn] = + federatedFieldInfos + .filter(f => FederatedFieldColumn.isMigrationFederatedField(f.tfield)) + .map { fieldInfo: ThriftStructFieldInfo => + val v1Path = FederatedFieldColumn.makeV1ColumnPath(fieldInfo.tfield) + val stratoType = ScroogeConv.typeOfFieldInfo(fieldInfo) + log.info(f"creating V1 federated column: $v1Path") + new FederatedFieldColumn( + federatedFieldGroupBuilder, + thriftTweetService.setAdditionalFields, + stratoType, + fieldInfo.tfield, + Some(v1Path) + ) + } + + // Combine the dynamic and hard coded federated columns + val allColumns: Seq[StratoFed.Column] = + columns ++ federatedFieldColumns ++ federatedV1FieldColumns + + Activity.value( + Catalog( + allColumns.map { column => + column.path -> column + }: _* + )) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala new file mode 100644 index 000000000..af3ee9fd2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/AccessPolicy.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} +import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended +import com.twitter.strato.access.Access.ClientApplicationPrivilege +import com.twitter.strato.access.Access.TwitterUserNotSuspended +import com.twitter.strato.access.ClientApplicationPrivilegeVariant +import com.twitter.strato.config._ + +object AccessPolicy { + + /** + * All Tweet Mutation operations require all of: + * - Twitter user authentication + * - Twitter user is not suspended + * - Contributor user, if provided, is not suspended + * - "Teams Access": user is acting their own behalf, or is a + * contributor using a client with ClientAppPriviledges.CONTRIBUTORS + * - Write privileges + */ + val TweetMutationCommonAccessPolicies: Policy = + AllOf( + Seq( + AllowTwitterUserId, + Has( + TwitterUserNotSuspended + ), + Has( + AuthenticatedTwitterUserNotSuspended + ), + AnyOf( + Seq( + TwitterUserContributingAsSelf, + Has(principal = ClientApplicationPrivilege(ClientApplicationPrivilegeVariant + .byId(ClientAppPrivileges.CONTRIBUTORS.toShort).get)) + )), + AllowWritableAccessToken + ) + ) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala new file mode 100644 index 000000000..82550e2c5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/ApiErrors.scala @@ -0,0 +1,110 @@ +package com.twitter.tweetypie.federated +package columns + +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.finagle.http.Status +import com.twitter.finatra.api11 +import com.twitter.finatra.api11.ApiError +import com.twitter.strato.response.Err + +object ApiErrors { + // Errs ported from StatusesRetweetController + val GenericAccessDeniedErr = toErr(ApiError.GenericAccessDenied) + val AlreadyRetweetedErr = toErr(ApiError.AlreadyRetweeted) + val DuplicateStatusErr = toErr(ApiError.DuplicateStatusError) + val InvalidRetweetForStatusErr = toErr(ApiError.InvalidRetweetForStatus) + val StatusNotFoundErr = toErr(ApiError.StatusNotFound) + val BlockedUserErr = + toErr(ApiError.BlockedUserError, "retweeting this user's tweets at their request") + val ClientNotPrivilegedErr = toErr(ApiError.ClientNotPrivileged) + val UserDeniedRetweetErr = toErr(ApiError.CurrentUserSuspended) + + // Errs ported from StatusesUpdateController + val RateLimitExceededErr = toErr(ApiError.OverStatusUpdateLimit, "User") + val TweetUrlSpamErr = toErr(ApiError.TieredActionTweetUrlSpam) + val TweetSpammerErr = toErr(ApiError.TieredActionTweetSpammer) + val CaptchaChallengeErr = toErr(ApiError.TieredActionChallengeCaptcha) + val SafetyRateLimitExceededErr = toErr(ApiError.UserActionRateLimitExceeded, "User") + val TweetCannotBeBlankErr = // was MissingRequiredParameterException + toErr(ApiError.ForbiddenMissingParameter, "tweet_text or media") + val TweetTextTooLongErr = toErr(ApiError.StatusTooLongError) + val MalwareTweetErr = toErr(ApiError.StatusMalwareError) + val DuplicateTweetErr = toErr(ApiError.DuplicateStatusError) + val CurrentUserSuspendedErr = toErr(ApiError.CurrentUserSuspended) + val MentionLimitExceededErr = toErr(ApiError.MentionLimitInTweetExceeded) + val UrlLimitExceededErr = toErr(ApiError.UrlLimitInTweetExceeded) + val HashtagLimitExceededErr = toErr(ApiError.HashtagLimitInTweetExceeded) + val CashtagLimitExceededErr = toErr(ApiError.CashtagLimitInTweetExceeded) + val HashtagLengthLimitExceededErr = toErr(ApiError.HashtagLengthLimitInTweetExceeded) + val TooManyAttachmentTypesErr = toErr(ApiError.AttachmentTypesLimitInTweetExceeded) + val InvalidAttachmentUrlErr = toErr(ApiError.InvalidParameter("attachment_url")) + val InReplyToTweetNotFoundErr = toErr(ApiError.InReplyToTweetNotFound) + val InvalidAdditionalFieldErr = toErr(ApiError.GenericBadRequest) + def invalidAdditionalFieldWithReasonErr(failureReason: String) = + toErr(ApiError.GenericBadRequest.copy(message = failureReason)) + val InvalidUrlErr = toErr(ApiError.InvalidUrl) + val InvalidCoordinatesErr = toErr(ApiError.InvalidCoordinates) + val InvalidGeoSearchRequestIdErr = + toErr(ApiError.InvalidParameter("geo_search_request_id")) + val ConversationControlNotAuthorizedErr = toErr(ApiError.ConversationControlNotAuthorized) + val ConversationControlInvalidErr = toErr(ApiError.ConversationControlInvalid) + val ConversationControlReplyRestricted = toErr(ApiError.ConversationControlReplyRestricted) + + // Errors ported from StatusesDestroyController + val DeletePermissionErr = toErr(ApiError.StatusActionPermissionError("delete")) + + // See StatusesUpdateController#GenericErrorException + val GenericTweetCreateErr = toErr(ApiError.UnknownInterpreterError, "Tweet creation failed") + val InvalidBatchModeParameterErr = toErr(ApiError.InvalidParameter("batch_mode")) + val CannotConvoControlAndCommunitiesErr = + toErr(ApiError.CommunityInvalidParams, "conversation_control") + val TooManyCommunitiesErr = toErr(ApiError.CommunityInvalidParams, "communities") + val CommunityReplyTweetNotAllowedErr = toErr(ApiError.CommunityReplyTweetNotAllowed) + val ConversationControlNotSupportedErr = toErr(ApiError.ConversationControlNotSupported) + val CommunityUserNotAuthorizedErr = toErr(ApiError.CommunityUserNotAuthorized) + val CommunityNotFoundErr = toErr(ApiError.CommunityNotFound) + val CommunityProtectedUserCannotTweetErr = toErr(ApiError.CommunityProtectedUserCannotTweet) + + val SuperFollowCreateNotAuthorizedErr = toErr(ApiError.SuperFollowsCreateNotAuthorized) + val SuperFollowInvalidParamsErr = toErr(ApiError.SuperFollowsInvalidParams) + val ExclusiveTweetEngagementNotAllowedErr = toErr(ApiError.ExclusiveTweetEngagementNotAllowed) + + val SafetyLevelMissingErr = toErr(ApiError.MissingParameter("safety_level")) + + def accessDeniedByBouncerErr(bounce: Bounce) = + toErr(ApiError.AccessDeniedByBouncer, bounce.errorMessage.getOrElse(Seq.empty)) + + def tweetEngagementLimitedErr(failureReason: String) = + toErr(ApiError.TweetEngagementsLimited(failureReason)) + + def invalidMediaErr(failureReason: Option[String]) = + toErr(ApiError.invalidMediaId(failureReason)) + + val TrustedFriendsInvalidParamsErr = toErr(ApiError.TrustedFriendsInvalidParams) + val TrustedFriendsRetweetNotAllowedErr = toErr(ApiError.TrustedFriendsRetweetNotAllowed) + val TrustedFriendsEngagementNotAllowedErr = toErr(ApiError.TrustedFriendsEngagementNotAllowed) + val TrustedFriendsCreateNotAllowedErr = toErr(ApiError.TrustedFriendsCreateNotAllowed) + val TrustedFriendsQuoteTweetNotAllowedErr = toErr(ApiError.TrustedFriendsQuoteTweetNotAllowed) + + val StaleTweetEngagementNotAllowedErr = toErr(ApiError.StaleTweetEngagementNotAllowed) + val StaleTweetQuoteTweetNotAllowedErr = toErr(ApiError.StaleTweetQuoteTweetNotAllowed) + val StaleTweetRetweetNotAllowedErr = toErr(ApiError.StaleTweetRetweetNotAllowed) + + val CollabTweetInvalidParamsErr = toErr(ApiError.CollabTweetInvalidParams) + + val FieldEditNotAllowedErr = toErr(ApiError.FieldEditNotAllowed) + val NotEligibleForEditErr = toErr(ApiError.NotEligibleForEdit) + + def toErr(apiError: api11.ApiError, args: Any*): Err = { + val errCode = apiError.status match { + case Status.Forbidden => Err.Authorization + case Status.Unauthorized => Err.Authentication + case Status.NotFound => Err.BadRequest + case Status.BadRequest => Err.BadRequest + case _ => Err.BadRequest + } + val errMessage = s"${apiError.message.format(args.mkString(","))} (${apiError.code})" + val errContext = Some(Err.Context.Api11Error(apiError.code)) + Err(errCode, errMessage, errContext) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD new file mode 100644 index 000000000..7148dfa4b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/BUILD @@ -0,0 +1,43 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-base-http/src/main", + "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", + "geo/model/src/main/scala/com/twitter/geo/model", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/accounts/util:safety-meta", + "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", + "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-service-federated-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala new file mode 100644 index 000000000..0acf695d2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateRetweetColumn.scala @@ -0,0 +1,184 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.accounts.util.SafetyMetadataUtils +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllOf +import com.twitter.strato.config.BouncerAccess +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.columns.ApiErrors._ +import com.twitter.tweetypie.federated.columns.CreateRetweetColumn.toCreateRetweetErr +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger.RetweetEngagement +import com.twitter.tweetypie.thriftscala.TweetCreateState._ +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} + +class CreateRetweetColumn( + retweet: thrift.RetweetRequest => Future[thrift.PostTweetResult], + getRequestContext: GetRequestContext, + prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], + logTweetPromotedContent: TweetPromotedContentLogger.Type, + statsReceiver: StatsReceiver, +) extends StratoFed.Column(CreateRetweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AllOf( + Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) + + // The underlying call to thriftTweetService.postRetweet is not idempotent + override val isIdempotent: Boolean = false + + override type Arg = gql.CreateRetweetRequest + override type Result = gql.CreateRetweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + Some(Production), + Some(PlainText("Creates a retweet by the calling Twitter user of the given source tweet."))) + + private val getWeaverbirdCtx = new WGetRequestContext() + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + val ctx = getRequestContext(opContext) + + // First, do any request parameter validation that can result in an error + // prior to calling into thriftTweetService.retweet. + val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) + + // Macaw-tweets returns ApiError.ClientNotPrivileged if the caller provides + // an impression_id but lacks the PROMOTED_TWEETS_IN_TIMELINE privilege. + val trackingId = request.engagementRequest match { + case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => + TrackingId.parse(engagementRequest.impressionId, statsReceiver) + case Some(e: EngagementRequest) => + throw ClientNotPrivilegedErr + case None => + None + } + + // DeviceSource is an oauth string computed from the ClientApplicationId. + // Macaw-tweets allows non-oauth callers, but GraphQL does not. An undefined + // ClientApplicationId is similar to TweetCreateState.DeviceSourceNotFound, + // which Macaw-tweets handles via a catch-all that returns + // ApiError.GenericAccessDenied + val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) + + // Macaw-tweets doesn't perform any parameter validation for the components + // used as input to makeSafetyMetaData. + val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( + sessionHash = ctx.sessionHash, + knownDeviceToken = ctx.knownDeviceToken, + contributorId = ctx.contributorId + ) + + val thriftRetweetRequest = thrift.RetweetRequest( + sourceStatusId = request.tweetId, + userId = ctx.twitterUserId, + contributorUserId = None, // no longer supported, per tweet_service.thrift + createdVia = deviceSource, + nullcast = request.nullcast, + trackingId = trackingId, + dark = ctx.isDarkRequest, + hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), + safetyMetaData = Some(safetyMetadata), + ) + + val stitchRetweet = Stitch.callFuture(retweet(thriftRetweetRequest)) + + request.engagementRequest.foreach { engagement => + logTweetPromotedContent(engagement, RetweetEngagement, ctx.isDarkRequest) + } + + stitchRetweet.flatMap { result: thrift.PostTweetResult => + result.state match { + case thrift.TweetCreateState.Ok => + val r = PrefetchedDataRequest( + tweet = result.tweet.get, + sourceTweet = result.sourceTweet, + quotedTweet = result.quotedTweet, + safetyLevel = safetyLevel, + requestContext = getWeaverbirdCtx() + ) + + prefetchedDataRepository(r) + .liftToOption() + .map((prefetchedData: Option[PrefetchedDataResponse]) => { + gql.CreateRetweetResponseWithSubqueryPrefetchItems( + data = Some(gql.CreateRetweetResponse(result.tweet.map(_.id))), + subqueryPrefetchItems = prefetchedData.map(_.value) + ) + }) + case errState => + throw toCreateRetweetErr(errState, result.bounce, result.failureReason) + } + } + } +} + +object CreateRetweetColumn { + val Path = "tweetypie/createRetweet.Tweet" + + /** + * Ported from: + * StatusesRetweetController#retweetStatus rescue block + * TweetyPieStatusRepository.toRetweetException + */ + def toCreateRetweetErr( + errState: thrift.TweetCreateState, + bounce: Option[BouncerBounce], + failureReason: Option[String] + ): Err = errState match { + case CannotRetweetBlockingUser => + BlockedUserErr + case AlreadyRetweeted => + AlreadyRetweetedErr + case Duplicate => + DuplicateStatusErr + case CannotRetweetOwnTweet | CannotRetweetProtectedTweet | CannotRetweetSuspendedUser => + InvalidRetweetForStatusErr + case UserNotFound | SourceTweetNotFound | SourceUserNotFound | CannotRetweetDeactivatedUser => + StatusNotFoundErr + case UserDeactivated | UserSuspended => + UserDeniedRetweetErr + case RateLimitExceeded => + RateLimitExceededErr + case UrlSpam => + TweetUrlSpamErr + case Spam | UserReadonly => + TweetSpammerErr + case SafetyRateLimitExceeded => + SafetyRateLimitExceededErr + case Bounce if bounce.isDefined => + accessDeniedByBouncerErr(bounce.get) + case DisabledByIpiPolicy => + failureReason + .map(tweetEngagementLimitedErr) + .getOrElse(GenericAccessDeniedErr) + case TrustedFriendsRetweetNotAllowed => + TrustedFriendsRetweetNotAllowedErr + case StaleTweetRetweetNotAllowed => + StaleTweetRetweetNotAllowedErr + case _ => + GenericAccessDeniedErr + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala new file mode 100644 index 000000000..3530d68d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/CreateTweetColumn.scala @@ -0,0 +1,546 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.accounts.util.SafetyMetadataUtils +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.bouncer.thriftscala.{Bounce => BouncerBounce} +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.geo.model.LatitudeLongitude +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllOf +import com.twitter.strato.config.BouncerAccess +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.decider.overrides.TweetyPieDeciderOverrides +import com.twitter.tweetypie.federated.columns.ApiErrors._ +import com.twitter.tweetypie.federated.columns.CreateTweetColumn.toCreateTweetErr +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataRequest +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger +import com.twitter.tweetypie.federated.promotedcontent.TweetPromotedContentLogger._ +import com.twitter.tweetypie.repository.UnmentionInfoRepository +import com.twitter.tweetypie.repository.VibeRepository +import com.twitter.tweetypie.thriftscala.TransientCreateContext +import com.twitter.tweetypie.thriftscala.TweetCreateContextKey +import com.twitter.tweetypie.thriftscala.TweetCreateState._ +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.tweetypie.util.TransientContextUtil +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Throwables +import com.twitter.weaverbird.common.{GetRequestContext => WGetRequestContext} + +class CreateTweetColumn( + postTweet: thrift.PostTweetRequest => Future[thrift.PostTweetResult], + getRequestContext: GetRequestContext, + prefetchedDataRepository: PrefetchedDataRequest => Stitch[PrefetchedDataResponse], + unmentionInfoRepository: UnmentionInfoRepository.Type, + vibeRepository: VibeRepository.Type, + logTweetPromotedContent: TweetPromotedContentLogger.Type, + statsReceiver: StatsReceiver, + enableCommunityTweetCreatesDecider: Gate[Unit], +) extends StratoFed.Column(CreateTweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AllOf( + Seq(AccessPolicy.TweetMutationCommonAccessPolicies, BouncerAccess())) + + // The underlying call to thriftTweetService.postRetweet is not idempotent + override val isIdempotent: Boolean = false + + override type Arg = gql.CreateTweetRequest + override type Result = gql.CreateTweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata( + Some(Production), + Some( + PlainText( + """ + Creates a tweet using the calling authenticated Twitter user as author. + NOTE, not all Tweet space fields are GraphQL queryable in the CreateTweet mutation response. + See http://go/missing-create-tweet-fields. + """)) + ) + + private val getWeaverbirdCtx = new WGetRequestContext() + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + + val ctx = getRequestContext(opContext) + + // First, do any request parameter validation that can result in an error + // prior to calling into thriftTweetService.postTweet. + val safetyLevel = ctx.safetyLevel.getOrElse(throw SafetyLevelMissingErr) + + val trackingId = request.engagementRequest match { + case Some(engagementRequest: EngagementRequest) if ctx.hasPrivilegePromotedTweetsInTimeline => + TrackingId.parse(engagementRequest.impressionId, statsReceiver) + case Some(e: EngagementRequest) => + throw ClientNotPrivilegedErr + case None => + None + } + + val deviceSource = ctx.deviceSource.getOrElse(throw GenericAccessDeniedErr) + + if (request.nullcast && !ctx.hasPrivilegeNullcastingAccess) { + throw GenericAccessDeniedErr + } + + val safetyMetadata = SafetyMetadataUtils.makeSafetyMetaData( + sessionHash = ctx.sessionHash, + knownDeviceToken = ctx.knownDeviceToken, + contributorId = ctx.contributorId + ) + + val cardReference: Option[thrift.CardReference] = + request.cardUri.filter(_.nonEmpty).map(thrift.CardReference(_)) + + val escherbirdEntityAnnotations: Option[thrift.EscherbirdEntityAnnotations] = + request.semanticAnnotationIds + .filter(_.nonEmpty) + .map((seq: Seq[gql.TweetAnnotation]) => seq.map(parseTweetEntityAnnotation)) + .map(thrift.EscherbirdEntityAnnotations(_)) + + val mediaEntities = request.media.map(_.mediaEntities) + val mediaUploadIds = mediaEntities.map(_.map(_.mediaId)).filter(_.nonEmpty) + + val mediaTags: Option[thrift.TweetMediaTags] = { + val mediaTagsAuthorized = !ctx.isContributorRequest + + val tagMap: Map[MediaId, Seq[thrift.MediaTag]] = + mediaEntities + .getOrElse(Nil) + .filter(_ => mediaTagsAuthorized) + .filter(_.taggedUsers.nonEmpty) + .map(mediaEntity => + mediaEntity.mediaId -> + mediaEntity.taggedUsers + .map(user_id => thrift.MediaTag(thrift.MediaTagType.User, Some(user_id)))) + .toMap + + Option(tagMap) + .filter(_.nonEmpty) + .map(thrift.TweetMediaTags(_)) + } + + // Can not have both conversation controls and communities defined for a tweet + // as they have conflicting permissions on who can reply to the tweet. + val communities = parseCommunityIds(escherbirdEntityAnnotations) + if (request.conversationControl.isDefined && communities.nonEmpty) { + throw CannotConvoControlAndCommunitiesErr + } + + // Currently we do not support posting to multiple communities. + if (communities.length > 1) { + throw TooManyCommunitiesErr + } + + // Kill switch for community tweets in case we need to disable them for app security. + if (communities.nonEmpty && !enableCommunityTweetCreatesDecider()) { + throw CommunityUserNotAuthorizedErr + } + + // additionalFields is used to marshal multiple input params and + // should only be defined if one or more of those params are defined. + val additionalFields: Option[Tweet] = + cardReference + .orElse(escherbirdEntityAnnotations) + .orElse(mediaTags) + .map(_ => + thrift.Tweet( + 0L, + cardReference = cardReference, + escherbirdEntityAnnotations = escherbirdEntityAnnotations, + mediaTags = mediaTags + )) + + val transientContext: Option[TransientCreateContext] = + parseTransientContext( + request.batchCompose, + request.periscope, + ctx.twitterUserId, + ) + + // PostTweetRequest.additionalContext is marked as deprecated in favor of .transientContext, + // but the REST API still supports it and it is still passed along through Tweetypie, and + // FanoutService and Notifications still depend on it. + val additionalContext: Option[Map[TweetCreateContextKey, String]] = + transientContext.map(TransientContextUtil.toAdditionalContext) + + val thriftPostTweetRequest = thrift.PostTweetRequest( + userId = ctx.twitterUserId, + text = request.tweetText, + createdVia = deviceSource, + inReplyToTweetId = request.reply.map(_.inReplyToTweetId), + geo = request.geo.flatMap(parseTweetCreateGeo), + autoPopulateReplyMetadata = request.reply.isDefined, + excludeReplyUserIds = request.reply.map(_.excludeReplyUserIds).filter(_.nonEmpty), + nullcast = request.nullcast, + // Send a dark request to Tweetypie if the dark_request directive is set or + // if the Tweet is undo-able. + dark = ctx.isDarkRequest || request.undoOptions.exists(_.isUndo), + hydrationOptions = Some(HydrationOptions.writePathHydrationOptions(ctx.cardsPlatformKey)), + remoteHost = ctx.remoteHost, + safetyMetaData = Some(safetyMetadata), + attachmentUrl = request.attachmentUrl, + mediaUploadIds = mediaUploadIds, + mediaMetadata = None, + transientContext = transientContext, + additionalContext = additionalContext, + conversationControl = request.conversationControl.map(parseTweetCreateConversationControl), + exclusiveTweetControlOptions = request.exclusiveTweetControlOptions.map { _ => + thrift.ExclusiveTweetControlOptions() + }, + trustedFriendsControlOptions = + request.trustedFriendsControlOptions.map(parseTrustedFriendsControlOptions), + editOptions = request.editOptions.flatMap(_.previousTweetId.map(thrift.EditOptions(_))), + collabControlOptions = request.collabControlOptions.map(parseCollabControlOptions), + additionalFields = additionalFields, + trackingId = trackingId, + noteTweetOptions = request.noteTweetOptions.map(options => + thrift.NoteTweetOptions( + options.noteTweetId, + options.mentionedScreenNames, + options.mentionedUserIds, + options.isExpandable)) + ) + + val stitchPostTweet = + Stitch.callFuture { + TweetyPieDeciderOverrides.ConversationControlUseFeatureSwitchResults.On { + postTweet(thriftPostTweetRequest) + } + } + + for { + engagement <- request.engagementRequest + if !request.reply.exists(_.inReplyToTweetId == 0) // no op per go/rb/845242 + engagementType = if (request.reply.isDefined) ReplyEngagement else TweetEngagement + } logTweetPromotedContent(engagement, engagementType, ctx.isDarkRequest) + + stitchPostTweet.flatMap { result: thrift.PostTweetResult => + result.state match { + + case thrift.TweetCreateState.Ok => + val unmentionSuccessCounter = statsReceiver.counter("unmention_info_success") + val unmentionFailuresCounter = statsReceiver.counter("unmention_info_failures") + val unmentionFailuresScope = statsReceiver.scope("unmention_info_failures") + + val unmentionInfoStitch = result.tweet match { + case Some(tweet) => + unmentionInfoRepository(tweet) + .onFailure { t => + unmentionFailuresCounter.incr() + unmentionFailuresScope.counter(Throwables.mkString(t): _*).incr() + } + .onSuccess { _ => + unmentionSuccessCounter.incr() + } + .rescue { + case _ => + Stitch.None + } + case _ => + Stitch.None + } + + val vibeSuccessCounter = statsReceiver.counter("vibe_success") + val vibeFailuresCounter = statsReceiver.counter("vibe_failures") + val vibeFailuresScope = statsReceiver.scope("vibe_failures") + + val vibeStitch = result.tweet match { + case Some(tweet) => + vibeRepository(tweet) + .onSuccess { _ => + vibeSuccessCounter.incr() + } + .onFailure { t => + vibeFailuresCounter.incr() + vibeFailuresScope.counter(Throwables.mkString(t): _*).incr() + } + .rescue { + case _ => + Stitch.None + } + case _ => + Stitch.None + } + + Stitch + .join(unmentionInfoStitch, vibeStitch) + .liftToOption() + .flatMap { prefetchFields => + val r = PrefetchedDataRequest( + tweet = result.tweet.get, + sourceTweet = result.sourceTweet, + quotedTweet = result.quotedTweet, + safetyLevel = safetyLevel, + unmentionInfo = prefetchFields.flatMap(params => params._1), + vibe = prefetchFields.flatMap(params => params._2), + requestContext = getWeaverbirdCtx() + ) + + prefetchedDataRepository(r) + .liftToOption() + .map((prefetchedData: Option[PrefetchedDataResponse]) => { + gql.CreateTweetResponseWithSubqueryPrefetchItems( + data = Some(gql.CreateTweetResponse(result.tweet.map(_.id))), + subqueryPrefetchItems = prefetchedData.map(_.value) + ) + }) + } + + case errState => + throw toCreateTweetErr(errState, result.bounce, result.failureReason) + } + } + } + + private[this] def parseTweetCreateGeo(gqlGeo: gql.TweetGeo): Option[thrift.TweetCreateGeo] = { + val coordinates: Option[thrift.GeoCoordinates] = + gqlGeo.coordinates.map { coords => + LatitudeLongitude.of(coords.latitude, coords.longitude) match { + case Return(latlon: LatitudeLongitude) => + thrift.GeoCoordinates( + latitude = latlon.latitudeDegrees, + longitude = latlon.longitudeDegrees, + geoPrecision = latlon.precision, + display = coords.displayCoordinates + ) + case Throw(_) => + throw InvalidCoordinatesErr + } + } + + val geoSearchRequestId = gqlGeo.geoSearchRequestId.map { id => + if (id.isEmpty) { + throw InvalidGeoSearchRequestIdErr + } + thrift.TweetGeoSearchRequestID(id) + } + + if (coordinates.isEmpty && gqlGeo.placeId.isEmpty) { + None + } else { + Some( + thrift.TweetCreateGeo( + coordinates = coordinates, + placeId = gqlGeo.placeId, + geoSearchRequestId = geoSearchRequestId + )) + } + } + + private[this] def parseTweetCreateConversationControl( + gqlCC: gql.TweetConversationControl + ): thrift.TweetCreateConversationControl = + gqlCC.mode match { + case gql.ConversationControlMode.ByInvitation => + ConversationControls.Create.byInvitation() + case gql.ConversationControlMode.Community => + ConversationControls.Create.community() + case gql.ConversationControlMode.EnumUnknownConversationControlMode(_) => + throw ConversationControlNotSupportedErr + } + + private[this] def parseTweetEntityAnnotation( + gqlTweetAnnotation: gql.TweetAnnotation + ): TweetEntityAnnotation = + TweetEntityAnnotation( + gqlTweetAnnotation.groupId, + gqlTweetAnnotation.domainId, + gqlTweetAnnotation.entityId + ) + + private[this] def parseCommunityIds( + escherbirdAnnotations: Option[thrift.EscherbirdEntityAnnotations] + ): Seq[Long] = + escherbirdAnnotations + .map(_.entityAnnotations).getOrElse(Nil) + .flatMap { + case CommunityAnnotation(id) => Seq(id) + case _ => Nil + } + + private[this] def parseBatchMode( + gqlBatchComposeMode: gql.BatchComposeMode + ): thrift.BatchComposeMode = { + + gqlBatchComposeMode match { + case gql.BatchComposeMode.BatchFirst => + thrift.BatchComposeMode.BatchFirst + case gql.BatchComposeMode.BatchSubsequent => + thrift.BatchComposeMode.BatchSubsequent + case gql.BatchComposeMode.EnumUnknownBatchComposeMode(_) => + throw InvalidBatchModeParameterErr + } + } + + private[this] def parseTransientContext( + gqlBatchComposeMode: Option[gql.BatchComposeMode], + gqlPeriscope: Option[gql.TweetPeriscopeContext], + twitterUserId: UserId, + ): Option[TransientCreateContext] = { + val batchComposeMode = gqlBatchComposeMode.map(parseBatchMode) + + // Per c.t.fanoutservice.model.Tweet#deviceFollowType, isLive=None and Some(false) are + // equivalent and the creatorId is discarded in both cases. + val periscopeIsLive = gqlPeriscope.map(_.isLive).filter(_ == true) + val periscopeCreatorId = if (periscopeIsLive.isDefined) Some(twitterUserId) else None + + if (batchComposeMode.isDefined || periscopeIsLive.isDefined) { + Some( + thrift.TransientCreateContext( + batchCompose = batchComposeMode, + periscopeIsLive = periscopeIsLive, + periscopeCreatorId = periscopeCreatorId + ) + ) + } else { + None + } + } + + private[this] def parseTrustedFriendsControlOptions( + gqlTrustedFriendsControlOptions: gql.TrustedFriendsControlOptions + ): thrift.TrustedFriendsControlOptions = { + thrift.TrustedFriendsControlOptions( + trustedFriendsListId = gqlTrustedFriendsControlOptions.trustedFriendsListId + ) + } + + private[this] def parseCollabControlOptions( + gqlCollabControlOptions: gql.CollabControlOptions + ): thrift.CollabControlOptions = { + gqlCollabControlOptions.collabControlType match { + case gql.CollabControlType.CollabInvitation => + thrift.CollabControlOptions.CollabInvitation( + thrift.CollabInvitationOptions( + collaboratorUserIds = gqlCollabControlOptions.collaboratorUserIds + ) + ) + case gql.CollabControlType.EnumUnknownCollabControlType(_) => + throw CollabTweetInvalidParamsErr + } + } +} + +object CreateTweetColumn { + val Path = "tweetypie/createTweet.Tweet" + + def toCreateTweetErr( + errState: thrift.TweetCreateState, + bounce: Option[BouncerBounce], + failureReason: Option[String] + ): Err = errState match { + case TextCannotBeBlank => + TweetCannotBeBlankErr + case TextTooLong => + TweetTextTooLongErr + case Duplicate => + DuplicateStatusErr + case MalwareUrl => + MalwareTweetErr + case UserDeactivated | UserSuspended => + // should not occur since this condition is caught by access policy filters + CurrentUserSuspendedErr + case RateLimitExceeded => + RateLimitExceededErr + case UrlSpam => + TweetUrlSpamErr + case Spam | UserReadonly => + TweetSpammerErr + case SpamCaptcha => + CaptchaChallengeErr + case SafetyRateLimitExceeded => + SafetyRateLimitExceededErr + case Bounce if bounce.isDefined => + accessDeniedByBouncerErr(bounce.get) + case MentionLimitExceeded => + MentionLimitExceededErr + case UrlLimitExceeded => + UrlLimitExceededErr + case HashtagLimitExceeded => + HashtagLimitExceededErr + case CashtagLimitExceeded => + CashtagLimitExceededErr + case HashtagLengthLimitExceeded => + HashtagLengthLimitExceededErr + case TooManyAttachmentTypes => + TooManyAttachmentTypesErr + case InvalidUrl => + InvalidUrlErr + case DisabledByIpiPolicy => + failureReason + .map(tweetEngagementLimitedErr) + .getOrElse(GenericTweetCreateErr) + case InvalidAdditionalField => + failureReason + .map(invalidAdditionalFieldWithReasonErr) + .getOrElse(InvalidAdditionalFieldErr) + // InvalidImage has been deprecated by tweetypie. Use InvalidMedia instead. + case InvalidMedia | InvalidImage | MediaNotFound => + invalidMediaErr(failureReason) + case InReplyToTweetNotFound => + InReplyToTweetNotFoundErr + case InvalidAttachmentUrl => + InvalidAttachmentUrlErr + case ConversationControlNotAllowed => + ConversationControlNotAuthorizedErr + case InvalidConversationControl => + ConversationControlInvalidErr + case ReplyTweetNotAllowed => + ConversationControlReplyRestricted + case ExclusiveTweetEngagementNotAllowed => + ExclusiveTweetEngagementNotAllowedErr + case CommunityReplyTweetNotAllowed => + CommunityReplyTweetNotAllowedErr + case CommunityUserNotAuthorized => + CommunityUserNotAuthorizedErr + case CommunityNotFound => + CommunityNotFoundErr + case SuperFollowsInvalidParams => + SuperFollowInvalidParamsErr + case SuperFollowsCreateNotAuthorized => + SuperFollowCreateNotAuthorizedErr + case CommunityProtectedUserCannotTweet => + CommunityProtectedUserCannotTweetErr + case TrustedFriendsInvalidParams => + TrustedFriendsInvalidParamsErr + case TrustedFriendsEngagementNotAllowed => + TrustedFriendsEngagementNotAllowedErr + case TrustedFriendsCreateNotAllowed => + TrustedFriendsCreateNotAllowedErr + case TrustedFriendsQuoteTweetNotAllowed => + TrustedFriendsQuoteTweetNotAllowedErr + case CollabTweetInvalidParams => + CollabTweetInvalidParamsErr + case StaleTweetEngagementNotAllowed => + StaleTweetEngagementNotAllowedErr + case StaleTweetQuoteTweetNotAllowed => + StaleTweetQuoteTweetNotAllowedErr + case FieldEditNotAllowed => + FieldEditNotAllowedErr + case NotEligibleForEdit => + NotEligibleForEditErr + case _ => + GenericTweetCreateErr + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala new file mode 100644 index 000000000..48828d7da --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/DeleteTweetColumn.scala @@ -0,0 +1,81 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.prefetcheddata.PrefetchedDataResponse +import com.twitter.tweetypie.thriftscala.TweetDeleteState +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Future + +class DeleteTweetColumn( + deleteTweet: thrift.DeleteTweetsRequest => Future[Seq[thrift.DeleteTweetResult]], + getRequestContext: GetRequestContext, +) extends StratoFed.Column(DeleteTweetColumn.Path) + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies + + override val isIdempotent: Boolean = true + + override type Arg = gql.DeleteTweetRequest + override type Result = gql.DeleteTweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata(Some(Production), Some(PlainText("Deletes a tweet by the calling Twitter user."))) + + override def execute(request: Arg, opContext: OpContext): Stitch[Result] = { + val ctx = getRequestContext(opContext) + + val thriftDeleteTweetRequest = thrift.DeleteTweetsRequest( + tweetIds = Seq(request.tweetId), + // byUserId is picked up by the context in tweetypie.deleteTweet, + // but we're passing it in here to be explicit + byUserId = Some(ctx.twitterUserId), + ) + + val stitchDeleteTweet = handleDarkRequest(opContext)( + light = { + Stitch.callFuture(deleteTweet(thriftDeleteTweetRequest)) + }, + // For dark requests, we don't want to send traffic to tweetypie. + // Since the response is the same regardless of the request, we take a no-op + // action instead. + dark = Stitch.value(Seq(thrift.DeleteTweetResult(request.tweetId, TweetDeleteState.Ok))) + ) + + stitchDeleteTweet.map { result: Seq[thrift.DeleteTweetResult] => + result.headOption match { + case Some(thrift.DeleteTweetResult(id, TweetDeleteState.Ok)) => + gql.DeleteTweetResponseWithSubqueryPrefetchItems( + data = Some(gql.DeleteTweetResponse(Some(id))), + // Prefetch data is always NotFound to prevent subqueries from hydrating via weaverbird + // and possibly returning inconsistent results, i.e. a Found tweet. + subqueryPrefetchItems = Some(PrefetchedDataResponse.notFound(id).value) + ) + case Some(thrift.DeleteTweetResult(_, TweetDeleteState.PermissionError)) => + throw ApiErrors.DeletePermissionErr + case _ => + throw ApiErrors.GenericAccessDeniedErr + } + } + } +} + +object DeleteTweetColumn { + val Path = "tweetypie/deleteTweet.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala new file mode 100644 index 000000000..c6b3cf246 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldColumn.scala @@ -0,0 +1,141 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.io.Buf +import com.twitter.scrooge.TFieldBlob +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllowAll +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.data.Type +import com.twitter.strato.data.Val +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.serialization.MVal +import com.twitter.strato.serialization.Thrift +import com.twitter.strato.util.Strings +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetFieldsResultState.Found +import com.twitter.util.Future +import org.apache.thrift.protocol.TField + +/** + * Federated strato column to return tweet fields + * @param federatedFieldsGroup Group to be used for Stitch batching. + * This is a function that takes a GroupOptions and returns a FederatedFieldGroup. + * Using a function that accepts a GroupOptions allows for Stitch to handle a new group for distinct GroupOptions. + * @param setAdditionalFields Handler to set additional fields on tweets. + * @param stratoValueType Type to be returned by the strato column. + * @param tfield Tweet thrift field to be stored + * @param pathName Path to be used in the strato catalog + */ +class FederatedFieldColumn( + federatedFieldsGroup: FederatedFieldGroupBuilder.Type, + setAdditionalFields: SetAdditionalFieldsRequest => Future[Unit], + stratoValueType: Type, + tfield: TField, + pathOverride: Option[String] = None) + extends StratoFed.Column(pathOverride.getOrElse(FederatedFieldColumn.makeColumnPath(tfield))) + with StratoFed.Fetch.StitchWithContext + with StratoFed.Put.Stitch { + + type Key = Long + type View = Unit + type Value = Val.T + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = Conv.ofType + override val valueConv: Conv[Value] = Conv(stratoValueType, identity, identity) + + override val policy: Policy = AllowAll + + /* + * A fetch that proxies GetTweetFieldsColumn.fetch but only requests and + * returns one specific field. + */ + override def fetch(tweetId: Key, view: View, opContext: OpContext): Stitch[Result[Value]] = { + + val twitterUserId: Option[UserId] = Access.getTwitterUserId match { + // Access.getTwitterUserId should return a value when request is made on behalf of a user + // and will not return a value otherwise + case Some(twitterUser) => Some(twitterUser.id) + case None => None + } + + val stitchGroup = federatedFieldsGroup(GroupOptions(twitterUserId)) + + Stitch + .call(FederatedFieldReq(tweetId, tfield.id), stitchGroup).map { + result: GetTweetFieldsResult => + result.tweetResult match { + case Found(f) => + f.tweet.getFieldBlob(tfield.id) match { + case Some(v: TFieldBlob) => + found(blobToVal(v)) + case None => missing + } + case _ => missing + } + } + + } + + /* + * A strato put interface for writing a single additional field to a tweet + */ + override def put(tweetId: Key, value: Val.T): Stitch[Unit] = { + val tweet: Tweet = Tweet(id = tweetId).setField(valToBlob(value)) + val request: SetAdditionalFieldsRequest = SetAdditionalFieldsRequest(tweet) + Stitch.callFuture(setAdditionalFields(request)) + } + + val mval: Thrift.Codec = MVal.codec(stratoValueType).thrift(4) + + def valToBlob(value: Val.T): TFieldBlob = + TFieldBlob(tfield, mval.write[Buf](value, Thrift.compactProto)) + + def blobToVal(thriftFieldBlob: TFieldBlob): Val.T = + mval.read(thriftFieldBlob.content, Thrift.compactProto) + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = Some(PlainText(s"A federated column for the field tweet.$stratoValueType")) + ) +} + +object FederatedFieldColumn { + val idAllowlist: Seq[Short] = Seq( + Tweet.CoreDataField.id, + Tweet.LanguageField.id, + Tweet.ConversationMutedField.id + ) + val ID_START = 157 + val ID_END = 32000 + + private val MigrationFields: Seq[Short] = Seq(157) + + def isFederatedField(id: Short) = id >= ID_START && id < ID_END || idAllowlist.contains(id) + + def isMigrationFederatedField(tField: TField): Boolean = MigrationFields.contains(tField.id) + + /* federated field column strato configs must conform to this + * path name scheme for tweetypie to pick them up + */ + def makeColumnPath(tField: TField) = { + val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) + s"tweetypie/fields/${columnName}.Tweet" + } + + def makeV1ColumnPath(tField: TField): String = { + val columnName = Strings.toCamelCase(tField.name.stripSuffix("id")) + s"tweetypie/fields/$columnName-V1.Tweet" + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala new file mode 100644 index 000000000..88b9db624 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldGroup.scala @@ -0,0 +1,88 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.MapGroup +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.allCountFields +import com.twitter.tweetypie.federated.columns.FederatedFieldGroupBuilder.countTweetFields +import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions +import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetInclude +import com.twitter.util.Future +import com.twitter.util.Throw +import com.twitter.util.Try + +case class GroupOptions(twitterUserId: Option[UserId]) + +object FederatedFieldGroupBuilder { + type Type = GroupOptions => MapGroup[FederatedFieldReq, GetTweetFieldsResult] + + def apply( + getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]] + ): Type = { + FederatedFieldGroup(getTweetFieldsHandler, _) + } + + // The set of non-deprecated count field includes + val allCountFields: Set[TweetInclude] = Set( + TweetInclude.CountsFieldId(StatusCounts.RetweetCountField.id), + TweetInclude.CountsFieldId(StatusCounts.QuoteCountField.id), + TweetInclude.CountsFieldId(StatusCounts.FavoriteCountField.id), + TweetInclude.CountsFieldId(StatusCounts.ReplyCountField.id), + TweetInclude.CountsFieldId(StatusCounts.BookmarkCountField.id), + ) + + // Tweet field includes which contain counts. These are the only fields where count field includes are relevant. + val countTweetFields: Set[TweetInclude] = Set( + TweetInclude.TweetFieldId(Tweet.CountsField.id), + TweetInclude.TweetFieldId(Tweet.PreviousCountsField.id)) +} + +case class FederatedFieldGroup( + getTweetFieldsHandler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], + options: GroupOptions) + extends MapGroup[FederatedFieldReq, GetTweetFieldsResult] { + override protected def run( + reqs: Seq[FederatedFieldReq] + ): Future[FederatedFieldReq => Try[GetTweetFieldsResult]] = { + + // requesting the field ids of the requested additional field ids in this group + val fieldIncludes: Set[TweetInclude] = reqs.map { req: FederatedFieldReq => + TweetInclude.TweetFieldId(req.fieldId) + }.toSet + + val allIncludes: Set[TweetInclude] = if (fieldIncludes.intersect(countTweetFields).nonEmpty) { + // if counts are being requested we include all count fields by default + // because there is no way to specify them individually with federated fields, + fieldIncludes ++ allCountFields + } else { + fieldIncludes + } + + val gtfOptions = GetTweetFieldsOptions( + tweetIncludes = allIncludes, + forUserId = options.twitterUserId, + // visibility filtering happens at the api layer / tweet top level + // and therefore is not required at individual field level + safetyLevel = Some(SafetyLevel.FilterNone) + ) + getTweetFieldsHandler( + GetTweetFieldsRequest( + tweetIds = reqs.map(_.tweetId).distinct, + options = gtfOptions + ) + ).map { + response => + { req => + response.find(_.tweetId == req.tweetId) match { + case Some(result) => Try(result) + case None => + Throw(new NoSuchElementException(s"response not found for tweet: ${req.tweetId}")) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala new file mode 100644 index 000000000..594f46273 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/FederatedFieldReq.scala @@ -0,0 +1,7 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.tweetypie.TweetId + +// Case class to be used for grouping Stitch requests +// for Federated Fields +case class FederatedFieldReq(tweetId: TweetId, fieldId: Short) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala new file mode 100644 index 000000000..f4aaa6e12 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsByUserColumn.scala @@ -0,0 +1,83 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access.LdapGroup +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Has +import com.twitter.strato.config.Path +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserView +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsByUserResponse +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.util.Future + +class GetStoredTweetsByUserColumn( + handler: thrift.GetStoredTweetsByUserRequest => Future[thrift.GetStoredTweetsByUserResult]) + extends StratoFed.Column(GetStoredTweetsByUserColumn.Path) + with StratoFed.Fetch.Stitch { + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = + Some(PlainText("Fetches hydrated Tweets for a particular User regardless of Tweet state.")) + ) + override val policy: Policy = AnyOf( + Seq( + FromColumns(Set(Path("tweetypie/data-provider/storedTweets.User"))), + Has(LdapGroup("tweetypie-team")) + )) + + override type Key = UserId + override type View = GetStoredTweetsByUserView + override type Value = GetStoredTweetsByUserResponse + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsByUserView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsByUserResponse] + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + val request = thrift.GetStoredTweetsByUserRequest( + userId = key, + options = Some( + thrift.GetStoredTweetsByUserOptions( + bypassVisibilityFiltering = view.bypassVisibilityFiltering, + setForUserId = view.setForUserId, + startTimeMsec = view.startTimeMsec, + endTimeMsec = view.endTimeMsec, + cursor = view.cursor, + startFromOldest = view.startFromOldest, + additionalFieldIds = view.additionalFieldIds + )) + ) + + Stitch + .callFuture(handler(request)) + .map { result => + Fetch.Result.found( + GetStoredTweetsByUserResponse( + storedTweets = result.storedTweets, + cursor = result.cursor + )) + } + .rescue { + case _ => Stitch.exception(Err(Err.Internal)) + } + } + +} + +object GetStoredTweetsByUserColumn { + val Path = "tweetypie/internal/getStoredTweets.User" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala new file mode 100644 index 000000000..20afd87e1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetStoredTweetsColumn.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access.LdapGroup +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AnyOf +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.FromColumns +import com.twitter.strato.config.Has +import com.twitter.strato.config.Path +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.{thriftscala => thrift} +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsView +import com.twitter.tweetypie.thriftscala.federated.GetStoredTweetsResponse +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +class GetStoredTweetsColumn( + getStoredTweets: thrift.GetStoredTweetsRequest => Future[Seq[thrift.GetStoredTweetsResult]]) + extends StratoFed.Column(GetStoredTweetsColumn.Path) + with StratoFed.Fetch.Stitch { + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = Some(PlainText("Fetches hydrated Tweets regardless of Tweet state.")) + ) + override val policy: Policy = AnyOf( + Seq( + FromColumns( + Set( + Path("tweetypie/data-provider/storedTweets.User"), + Path("note_tweet/data-provider/noteTweetForZipbird.User"))), + Has(LdapGroup("tweetypie-team")) + )) + + override type Key = TweetId + override type View = GetStoredTweetsView + override type Value = GetStoredTweetsResponse + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetStoredTweetsView] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetStoredTweetsResponse] + + override def fetch(key: Key, view: View): Stitch[Result[Value]] = { + Stitch.call(key, Group(view)) + } + + private case class Group(view: GetStoredTweetsView) + extends MapGroup[TweetId, Fetch.Result[GetStoredTweetsResponse]] { + override protected def run( + keys: Seq[TweetId] + ): Future[TweetId => Try[Result[GetStoredTweetsResponse]]] = { + val options = thrift.GetStoredTweetsOptions( + bypassVisibilityFiltering = view.bypassVisibilityFiltering, + forUserId = view.forUserId, + additionalFieldIds = view.additionalFieldIds + ) + + getStoredTweets(thrift.GetStoredTweetsRequest(keys, Some(options))) + .map(transformAndGroupByTweetId) + .handle { + case _ => + _ => Throw[Result[GetStoredTweetsResponse]](Err(Err.Internal)) + } + } + + private def transformAndGroupByTweetId( + results: Seq[thrift.GetStoredTweetsResult] + ): Map[TweetId, Try[Fetch.Result[GetStoredTweetsResponse]]] = { + results + .map(result => GetStoredTweetsResponse(result.storedTweet)) + .groupBy(_.storedTweet.tweetId) + .map { + case (tweetId, Seq(result)) => (tweetId, Return(Fetch.Result.found(result))) + case (tweetId, multipleResults) => + ( + tweetId, + Throw(Err(Err.BadRequest, s"Got ${multipleResults.size} results for $tweetId"))) + } + } + + } +} + +object GetStoredTweetsColumn { + val Path = "tweetypie/internal/getStoredTweets.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala new file mode 100644 index 000000000..2daa9bdb4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/GetTweetFieldsColumn.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.Fetch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.AllowAll +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.client_id.PreferForwardedServiceIdentifierForStrato +import com.twitter.tweetypie.thriftscala.GetTweetFieldsOptions +import com.twitter.tweetypie.thriftscala.GetTweetFieldsRequest +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.TweetVisibilityPolicy +import com.twitter.util.Future +import com.twitter.util.Try + +/** + * Strato federated column implementing GetTweetFields as a Fetch. + */ +class GetTweetFieldsColumn( + handler: GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]], + stats: StatsReceiver) + extends StratoFed.Column(GetTweetFieldsColumn.Path) + with StratoFed.Fetch.StitchWithContext { + + /** + * At this point, this fetch op will reject any requests that specify + * visibilityPolicy other than USER_VISIBLE, so no access control is needed. + */ + override val policy: Policy = AllowAll + + override type Key = TweetId + override type View = GetTweetFieldsOptions + override type Value = GetTweetFieldsResult + + override val keyConv: Conv[Key] = Conv.ofType + override val viewConv: Conv[View] = ScroogeConv.fromStruct[GetTweetFieldsOptions] + override val valueConv: Conv[Value] = ScroogeConv.fromStruct[GetTweetFieldsResult] + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = OpMetadata( + lifecycle = Some(Production), + description = + Some(PlainText("Get of tweets that allows fetching only specific subsets of the data.")), + ) + + val safetyOpContextOnlyCounter = stats.counter("safety_op_context_only") + val safetyOpContextOnlyValueScope = stats.scope("safety_op_context_only_value") + val safetyOpContextOnlyCallerScope = stats.scope("safety_op_context_only_caller") + + val safetyViewOnlyCounter = stats.counter("safety_view_only") + val safetyViewOnlyValueScope = stats.scope("safety_view_only_value") + val safetyViewOnlyCallerScope = stats.scope("safety_view_only_caller") + + val safetyLevelInconsistencyCounter = stats.counter("safety_level_inconsistency") + val safetyLevelInconsistencyValueScope = stats.scope("safety_level_inconsistency_value") + val safetyLevelInconsistencyCallerScope = stats.scope("safety_level_inconsistency_caller") + + override def fetch(key: Key, view: View, ctx: OpContext): Stitch[Result[Value]] = { + compareSafetyLevel(view, ctx) + checkVisibilityPolicyUserVisible(view).flatMap { _ => + Stitch.call(key, Group(view)) + } + } + + /** + * Only allow [[TweetVisibilityPolicy.UserVisible]] visibilityPolicy. + * + * This column requires access policy in order to serve requests with visibilityPolicy + * other than [[TweetVisibilityPolicy.UserVisible]]. Before we support access control, + * reject all requests that are not safe. + */ + private def checkVisibilityPolicyUserVisible(view: View): Stitch[Unit] = + view.visibilityPolicy match { + case TweetVisibilityPolicy.UserVisible => Stitch.value(Unit) + case otherValue => + Stitch.exception( + Err( + Err.BadRequest, + "GetTweetFields does not support access control on Strato yet. " + + s"Hence visibilityPolicy can only take the default ${TweetVisibilityPolicy.UserVisible} value, " + + s"got: ${otherValue}." + )) + } + + /** Compare the SafetyLevels in the View and OpContext */ + private def compareSafetyLevel(view: View, ctx: OpContext): Unit = + (view.safetyLevel, ctx.safetyLevel) match { + case (None, None) => + case (Some(viewSafety), None) => { + safetyViewOnlyCounter.incr() + safetyViewOnlyValueScope.counter(viewSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => safetyViewOnlyCallerScope.counter(serviceId.toString).incr()) + } + case (None, Some(ctxSafety)) => { + safetyOpContextOnlyCounter.incr() + safetyOpContextOnlyValueScope.counter(ctxSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => safetyOpContextOnlyCallerScope.counter(serviceId.toString).incr()) + } + case (Some(viewSafety), Some(ctxSafety)) => + def safeStringEquals(a: String, b: String) = + a.toLowerCase().trim().equals(b.toLowerCase().trim()) + if (!safeStringEquals(viewSafety.name, ctxSafety.name)) { + safetyLevelInconsistencyCounter.incr() + safetyLevelInconsistencyValueScope.counter(viewSafety.name + '-' + ctxSafety.name).incr() + PreferForwardedServiceIdentifierForStrato.serviceIdentifier + .foreach(serviceId => + safetyLevelInconsistencyCallerScope.counter(serviceId.toString).incr()) + } + } + + /** + * Means of batching of [[GetTweetFieldsColumn]] calls. + * + * Only calls issued against the same instance of [[GetTweetFieldsColumn]] + * are batched as Stitch clusters group objects based on equality, + * and nested case class implicitly captures [[GetTweetFieldsColumn]] reference. + */ + private case class Group(view: GetTweetFieldsOptions) + extends MapGroup[TweetId, Fetch.Result[GetTweetFieldsResult]] { + + /** + * Batches given [[TweetId]] lookups in a single [[GetTweetFieldsRequest]] + * and returns a result mapped by [[TweetId]]. + */ + override protected def run( + keys: Seq[TweetId] + ): Future[TweetId => Try[Fetch.Result[GetTweetFieldsResult]]] = + handler( + GetTweetFieldsRequest( + // Sorting the keys makes for simpler matchers in the tests + // as matching on a Seq needs to be in order. + tweetIds = keys.sorted, + options = view, + )).map(groupByTweetId) + + /** + * Groups given [[GetTweetFieldsResult]] objects by [[TweetId]] and returns the mapping. + */ + private def groupByTweetId( + allResults: Seq[GetTweetFieldsResult] + ): TweetId => Try[Fetch.Result[GetTweetFieldsResult]] = { + allResults + .groupBy(_.tweetId) + .mapValues { + case Seq(result) => Try(Fetch.Result.found(result)) + case manyResults => + Try { + throw Err( + Err.Dependency, + s"Expected one result per tweeet ID, got ${manyResults.length}") + } + } + } + } +} + +object GetTweetFieldsColumn { + val Path = "tweetypie/getTweetFields.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala new file mode 100644 index 000000000..d1e00821a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/HydrationOptions.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.tweetypie.{thriftscala => thrift} + +object HydrationOptions { + + def writePathHydrationOptions( + cardsPlatformKey: Option[String] + ) = + thrift.WritePathHydrationOptions( + // The GraphQL API extracts or "lifts" the ApiTweet.card reference field from the + // ApiTweet.card.url returned by Tweetypie. Tweetypie's card hydration business logic + // selects the single correct Card URL by first making Expandodo.getCards2 requests for + // the Tweet's cardReference, or all of the Tweet's URL entities in cases where Tweet + // does not have a stored cardReference, and then selecting the last of the hydrated + // cards returned by Expandodo. + includeCards = true, + cardsPlatformKey = cardsPlatformKey, + // The GraphQL API only supports quoted tweet results formatted per go/simplequotedtweet. + simpleQuotedTweet = true, + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala new file mode 100644 index 000000000..0030bcd40 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TrackingId.scala @@ -0,0 +1,29 @@ +package com.twitter.tweetypie.federated +package columns + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.tweetypie.StatsReceiver +import com.twitter.util.logging.Logger + +object TrackingId { + private[this] val log = Logger(getClass) + + def parse(s: String, statsReceiver: StatsReceiver = NullStatsReceiver): Option[Long] = { + val trackingStats = statsReceiver.scope("tracking_id_parser") + + val parsedCountCounter = trackingStats.scope("parsed").counter("count") + val parseFailedCounter = trackingStats.scope("parse_failed").counter("count") + Option(s).map(_.trim).filter(_.nonEmpty).flatMap { idStr => + try { + val id = java.lang.Long.parseLong(idStr, 16) + parsedCountCounter.incr() + Some(id) + } catch { + case _: NumberFormatException => + parseFailedCounter.incr() + log.warn(s"invalid tracking ID: '$s'") + None + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala new file mode 100644 index 000000000..74bd0569d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/TweetypieContactInfo.scala @@ -0,0 +1,11 @@ +package com.twitter.tweetypie.federated.columns + +import com.twitter.strato.config.ContactInfo + +object TweetypieContactInfo + extends ContactInfo( + contactEmail = "", + ldapGroup = "", + jiraProject = "", + slackRoomId = "" + ) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala new file mode 100644 index 000000000..489285986 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns/UnretweetColumn.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie +package federated.columns + +import com.twitter.stitch.Stitch +import com.twitter.strato.catalog.OpMetadata +import com.twitter.strato.config.ContactInfo +import com.twitter.strato.config.Policy +import com.twitter.strato.data.Conv +import com.twitter.strato.data.Description.PlainText +import com.twitter.strato.data.Lifecycle.Production +import com.twitter.strato.fed.StratoFed +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.thrift.ScroogeConv +import com.twitter.tweetypie.federated.context.GetRequestContext +import com.twitter.tweetypie.federated.context.RequestContext +import com.twitter.tweetypie.thriftscala.{graphql => gql} +import com.twitter.tweetypie.{thriftscala => thrift} + +class UnretweetColumn( + unretweet: thrift.UnretweetRequest => Future[thrift.UnretweetResult], + getRequestContext: GetRequestContext, +) extends StratoFed.Column("tweetypie/unretweet.Tweet") + with StratoFed.Execute.StitchWithContext + with StratoFed.HandleDarkRequests { + + override val policy: Policy = AccessPolicy.TweetMutationCommonAccessPolicies + + // It's acceptable to retry or reapply an unretweet operation, + // as multiple calls result in the same end state. + override val isIdempotent: Boolean = true + + override type Arg = gql.UnretweetRequest + override type Result = gql.UnretweetResponseWithSubqueryPrefetchItems + + override val argConv: Conv[Arg] = ScroogeConv.fromStruct + override val resultConv: Conv[Result] = ScroogeConv.fromStruct + + override val contactInfo: ContactInfo = TweetypieContactInfo + override val metadata: OpMetadata = + OpMetadata( + Some(Production), + Some(PlainText("Removes any retweets by the calling user of the given source tweet."))) + + override def execute(gqlRequest: Arg, opContext: OpContext): Stitch[Result] = { + val ctx: RequestContext = getRequestContext(opContext) + val req = thrift.UnretweetRequest( + ctx.twitterUserId, + gqlRequest.sourceTweetId, + ) + + val stitchUnretweet = handleDarkRequest(opContext)( + light = Stitch.callFuture(unretweet(req)), + // For dark requests, we don't want to send traffic to tweetypie. + // Since the response is the same regardless of the request, we take a no-op + // action instead. + dark = Stitch.value(thrift.UnretweetResult(state = thrift.TweetDeleteState.Ok)) + ) + + stitchUnretweet.map { _ => + gql.UnretweetResponseWithSubqueryPrefetchItems( + data = Some(gql.UnretweetResponse(Some(gqlRequest.sourceTweetId))) + ) + } + } +} + +object UnretweetColumn { + val Path = "tweetypie/unretweet.Tweet" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD new file mode 100644 index 000000000..942c66697 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/BUILD @@ -0,0 +1,27 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "finatra-internal/tfe", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "src/scala/com/twitter/common/ip_address_utils", + "src/thrift/com/twitter/context:feature-context-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/ip_address_utils:ip-address-utils-thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/config", + "strato/src/main/scala/com/twitter/strato/context", + "strato/src/main/scala/com/twitter/strato/data", + "strato/src/main/scala/com/twitter/strato/opcontext", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/thrift", + "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "twitter-context/src/main/scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala new file mode 100644 index 000000000..170ba3c5c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context/RequestContext.scala @@ -0,0 +1,131 @@ +package com.twitter.tweetypie +package federated.context + +import com.twitter.common.ip_address_utils.ClientIpAddressUtils +import com.twitter.context.thriftscala.Viewer +import com.twitter.context.TwitterContext +import com.twitter.finagle.core.util.InetAddressUtil +import com.twitter.passbird.bitfield.clientprivileges.thriftscala.{Constants => ClientAppPrivileges} +import com.twitter.finatra.tfe.HttpHeaderNames +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.strato.access.Access.ClientApplicationPrivilege +import com.twitter.strato.access.Access +import com.twitter.strato.access.ClientApplicationPrivilegeVariant +import com.twitter.strato.context.StratoContext +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.response.Err +import com.twitter.weaverbird.common.GetPlatformKey + +/** + * [[RequestContext]] exists to avoid wiring the federated column + * implementations directly to the request data that is derived from the + * contextual environment. Columns should not directly reference + * TwitterContext, StratoContext, strato.access.Access, HTTP headers, etc. + * Each column operation operates on two input parameters: a request (i.e. + * a column operation's Arg) and a [[RequestContext]]. + */ +private[federated] case class RequestContext( + clientApplicationId: Option[AppId] = None, + deviceSource: Option[String] = None, + knownDeviceToken: Option[KnownDeviceToken] = None, + remoteHost: Option[String] = None, + twitterUserId: UserId, + contributorId: Option[UserId] = None, + isDarkRequest: Boolean = false, + hasPrivilegeNullcastingAccess: Boolean = false, + hasPrivilegePromotedTweetsInTimeline: Boolean = false, + sessionHash: Option[String] = None, + cardsPlatformKey: Option[String] = None, + safetyLevel: Option[SafetyLevel] = None, +) { + def isContributorRequest = contributorId.exists(_ != twitterUserId) +} + +/** + * Provides a single place to derive request data from the contextual + * environment. Defined as a sealed class (vs an object) to allow mocking + * in unit tests. + */ +private[federated] sealed class GetRequestContext() { + // Bring Tweetypie permitted TwitterContext into scope + private[this] val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + /** + * When TwitterUserIdNotDefined is thrown, it's likely that the column + * access control configuration lacks `AllowTwitterUserId` or other + * Policy that ensures the caller is authenticated. + */ + private[federated] val TwitterUserIdNotDefined = + Err(Err.Authentication, "User authentication is required for this operation.") + + private[this] val SessionHashHeaderName = "x-tfe-session-hash" + private[this] def hasClientApplicationPrivilege(id: Int): Boolean = + Access.getPrincipals.contains( + ClientApplicationPrivilege( + ClientApplicationPrivilegeVariant + .byId(id.toShort).get)) + + private[this] def getRequestHeader(headerName: String): Option[String] = + StratoContext + .current() + .propagatedHeaders + .flatMap(_.get(headerName)) + + def apply(opContext: OpContext): RequestContext = { + val twitterUserId = Access.getTwitterUserId match { + // Access.getTwitterUserId should return a value as long as the column + // policy includes AllowTwitterUserId, which guarantees the presence of + // the value. + case Some(twitterUser) => twitterUser.id + case None => throw TwitterUserIdNotDefined + } + + // contributorId should only be defined when the authenticated user differs + // from the "Twitter user" + val contributorId = + Access.getAuthenticatedTwitterUserId.map(_.id).filter(_ != twitterUserId) + + val twitterContext = TwitterContext().getOrElse(Viewer()) + + val deviceSource = twitterContext.clientApplicationId.map("oauth:" + _) + + // Ported from StatusesUpdateController#getBirdherdOptions and + // BirdherdOption.UserIp(request.clientHost) + val remoteHost: Option[String] = + getRequestHeader(HttpHeaderNames.X_TWITTER_AUDIT_IP_THRIFT.toLowerCase) // use the new header + .flatMap(ClientIpAddressUtils.decodeClientIpAddress(_)) + .flatMap(ClientIpAddressUtils.getString(_)) + .orElse( + getRequestHeader( + HttpHeaderNames.X_TWITTER_AUDIT_IP.toLowerCase + ) // fallback to old way before migration is completed + .map(h => InetAddressUtil.getByName(h.trim).getHostAddress) + ) + + val isDarkRequest = opContext.darkRequest.isDefined + + val sessionHash = getRequestHeader(SessionHashHeaderName) + + val cardsPlatformKey = twitterContext.clientApplicationId.map(GetPlatformKey(_)) + + val safetyLevel = opContext.safetyLevel + + RequestContext( + clientApplicationId = twitterContext.clientApplicationId, + deviceSource = deviceSource, + knownDeviceToken = twitterContext.knownDeviceToken, + remoteHost = remoteHost, + twitterUserId = twitterUserId, + contributorId = contributorId, + isDarkRequest = isDarkRequest, + hasPrivilegeNullcastingAccess = + hasClientApplicationPrivilege(ClientAppPrivileges.NULLCASTING_ACCESS), + hasPrivilegePromotedTweetsInTimeline = + hasClientApplicationPrivilege(ClientAppPrivileges.PROMOTED_TWEETS_IN_TIMELINE), + sessionHash = sessionHash, + cardsPlatformKey = cardsPlatformKey, + safetyLevel = safetyLevel, + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD new file mode 100644 index 000000000..06a2a8c10 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/BUILD @@ -0,0 +1,32 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-core/src/main", + "tweetypie/servo/util/src/main/scala", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:safety-result-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-compat/src/main/scala/com/twitter/stitch/compat", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "stitch/stitch-gizmoduck", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/rpc", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/common", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/common", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/converters/tweet", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/hydrators", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/mappers", + "weaverbird/common/src/main/scala/com/twitter/weaverbird/repositories", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala new file mode 100644 index 000000000..d829955db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata/PrefetchedDataRepository.scala @@ -0,0 +1,166 @@ +package com.twitter.tweetypie +package federated +package prefetcheddata + +import com.twitter.consumer_privacy.mention_controls.thriftscala.UnmentionInfo +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.QueryFields +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.strato.graphql.thriftscala.CacheMissStrategy +import com.twitter.strato.graphql.thriftscala.PrefetchedData +import com.twitter.strato.graphql.thriftscala.TweetResult +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Throwables +import com.twitter.vibes.thriftscala.VibeV2 +import com.twitter.weaverbird.common.GetRequestContext +import com.twitter.weaverbird.common.PerTOOAppCallerStats +import com.twitter.weaverbird.common.RequestContext +import com.twitter.weaverbird.converters.tweet.WeaverbirdEntitySetMutations +import com.twitter.weaverbird.converters.tweet.WeaverbirdTweetMutations +import com.twitter.weaverbird.hydrators._ +import com.twitter.weaverbird.mappers.ApiTweetPrefetchedMapper +import com.twitter.weaverbird.repositories.UserRepository +import com.twitter.weaverbird.converters.common.EntityRenderingOptions + +private[federated] final case class PrefetchedDataRequest( + tweet: Tweet, + sourceTweet: Option[Tweet], + quotedTweet: Option[Tweet], + unmentionInfo: Option[UnmentionInfo] = None, + vibe: Option[VibeV2] = None, + safetyLevel: SafetyLevel, + requestContext: RequestContext) + +private[federated] final case class PrefetchedDataResponse(value: PrefetchedData) + +private[federated] object PrefetchedDataResponse { + // For NotFound, there is no subsequent result or quoted_tweet_results field, so both + // settings are false here. These deciders will be removed post migration. + private[this] val prefetchedMapper = new ApiTweetPrefetchedMapper( + skipTweetResultPrefetchItem = () => false + ) + def notFound(tweetId: Long): PrefetchedDataResponse = + PrefetchedDataResponse( + value = prefetchedMapper.getPrefetchedData( + tweetId = tweetId, + apiTweet = None, + tweetResult = None + ) + ) +} + +private[federated] object PrefetchedDataRepository { + def apply( + thriftTweetToApiTweet: ThriftTweetToApiTweet, + prefetchedMapper: ApiTweetPrefetchedMapper, + statsReceiver: StatsReceiver, + ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = + (request: PrefetchedDataRequest) => { + val thriftTweetToApiTweetRequest = ThriftTweetToApiTweetRequest( + tweet = request.tweet, + sourceTweet = request.sourceTweet, + quotedTweet = request.quotedTweet, + // For Tweet writes, filteredReason will always be None. + filteredReason = None, + safetyLevel = request.safetyLevel, + requestContext = request.requestContext, + entityRenderingOptions = EntityRenderingOptions() + ) + + val successCounter = statsReceiver.counter("success") + val failuresCounter = statsReceiver.counter("failures") + val failuresScope = statsReceiver.scope("failures") + + thriftTweetToApiTweet + .arrow(thriftTweetToApiTweetRequest) + .onSuccess(_ => successCounter.incr()) + .onFailure { t => + failuresCounter.incr() + failuresScope.counter(Throwables.mkString(t): _*).incr() + } + .map((resp: ThriftTweetToApiTweetResponse) => { + val prefetchedData: PrefetchedData = prefetchedMapper.getPrefetchedData( + tweetId = request.tweet.id, + apiTweet = Some(resp.apiTweet), + // since ApiTweet was hydrate, we can fabricate a TweetResult.Tweet + tweetResult = Some(TweetResult.Tweet(request.tweet.id)), + unmentionInfo = request.unmentionInfo, + editControl = request.tweet.editControl, + previousCounts = request.tweet.previousCounts, + vibe = request.vibe, + editPerspective = request.tweet.editPerspective, + noteTweet = request.tweet.noteTweet + ) + + // Notify GraphQL API to not attempt hydration for missing + // ApiTweet/TweetResult fields. This is only needed on the + // Tweet write path since the newly created Tweet may not + // be fully persisted yet in tbird Manhattan. + val shortCircuitedPrefetchedData = prefetchedData.copy( + onCacheMiss = CacheMissStrategy.ShortCircuitExisting + ) + + PrefetchedDataResponse(shortCircuitedPrefetchedData) + }) + } +} + +private[federated] object PrefetchedDataRepositoryBuilder { + def apply( + getUserResultsById: Gizmoduck.GetById, + statsReceiver: StatsReceiver + ): PrefetchedDataRequest => Stitch[PrefetchedDataResponse] = { + val repoStats = statsReceiver.scope("repositories") + + case class GetUserResultById( + queryFields: Set[QueryFields], + lookupContext: LookupContext, + ) extends SeqGroup[UserId, UserResult] { + override def run(keys: Seq[UserId]): Future[Seq[Try[UserResult]]] = + LegacySeqGroup.liftToSeqTry(getUserResultsById((lookupContext, keys, queryFields))) + + override def maxSize: Int = 100 + } + + val stitchGetUserResultById: UserRepository.GetUserResultById = + (userId: UserId, queryFields: Set[QueryFields], lookupContext: LookupContext) => + Stitch.call(userId, GetUserResultById(queryFields, lookupContext)) + + val userRepository = new UserRepository(stitchGetUserResultById, repoStats) + + // Note, this is weaverbird.common.GetRequestContext + val getRequestContext = new GetRequestContext() + + // TwiggyUserHydrator is needed to hydrate TwiggyUsers for CWC and misc. logic + val twiggyUserHydrator = new TwiggyUserHydrator(userRepository, getRequestContext) + + val weaverbirdMutations = new WeaverbirdTweetMutations( + new WeaverbirdEntitySetMutations( + new PerTOOAppCallerStats(statsReceiver, getRequestContext) + ) + ) + + val prefetchedMapper = new ApiTweetPrefetchedMapper( + // do not skip this in mutation path as we depends on it + skipTweetResultPrefetchItem = () => false + ) + + val thriftTweetToApiTweet: ThriftTweetToApiTweet = + new FoundThriftTweetToApiTweet( + statsReceiver, + twiggyUserHydrator, + weaverbirdMutations + ) + PrefetchedDataRepository( + thriftTweetToApiTweet, + prefetchedMapper, + repoStats.scope("prefetched_data_repo") + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD new file mode 100644 index 000000000..f0ed3efd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "ads-common/loggingclient/src/main/scala", + "src/scala/com/twitter/ads/internal/pcl/service", + "src/scala/com/twitter/ads/internal/pcl/strato_adaptor", + "src/thrift/com/twitter/ads/adserver:ads_shared_types-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/ads/internal/pcl:promoted_content_input-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "strato/src/main/scala/com/twitter/strato/server/context", + "twitter-context/src/main/scala", + "util/util-stats/src/main/scala/com/twitter/finagle/stats", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala new file mode 100644 index 000000000..f3a285d65 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent/TweetPromotedContentLogger.scala @@ -0,0 +1,40 @@ +package com.twitter.tweetypie +package federated +package promotedcontent + +import com.twitter.ads.callback.thriftscala.EngagementRequest +import com.twitter.ads.internal.pcl.service.CallbackPromotedContentLogger +import com.twitter.ads.internal.pcl.strato_adaptor.PromotedContentInputProvider +import com.twitter.ads.internal.pcl.thriftscala.PromotedContentInput +import com.twitter.adserver.thriftscala.EngagementType +import com.twitter.util.Future + +object TweetPromotedContentLogger { + sealed abstract class TweetEngagementType(val engagementType: EngagementType) + case object TweetEngagement extends TweetEngagementType(EngagementType.Send) + case object ReplyEngagement extends TweetEngagementType(EngagementType.Reply) + case object RetweetEngagement extends TweetEngagementType(EngagementType.Retweet) + + type Type = (EngagementRequest, TweetEngagementType, Boolean) => Future[Unit] + + private[this] val TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def apply(callbackPromotedContentLogger: CallbackPromotedContentLogger): Type = + ( + engagementRequest: EngagementRequest, + tweetEngagementType: TweetEngagementType, + isDark: Boolean + ) => { + val pci: PromotedContentInput = + PromotedContentInputProvider(TwitterContext, engagementRequest) + + // The real logging is fire-and-forget, so we can create the Future and ignore returning it. + Future.when(!isDark) { + callbackPromotedContentLogger.logNonTrendEngagement( + pci, + tweetEngagementType.engagementType, + pci.impressionId) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD new file mode 100644 index 000000000..0bf98375c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/BUILD @@ -0,0 +1,43 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-base-http/src/main", + "finatra-internal/api11/src/main/scala/com/twitter/finatra/api11:errors", + "geo/model/src/main/scala/com/twitter/geo/model", + "passbird/bitfields-thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util/src/main/scala", + "tweetypie/servo/util/src/main/scala:exception", + "src/scala/com/twitter/accounts/util:safety-meta", + "src/thrift/com/twitter/ads/adserver:ad_engagement_details-scala", + "src/thrift/com/twitter/ads/adserver:preroll_metadata-scala", + "src/thrift/com/twitter/ads/callback:engagement_request-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet_service_graphql-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:graphql-scala", + "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql-scala", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/context", + "strato/src/main/scala/com/twitter/strato/fed", + "strato/src/main/scala/com/twitter/strato/response", + "strato/src/main/scala/com/twitter/strato/test/config/bouncer", + "strato/src/main/scala/com/twitter/strato/thrift", + "strato/src/main/thrift/com/twitter/strato/context:thrift-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/columns", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/context", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/prefetcheddata", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/promotedcontent", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service", + "tweetypie/common/src/scala/com/twitter/tweetypie/decider/overrides", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala new file mode 100644 index 000000000..a020bdd3e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/federated/warmups/StratoCatalogWarmups.scala @@ -0,0 +1,140 @@ +package com.twitter.tweetypie +package federated +package warmups + +import com.twitter.context.TwitterContext +import com.twitter.context.thriftscala.Viewer +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.strato.access.Access +import com.twitter.strato.access.Access.AccessToken +import com.twitter.strato.access.Access.AuthenticatedTwitterUserId +import com.twitter.strato.access.Access.AuthenticatedTwitterUserNotSuspended +import com.twitter.strato.access.Access.TwitterUserId +import com.twitter.strato.access.Access.TwitterUserNotSuspended +import com.twitter.strato.catalog.Ops +import com.twitter.strato.client.StaticClient +import com.twitter.strato.context.StratoContext +import com.twitter.strato.opcontext.DarkRequest +import com.twitter.strato.opcontext.OpContext +import com.twitter.strato.test.config.bouncer.TestPrincipals +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.tweetypie.federated.columns.CreateRetweetColumn +import com.twitter.tweetypie.federated.columns.CreateTweetColumn +import com.twitter.tweetypie.federated.columns.DeleteTweetColumn +import com.twitter.tweetypie.federated.columns.UnretweetColumn +import com.twitter.tweetypie.service.WarmupQueriesSettings +import com.twitter.tweetypie.thriftscala.graphql._ +import com.twitter.util.logging.Logger +import com.twitter.util.Future +import com.twitter.util.Stopwatch + +object StratoCatalogWarmups { + private[this] val log = Logger(getClass) + + // Performs warmup queries, failing after 30 seconds + def warmup( + warmupSettings: WarmupQueriesSettings, + catalog: PartialFunction[String, Ops] + ): Future[Unit] = { + val elapsed = Stopwatch.start() + // note: we need to supply bouncer principals here, because the + // columns are gated by a bouncer policy + Access + .withPrincipals(WarmupPrincipals) { + StratoContext.withOpContext(WarmupOpContext) { + TwitterContext.let(viewer = WarmupViewer) { + warmupSettings.clientId.asCurrent { + Stitch.run(executeDarkly(catalog)) + } + } + } + } + .onSuccess { _ => log.info("warmup completed in %s".format(elapsed())) } + .onFailure { t => log.error("could not complete warmup queries before startup.", t) } + } + + private val WarmupTwitterUserId = 0L + + private val WarmupPrincipals = Set( + TestPrincipals.normalStratoBouncerAccessPrincipal, + AuthenticatedTwitterUserId(WarmupTwitterUserId), + TwitterUserId(WarmupTwitterUserId), + TwitterUserNotSuspended, + AuthenticatedTwitterUserNotSuspended, + AccessToken(isWritable = true) + ) + + private[this] val RwebClientId = 0L + + private[this] val WarmupViewer = Viewer( + userId = Some(WarmupTwitterUserId), + authenticatedUserId = Some(WarmupTwitterUserId), + clientApplicationId = Some(RwebClientId), + ) + + private[this] val WarmupOpContext = + OpContext + .safetyLevel(SafetyLevel.TweetWritesApi.name) + .copy(darkRequest = Some(DarkRequest())) + .toThrift() + + private[this] val EllenOscarSelfie = 440322224407314432L + + private[this] val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + private[this] def executeDarkly(catalog: PartialFunction[String, Ops]): Stitch[Unit] = { + val stratoClient = new StaticClient(catalog) + val tweetCreator = + stratoClient.executer[CreateTweetRequest, CreateTweetResponseWithSubqueryPrefetchItems]( + CreateTweetColumn.Path) + + val tweetDeletor = + stratoClient + .executer[DeleteTweetRequest, DeleteTweetResponseWithSubqueryPrefetchItems]( + DeleteTweetColumn.Path) + + val retweetCreator = + stratoClient + .executer[CreateRetweetRequest, CreateRetweetResponseWithSubqueryPrefetchItems]( + CreateRetweetColumn.Path) + + val unretweetor = + stratoClient + .executer[UnretweetRequest, UnretweetResponseWithSubqueryPrefetchItems]( + UnretweetColumn.Path) + + val stitchCreateTweet = + tweetCreator + .execute(CreateTweetRequest("getting warmer")) + .onSuccess(_ => log.info(s"${CreateTweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${CreateTweetColumn.Path} warmup fail: $e")) + + val stitchDeleteTweet = + tweetDeletor + .execute(DeleteTweetRequest(-1L)) + .onSuccess(_ => log.info(s"${DeleteTweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${DeleteTweetColumn.Path} warmup fail: $e")) + + val stitchCreateRetweet = + retweetCreator + .execute(CreateRetweetRequest(EllenOscarSelfie)) + .onSuccess(_ => log.info(s"${CreateRetweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${CreateRetweetColumn.Path} warmup fail: $e")) + + val stitchUnretweet = + unretweetor + .execute(UnretweetRequest(EllenOscarSelfie)) + .onSuccess(_ => log.info(s"${UnretweetColumn.Path} warmup success")) + .onFailure(e => log.info(s"${UnretweetColumn.Path} warmup fail: $e")) + + Stitch + .join( + stitchCreateTweet, + stitchDeleteTweet, + stitchCreateRetweet, + stitchUnretweet, + ).unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala new file mode 100644 index 000000000..b9c3c8616 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/AttachmentBuilder.scala @@ -0,0 +1,185 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetutil.DmDeepLink +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.QuotedTweet +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetCreateState + +case class AttachmentBuilderRequest( + tweetId: TweetId, + user: User, + mediaUploadIds: Option[Seq[Long]], + cardReference: Option[CardReference], + attachmentUrl: Option[String], + remoteHost: Option[String], + darkTraffic: Boolean, + deviceSource: DeviceSource) { + val ctx: ValidationContext = ValidationContext( + user = user, + mediaUploadIds = mediaUploadIds, + cardReference = cardReference + ) + val passThroughResponse: AttachmentBuilderResult = + AttachmentBuilderResult(attachmentUrl = attachmentUrl, validationContext = ctx) +} + +case class ValidationContext( + user: User, + mediaUploadIds: Option[Seq[Long]], + cardReference: Option[CardReference]) + +case class AttachmentBuilderResult( + attachmentUrl: Option[String] = None, + quotedTweet: Option[QuotedTweet] = None, + extraChars: Int = 0, + validationContext: ValidationContext) + +object AttachmentBuilder { + + private[this] val log = Logger(getClass) + private[this] val attachmentCountLogger = Logger( + "com.twitter.tweetypie.handler.CreateAttachmentCount" + ) + + type Type = FutureArrow[AttachmentBuilderRequest, AttachmentBuilderResult] + type ValidationType = FutureEffect[AttachmentBuilderResult] + + def validateAttachmentUrl(attachmentUrl: Option[String]): Unit.type = + attachmentUrl match { + case None => Unit + case Some(TweetPermalink(_, _)) => Unit + case Some(DmDeepLink(_)) => Unit + case _ => throw TweetCreateFailure.State(TweetCreateState.InvalidAttachmentUrl) + } + + def validateAttachments( + stats: StatsReceiver, + validateCardRef: Gate[Option[String]] + ): AttachmentBuilder.ValidationType = + FutureEffect { result: AttachmentBuilderResult => + validateAttachmentUrl(result.attachmentUrl) + + val ctx = result.validationContext + + val cardRef = ctx.cardReference.filter { + case CardReferenceUriExtractor(NonTombstone(_)) => true + case _ => false + } + + if (result.quotedTweet.isDefined && cardRef.isEmpty) { + Future.Unit + } else { + val attachmentCount = + Seq( + ctx.mediaUploadIds, + result.attachmentUrl, + result.quotedTweet + ).count(_.nonEmpty) + + val userAgent = TwitterContext().flatMap(_.userAgent) + if (attachmentCount + cardRef.count(_ => true) > 1) { + attachmentCountLogger.warn( + s"Too many attachment types on tweet create from user: ${ctx.user.id}, " + + s"agent: '${userAgent}', media: ${ctx.mediaUploadIds}, " + + s"attachmentUrl: ${result.attachmentUrl}, cardRef: $cardRef" + ) + stats.counter("too_many_attachment_types_with_cardref").incr() + } + Future.when(attachmentCount + cardRef.count(_ => validateCardRef(userAgent)) > 1) { + Future.exception(TweetCreateFailure.State(TweetCreateState.TooManyAttachmentTypes)) + } + } + } + + private val queryInclude = TweetQuery.Include(Set(Tweet.CoreDataField.id)) + + private val queryOptions = TweetQuery.Options(include = queryInclude) + + def buildUrlShortenerCtx(request: AttachmentBuilderRequest): UrlShortener.Context = + UrlShortener.Context( + tweetId = request.tweetId, + userId = request.user.id, + createdAt = SnowflakeId(request.tweetId).time, + userProtected = request.user.safety.get.isProtected, + clientAppId = request.deviceSource.clientAppId, + remoteHost = request.remoteHost, + dark = request.darkTraffic + ) + + def asQuotedTweet(tweet: Tweet, shortenedUrl: ShortenedUrl): QuotedTweet = + getShare(tweet) match { + case None => QuotedTweet(tweet.id, getUserId(tweet), Some(shortenedUrl)) + case Some(share) => QuotedTweet(share.sourceStatusId, share.sourceUserId, Some(shortenedUrl)) + } + + def tweetPermalink(request: AttachmentBuilderRequest): Option[TweetPermalink] = + request.attachmentUrl.collectFirst { + // prevent tweet-quoting cycles + case TweetPermalink(screenName, quotedTweetId) if request.tweetId > quotedTweetId => + TweetPermalink(screenName, quotedTweetId) + } + + def apply( + tweetRepo: TweetRepository.Optional, + urlShortener: UrlShortener.Type, + validateAttachments: AttachmentBuilder.ValidationType, + stats: StatsReceiver, + denyNonTweetPermalinks: Gate[Unit] = Gate.False + ): Type = { + val tweetGetter = TweetRepository.tweetGetter(tweetRepo, queryOptions) + val attachmentNotPermalinkCounter = stats.counter("attachment_url_not_tweet_permalink") + val quotedTweetFoundCounter = stats.counter("quoted_tweet_found") + val quotedTweetNotFoundCounter = stats.counter("quoted_tweet_not_found") + + def buildAttachmentResult(request: AttachmentBuilderRequest) = + tweetPermalink(request) match { + case Some(qtPermalink) => + tweetGetter(qtPermalink.tweetId).flatMap { + case Some(tweet) => + quotedTweetFoundCounter.incr() + val ctx = buildUrlShortenerCtx(request) + urlShortener((qtPermalink.url, ctx)).map { shortenedUrl => + AttachmentBuilderResult( + quotedTweet = Some(asQuotedTweet(tweet, shortenedUrl)), + extraChars = shortenedUrl.shortUrl.length + 1, + validationContext = request.ctx + ) + } + case None => + quotedTweetNotFoundCounter.incr() + log.warn( + s"unable to extract quote tweet from attachment builder request: $request" + ) + if (denyNonTweetPermalinks()) { + throw TweetCreateFailure.State( + TweetCreateState.SourceTweetNotFound, + Some(s"quoted tweet is not found from given permalink: $qtPermalink") + ) + } else { + Future.value(request.passThroughResponse) + } + } + case _ => + attachmentNotPermalinkCounter.incr() + Future.value(request.passThroughResponse) + } + + FutureArrow { request => + for { + result <- buildAttachmentResult(request) + () <- validateAttachments(result) + } yield result + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD new file mode 100644 index 000000000..2475b5f1a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/BUILD @@ -0,0 +1,88 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/joda-time", + "3rdparty/jvm/org/apache/thrift:libthrift", + "3rdparty/jvm/org/geotools:gt-referencing", + "3rdparty/jvm/org/locationtech/spatial4j", + "compliance/user-consent/src/main/scala/com/twitter/userconsent/compliance/birthdate", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "diffshow", + "eventbus/client", + "featureswitches/featureswitches-core/src/main/scala", + "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", + "flock-client", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/primitives", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "gizmoduck/common/src/main/scala/com/twitter/gizmoduck/util:scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/scrooge/scala", + "scrooge-internal/scrooge-schema/src/main/scala/com/twitter/scrooge/schema/tree", + "scrooge-internal/src/main/scala/com/twitter/scrooge_internal/linter/known_annotations", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/util", + "snowflake:id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/botmaker:botmaker-scala", + "src/thrift/com/twitter/bouncer:bounce-action-thrift-scala", + "src/thrift/com/twitter/context:testing-signals-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-type-thrift-scala", + "src/thrift/com/twitter/relevance/feature_store:feature_store-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "src/thrift/com/twitter/spam/features:safety-meta-data-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "tco-util", + "tweet-util/src/main/scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/jiminy/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats", + "visibility/common/src/main/scala/com/twitter/visibility/common", + "visibility/lib/src/main/scala/com/twitter/visibility/builder", + "visibility/lib/src/main/scala/com/twitter/visibility/generators", + "visibility/lib/src/main/scala/com/twitter/visibility/models", + "visibility/writer/src/main/scala/com/twitter/visibility/writer", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/interfaces/tweets", + "visibility/writer/src/main/scala/com/twitter/visibility/writer/models", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala new file mode 100644 index 000000000..5a04c611f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardReferenceValidationHandler.scala @@ -0,0 +1,74 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.expandodo.thriftscala.AttachmentEligibilityResponses +import com.twitter.expandodo.{thriftscala => expandodo} +import com.twitter.tweetypie.backends.Expandodo +import com.twitter.twittertext.Extractor +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal +import java.net.URI + +object CardReferenceValidationFailedException extends Exception with NoStackTrace + +object CardReferenceValidationHandler { + type Type = FutureArrow[(UserId, CardUri), CardUri] + + def apply(checkEligibility: Expandodo.CheckAttachmentEligibility): Type = { + def validateAttachmentForUser(userId: UserId, cardUri: CardUri): Future[CardUri] = { + val request = Seq(expandodo.AttachmentEligibilityRequest(cardUri, userId)) + checkEligibility(request) + .flatMap(validatedCardUri) + .rescue { + case NonFatal(_) => Future.exception(CardReferenceValidationFailedException) + } + } + + FutureArrow { + case (userId, cardUri) => + if (shouldSkipValidation(cardUri)) { + Future.value(cardUri) + } else { + validateAttachmentForUser(userId, cardUri) + } + } + } + + private[this] def validatedCardUri(responses: AttachmentEligibilityResponses) = { + responses.results.headOption match { + case Some( + expandodo.AttachmentEligibilityResult + .Success(expandodo.ValidCardUri(validatedCardUri)) + ) => + Future.value(validatedCardUri) + case _ => + Future.exception(CardReferenceValidationFailedException) + } + } + + // We're not changing state between calls, so it's safe to share among threads + private[this] val extractor = { + val extractor = new Extractor + extractor.setExtractURLWithoutProtocol(false) + extractor + } + + // Card References with these URIs don't need validation since cards referenced by URIs in these + // schemes are public and hence not subject to restrictions. + private[handler] val isWhitelistedSchema = Set("http", "https", "tombstone") + + // NOTE: http://www.ietf.org/rfc/rfc2396.txt + private[this] def hasWhitelistedScheme(cardUri: CardUri) = + Try(new URI(cardUri)).toOption + .map(_.getScheme) + .exists(isWhitelistedSchema) + + // Even though URI spec is technically is a superset of http:// and https:// URLs, we have to + // resort to using a Regex based parser here as a fallback because many URLs found in the wild + // have unescaped components that would fail java.net.URI parsing, yet are still considered acceptable. + private[this] def isTwitterUrlEntity(cardUri: CardUri) = + extractor.extractURLs(cardUri).size == 1 + + private[this] def shouldSkipValidation(cardUri: CardUri) = + hasWhitelistedScheme(cardUri) || isTwitterUrlEntity(cardUri) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala new file mode 100644 index 000000000..da483cef5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CardUsersFinder.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.Tombstone +import com.twitter.tweetypie.repository.CardUsersRepository +import com.twitter.tweetypie.repository.CardUsersRepository.Context +import com.twitter.tweetypie.thriftscala.CardReference + +/** + * Finds a set of UserId that may be mentioned when replying to a tweet that has a card. + * + * Replies created without 'auto_populate_reply_metadata' include both 'site' and 'author' users to + * have a more exhaustive list of mentions to match against. This is needed because iOS and Android + * have had different implementations client-side for years. + */ +object CardUsersFinder { + + case class Request( + cardReference: Option[CardReference], + urls: Seq[String], + perspectiveUserId: UserId) { + val uris: Seq[String] = cardReference match { + case Some(CardReferenceUriExtractor(cardUri)) => + cardUri match { + case NonTombstone(uri) => Seq(uri) + case Tombstone => Nil + } + case _ => urls + } + + val context: CardUsersRepository.Context = Context(perspectiveUserId) + } + + type Type = Request => Stitch[Set[UserId]] + + /** + * From a card-related arguments in [[Request]] select the set of user ids associated with the + * card. + * + * Note that this uses the same "which card do I use?" logic from Card2Hydrator which + * prioritizes CardReferenceUri and then falls back to the last resolvable (non-None) url entity. + */ + def apply(cardUserRepo: CardUsersRepository.Type): Type = + request => + Stitch + .traverse(request.uris) { uri => cardUserRepo(uri, request.context) } + // select the last, non-None Set of users ids + .map(r => r.flatten.reverse.headOption.getOrElse(Set.empty)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala new file mode 100644 index 000000000..058bcbce5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CollabControlBuilder.scala @@ -0,0 +1,109 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.CollabControl +import com.twitter.tweetypie.thriftscala.CollabControlOptions +import com.twitter.tweetypie.thriftscala.CollabInvitation +import com.twitter.tweetypie.thriftscala.CollabInvitationOptions +import com.twitter.tweetypie.thriftscala.CollabInvitationStatus +import com.twitter.tweetypie.thriftscala.CollabTweet +import com.twitter.tweetypie.thriftscala.CollabTweetOptions +import com.twitter.tweetypie.thriftscala.Communities +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.InvitedCollaborator +import com.twitter.tweetypie.thriftscala.TrustedFriendsControl +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.CollabTweetInvalidParams +import com.twitter.tweetypie.util.CommunityUtil + +object CollabControlBuilder { + type Type = Request => Future[Option[CollabControl]] + + case class Request( + collabControlOptions: Option[CollabControlOptions], + replyResult: Option[ReplyBuilder.Result], + communities: Option[Communities], + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId) + + def apply(): Type = { request => + val collabControl = convertToCollabControl(request.collabControlOptions, request.userId) + + validateCollabControlParams( + collabControl, + request.replyResult, + request.communities, + request.trustedFriendsControl, + request.conversationControl, + request.exclusiveTweetControl, + request.userId + ) map { _ => collabControl } + } + + def convertToCollabControl( + collabTweetOptions: Option[CollabControlOptions], + authorId: UserId + ): Option[CollabControl] = { + collabTweetOptions flatMap { + case CollabControlOptions.CollabInvitation( + collabInvitationOptions: CollabInvitationOptions) => + Some( + CollabControl.CollabInvitation( + CollabInvitation( + invitedCollaborators = collabInvitationOptions.collaboratorUserIds.map(userId => { + InvitedCollaborator( + collaboratorUserId = userId, + collabInvitationStatus = + if (userId == authorId) + CollabInvitationStatus.Accepted + else CollabInvitationStatus.Pending + ) + }) + ) + ) + ) + case CollabControlOptions.CollabTweet(collabTweetOptions: CollabTweetOptions) => + Some( + CollabControl.CollabTweet( + CollabTweet( + collaboratorUserIds = collabTweetOptions.collaboratorUserIds + ) + ) + ) + case _ => None + } + } + + def validateCollabControlParams( + collabControl: Option[CollabControl], + replyResult: Option[ReplyBuilder.Result], + communities: Option[Communities], + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + + collabControl match { + case Some(_: CollabControl) + if (isInReplyToTweet || + CommunityUtil.hasCommunity(communities) || + exclusiveTweetControl.isDefined || + trustedFriendsControl.isDefined || + conversationControl.isDefined) => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case Some(CollabControl.CollabInvitation(collab_invitation)) + if collab_invitation.invitedCollaborators.head.collaboratorUserId != userId => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case Some(CollabControl.CollabTweet(collab_tweet)) + if collab_tweet.collaboratorUserIds.head != userId => + Future.exception(TweetCreateFailure.State(CollabTweetInvalidParams)) + case _ => + Future.Unit + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala new file mode 100644 index 000000000..220a6e1dd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/CommunitiesValidator.scala @@ -0,0 +1,40 @@ +package com.twitter.tweetypie.handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.servo.util.Gate +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.Communities +import com.twitter.tweetypie.thriftscala.TweetCreateState.CommunityProtectedUserCannotTweet +import com.twitter.tweetypie.util.CommunityUtil + +object CommunitiesValidator { + case class Request( + matchedResults: Option[FeatureSwitchResults], + isProtected: Boolean, + community: Option[Communities]) + + type Type = Request => Future[Unit] + + val CommunityProtectedCanCreateTweet = "communities_protected_community_tweet_creation_enabled" + + val communityProtectedCanCreateTweetGate: Gate[Request] = Gate { request: Request => + request.matchedResults + .flatMap(_.getBoolean(CommunityProtectedCanCreateTweet, shouldLogImpression = true)) + .contains(false) + } + + def apply(): Type = + (request: Request) => { + // Order is important: the feature-switch gate is checked only when the + // request is both protected & community so that the FS experiment measurements + // are based only on data from requests that are subject to rejection by this validator. + if (request.isProtected && + CommunityUtil.hasCommunity(request.community) && + communityProtectedCanCreateTweetGate(request)) { + Future.exception(TweetCreateFailure.State(CommunityProtectedUserCannotTweet)) + } else { + Future.Unit + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala new file mode 100644 index 000000000..6eeea01f9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ConversationControlBuilder.scala @@ -0,0 +1,272 @@ +package com.twitter.tweetypie.handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie._ +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.UserIdentityRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.ConversationControlNotAllowed +import com.twitter.tweetypie.thriftscala.TweetCreateState.InvalidConversationControl +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.util.logging.Logging + +/** + * Process request parameters into a ConversationControl value. + */ +object ConversationControlBuilder extends Logging { + type Type = Request => Stitch[Option[ConversationControl]] + + type ScreenName = String + + /** + * The fields necessary to create a [[ConversationControl]]. + * + * This is a trait rather than a case class to avoid running the + * code to extract the mentions in the cases where handling the + * request doesn't need to use them (the common case where + * tweetCreateConversationControl is None). + */ + trait Request { + def tweetCreateConversationControl: Option[TweetCreateConversationControl] + def tweetAuthorId: UserId + def mentionedUserScreenNames: Set[String] + + def noteTweetMentionedUserIds: Option[Set[Long]] + } + + object Request { + + /** + * Extract the data necessary to create a [[ConversationControl]] + * for a new [[Tweet]]. This is intended for use when creating + * Tweets. It must be called after the Tweet has had its entities + * extracted. + */ + def fromTweet( + tweet: Tweet, + tweetCreateConversationControl: Option[TweetCreateConversationControl], + noteTweetMentionedUserIdsList: Option[Seq[Long]] + ): Request = { + val cctl = tweetCreateConversationControl + new Request { + def tweetCreateConversationControl: Option[TweetCreateConversationControl] = cctl + def mentionedUserScreenNames: Set[ScreenName] = + tweet.mentions + // Enforce that the Tweet's mentions have already been + // extracted from the text. (Mentions will be None if they + // have not yet been extracted.) + .getOrElse( + throw new RuntimeException( + "Mentions must be extracted before applying ConversationControls")) + .map(_.screenName) + .toSet + + def tweetAuthorId: UserId = tweet.coreData.get.userId + def noteTweetMentionedUserIds: Option[Set[Long]] = + noteTweetMentionedUserIdsList.map(_.toSet) + } + } + } + + /** + * Create a ConversationControlBuilder that looks up user ids for + * screen names using the specified UserIdentityRepository. + */ + def fromUserIdentityRepo( + statsReceiver: StatsReceiver, + userIdentityRepo: UserIdentityRepository.Type + ): Request => Stitch[Option[ConversationControl]] = + ConversationControlBuilder( + getUserId = screenName => userIdentityRepo(UserKey.byScreenName(screenName)).map(_.id), + statsReceiver = statsReceiver + ) + + /** + * Extract the inviteViaMention value which does not exist on the TweetCreateConversationControl + * itself but does exist on the structures it unions. + */ + def inviteViaMention(tccc: TweetCreateConversationControl): Boolean = + tccc match { + case TweetCreateConversationControl.ByInvitation(c) => c.inviteViaMention.contains(true) + case TweetCreateConversationControl.Community(c) => c.inviteViaMention.contains(true) + case TweetCreateConversationControl.Followers(c) => c.inviteViaMention.contains(true) + case _ => false + } + + /** + * Translates the TweetCreateConversationControl into + * ConversationControl using the context from the rest of the tweet + * creation. For the most part, this is just a direct translation, + * plus filling in the contextual user ids (mentioned users and tweet + * author). + */ + def apply( + statsReceiver: StatsReceiver, + getUserId: ScreenName => Stitch[UserId] + ): Request => Stitch[Option[ConversationControl]] = { + val userIdLookupsCounter = statsReceiver.counter("user_id_lookups") + val conversationControlPresentCounter = statsReceiver.counter("conversation_control_present") + val conversationControlInviteViaMentionPresentCounter = + statsReceiver.counter("conversation_control_invite_via_mention_present") + val failureCounter = statsReceiver.counter("failures") + + // Get the user ids for these screen names. Any users who do not + // exist will be silently dropped. + def getExistingUserIds( + screenNames: Set[ScreenName], + mentionedUserIds: Option[Set[Long]] + ): Stitch[Set[UserId]] = { + mentionedUserIds match { + case Some(userIds) => Stitch.value(userIds) + case _ => + Stitch + .traverse(screenNames.toSeq) { screenName => + getUserId(screenName).liftNotFoundToOption + .ensure(userIdLookupsCounter.incr()) + } + .map(userIdOptions => userIdOptions.flatten.toSet) + } + } + + // This is broken out just to make it syntactically nicer to add + // the stats handling + def process(request: Request): Stitch[Option[ConversationControl]] = + request.tweetCreateConversationControl match { + case None => Stitch.None + case Some(cctl) => + cctl match { + case TweetCreateConversationControl.ByInvitation(byInvitationControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.byInvitation( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + byInvitationControl.inviteViaMention + ) + ) + + case TweetCreateConversationControl.Community(communityControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.community( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + communityControl.inviteViaMention + ) + ) + case TweetCreateConversationControl.Followers(followersControl) => + for { + invitedUserIds <- getExistingUserIds( + request.mentionedUserScreenNames, + request.noteTweetMentionedUserIds) + } yield Some( + ConversationControls.followers( + invitedUserIds = invitedUserIds.toSeq.filterNot(_ == request.tweetAuthorId), + conversationTweetAuthorId = request.tweetAuthorId, + followersControl.inviteViaMention + ) + ) + // This should only ever happen if a new value is added to the + // union and we don't update this code. + case TweetCreateConversationControl.UnknownUnionField(fld) => + throw new RuntimeException(s"Unexpected TweetCreateConversationControl: $fld") + } + } + + (request: Request) => { + // Wrap in Stitch to encapsulate any exceptions that happen + // before making a Stitch call inside of process. + Stitch(process(request)).flatten.respond { response => + // If we count this before doing the work, and the stats are + // collected before the RPC completes, then any failures + // will get counted in a different minute than the request + // that caused it. + request.tweetCreateConversationControl.foreach { cc => + conversationControlPresentCounter.incr() + if (inviteViaMention(cc)) conversationControlInviteViaMentionPresentCounter.incr() + } + + response.onFailure { e => + error(message = "Failed to create conversation control", cause = e) + // Don't bother counting individual exceptions, because + // the cost of keeping those stats is probably not worth + // the convenience of not having to look in the logs. + failureCounter.incr() + } + } + } + } + + /** + * Validates if a conversation control request is allowed by feature switches + * and is only requested on a root tweet. + */ + object Validate { + case class Request( + matchedResults: Option[FeatureSwitchResults], + conversationControl: Option[TweetCreateConversationControl], + inReplyToTweetId: Option[TweetId]) + + type Type = FutureEffect[Request] + + val ExInvalidConversationControl = TweetCreateFailure.State(InvalidConversationControl) + val ExConversationControlNotAllowed = TweetCreateFailure.State(ConversationControlNotAllowed) + val ConversationControlStatusUpdateEnabledKey = "conversation_control_status_update_enabled" + val ConversationControlFollowersEnabledKey = "conversation_control_my_followers_enabled" + + def apply( + useFeatureSwitchResults: Gate[Unit], + statsReceiver: StatsReceiver + ): Type = request => { + def fsDenied(fsKey: String): Boolean = { + val featureEnabledOpt: Option[Boolean] = + // Do not log impressions, which would interfere with shared client experiment data. + request.matchedResults.flatMap(_.getBoolean(fsKey, shouldLogImpression = false)) + val fsEnabled = featureEnabledOpt.contains(true) + if (!fsEnabled) { + statsReceiver.counter(s"check_conversation_control/unauthorized/fs/$fsKey").incr() + } + !fsEnabled + } + + val isCcRequest: Boolean = request.conversationControl.isDefined + + val isCcInvalidParams = isCcRequest && { + val isRootTweet = request.inReplyToTweetId.isEmpty + if (!isRootTweet) { + statsReceiver.counter("check_conversation_control/invalid").incr() + } + !isRootTweet + } + + val isCcDeniedByFs = isCcRequest && { + val isFollower = request.conversationControl.exists { + case _: TweetCreateConversationControl.Followers => true + case _ => false + } + + fsDenied(ConversationControlStatusUpdateEnabledKey) || + (isFollower && fsDenied(ConversationControlFollowersEnabledKey)) + } + + if (isCcDeniedByFs && useFeatureSwitchResults()) { + Future.exception(ExConversationControlNotAllowed) + } else if (isCcInvalidParams) { + Future.exception(ExInvalidConversationControl) + } else { + Future.Unit + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala new file mode 100644 index 000000000..c6b1fd0e9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteAdditionalFieldsBuilder.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.repository.UserQueryOptions +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository.UserVisibility +import com.twitter.tweetypie.store.AsyncDeleteAdditionalFields +import com.twitter.tweetypie.store.DeleteAdditionalFields +import com.twitter.tweetypie.store.TweetStoreEventOrRetry +import com.twitter.tweetypie.thriftscala.AsyncDeleteAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.DeleteAdditionalFieldsRequest + +object DeleteAdditionalFieldsBuilder { + type Type = DeleteAdditionalFieldsRequest => Future[Seq[DeleteAdditionalFields.Event]] + + val tweetQueryOptions = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Type): Type = { + def getTweet(tweetId: TweetId) = + Stitch.run( + tweetRepo(tweetId, tweetQueryOptions) + .rescue(HandlerError.translateNotFoundToClientError(tweetId)) + ) + + request => { + Future.collect( + request.tweetIds.map { tweetId => + getTweet(tweetId).map { tweet => + DeleteAdditionalFields.Event( + tweetId = tweetId, + fieldIds = request.fieldIds, + userId = getUserId(tweet), + timestamp = Time.now + ) + } + } + ) + } + } +} + +object AsyncDeleteAdditionalFieldsBuilder { + type Type = AsyncDeleteAdditionalFieldsRequest => Future[ + TweetStoreEventOrRetry[AsyncDeleteAdditionalFields.Event] + ] + + val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): Type = { + def getUser(userId: UserId): Future[User] = + Stitch.run( + userRepo(UserKey.byId(userId), userQueryOpts) + .rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) } + ) + + request => + getUser(request.userId).map { user => + AsyncDeleteAdditionalFields.Event.fromAsyncRequest(request, user) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala new file mode 100644 index 000000000..34b588a17 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DeleteLocationDataHandler.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.GeoScrubEventStore.GetGeoScrubTimestamp +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.DeleteLocationDataRequest + +/** + * Initiates the process of removing the geo information from a user's + * tweets. + */ +object DeleteLocationDataHandler { + type Type = DeleteLocationDataRequest => Future[Unit] + + def apply( + getLastScrubTime: GetGeoScrubTimestamp, + scribe: DeleteLocationData => Future[Unit], + eventbus: EventBusPublisher[DeleteLocationData] + ): Type = + request => { + // Attempt to bound the time range of the tweets that need to be + // scrubbed by finding the most recent scrub time on record. This + // is an optimization that prevents scrubbing already-scrubbed + // tweets, so it is OK if the value that we find is occasionally + // stale or if the lookup fails. Primarily, this is intended to + // protect against intentional abuse by enqueueing multiple + // delete_location_data events that have to traverse a very long + // timeline. + Stitch + .run(getLastScrubTime(request.userId)) + // If there is no timestamp or the lookup failed, continue with + // an unchanged request. + .handle { case _ => None } + .flatMap { lastScrubTime => + // Due to clock skew, it's possible for the last scrub + // timestamp to be larger than the timestamp from the request, + // but we ignore that so that we keep a faithful record of + // user requests. The execution of such events will end up a + // no-op. + val event = + DeleteLocationData( + userId = request.userId, + timestampMs = Time.now.inMilliseconds, + lastTimestampMs = lastScrubTime.map(_.inMilliseconds) + ) + + Future.join( + Seq( + // Scribe the event so that we can reprocess events if + // there is a bug or operational issue that causes some + // events to be lost. + scribe(event), + // The actual deletion process is handled by the TweetyPie + // geoscrub daemon. + eventbus.publish(event) + ) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala new file mode 100644 index 000000000..168dde9c6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/DuplicateTweetFinder.scala @@ -0,0 +1,254 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.ConversationControlByInvitation +import com.twitter.tweetypie.thriftscala.ConversationControlCommunity +import com.twitter.tweetypie.thriftscala.ConversationControlFollowers +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.NoteTweetOptions +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.thriftscala.TweetCreateConversationControl +import com.twitter.tweetypie.util.ConversationControls +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.util.Time + +/** + * Used at tweet creation time to determine whether the tweet creation + * request should be considered a duplicate of an existing tweet. + */ +object DuplicateTweetFinder { + + /** + * Return the ids of any tweets that are found to be duplicates of + * this request. + */ + type Type = RequestInfo => Future[Option[TweetId]] + + final case class Settings( + // The number of tweets that are loaded from the user's timeline + // for the heuristic duplicate check + numTweetsToCheck: Int, + // The oldest that a tweet can be to still be considered a + // duplicate by the heuristic duplicate check + maxDuplicateAge: Duration) + + // Takes a ConversationControl from a Tweet and converts to the equivalent + // TweetCreateConversationControl. Note: this is a lossy conversion because the + // ConversationControl contains additional data from the Tweet. + def toTweetCreateConversationControl( + conversationControl: ConversationControl + ): TweetCreateConversationControl = + conversationControl match { + case ConversationControl.ByInvitation( + ConversationControlByInvitation(_, _, inviteViaMention)) => + ConversationControls.Create.byInvitation(inviteViaMention) + case ConversationControl.Community(ConversationControlCommunity(_, _, inviteViaMention)) => + ConversationControls.Create.community(inviteViaMention) + case ConversationControl.Followers(ConversationControlFollowers(_, _, inviteViaMention)) => + ConversationControls.Create.followers(inviteViaMention) + case _ => throw new IllegalArgumentException + } + + /** + * The parts of the request that we need in order to perform + * duplicate detection. + */ + final case class RequestInfo( + userId: UserId, + isNarrowcast: Boolean, + isNullcast: Boolean, + text: String, + replyToTweetId: Option[TweetId], + mediaUploadIds: Seq[MediaId], + cardReference: Option[CardReference], + conversationControl: Option[TweetCreateConversationControl], + underlyingCreativesContainer: Option[CreativesContainerId], + editOptions: Option[EditOptions] = None, + noteTweetOptions: Option[NoteTweetOptions] = None) { + + def isDuplicateOf(tweet: Tweet, oldestAcceptableTimestamp: Time): Boolean = { + val createdAt = getTimestamp(tweet) + val isDuplicateText = text == getText(tweet) + val isDuplicateReplyToTweetId = replyToTweetId == getReply(tweet).flatMap(_.inReplyToStatusId) + val isDuplicateMedia = getMedia(tweet).map(_.mediaId) == mediaUploadIds + val isDuplicateCardReference = getCardReference(tweet) == cardReference + val isDuplicateConversationControl = + tweet.conversationControl.map(toTweetCreateConversationControl) == conversationControl + val isDuplicateConversationContainerId = { + tweet.underlyingCreativesContainerId == underlyingCreativesContainer + } + + val isDuplicateIfEditRequest = if (editOptions.isDefined) { + // We do not count an incoming edit request as creating a duplicate tweet if: + // 1) The tweet that is considered a duplicate is a previous version of this tweet OR + // 2) The tweet that is considered a duplicate is otherwise stale. + val tweetEditChain = tweet.editControl match { + case Some(EditControl.Initial(initial)) => + initial.editTweetIds + case Some(EditControl.Edit(edit)) => + edit.editControlInitial.map(_.editTweetIds).getOrElse(Nil) + case _ => Nil + } + val tweetIsAPreviousVersion = + editOptions.map(_.previousTweetId).exists(tweetEditChain.contains) + + val tweetIsStale = EditControlUtil.isLatestEdit(tweet.editControl, tweet.id) match { + case Return(false) => true + case _ => false + } + + !(tweetIsStale || tweetIsAPreviousVersion) + } else { + // If not an edit request, this condition is true as duplication checking is not blocked + true + } + + // Note that this does not prevent you from tweeting the same + // image twice with different text, or the same text twice with + // different images, because if you upload the same media twice, + // we will store two copies of it, each with a different media + // URL and thus different t.co URL, and since the text that + // we're checking here has that t.co URL added to it already, it + // is necessarily different. + // + // We shouldn't have to check the user id or whether it's a + // retweet, because we loaded the tweets from the user's + // (non-retweet) timelines, but it doesn't hurt and protects + // against possible future changes. + (oldestAcceptableTimestamp <= createdAt) && + getShare(tweet).isEmpty && + (getUserId(tweet) == userId) && + isDuplicateText && + isDuplicateReplyToTweetId && + isDuplicateMedia && + isDuplicateCardReference && + isDuplicateConversationControl && + isDuplicateConversationContainerId && + isDuplicateIfEditRequest && + noteTweetOptions.isEmpty // Skip duplicate checks for NoteTweets + } + } + + object RequestInfo { + + /** + * Extract the information relevant to the DuplicateTweetFinder + * from the PostTweetRequest. + */ + def fromPostTweetRequest(req: PostTweetRequest, processedText: String): RequestInfo = + RequestInfo( + userId = req.userId, + isNarrowcast = req.narrowcast.nonEmpty, + isNullcast = req.nullcast, + text = processedText, + replyToTweetId = req.inReplyToTweetId, + mediaUploadIds = req.mediaUploadIds.getOrElse[Seq[MediaId]](Seq.empty), + cardReference = req.additionalFields.flatMap(_.cardReference), + conversationControl = req.conversationControl, + underlyingCreativesContainer = req.underlyingCreativesContainerId, + editOptions = req.editOptions, + noteTweetOptions = req.noteTweetOptions + ) + } + + /** + * Encapsulates the external interactions that we need to do for + * duplicate checking. + */ + trait TweetSource { + def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] + def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] + def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[TweetId]] + } + + object TweetSource { + + /** + * Use the provided services to access tweets. + */ + def fromServices( + tweetRepo: TweetRepository.Optional, + getStatusTimeline: TimelineService.GetStatusTimeline + ): TweetSource = + new TweetSource { + // only fields needed by RequestInfo.isDuplicateOf() + private[this] val tweetQueryOption = + TweetQuery.Options( + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.MediaField.id, + Tweet.ConversationControlField.id, + Tweet.EditControlField.id + ), + pastedMedia = true + ) + ) + + private[this] def loadTimeline(query: tls.TimelineQuery): Future[Seq[Long]] = + getStatusTimeline(Seq(query)).map(_.head.entries.map(_.statusId)) + + override def loadUserTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = + loadTimeline( + tls.TimelineQuery( + timelineType = tls.TimelineType.User, + timelineId = userId, + maxCount = maxCount.toShort + ) + ) + + override def loadNarrowcastTimelineIds(userId: UserId, maxCount: Int): Future[Seq[Long]] = + loadTimeline( + tls.TimelineQuery( + timelineType = tls.TimelineType.Narrowcasted, + timelineId = userId, + maxCount = maxCount.toShort + ) + ) + + override def loadTweets(tweetIds: Seq[TweetId]): Future[Seq[Tweet]] = + if (tweetIds.isEmpty) { + Future.value(Seq[Tweet]()) + } else { + Stitch + .run( + Stitch.traverse(tweetIds) { tweetId => tweetRepo(tweetId, tweetQueryOption) } + ) + .map(_.flatten) + } + } + } + + def apply(settings: Settings, tweetSource: TweetSource): Type = { reqInfo => + if (reqInfo.isNullcast) { + // iff nullcast, we bypass duplication logic all together + Future.None + } else { + val oldestAcceptableTimestamp = Time.now - settings.maxDuplicateAge + val userTweetIdsFut = + tweetSource.loadUserTimelineIds(reqInfo.userId, settings.numTweetsToCheck) + + // Check the narrowcast timeline iff this is a narrowcasted tweet + val narrowcastTweetIdsFut = + if (reqInfo.isNarrowcast) { + tweetSource.loadNarrowcastTimelineIds(reqInfo.userId, settings.numTweetsToCheck) + } else { + Future.value(Seq.empty) + } + + for { + userTweetIds <- userTweetIdsFut + narrowcastTweetIds <- narrowcastTweetIdsFut + candidateTweets <- tweetSource.loadTweets(userTweetIds ++ narrowcastTweetIds) + } yield candidateTweets.find(reqInfo.isDuplicateOf(_, oldestAcceptableTimestamp)).map(_.id) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala new file mode 100644 index 000000000..d3baa0ae0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditControlBuilder.scala @@ -0,0 +1,361 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.gizmoduck.util.UserUtil +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.Card2Repository +import com.twitter.tweetypie.repository.StratoPromotedTweetRepository +import com.twitter.tweetypie.repository.StratoSubscriptionVerificationRepository +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UrlCard2Key +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.util.EditControlUtil._ +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.util.Future + +object EditControlBuilder { + type Type = Request => Future[Option[EditControl]] + + val editTweetCountStat = "edit_tweet_count" + val editControlQueryOptions = TweetQuery.Options( + TweetQuery.Include(Set(Tweet.CoreDataField.id, Tweet.EditControlField.id)) + ) + val TweetEditCreationEnabledKey = "tweet_edit_creation_enabled" + val TweetEditCreationEnabledForTwitterBlueKey = "tweet_edit_creation_enabled_for_twitter_blue" + + val pollCardNames: Set[String] = Set( + "poll2choice_text_only", + "poll3choice_text_only", + "poll4choice_text_only", + "poll2choice_image", + "poll3choice_image", + "poll4choice_image", + "poll2choice_video", + "poll3choice_video", + "poll4choice_video", + ) + + /** Used just for checking card name for poll check in case cards platform key not provided. */ + val defaultCardsPlatformKey = "iPhone-13" + + /** + * Do we assume a Tweet has a poll (which makes it not editable) when it has a card + * that could be a poll, and it cannot be resolved at create. + */ + val isPollCardAssumption = true + + val tweetEditSubscriptionResource = "feature/tweet_edit" + + val log: Logger = Logger(getClass) + + case class Request( + postTweetRequest: PostTweetRequest, + tweet: Tweet, + matchedResults: Option[FeatureSwitchResults]) { + def editOptions: Option[EditOptions] = postTweetRequest.editOptions + + def authorId: UserId = postTweetRequest.userId + + def createdAt: Time = Time.fromMilliseconds(tweet.coreData.get.createdAtSecs * 1000L) + + def tweetId: TweetId = tweet.id + + def cardReference: Option[CardReference] = + postTweetRequest.additionalFields.flatMap(_.cardReference) + + def cardsPlatformKey: Option[String] = + postTweetRequest.hydrationOptions.flatMap(_.cardsPlatformKey) + } + + def apply( + tweetRepo: TweetRepository.Type, + card2Repo: Card2Repository.Type, + promotedTweetRepo: StratoPromotedTweetRepository.Type, + subscriptionVerificationRepo: StratoSubscriptionVerificationRepository.Type, + disablePromotedTweetEdit: Gate[Unit], + checkTwitterBlueSubscription: Gate[Unit], + setEditWindowToSixtyMinutes: Gate[Unit], + stats: StatsReceiver + ): Type = { + + // Nullcast tweets not allowed, except if the tweet has a community annotation + def isNullcastedButNotCommunityTweet(request: Request): Boolean = { + + val isNullcasted: Boolean = request.tweet.coreData.get.nullcast + + val communityIds: Option[Seq[CommunityId]] = + request.postTweetRequest.additionalFields + .flatMap(CommunityAnnotation.additionalFieldsToCommunityIDs) + + isNullcasted && !(communityIds.exists(_.nonEmpty)) + } + + def isSuperFollow(tweet: Tweet): Boolean = tweet.exclusiveTweetControl.isDefined + + def isCollabTweet(tweet: Tweet): Boolean = tweet.collabControl.isDefined + + def isReplyToTweet(tweet: Tweet): Boolean = + getReply(tweet).flatMap(_.inReplyToStatusId).isDefined + + // When card is tombstone, tweet is not considered a poll, and therefore can be edit eligible. + val cardReferenceUriIsTombstone = stats.counter("edit_control_builder_card_tombstoned") + // We check whether tweets are polls since these are not edit eligible. + // If we are not sure due to lookup failure, we take an `isPollCardAssumption`. + def isPoll( + card2Repo: Card2Repository.Type, + cardReference: CardReference, + cardsPlatformKey: String, + ): Stitch[Boolean] = { + if (cardReference.cardUri == "tombstone://card") { + cardReferenceUriIsTombstone.incr() + Stitch.value(false) + } else { + val key = UrlCard2Key(cardReference.cardUri) + // `allowNonTcoUrls = true` This allows us to check if non-tco urls (e.g. apple.com) have a card + // at this point in tweet builder urls can be in their original form and not tcoified. + val options = Card2RequestOptions( + platformKey = cardsPlatformKey, + allowNonTcoUrls = true + ) + card2Repo(key, options) + .map(card2 => pollCardNames.contains(card2.name)) + } + } + + def isFeatureSwitchEnabled(matchedResults: Option[FeatureSwitchResults], key: String): Boolean = + matchedResults.flatMap(_.getBoolean(key, shouldLogImpression = false)).contains(true) + + def wrapInitial(initial: EditControlInitial): Option[EditControl.Initial] = + Some(EditControl.Initial(initial = initial)) + + // Checks for validity of an edit are implemented as procedures + // that throw an error in case a check fails. This composes way better than + // returning a Try/Future/Stitch because: + // 1. We do not need to decide which of the aforementioned containers to use. + // 2. The checks as below compose with callbacks in all the aforementioned containers. + + val editRequestOutsideOfAllowlist = stats.counter("edit_control_builder_rejected", "allowlist") + + // This method uses two feature switches: + // - TweetEditCreationEnabledKey authorizes the user to edit tweets directly + // - TweetEditCreationEnabledForTwitterBlueKey authorizes the user to edit tweets if they have + // a Twitter Blue subscription + // + // Test users are always authorized to edit tweets. + def checkUserEligibility( + authorId: UserId, + matchedResults: Option[FeatureSwitchResults] + ): Stitch[Unit] = { + val isTestUser = UserUtil.isTestUserId(authorId) + val authorizedWithoutTwitterBlue = + isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledKey) + + if (isTestUser || authorizedWithoutTwitterBlue) { + // If the editing user is a test user or is authorized by the non-Twitter Blue feature + // switch, allow editing. + Stitch.Done + } else { + // Otherwise, check if they're authorized by the Twitter Blue feature switch and if they're + // subscribed to Twitter Blue. + val authorizedWithTwitterBlue: Stitch[Boolean] = + if (checkTwitterBlueSubscription() && + isFeatureSwitchEnabled(matchedResults, TweetEditCreationEnabledForTwitterBlueKey)) { + subscriptionVerificationRepo(authorId, tweetEditSubscriptionResource) + } else Stitch.value(false) + + authorizedWithTwitterBlue.flatMap { authorized => + if (!authorized) { + log.error(s"User ${authorId} unauthorized to edit") + editRequestOutsideOfAllowlist.incr() + Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) + } else Stitch.Done + } + } + } + + val editRequestByNonAuthor = stats.counter("edit_control_builder_rejected", "not_author") + def checkAuthor( + authorId: UserId, + previousTweetAuthorId: UserId + ): Unit = { + if (authorId != previousTweetAuthorId) { + editRequestByNonAuthor.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthor) + } + } + + val tweetEditForStaleTweet = stats.counter("edit_control_builder_rejected", "stale") + def checkLatestEdit( + previousTweetId: TweetId, + initial: EditControlInitial, + ): Unit = { + if (previousTweetId != initial.editTweetIds.last) { + tweetEditForStaleTweet.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTweetNotLatestVersion) + } + } + + val tweetEditForLimitReached = stats.counter("edit_control_builder_rejected", "edits_limit") + def checkEditsRemaining(initial: EditControlInitial): Unit = { + initial.editsRemaining match { + case Some(number) if number > 0 => // OK + case _ => + tweetEditForLimitReached.incr() + throw TweetCreateFailure.State(TweetCreateState.EditCountLimitReached) + } + } + + val editTweetExpired = stats.counter("edit_control_builder_rejected", "expired") + val editTweetExpiredNoEditControl = + stats.counter("edit_control_builder_rejected", "expired", "no_edit_control") + def checkEditTimeWindow(initial: EditControlInitial): Unit = { + initial.editableUntilMsecs match { + case Some(millis) if Time.now < Time.fromMilliseconds(millis) => // OK + case Some(_) => + editTweetExpired.incr() + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + case editable => + editTweetExpired.incr() + if (editable.isEmpty) { + editTweetExpiredNoEditControl.incr() + } + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + } + } + + val tweetEditNotEligible = stats.counter("edit_control_builder_rejected", "not_eligible") + def checkIsEditEligible(initial: EditControlInitial): Unit = { + initial.isEditEligible match { + case Some(true) => // OK + case _ => + tweetEditNotEligible.incr() + throw TweetCreateFailure.State(TweetCreateState.NotEligibleForEdit) + } + } + + val editControlInitialMissing = + stats.counter("edit_control_builder_rejected", "initial_missing") + def findEditControlInitial(previousTweet: Tweet): EditControlInitial = { + previousTweet.editControl match { + case Some(EditControl.Initial(initial)) => initial + case Some(EditControl.Edit(edit)) => + edit.editControlInitial.getOrElse { + editControlInitialMissing.incr() + throw new IllegalStateException( + "Encountered edit tweet with missing editControlInitial.") + } + case _ => + throw TweetCreateFailure.State(TweetCreateState.EditTimeLimitReached) + } + } + + val editPromotedTweet = stats.counter("tweet_edit_for_promoted_tweet") + def checkPromotedTweet( + previousTweetId: TweetId, + promotedTweetRepo: StratoPromotedTweetRepository.Type, + disablePromotedTweetEdit: Gate[Unit] + ): Stitch[Unit] = { + if (disablePromotedTweetEdit()) { + promotedTweetRepo(previousTweetId).flatMap { + case false => + Stitch.Done + case true => + editPromotedTweet.incr() + Stitch.exception(TweetCreateFailure.State(TweetCreateState.EditTweetUserNotAuthorized)) + } + } else { + Stitch.Done + } + } + + // Each time edit is made, count how many versions a tweet already has. + // Value should be always between 1 and 4. + val editTweetCount = 0 + .to(EditControlUtil.maxTweetEditsAllowed) + .map(i => i -> stats.counter("edit_control_builder_edits_count", i.toString)) + .toMap + // Overall counter and failures of card resolution for poll lookups. Needed because polls are not editable. + val pollCardResolutionTotal = stats.counter("edit_control_builder_card_resolution", "total") + val pollCardResolutionFailure = + stats.counter("edit_control_builder_card_resolution", "failures") + // Edit of initial tweet requested, and all edit checks successful. + val initialEditTweet = stats.counter("edit_control_builder_initial_edit") + request => + Stitch.run { + request.editOptions match { + case None => + val editControl = + makeEditControlInitial( + tweetId = request.tweetId, + createdAt = request.createdAt, + setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes + ).initial.copy( + isEditEligible = Some( + !isNullcastedButNotCommunityTweet(request) + && !isSuperFollow(request.tweet) + && !isCollabTweet(request.tweet) + && !isReplyToTweet(request.tweet) + ), + ) + (editControl.isEditEligible, request.cardReference) match { + case (Some(true), Some(reference)) => + pollCardResolutionTotal.incr() + isPoll( + card2Repo = card2Repo, + cardReference = reference, + cardsPlatformKey = request.cardsPlatformKey.getOrElse(defaultCardsPlatformKey), + ).rescue { + // Revert to the assumed value if card cannot be resolved. + case _ => + pollCardResolutionFailure.incr() + Stitch.value(isPollCardAssumption) + } + .map { tweetIsAPoll => + wrapInitial(editControl.copy(isEditEligible = Some(!tweetIsAPoll))) + } + case _ => Stitch.value(wrapInitial(editControl)) + } + case Some(editOptions) => + for { + (previousTweet, _, _) <- Stitch.join( + tweetRepo(editOptions.previousTweetId, editControlQueryOptions), + checkPromotedTweet( + editOptions.previousTweetId, + promotedTweetRepo, + disablePromotedTweetEdit), + checkUserEligibility( + authorId = request.authorId, + matchedResults = request.matchedResults) + ) + } yield { + val initial = findEditControlInitial(previousTweet) + checkAuthor( + authorId = request.authorId, + previousTweetAuthorId = getUserId(previousTweet)) + editTweetCount + .get(initial.editTweetIds.size) + .orElse(editTweetCount.get(EditControlUtil.maxTweetEditsAllowed)) + .foreach(counter => counter.incr()) + checkLatestEdit(previousTweet.id, initial) + checkEditsRemaining(initial) + checkEditTimeWindow(initial) + checkIsEditEligible(initial) + if (initial.editTweetIds == Seq(previousTweet.id)) { + initialEditTweet.incr() + } + Some(editControlEdit(initialTweetId = initial.editTweetIds.head)) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala new file mode 100644 index 000000000..0177996ec --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EditValidator.scala @@ -0,0 +1,137 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeDefBuilder +import com.twitter.scrooge.schema.scrooge.scala.CompiledScroogeValueExtractor +import com.twitter.scrooge.schema.tree.DefinitionTraversal +import com.twitter.scrooge.schema.tree.FieldPath +import com.twitter.scrooge.schema.{ThriftDefinitions => DEF} +import com.twitter.scrooge_internal.linter.known_annotations.AllowedAnnotationKeys.TweetEditAllowed +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.TweetQuery.Options +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.ConversationControl +import com.twitter.tweetypie.thriftscala.TweetCreateState.FieldEditNotAllowed +import com.twitter.tweetypie.thriftscala.TweetCreateState.InitialTweetNotFound +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future +import com.twitter.util.logging.Logger + +/** + * This class constructs a validator `Tweet => Future[Unit]` which + * takes a new edit tweet and performs some validations. Specifically, it + * + * 1) ensures that no uneditable fields were edited. Uneditable fields are marked + * on the tweet.thrift using the thrift annotation "tweetEditAllowed=false". + * By default, fields with no annotation are treated as editable. + * + * 2) ensures that the conversationControl field (which is editable) remains the + * same type, e.g. a ConversationControl.ByInvitation doesn't change to a + * ConversationControl.Community. + * + * If either of these validations fail, the validator fails with a `FieldEditNotAllowed` + * tweet create state. + */ +object EditValidator { + type Type = (Tweet, Option[EditOptions]) => Future[Unit] + + val log: Logger = Logger(getClass) + + // An object that describes the tweet thrift, used to walk a tweet object looking + // for annotated fields. + val TweetDef = CompiledScroogeDefBuilder.build[Tweet].asInstanceOf[DEF.StructDef] + + // Collect the `FieldPath` for any nested tweet field with a uneditable field annotation + // that is set to false. These are the fields that this validator ensures cannot be edited. + val uneditableFieldPaths: Seq[FieldPath] = { + DefinitionTraversal().collect(TweetDef) { + case (d: DEF.FieldDef, path) if (d.annotations.get(TweetEditAllowed).contains("false")) => + path + } + } + + // A tweet query options which includes + // - any top level tweet field which either is an uneditable field, or contains an uneditable + // subfield. + // - the conversationControl field + // These fields must be present on the initial tweet in order for us to compare them against the + // edit tweet. + val previousTweetQueryOptions = { + // A set of the top level field ids for each (potentially nested) uneditable field. + val topLevelUneditableTweetFields = uneditableFieldPaths.map(_.ids.head).toSet + Options( + TweetQuery.Include( + tweetFields = topLevelUneditableTweetFields + Tweet.ConversationControlField.id + )) + } + + def validateUneditableFields(previousTweet: Tweet, editTweet: Tweet): Unit = { + // Collect uneditable fields that were edited + val invalidEditedFields = uneditableFieldPaths.flatMap { fieldPath => + val previousValue = + FieldPath.lensGet(CompiledScroogeValueExtractor, previousTweet, fieldPath) + val editValue = FieldPath.lensGet(CompiledScroogeValueExtractor, editTweet, fieldPath) + + if (previousValue != editValue) { + Some(fieldPath.toString) + } else { + None + } + } + + if (invalidEditedFields.nonEmpty) { + // If any inequalities are found, log them and return an exception. + val msg = "uneditable fields were edited: " + invalidEditedFields.mkString(",") + log.error(msg) + throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) + } + } + + def validateConversationControl( + previous: Option[ConversationControl], + edit: Option[ConversationControl] + ): Unit = { + import ConversationControl.ByInvitation + import ConversationControl.Community + import ConversationControl.Followers + + (previous, edit) match { + case (None, None) => () + case (Some(ByInvitation(_)), Some(ByInvitation(_))) => () + case (Some(Community(_)), Some(Community(_))) => () + case (Some(Followers(_)), Some(Followers(_))) => () + case (_, _) => + val msg = "conversationControl type was edited" + log.error(msg) + throw TweetCreateFailure.State(FieldEditNotAllowed, Some(msg)) + } + } + + def apply(tweetRepo: TweetRepository.Optional): Type = { (tweet, editOptions) => + Stitch.run( + editOptions match { + case Some(EditOptions(previousTweetId)) => { + // Query for the previous tweet so that we can compare the + // fields between the two tweets. + tweetRepo(previousTweetId, previousTweetQueryOptions).map { + case Some(previousTweet) => + validateUneditableFields(previousTweet, tweet) + validateConversationControl( + previousTweet.conversationControl, + tweet.conversationControl) + case _ => + // If the previous tweet is not found we cannot perform validations that + // compare tweet fields and we have to fail tweet creation. + throw TweetCreateFailure.State(InitialTweetNotFound) + } + } + // This is the case where this isn't an edit tweet (since editOptions = None) + // Since this tweet is not an edit there are no fields to validate. + case _ => Stitch.Unit + } + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala new file mode 100644 index 000000000..64441439b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/EraseUserTweetsHandler.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.stats.Stat +import com.twitter.flockdb.client._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.thriftscala._ + +trait EraseUserTweetsHandler { + + val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] + + val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] +} + +/** + * This library allows you to erase all of a users's tweets. It's used to clean up + * tweets after a user deletes their account. + */ +object EraseUserTweetsHandler { + + /** + * Build a FutureEffect which, when called, deletes one page worth of tweets at the + * specified flock cursor. When the page of tweets has been deleted another asyncEraseUserTweets + * request is made with the updated cursor location so that the next page of tweets can be processed. + */ + def apply( + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + deleteTweet: FutureEffect[(TweetId, UserId)], + asyncEraseUserTweets: FutureArrow[AsyncEraseUserTweetsRequest, Unit], + stats: StatsReceiver, + sleep: () => Future[Unit] = () => Future.Unit + ): EraseUserTweetsHandler = + new EraseUserTweetsHandler { + val latencyStat: Stat = stats.stat("latency_ms") + val deletedTweetsStat: Stat = stats.stat("tweets_deleted_for_erased_user") + + val selectUserTweets: AsyncEraseUserTweetsRequest => Select[StatusGraph] = + (request: AsyncEraseUserTweetsRequest) => + UserTimelineGraph + .from(request.userId) + .withCursor(Cursor(request.flockCursor)) + + // For a provided list of tweetIds, delete each one sequentially, sleeping between each call + // This is a rate limiting mechanism to slow down deletions. + def deletePage(page: PageResult[Long], expectedUserId: UserId): Future[Unit] = + page.entries.foldLeft(Future.Unit) { (previousFuture, nextId) => + for { + _ <- previousFuture + _ <- sleep() + _ <- deleteTweet((nextId, expectedUserId)) + } yield () + } + + /** + * If we aren't on the last page, make another EraseUserTweets request to delete + * the next page of tweets + */ + val nextRequestOrEnd: (AsyncEraseUserTweetsRequest, PageResult[Long]) => Future[Unit] = + (request: AsyncEraseUserTweetsRequest, page: PageResult[Long]) => + if (page.nextCursor.isEnd) { + latencyStat.add(Time.fromMilliseconds(request.startTimestamp).untilNow.inMillis) + deletedTweetsStat.add(request.tweetCount + page.entries.size) + Future.Unit + } else { + asyncEraseUserTweets( + request.copy( + flockCursor = page.nextCursor.value, + tweetCount = request.tweetCount + page.entries.size + ) + ) + } + + override val eraseUserTweetsRequest: FutureArrow[EraseUserTweetsRequest, Unit] = + FutureArrow { request => + asyncEraseUserTweets( + AsyncEraseUserTweetsRequest( + userId = request.userId, + flockCursor = Cursor.start.value, + startTimestamp = Time.now.inMillis, + tweetCount = 0L + ) + ) + } + + override val asyncEraseUserTweetsRequest: FutureArrow[AsyncEraseUserTweetsRequest, Unit] = + FutureArrow { request => + for { + _ <- sleep() + + // get one page of tweets + page <- selectPage(selectUserTweets(request)) + + // delete tweets + _ <- deletePage(page, request.userId) + + // make call to delete the next page of tweets + _ <- nextRequestOrEnd(request, page) + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala new file mode 100644 index 000000000..19cbbded0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GeoBuilder.scala @@ -0,0 +1,137 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.stats.Counter +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.PlaceKey +import com.twitter.tweetypie.repository.PlaceRepository +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ + +object GeoStats { + val topTenCountryCodes: Set[PlaceLanguage] = + Set("US", "JP", "GB", "ID", "BR", "SA", "TR", "MX", "ES", "CA") + + def apply(stats: StatsReceiver): Effect[Option[Place]] = { + val totalCount = stats.counter("total") + val notFoundCount = stats.counter("not_found") + val countryStats: Map[String, Counter] = + topTenCountryCodes.map(cc => cc -> stats.scope("with_country_code").counter(cc)).toMap + + val placeTypeStats: Map[PlaceType, Counter] = + Map( + PlaceType.Admin -> stats.counter("admin"), + PlaceType.City -> stats.counter("city"), + PlaceType.Country -> stats.counter("country"), + PlaceType.Neighborhood -> stats.counter("neighborhood"), + PlaceType.Poi -> stats.counter("poi"), + PlaceType.Unknown -> stats.counter("unknown") + ) + + Effect.fromPartial { + case Some(place) => { + totalCount.incr() + placeTypeStats(place.`type`).incr() + place.countryCode.foreach(cc => countryStats.get(cc).foreach(_.incr())) + } + case None => notFoundCount.incr() + } + } +} + +object GeoBuilder { + case class Request(createGeo: TweetCreateGeo, userGeoEnabled: Boolean, language: String) + + case class Result(geoCoordinates: Option[GeoCoordinates], placeId: Option[PlaceId]) + + type Type = FutureArrow[Request, Result] + + def apply(placeRepo: PlaceRepository.Type, rgc: ReverseGeocoder, stats: StatsReceiver): Type = { + val exceptionCounters = ExceptionCounter(stats) + + def ignoreFailures[A](future: Future[Option[A]]): Future[Option[A]] = + exceptionCounters(future).handle { case _ => None } + + def isValidPlaceId(placeId: String) = PlaceIdRegex.pattern.matcher(placeId).matches + + def isValidLatLon(latitude: Double, longitude: Double): Boolean = + latitude >= -90.0 && latitude <= 90.0 && + longitude >= -180.0 && longitude <= 180.0 && + // some clients send (0.0, 0.0) for unknown reasons, but this is highly unlikely to be + // valid and should be treated as if no coordinates were sent. if a place Id is provided, + // that will still be used. + (latitude != 0.0 || longitude != 0.0) + + // Count the number of times we erase geo information based on user preferences. + val geoErasedCounter = stats.counter("geo_erased") + // Count the number of times we override a user's preferences and add geo anyway. + val geoOverriddenCounter = stats.counter("geo_overridden") + + val geoScope = stats.scope("create_geotagged_tweet") + + // Counter for geo tweets with neither lat lon nor place id data + val noGeoCounter = geoScope.counter("no_geo_info") + val invalidCoordinates = geoScope.counter("invalid_coordinates") + val inValidPlaceId = geoScope.counter("invalid_place_id") + val latlonStatsEffect = GeoStats(geoScope.scope("from_latlon")) + val placeIdStatsEffect = GeoStats(geoScope.scope("from_place_id")) + + def validateCoordinates(coords: GeoCoordinates): Option[GeoCoordinates] = + if (isValidLatLon(coords.latitude, coords.longitude)) Some(coords) + else { + invalidCoordinates.incr() + None + } + + def validatePlaceId(placeId: String): Option[String] = + if (isValidPlaceId(placeId)) Some(placeId) + else { + inValidPlaceId.incr() + None + } + + def getPlaceByRGC(coordinates: GeoCoordinates, language: String): Future[Option[Place]] = + ignoreFailures( + rgc((coordinates, language)).onSuccess(latlonStatsEffect) + ) + + def getPlaceById(placeId: String, language: String): Future[Option[Place]] = + ignoreFailures( + Stitch + .run(placeRepo(PlaceKey(placeId, language)).liftNotFoundToOption) + .onSuccess(placeIdStatsEffect) + ) + + FutureArrow[Request, Result] { request => + val createGeo = request.createGeo + val allowGeo = createGeo.overrideUserGeoSetting || request.userGeoEnabled + val overrideGeo = createGeo.overrideUserGeoSetting && !request.userGeoEnabled + + if (createGeo.placeId.isEmpty && createGeo.coordinates.isEmpty) { + noGeoCounter.incr() + Future.value(Result(None, None)) + } else if (!allowGeo) { + // Record that we had geo information but had to erase it based on user preferences. + geoErasedCounter.incr() + Future.value(Result(None, None)) + } else { + if (overrideGeo) geoOverriddenCounter.incr() + + // treat invalidate coordinates the same as no-coordinates + val validatedCoordinates = createGeo.coordinates.flatMap(validateCoordinates) + val validatedPlaceId = createGeo.placeId.flatMap(validatePlaceId) + + for { + place <- (createGeo.placeId, validatedPlaceId, validatedCoordinates) match { + // if the request contains an invalid place id, we want to return None for the + // place instead of reverse-geocoding the coordinates + case (Some(_), None, _) => Future.None + case (_, Some(placeId), _) => getPlaceById(placeId, request.language) + case (_, _, Some(coords)) => getPlaceByRGC(coords, request.language) + case _ => Future.None + } + } yield Result(validatedCoordinates, place.map(_.id)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala new file mode 100644 index 000000000..b74acf94d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetDeletedTweetsHandler.scala @@ -0,0 +1,119 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.InternalServerError +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.DeleteState +import com.twitter.tweetypie.storage.DeletedTweetResponse +import com.twitter.tweetypie.storage.RateLimited +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.thriftscala._ + +/** + * Allow access to raw, unhydrated deleted tweet fields from storage backends (currently Manhattan) + */ +object GetDeletedTweetsHandler { + + type Type = FutureArrow[GetDeletedTweetsRequest, Seq[GetDeletedTweetResult]] + type TweetsExist = Seq[TweetId] => Stitch[Set[TweetId]] + + def processTweetResponse(response: Try[GetTweet.Response]): Stitch[Option[Tweet]] = { + import GetTweet.Response._ + + response match { + case Return(Found(tweet)) => Stitch.value(Some(tweet)) + case Return(Deleted | NotFound | BounceDeleted(_)) => Stitch.None + case Throw(_: RateLimited) => Stitch.exception(OverCapacity("manhattan")) + case Throw(exception) => Stitch.exception(exception) + } + } + + def convertDeletedTweetResponse( + r: DeletedTweetResponse, + extantIds: Set[TweetId] + ): GetDeletedTweetResult = { + val id = r.tweetId + if (extantIds.contains(id) || r.deleteState == DeleteState.NotDeleted) { + GetDeletedTweetResult(id, DeletedTweetState.NotDeleted) + } else { + r.overallResponse match { + case TweetResponseCode.Success => + GetDeletedTweetResult(id, convertState(r.deleteState), r.tweet) + case TweetResponseCode.OverCapacity => throw OverCapacity("manhattan") + case _ => + throw InternalServerError( + s"Unhandled response ${r.overallResponse} from getDeletedTweets for tweet $id" + ) + } + } + } + + def convertState(d: DeleteState): DeletedTweetState = d match { + case DeleteState.NotFound => DeletedTweetState.NotFound + case DeleteState.NotDeleted => DeletedTweetState.NotDeleted + case DeleteState.SoftDeleted => DeletedTweetState.SoftDeleted + // Callers of this endpoint treat BounceDeleted tweets the same as SoftDeleted + case DeleteState.BounceDeleted => DeletedTweetState.SoftDeleted + case DeleteState.HardDeleted => DeletedTweetState.HardDeleted + } + + /** + * Converts [[TweetStorageClient.GetTweet]] into a FutureArrow that returns extant tweet ids from + * the original list. This method is used to check underlying storage againt cache, preferring + * cache if a tweet exists there. + */ + def tweetsExist(getTweet: TweetStorageClient.GetTweet): TweetsExist = + (tweetIds: Seq[TweetId]) => + for { + response <- Stitch.traverse(tweetIds) { tweetId => getTweet(tweetId).liftToTry } + tweets <- Stitch.collect(response.map(processTweetResponse)) + } yield tweets.flatten.map(_.id).toSet.filter(tweetIds.contains) + + def apply( + getDeletedTweets: TweetStorageClient.GetDeletedTweets, + tweetsExist: TweetsExist, + stats: StatsReceiver + ): Type = { + + val notFound = stats.counter("not_found") + val notDeleted = stats.counter("not_deleted") + val softDeleted = stats.counter("soft_deleted") + val hardDeleted = stats.counter("hard_deleted") + val unknown = stats.counter("unknown") + + def trackState(results: Seq[GetDeletedTweetResult]): Unit = + results.foreach { r => + r.state match { + case DeletedTweetState.NotFound => notFound.incr() + case DeletedTweetState.NotDeleted => notDeleted.incr() + case DeletedTweetState.SoftDeleted => softDeleted.incr() + case DeletedTweetState.HardDeleted => hardDeleted.incr() + case _ => unknown.incr() + } + } + + FutureArrow { request => + Stitch.run { + Stitch + .join( + getDeletedTweets(request.tweetIds), + tweetsExist(request.tweetIds) + ) + .map { + case (deletedTweetResponses, extantIds) => + val responseIds = deletedTweetResponses.map(_.tweetId) + assert( + responseIds == request.tweetIds, + s"getDeletedTweets response does not match order of request: Request ids " + + s"(${request.tweetIds.mkString(", ")}) != response ids (${responseIds + .mkString(", ")})" + ) + deletedTweetResponses.map { r => convertDeletedTweetResponse(r, extantIds) } + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala new file mode 100644 index 000000000..c9b096e0f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsByUserHandler.scala @@ -0,0 +1,188 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.flockdb.client.Cursor +import com.twitter.flockdb.client.PageResult +import com.twitter.flockdb.client.Select +import com.twitter.flockdb.client.StatusGraph +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.flockdb.client.thriftscala.EdgeState +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserResult +import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest + +object GetStoredTweetsByUserHandler { + type Type = FutureArrow[GetStoredTweetsByUserRequest, GetStoredTweetsByUserResult] + + def apply( + getStoredTweetsHandler: GetStoredTweetsHandler.Type, + getStoredTweet: TweetStorageClient.GetStoredTweet, + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + maxPages: Int + ): Type = { + FutureArrow { request => + val options = request.options.getOrElse(GetStoredTweetsByUserOptions()) + + val startTimeMsec: Long = options.startTimeMsec.getOrElse(0L) + val endTimeMsec: Long = options.endTimeMsec.getOrElse(Time.now.inMillis) + val cursor = options.cursor.map(Cursor(_)).getOrElse { + if (options.startFromOldest) Cursor.lowest else Cursor.highest + } + + getNextTweetIdsInTimeRange( + request.userId, + startTimeMsec, + endTimeMsec, + cursor, + selectPage, + getStoredTweet, + maxPages, + numTries = 0 + ).flatMap { + case (tweetIds, cursor) => + val getStoredTweetsRequest = toGetStoredTweetsRequest(tweetIds, request.userId, options) + + getStoredTweetsHandler(getStoredTweetsRequest) + .map { getStoredTweetsResults => + GetStoredTweetsByUserResult( + storedTweets = getStoredTweetsResults.map(_.storedTweet), + cursor = if (cursor.isEnd) None else Some(cursor.value) + ) + } + } + } + } + + private def toGetStoredTweetsRequest( + tweetIds: Seq[TweetId], + userId: UserId, + getStoredTweetsByUserOptions: GetStoredTweetsByUserOptions + ): GetStoredTweetsRequest = { + + val options: GetStoredTweetsOptions = GetStoredTweetsOptions( + bypassVisibilityFiltering = getStoredTweetsByUserOptions.bypassVisibilityFiltering, + forUserId = if (getStoredTweetsByUserOptions.setForUserId) Some(userId) else None, + additionalFieldIds = getStoredTweetsByUserOptions.additionalFieldIds + ) + + GetStoredTweetsRequest( + tweetIds = tweetIds, + options = Some(options) + ) + } + + private def getNextTweetIdsInTimeRange( + userId: UserId, + startTimeMsec: Long, + endTimeMsec: Long, + cursor: Cursor, + selectPage: FutureArrow[Select[StatusGraph], PageResult[Long]], + getStoredTweet: TweetStorageClient.GetStoredTweet, + maxPages: Int, + numTries: Int + ): Future[(Seq[TweetId], Cursor)] = { + val select = Select( + sourceId = userId, + graph = UserTimelineGraph, + stateIds = + Some(Seq(EdgeState.Archived.value, EdgeState.Positive.value, EdgeState.Removed.value)) + ).withCursor(cursor) + + def inTimeRange(timestamp: Long): Boolean = + timestamp >= startTimeMsec && timestamp <= endTimeMsec + def pastTimeRange(timestamps: Seq[Long]) = { + if (cursor.isAscending) { + timestamps.max > endTimeMsec + } else { + timestamps.min < startTimeMsec + } + } + + val pageResultFuture: Future[PageResult[Long]] = selectPage(select) + + pageResultFuture.flatMap { pageResult => + val groupedIds = pageResult.entries.groupBy(SnowflakeId.isSnowflakeId) + val nextCursor = if (cursor.isAscending) pageResult.previousCursor else pageResult.nextCursor + + // Timestamps for the creation of Tweets with snowflake IDs can be calculated from the IDs + // themselves. + val snowflakeIdsTimestamps: Seq[(Long, Long)] = groupedIds.getOrElse(true, Seq()).map { id => + val snowflakeTimeMillis = SnowflakeId.unixTimeMillisFromId(id) + (id, snowflakeTimeMillis) + } + + // For non-snowflake Tweets, we need to fetch the Tweet data from Manhattan to see when the + // Tweet was created. + val nonSnowflakeIdsTimestamps: Future[Seq[(Long, Long)]] = Stitch.run( + Stitch + .traverse(groupedIds.getOrElse(false, Seq()))(getStoredTweet) + .map { + _.flatMap { + case GetStoredTweet.Response.FoundAny(tweet, _, _, _, _) => { + if (tweet.coreData.exists(_.createdAtSecs > 0)) { + Some((tweet.id, tweet.coreData.get.createdAtSecs)) + } else None + } + case _ => None + } + }) + + nonSnowflakeIdsTimestamps.flatMap { nonSnowflakeList => + val allTweetIdsAndTimestamps = snowflakeIdsTimestamps ++ nonSnowflakeList + val filteredTweetIds = allTweetIdsAndTimestamps + .filter { + case (_, ts) => inTimeRange(ts) + } + .map(_._1) + + if (nextCursor.isEnd) { + // We've considered the last Tweet for this User. There are no more Tweets to return. + Future.value((filteredTweetIds, Cursor.end)) + } else if (allTweetIdsAndTimestamps.nonEmpty && + pastTimeRange(allTweetIdsAndTimestamps.map(_._2))) { + // At least one Tweet returned from Tflock has a timestamp past our time range, i.e. + // greater than the end time (if we're fetching in an ascending order) or lower than the + // start time (if we're fetching in a descending order). There is no point in looking at + // any more Tweets from this User as they'll all be outside the time range. + Future.value((filteredTweetIds, Cursor.end)) + } else if (filteredTweetIds.isEmpty) { + // We're here because one of two things happened: + // 1. allTweetIdsAndTimestamps is empty: Either Tflock has returned an empty page of Tweets + // or we weren't able to fetch timestamps for any of the Tweets Tflock returned. In this + // case, we fetch the next page of Tweets. + // 2. allTweetIdsAndTimestamps is non-empty but filteredTweetIds is empty: The current page + // has no Tweets inside the requested time range. We fetch the next page of Tweets and + // try again. + // If we hit the limit for the maximum number of pages from tflock to be requested, we + // return an empty list of Tweets with the cursor for the caller to try again. + + if (numTries == maxPages) { + Future.value((filteredTweetIds, nextCursor)) + } else { + getNextTweetIdsInTimeRange( + userId = userId, + startTimeMsec = startTimeMsec, + endTimeMsec = endTimeMsec, + cursor = nextCursor, + selectPage = selectPage, + getStoredTweet = getStoredTweet, + maxPages = maxPages, + numTries = numTries + 1 + ) + } + } else { + // filteredTweetIds is non-empty: There are some Tweets in this page that are within the + // requested time range, and we aren't out of the time range yet. We return the Tweets we + // have and set the cursor forward for the next request. + Future.value((filteredTweetIds, nextCursor)) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala new file mode 100644 index 000000000..ab8bfb4ad --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetStoredTweetsHandler.scala @@ -0,0 +1,161 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.StoredTweetResult._ +import com.twitter.tweetypie.core.StoredTweetResult +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.FieldId +import com.twitter.tweetypie.FutureArrow +import com.twitter.tweetypie.repository.CacheControl +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala.{BounceDeleted => BounceDeletedState} +import com.twitter.tweetypie.thriftscala.{ForceAdded => ForceAddedState} +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsOptions +import com.twitter.tweetypie.thriftscala.GetStoredTweetsResult +import com.twitter.tweetypie.thriftscala.{HardDeleted => HardDeletedState} +import com.twitter.tweetypie.thriftscala.{NotFound => NotFoundState} +import com.twitter.tweetypie.thriftscala.{SoftDeleted => SoftDeletedState} +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.StoredTweetError +import com.twitter.tweetypie.thriftscala.StoredTweetInfo +import com.twitter.tweetypie.thriftscala.StoredTweetState +import com.twitter.tweetypie.thriftscala.{Undeleted => UndeletedState} + +object GetStoredTweetsHandler { + type Type = FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] + + def apply(tweetRepo: TweetResultRepository.Type): Type = { + FutureArrow[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] { request => + val requestOptions: GetStoredTweetsOptions = + request.options.getOrElse(GetStoredTweetsOptions()) + val queryOptions = toTweetQueryOptions(requestOptions) + + val result = Stitch + .traverse(request.tweetIds) { tweetId => + tweetRepo(tweetId, queryOptions) + .map(toStoredTweetInfo) + .map(GetStoredTweetsResult(_)) + .handle { + case _ => + GetStoredTweetsResult( + StoredTweetInfo( + tweetId = tweetId, + errors = Seq(StoredTweetError.FailedFetch) + ) + ) + } + } + + Stitch.run(result) + } + } + + private def toTweetQueryOptions(options: GetStoredTweetsOptions): TweetQuery.Options = { + val countsFields: Set[FieldId] = Set( + StatusCounts.FavoriteCountField.id, + StatusCounts.ReplyCountField.id, + StatusCounts.RetweetCountField.id, + StatusCounts.QuoteCountField.id + ) + + TweetQuery.Options( + include = GetTweetsHandler.BaseInclude.also( + tweetFields = Set(Tweet.CountsField.id) ++ options.additionalFieldIds, + countsFields = countsFields + ), + cacheControl = CacheControl.NoCache, + enforceVisibilityFiltering = !options.bypassVisibilityFiltering, + forUserId = options.forUserId, + requireSourceTweet = false, + fetchStoredTweets = true + ) + } + + private def toStoredTweetInfo(tweetResult: TweetResult): StoredTweetInfo = { + def translateErrors(errors: Seq[StoredTweetResult.Error]): Seq[StoredTweetError] = { + errors.map { + case StoredTweetResult.Error.Corrupt => StoredTweetError.Corrupt + case StoredTweetResult.Error.FieldsMissingOrInvalid => + StoredTweetError.FieldsMissingOrInvalid + case StoredTweetResult.Error.ScrubbedFieldsPresent => StoredTweetError.ScrubbedFieldsPresent + case StoredTweetResult.Error.ShouldBeHardDeleted => StoredTweetError.ShouldBeHardDeleted + } + } + + val tweetData = tweetResult.value + + tweetData.storedTweetResult match { + case Some(storedTweetResult) => { + val (tweet, storedTweetState, errors) = storedTweetResult match { + case Present(errors, _) => (Some(tweetData.tweet), None, translateErrors(errors)) + case HardDeleted(softDeletedAtMsec, hardDeletedAtMsec) => + ( + Some(tweetData.tweet), + Some( + StoredTweetState.HardDeleted( + HardDeletedState(softDeletedAtMsec, hardDeletedAtMsec))), + Seq() + ) + case SoftDeleted(softDeletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.SoftDeleted(SoftDeletedState(softDeletedAtMsec))), + translateErrors(errors) + ) + case BounceDeleted(deletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.BounceDeleted(BounceDeletedState(deletedAtMsec))), + translateErrors(errors) + ) + case Undeleted(undeletedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.Undeleted(UndeletedState(undeletedAtMsec))), + translateErrors(errors) + ) + case ForceAdded(addedAtMsec, errors, _) => + ( + Some(tweetData.tweet), + Some(StoredTweetState.ForceAdded(ForceAddedState(addedAtMsec))), + translateErrors(errors) + ) + case Failed(errors) => (None, None, translateErrors(errors)) + case NotFound => (None, Some(StoredTweetState.NotFound(NotFoundState())), Seq()) + } + + StoredTweetInfo( + tweetId = tweetData.tweet.id, + tweet = tweet.map(sanitizeNullMediaFields), + storedTweetState = storedTweetState, + errors = errors + ) + } + + case None => + StoredTweetInfo( + tweetId = tweetData.tweet.id, + tweet = Some(sanitizeNullMediaFields(tweetData.tweet)) + ) + } + } + + private def sanitizeNullMediaFields(tweet: Tweet): Tweet = { + // Some media fields are initialized as `null` at the storage layer. + // If the Tweet is meant to be hard deleted, or is not hydrated for + // some other reason but the media entities still exist, we sanitize + // these fields to allow serialization. + tweet.copy(media = tweet.media.map(_.map { mediaEntity => + mediaEntity.copy( + url = Option(mediaEntity.url).getOrElse(""), + mediaUrl = Option(mediaEntity.mediaUrl).getOrElse(""), + mediaUrlHttps = Option(mediaEntity.mediaUrlHttps).getOrElse(""), + displayUrl = Option(mediaEntity.displayUrl).getOrElse(""), + expandedUrl = Option(mediaEntity.expandedUrl).getOrElse(""), + ) + })) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala new file mode 100644 index 000000000..4100a76dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetCountsHandler.scala @@ -0,0 +1,44 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweetCounts` endpoint. + */ +object GetTweetCountsHandler { + type Type = FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]] + + def apply(repo: TweetCountsRepository.Type): Type = { + + def idToResult(id: TweetId, req: GetTweetCountsRequest): Stitch[GetTweetCountsResult] = + Stitch + .join( + // .liftToOption() converts any failures to None result + if (req.includeRetweetCount) repo(RetweetsKey(id)).liftToOption() else Stitch.None, + if (req.includeReplyCount) repo(RepliesKey(id)).liftToOption() else Stitch.None, + if (req.includeFavoriteCount) repo(FavsKey(id)).liftToOption() else Stitch.None, + if (req.includeQuoteCount) repo(QuotesKey(id)).liftToOption() else Stitch.None, + if (req.includeBookmarkCount) repo(BookmarksKey(id)).liftToOption() else Stitch.None + ).map { + case (retweetCount, replyCount, favoriteCount, quoteCount, bookmarkCount) => + GetTweetCountsResult( + tweetId = id, + retweetCount = retweetCount, + replyCount = replyCount, + favoriteCount = favoriteCount, + quoteCount = quoteCount, + bookmarkCount = bookmarkCount + ) + } + + FutureArrow[GetTweetCountsRequest, Seq[GetTweetCountsResult]] { request => + Stitch.run( + Stitch.traverse(request.tweetIds)(idToResult(_, request)) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala new file mode 100644 index 000000000..55ab6cb18 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetFieldsHandler.scala @@ -0,0 +1,395 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.container.thriftscala.MaterializeAsTweetFieldsRequest +import com.twitter.context.TestingSignalsContext +import com.twitter.servo.util.FutureArrow +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.DeletedTweetVisibilityRepository +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.TweetFieldsResultState +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweetFields` endpoint. + */ +object GetTweetFieldsHandler { + type Type = GetTweetFieldsRequest => Future[Seq[GetTweetFieldsResult]] + + def apply( + tweetRepo: TweetResultRepository.Type, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Type = { + FutureArrow[GetTweetFieldsRequest, Seq[GetTweetFieldsResult]] { request => + val queryOptions = toTweetQueryOptions(request.options) + + Stitch.run( + Stitch.traverse(request.tweetIds) { id => + tweetRepo(id, queryOptions).liftToTry.flatMap { tweetResult => + toGetTweetFieldsResult( + id, + tweetResult, + request.options, + deletedTweetVisibilityRepo, + containerAsGetTweetFieldsResultRepo, + stats, + shouldMaterializeContainers + ) + } + } + ) + } + } + + /** + * Converts a `GetTweetFieldsOptions` into an internal `TweetQuery.Options`. + */ + def toTweetQueryOptions(options: GetTweetFieldsOptions): TweetQuery.Options = { + val includes = options.tweetIncludes + val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty + val cacheControl = + if (shouldSkipCache) CacheControl.NoCache + else if (options.doNotCache) CacheControl.ReadOnlyCache + else CacheControl.ReadWriteCache + + TweetQuery.Options( + include = TweetQuery + .Include( + tweetFields = includes.collect { + case TweetInclude.TweetFieldId(id) => id + case TweetInclude.CountsFieldId(_) => Tweet.CountsField.id + case TweetInclude.MediaEntityFieldId(_) => Tweet.MediaField.id + }.toSet, + countsFields = includes.collect { case TweetInclude.CountsFieldId(id) => id }.toSet, + mediaFields = includes.collect { case TweetInclude.MediaEntityFieldId(id) => id }.toSet, + quotedTweet = options.includeQuotedTweet, + pastedMedia = true + ).also( + /** + * Always fetching underlying creatives container id. see + * [[hydrateCreativeContainerBackedTweet]] for more detail. + */ + tweetFields = Seq(Tweet.UnderlyingCreativesContainerIdField.id) + ), + cacheControl = cacheControl, + enforceVisibilityFiltering = options.visibilityPolicy == TweetVisibilityPolicy.UserVisible, + safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterNone), + forUserId = options.forUserId, + languageTag = options.languageTag.getOrElse("en"), + cardsPlatformKey = options.cardsPlatformKey, + extensionsArgs = options.extensionsArgs, + forExternalConsumption = true, + simpleQuotedTweet = options.simpleQuotedTweet + ) + } + + def toGetTweetFieldsResult( + tweetId: TweetId, + res: Try[TweetResult], + options: GetTweetFieldsOptions, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + containerAsGetTweetFieldsResultRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite( + stats.stat("racy_reads", "get_tweet_fields"), + Duration.fromSeconds(3) + ) + + res match { + case Throw(NotFound) => + measureRacyReads(tweetId) + Stitch.value(GetTweetFieldsResult(tweetId, NotFoundResultState)) + + case Throw(ex) => + val resultStateStitch = failureResultState(ex) match { + case notFoundResultState @ TweetFieldsResultState.NotFound(_) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(withVisibilityFilteredReason(notFoundResultState, _)) + case res => Stitch.value(res) + } + resultStateStitch.map(res => GetTweetFieldsResult(tweetId, res)) + case Return(r) => + toTweetFieldsResult( + r, + options, + deletedTweetVisibilityRepo, + containerAsGetTweetFieldsResultRepo, + stats, + shouldMaterializeContainers + ).flatMap { getTweetFieldsResult => + hydrateCreativeContainerBackedTweet( + r.value.tweet.underlyingCreativesContainerId, + getTweetFieldsResult, + options, + containerAsGetTweetFieldsResultRepo, + tweetId, + stats, + shouldMaterializeContainers + ) + } + } + } + + private def failureResultState(ex: Throwable): TweetFieldsResultState = + ex match { + case FilteredState.Unavailable.TweetDeleted => DeletedResultState + case FilteredState.Unavailable.BounceDeleted => BounceDeletedResultState + case FilteredState.Unavailable.SourceTweetNotFound(d) => notFoundResultState(deleted = d) + case FilteredState.Unavailable.Author.NotFound => NotFoundResultState + case fs: FilteredState.HasFilteredReason => toFilteredState(fs.filteredReason) + case OverCapacity(_) => toFailedState(overcapacity = true, None) + case _ => toFailedState(overcapacity = false, Some(ex.toString)) + } + + private val NotFoundResultState = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + + private val DeletedResultState = TweetFieldsResultState.NotFound( + TweetFieldsResultNotFound(deleted = true) + ) + + private val BounceDeletedResultState = TweetFieldsResultState.NotFound( + TweetFieldsResultNotFound(deleted = true, bounceDeleted = true) + ) + + def notFoundResultState(deleted: Boolean): TweetFieldsResultState.NotFound = + if (deleted) DeletedResultState else NotFoundResultState + + private def toFailedState( + overcapacity: Boolean, + message: Option[String] + ): TweetFieldsResultState = + TweetFieldsResultState.Failed(TweetFieldsResultFailed(overcapacity, message)) + + private def toFilteredState(reason: FilteredReason): TweetFieldsResultState = + TweetFieldsResultState.Filtered( + TweetFieldsResultFiltered(reason = reason) + ) + + /** + * Converts a `TweetResult` into a `GetTweetFieldsResult`. For retweets, missing or filtered source + * tweets cause the retweet to be treated as missing or filtered. + */ + private def toTweetFieldsResult( + tweetResult: TweetResult, + options: GetTweetFieldsOptions, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + val primaryResultState = toTweetFieldsResultState(tweetResult, options) + val quotedResultStateStitch = primaryResultState match { + case TweetFieldsResultState.Found(_) if options.includeQuotedTweet => + val tweetData = tweetResult.value.sourceTweetResult + .getOrElse(tweetResult) + .value + tweetData.quotedTweetResult + .map { + case QuotedTweetResult.NotFound => Stitch.value(NotFoundResultState) + case QuotedTweetResult.Filtered(state) => + val resultState = failureResultState(state) + + (tweetData.tweet.quotedTweet, resultState) match { + //When QT exists => contribute VF filtered reason to result state + case (Some(qt), notFoundResultState @ TweetFieldsResultState.NotFound(_)) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + state, + qt.tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = true + ) + ).map(withVisibilityFilteredReason(notFoundResultState, _)) + //When QT is absent => result state without filtered reason + case _ => Stitch.value(resultState) + } + case QuotedTweetResult.Found(res) => + Stitch + .value(toTweetFieldsResultState(res, options)) + .flatMap { resultState => + hydrateCreativeContainerBackedTweet( + creativesContainerId = res.value.tweet.underlyingCreativesContainerId, + originalGetTweetFieldsResult = GetTweetFieldsResult( + tweetId = res.value.tweet.id, + tweetResult = resultState, + ), + getTweetFieldsRequestOptions = options, + creativesContainerRepo = creativesContainerRepo, + res.value.tweet.id, + stats, + shouldMaterializeContainers + ) + } + .map(_.tweetResult) + } + //Quoted tweet result not requested + case _ => None + } + + quotedResultStateStitch + .map(qtStitch => qtStitch.map(Some(_))) + .getOrElse(Stitch.None) + .map(qtResult => + GetTweetFieldsResult( + tweetId = tweetResult.value.tweet.id, + tweetResult = primaryResultState, + quotedTweetResult = qtResult + )) + } + + /** + * @return a copy of resultState with filtered reason when @param filteredReasonOpt is present + */ + private def withVisibilityFilteredReason( + resultState: TweetFieldsResultState.NotFound, + filteredReasonOpt: Option[FilteredReason] + ): TweetFieldsResultState.NotFound = { + filteredReasonOpt match { + case Some(fs) => + resultState.copy( + notFound = resultState.notFound.copy( + filteredReason = Some(fs) + )) + case _ => resultState + } + } + + private def toTweetFieldsResultState( + tweetResult: TweetResult, + options: GetTweetFieldsOptions + ): TweetFieldsResultState = { + val tweetData = tweetResult.value + val suppressReason = tweetData.suppress.map(_.filteredReason) + val tweetFailedFields = tweetResult.state.failedFields + val sourceTweetFailedFields = + tweetData.sourceTweetResult.map(_.state.failedFields).getOrElse(Set()) + val sourceTweetOpt = tweetData.sourceTweetResult.map(_.value.tweet) + val sourceTweetSuppressReason = + tweetData.sourceTweetResult.flatMap(_.value.suppress.map(_.filteredReason)) + val isTweetPartial = tweetFailedFields.nonEmpty || sourceTweetFailedFields.nonEmpty + + val tweetFoundResult = tweetData.sourceTweetResult match { + case None => + // if `sourceTweetResult` is empty, this isn't a retweet + TweetFieldsResultFound( + tweet = tweetData.tweet, + suppressReason = suppressReason + ) + case Some(r) => + // if the source tweet result state is Found, merge that into the primary result + TweetFieldsResultFound( + tweet = tweetData.tweet, + retweetedTweet = sourceTweetOpt.filter(_ => options.includeRetweetedTweet), + suppressReason = suppressReason.orElse(sourceTweetSuppressReason) + ) + } + + if (isTweetPartial) { + TweetFieldsResultState.Failed( + TweetFieldsResultFailed( + overCapacity = false, + message = Some( + "Failed to load: " + (tweetFailedFields ++ sourceTweetFailedFields).mkString(", ")), + partial = Some( + TweetFieldsPartial( + found = tweetFoundResult, + missingFields = tweetFailedFields, + sourceTweetMissingFields = sourceTweetFailedFields + ) + ) + ) + ) + } else { + TweetFieldsResultState.Found( + tweetFoundResult + ) + } + } + + /** + * if tweet data is backed by creatives container, it'll be hydrated from creatives + * container service. + */ + private def hydrateCreativeContainerBackedTweet( + creativesContainerId: Option[Long], + originalGetTweetFieldsResult: GetTweetFieldsResult, + getTweetFieldsRequestOptions: GetTweetFieldsOptions, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetFieldsType, + tweetId: Long, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetFieldsResult] = { + // creatives container backed tweet stats + val ccTweetMaterialized = stats.scope("creatives_container", "get_tweet_fields") + val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests") + val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success") + val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed") + val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered") + + ( + creativesContainerId, + originalGetTweetFieldsResult.tweetResult, + getTweetFieldsRequestOptions.disableTweetMaterialization, + shouldMaterializeContainers() + ) match { + // 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence. + // 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration. + // (this logic can be revisited and improved further) + case (None, _, _, _) => + Stitch.value(originalGetTweetFieldsResult) + case (Some(_), _, _, false) => + ccTweetMaterializeFiltered.counter("decider_suppressed").incr() + Stitch.value { + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + ) + } + case (Some(containerId), TweetFieldsResultState.Found(_), false, _) => + ccTweetMaterializeRequests.incr() + val materializationRequest = + MaterializeAsTweetFieldsRequest(containerId, tweetId, Some(originalGetTweetFieldsResult)) + creativesContainerRepo( + materializationRequest, + getTweetFieldsRequestOptions + ).onSuccess(_ => ccTweetMaterializeSuccess.incr()) + .onFailure(_ => ccTweetMaterializeFailed.incr()) + .handle { + case ex => + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = failureResultState(ex) + ) + } + case (Some(_), _, true, _) => + ccTweetMaterializeFiltered.counter("suppressed").incr() + Stitch.value( + GetTweetFieldsResult( + tweetId = tweetId, + tweetResult = TweetFieldsResultState.NotFound(TweetFieldsResultNotFound()) + ) + ) + case (Some(_), state, _, _) => + ccTweetMaterializeFiltered.counter(state.getClass.getName).incr() + Stitch.value(originalGetTweetFieldsResult) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala new file mode 100644 index 000000000..f0f144dd5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/GetTweetsHandler.scala @@ -0,0 +1,415 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.container.thriftscala.MaterializeAsTweetRequest +import com.twitter.context.TestingSignalsContext +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Handler for the `getTweets` endpoint. + */ +object GetTweetsHandler { + type Type = FutureArrow[GetTweetsRequest, Seq[GetTweetResult]] + + /** + * A `TweetQuery.Include` instance with options set as the default base options + * for the `getTweets` endpoint. + */ + val BaseInclude: TweetQuery.Include = + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.UrlsField.id, + Tweet.MentionsField.id, + Tweet.MediaField.id, + Tweet.HashtagsField.id, + Tweet.CashtagsField.id, + Tweet.TakedownCountryCodesField.id, + Tweet.TakedownReasonsField.id, + Tweet.DeviceSourceField.id, + Tweet.LanguageField.id, + Tweet.ContributorField.id, + Tweet.QuotedTweetField.id, + Tweet.UnderlyingCreativesContainerIdField.id, + ), + pastedMedia = true + ) + + def apply( + tweetRepo: TweetResultRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Type = { + FutureArrow[GetTweetsRequest, Seq[GetTweetResult]] { request => + val requestOptions = request.options.getOrElse(GetTweetOptions()) + + val invalidAdditionalFields = + requestOptions.additionalFieldIds.filter(!AdditionalFields.isAdditionalFieldId(_)) + + if (invalidAdditionalFields.nonEmpty) { + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + "Requested additional fields contain invalid field id " + + s"${invalidAdditionalFields.mkString(", ")}. Additional fields ids must be greater than 100." + ) + ) + } else { + val opts = toTweetQueryOptions(requestOptions) + val measureRacyReads: TweetId => Unit = trackLossyReadsAfterWrite( + stats.stat("racy_reads", "get_tweets"), + Duration.fromSeconds(3) + ) + + Stitch.run( + Stitch.traverse(request.tweetIds) { id => + tweetRepo(id, opts).liftToTry + .flatMap { + case Throw(NotFound) => + measureRacyReads(id) + + Stitch.value(GetTweetResult(id, StatusState.NotFound)) + case Throw(ex) => + failureResult(deletedTweetVisibilityRepo, id, requestOptions, ex) + case Return(r) => + toGetTweetResult( + deletedTweetVisibilityRepo, + creativesContainerRepo, + requestOptions, + tweetResult = r, + includeSourceTweet = requestOptions.includeSourceTweet, + includeQuotedTweet = requestOptions.includeQuotedTweet, + stats, + shouldMaterializeContainers + ) + }.flatMap { getTweetResult => + // check if tweet data is backed by creatives container and needs to be hydrated from creatives + // container service. + hydrateCreativeContainerBackedTweet( + getTweetResult, + requestOptions, + creativesContainerRepo, + stats, + shouldMaterializeContainers + ) + } + } + ) + } + } + } + + def toTweetQueryOptions(options: GetTweetOptions): TweetQuery.Options = { + val shouldSkipCache = TestingSignalsContext().flatMap(_.simulateBackPressure).nonEmpty + val cacheControl = + if (shouldSkipCache) CacheControl.NoCache + else if (options.doNotCache) CacheControl.ReadOnlyCache + else CacheControl.ReadWriteCache + + val countsFields = toCountsFields(options) + val mediaFields = toMediaFields(options) + + TweetQuery.Options( + include = BaseInclude.also( + tweetFields = toTweetFields(options, countsFields), + countsFields = countsFields, + mediaFields = mediaFields, + quotedTweet = Some(options.includeQuotedTweet) + ), + cacheControl = cacheControl, + cardsPlatformKey = options.cardsPlatformKey, + excludeReported = options.excludeReported, + enforceVisibilityFiltering = !options.bypassVisibilityFiltering, + safetyLevel = options.safetyLevel.getOrElse(SafetyLevel.FilterDefault), + forUserId = options.forUserId, + languageTag = options.languageTag, + extensionsArgs = options.extensionsArgs, + forExternalConsumption = true, + simpleQuotedTweet = options.simpleQuotedTweet + ) + } + + private def toTweetFields(opts: GetTweetOptions, countsFields: Set[FieldId]): Set[FieldId] = { + val bldr = Set.newBuilder[FieldId] + + bldr ++= opts.additionalFieldIds + + if (opts.includePlaces) bldr += Tweet.PlaceField.id + if (opts.forUserId.nonEmpty) { + if (opts.includePerspectivals) bldr += Tweet.PerspectiveField.id + if (opts.includeConversationMuted) bldr += Tweet.ConversationMutedField.id + } + if (opts.includeCards && opts.cardsPlatformKey.isEmpty) bldr += Tweet.CardsField.id + if (opts.includeCards && opts.cardsPlatformKey.nonEmpty) bldr += Tweet.Card2Field.id + if (opts.includeProfileGeoEnrichment) bldr += Tweet.ProfileGeoEnrichmentField.id + + if (countsFields.nonEmpty) bldr += Tweet.CountsField.id + + if (opts.includeCardUri) bldr += Tweet.CardReferenceField.id + + bldr.result() + } + + private def toCountsFields(opts: GetTweetOptions): Set[FieldId] = { + val bldr = Set.newBuilder[FieldId] + + if (opts.includeRetweetCount) bldr += StatusCounts.RetweetCountField.id + if (opts.includeReplyCount) bldr += StatusCounts.ReplyCountField.id + if (opts.includeFavoriteCount) bldr += StatusCounts.FavoriteCountField.id + if (opts.includeQuoteCount) bldr += StatusCounts.QuoteCountField.id + + bldr.result() + } + + private def toMediaFields(opts: GetTweetOptions): Set[FieldId] = { + if (opts.includeMediaAdditionalMetadata) + Set(MediaEntity.AdditionalMetadataField.id) + else + Set.empty + } + + /** + * Converts a `TweetResult` into a `GetTweetResult`. + */ + def toGetTweetResult( + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + options: GetTweetOptions, + tweetResult: TweetResult, + includeSourceTweet: Boolean, + includeQuotedTweet: Boolean, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetResult] = { + val tweetData = tweetResult.value + + // only include missing fields if non empty + def asMissingFields(set: Set[FieldByPath]): Option[Set[FieldByPath]] = + if (set.isEmpty) None else Some(set) + + val missingFields = asMissingFields(tweetResult.state.failedFields) + + val sourceTweetResult = + tweetData.sourceTweetResult + .filter(_ => includeSourceTweet) + + val sourceTweetData = tweetData.sourceTweetResult + .getOrElse(tweetResult) + .value + val quotedTweetResult: Option[QuotedTweetResult] = sourceTweetData.quotedTweetResult + .filter(_ => includeQuotedTweet) + + val qtFilteredReasonStitch = + ((sourceTweetData.tweet.quotedTweet, quotedTweetResult) match { + case (Some(quotedTweet), Some(QuotedTweetResult.Filtered(filteredState))) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + filteredState, + quotedTweet.tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = true + ) + ) + case _ => Stitch.None + }) + //Use quotedTweetResult filtered reason when VF filtered reason is not present + .map(fsOpt => fsOpt.orElse(quotedTweetResult.flatMap(_.filteredReason))) + + val suppress = tweetData.suppress.orElse(tweetData.sourceTweetResult.flatMap(_.value.suppress)) + + val quotedTweetStitch: Stitch[Option[Tweet]] = + quotedTweetResult match { + // check if quote tweet is backed by creatives container and needs to be hydrated from creatives + // container service. detail see go/creatives-containers-tdd + case Some(QuotedTweetResult.Found(tweetResult)) => + hydrateCreativeContainerBackedTweet( + originalGetTweetResult = GetTweetResult( + tweetId = tweetResult.value.tweet.id, + tweetState = StatusState.Found, + tweet = Some(tweetResult.value.tweet) + ), + getTweetRequestOptions = options, + creativesContainerRepo = creativesContainerRepo, + stats = stats, + shouldMaterializeContainers + ).map(_.tweet) + case _ => + Stitch.value( + quotedTweetResult + .flatMap(_.toOption) + .map(_.value.tweet) + ) + } + + Stitch.join(qtFilteredReasonStitch, quotedTweetStitch).map { + case (qtFilteredReason, quotedTweet) => + GetTweetResult( + tweetId = tweetData.tweet.id, + tweetState = + if (suppress.nonEmpty) StatusState.Suppress + else if (missingFields.nonEmpty) StatusState.Partial + else StatusState.Found, + tweet = Some(tweetData.tweet), + missingFields = missingFields, + filteredReason = suppress.map(_.filteredReason), + sourceTweet = sourceTweetResult.map(_.value.tweet), + sourceTweetMissingFields = sourceTweetResult + .map(_.state.failedFields) + .flatMap(asMissingFields), + quotedTweet = quotedTweet, + quotedTweetMissingFields = quotedTweetResult + .flatMap(_.toOption) + .map(_.state.failedFields) + .flatMap(asMissingFields), + quotedTweetFilteredReason = qtFilteredReason + ) + } + } + + private[this] val AuthorAccountIsInactive = FilteredReason.AuthorAccountIsInactive(true) + + def failureResult( + deletedTweetVisibilityRepo: DeletedTweetVisibilityRepository.Type, + tweetId: TweetId, + options: GetTweetOptions, + ex: Throwable + ): Stitch[GetTweetResult] = { + def deletedState(deleted: Boolean, statusState: StatusState) = + if (deleted && options.enableDeletedState) { + statusState + } else { + StatusState.NotFound + } + + ex match { + case FilteredState.Unavailable.Author.Deactivated => + Stitch.value(GetTweetResult(tweetId, StatusState.DeactivatedUser)) + case FilteredState.Unavailable.Author.NotFound => + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case FilteredState.Unavailable.Author.Offboarded => + Stitch.value( + GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(AuthorAccountIsInactive))) + case FilteredState.Unavailable.Author.Suspended => + Stitch.value(GetTweetResult(tweetId, StatusState.SuspendedUser)) + case FilteredState.Unavailable.Author.Protected => + Stitch.value(GetTweetResult(tweetId, StatusState.ProtectedUser)) + case FilteredState.Unavailable.Author.Unsafe => + Stitch.value(GetTweetResult(tweetId, StatusState.Drop)) + //Handle delete state with optional FilteredReason + case FilteredState.Unavailable.TweetDeleted => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(deleted = true, StatusState.Deleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + + case FilteredState.Unavailable.BounceDeleted => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(deleted = true, StatusState.BounceDeleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + + case FilteredState.Unavailable.SourceTweetNotFound(d) => + deletedTweetVisibilityRepo( + DeletedTweetVisibilityRepository.VisibilityRequest( + ex, + tweetId, + options.safetyLevel, + options.forUserId, + isInnerQuotedTweet = false + ) + ).map(filteredReasonOpt => { + val deleteState = deletedState(d, StatusState.Deleted) + GetTweetResult(tweetId, deleteState, filteredReason = filteredReasonOpt) + }) + case FilteredState.Unavailable.Reported => + Stitch.value(GetTweetResult(tweetId, StatusState.ReportedTweet)) + case fs: FilteredState.HasFilteredReason => + Stitch.value( + GetTweetResult(tweetId, StatusState.Drop, filteredReason = Some(fs.filteredReason))) + case OverCapacity(_) => Stitch.value(GetTweetResult(tweetId, StatusState.OverCapacity)) + case _ => Stitch.value(GetTweetResult(tweetId, StatusState.Failed)) + } + } + + private def hydrateCreativeContainerBackedTweet( + originalGetTweetResult: GetTweetResult, + getTweetRequestOptions: GetTweetOptions, + creativesContainerRepo: CreativesContainerMaterializationRepository.GetTweetType, + stats: StatsReceiver, + shouldMaterializeContainers: Gate[Unit] + ): Stitch[GetTweetResult] = { + // creatives container backed tweet stats + val ccTweetMaterialized = stats.scope("creatives_container", "get_tweets") + val ccTweetMaterializeFiltered = ccTweetMaterialized.scope("filtered") + val ccTweetMaterializeSuccess = ccTweetMaterialized.counter("success") + val ccTweetMaterializeFailed = ccTweetMaterialized.counter("failed") + val ccTweetMaterializeRequests = ccTweetMaterialized.counter("requests") + + val tweetId = originalGetTweetResult.tweetId + val tweetState = originalGetTweetResult.tweetState + val underlyingCreativesContainerId = + originalGetTweetResult.tweet.flatMap(_.underlyingCreativesContainerId) + ( + tweetState, + underlyingCreativesContainerId, + getTweetRequestOptions.disableTweetMaterialization, + shouldMaterializeContainers() + ) match { + // 1. creatives container backed tweet is determined by `underlyingCreativesContainerId` field presence. + // 2. if the frontend tweet is suppressed by any reason, respect that and not do this hydration. + // (this logic can be revisited and improved further) + case (_, None, _, _) => + Stitch.value(originalGetTweetResult) + case (_, Some(_), _, false) => + ccTweetMaterializeFiltered.counter("decider_suppressed").incr() + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case (StatusState.Found, Some(containerId), false, _) => + ccTweetMaterializeRequests.incr() + val materializationRequest = + MaterializeAsTweetRequest(containerId, tweetId, Some(originalGetTweetResult)) + creativesContainerRepo( + materializationRequest, + Some(getTweetRequestOptions) + ).onSuccess(_ => ccTweetMaterializeSuccess.incr()) + .onFailure(_ => ccTweetMaterializeFailed.incr()) + .handle { + case _ => GetTweetResult(tweetId, StatusState.Failed) + } + case (_, Some(_), true, _) => + ccTweetMaterializeFiltered.counter("suppressed").incr() + Stitch.value(GetTweetResult(tweetId, StatusState.NotFound)) + case (state, Some(_), _, _) => + ccTweetMaterializeFiltered.counter(state.name).incr() + Stitch.value(originalGetTweetResult) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala new file mode 100644 index 000000000..6ec0fc611 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/HandlerError.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable._ + +private[tweetypie] object HandlerError { + + def translateNotFoundToClientError[U](tweetId: TweetId): PartialFunction[Throwable, Stitch[U]] = { + case NotFound => + Stitch.exception(HandlerError.tweetNotFound(tweetId)) + case TweetDeleted | BounceDeleted => + Stitch.exception(HandlerError.tweetNotFound(tweetId, true)) + case SourceTweetNotFound(deleted) => + Stitch.exception(HandlerError.tweetNotFound(tweetId, deleted)) + } + + def tweetNotFound(tweetId: TweetId, deleted: Boolean = false): ClientError = + ClientError( + ClientErrorCause.BadRequest, + s"tweet ${if (deleted) "deleted" else "not found"}: $tweetId" + ) + + def userNotFound(userId: UserId): ClientError = + ClientError(ClientErrorCause.BadRequest, s"user not found: $userId") + + def tweetNotFoundException(tweetId: TweetId): Future[Nothing] = + Future.exception(tweetNotFound(tweetId)) + + def userNotFoundException(userId: UserId): Future[Nothing] = + Future.exception(userNotFound(userId)) + + def getRequired[A, B]( + optionFutureArrow: FutureArrow[A, Option[B]], + notFound: A => Future[B] + ): FutureArrow[A, B] = + FutureArrow(key => + optionFutureArrow(key).flatMap { + case Some(x) => Future.value(x) + case None => notFound(key) + }) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala new file mode 100644 index 000000000..560c51304 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/MediaBuilder.scala @@ -0,0 +1,176 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.Offset + +object CreateMediaTco { + import UpstreamFailure._ + + case class Request( + tweetId: TweetId, + userId: UserId, + userScreenName: String, + isProtected: Boolean, + createdAt: Time, + isVideo: Boolean, + dark: Boolean) + + type Type = FutureArrow[Request, Media.MediaTco] + + def apply(urlShortener: UrlShortener.Type): Type = + FutureArrow[Request, Media.MediaTco] { req => + val expandedUrl = MediaUrl.Permalink(req.userScreenName, req.tweetId, req.isVideo) + val shortenCtx = + UrlShortener.Context( + userId = req.userId, + userProtected = req.isProtected, + tweetId = req.tweetId, + createdAt = req.createdAt, + dark = req.dark + ) + + urlShortener((expandedUrl, shortenCtx)) + .flatMap { metadata => + metadata.shortUrl match { + case TcoSlug(slug) => + Future.value( + Media.MediaTco( + expandedUrl, + metadata.shortUrl, + MediaUrl.Display.fromTcoSlug(slug) + ) + ) + + case _ => + // should never get here, since shortened urls from talon + // always start with "http://t.co/", just in case... + Future.exception(MediaShortenUrlMalformedFailure) + } + } + .rescue { + case UrlShortener.InvalidUrlError => + // should never get here, since media expandedUrl should always be a valid + // input to talon. + Future.exception(MediaExpandedUrlNotValidFailure) + } + } +} + +object MediaBuilder { + private val log = Logger(getClass) + + case class Request( + mediaUploadIds: Seq[MediaId], + text: String, + tweetId: TweetId, + userId: UserId, + userScreenName: String, + isProtected: Boolean, + createdAt: Time, + dark: Boolean = false, + productMetadata: Option[Map[MediaId, UserDefinedProductMetadata]] = None) + + case class Result(updatedText: String, mediaEntities: Seq[MediaEntity], mediaKeys: Seq[MediaKey]) + + type Type = FutureArrow[Request, Result] + + def apply( + processMedia: MediaClient.ProcessMedia, + createMediaTco: CreateMediaTco.Type, + stats: StatsReceiver + ): Type = + FutureArrow[Request, Result] { + case Request( + mediaUploadIds, + text, + tweetId, + userId, + screenName, + isProtected, + createdAt, + dark, + productMetadata + ) => + for { + mediaKeys <- processMedia( + ProcessMediaRequest( + mediaUploadIds, + userId, + tweetId, + isProtected, + productMetadata + ) + ) + mediaTco <- createMediaTco( + CreateMediaTco.Request( + tweetId, + userId, + screenName, + isProtected, + createdAt, + mediaKeys.exists(MediaKeyClassifier.isVideo(_)), + dark + ) + ) + } yield produceResult(text, mediaTco, isProtected, mediaKeys) + }.countExceptions( + ExceptionCounter(stats) + ) + .onFailure[Request] { (req, ex) => log.info(req.toString, ex) } + .translateExceptions { + case e: MediaExceptions.MediaClientException => + TweetCreateFailure.State(TweetCreateState.InvalidMedia, Some(e.getMessage)) + } + + def produceResult( + text: String, + mediaTco: Media.MediaTco, + userIsProtected: Boolean, + mediaKeys: Seq[MediaKey] + ): Result = { + + val newText = + if (text == "") mediaTco.url + else text + " " + mediaTco.url + + val to = Offset.CodePoint.length(newText) + val from = to - Offset.CodePoint.length(mediaTco.url) + + val mediaEntities = + mediaKeys.map { mediaKey => + MediaEntity( + mediaKey = Some(mediaKey), + fromIndex = from.toShort, + toIndex = to.toShort, + url = mediaTco.url, + displayUrl = mediaTco.displayUrl, + expandedUrl = mediaTco.expandedUrl, + mediaId = mediaKey.mediaId, + mediaPath = "", // to be hydrated + mediaUrl = null, // to be hydrated + mediaUrlHttps = null, // to be hydrated + nsfw = false, // deprecated + sizes = Set( + MediaSize( + sizeType = MediaSizeType.Orig, + resizeMethod = MediaResizeMethod.Fit, + deprecatedContentType = MediaKeyUtil.contentType(mediaKey), + width = -1, // to be hydrated + height = -1 // to be hydrated + ) + ) + ) + } + + Result(newText, mediaEntities, mediaKeys) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala new file mode 100644 index 000000000..2ee6d1063 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/PostTweet.scala @@ -0,0 +1,395 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.context.thriftscala.FeatureContext +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.store.InsertTweet +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.TweetCreationLock.{Key => TweetCreationLockKey} + +object PostTweet { + type Type[R] = FutureArrow[R, PostTweetResult] + + /** + * A type-class to abstract over tweet creation requests. + */ + trait RequestView[R] { + def isDark(req: R): Boolean + def sourceTweetId(req: R): Option[TweetId] + def options(req: R): Option[WritePathHydrationOptions] + def userId(req: R): UserId + def uniquenessId(req: R): Option[Long] + def returnSuccessOnDuplicate(req: R): Boolean + def returnDuplicateTweet(req: R): Boolean = + returnSuccessOnDuplicate(req) || uniquenessId(req).nonEmpty + def lockKey(req: R): TweetCreationLockKey + def geo(req: R): Option[TweetCreateGeo] + def featureContext(req: R): Option[FeatureContext] + def additionalContext(req: R): Option[collection.Map[TweetCreateContextKey, String]] + def transientContext(req: R): Option[TransientCreateContext] + def additionalFields(req: R): Option[Tweet] + def duplicateState: TweetCreateState + def scope: String + def isNullcast(req: R): Boolean + def creativesContainerId(req: R): Option[CreativesContainerId] + def noteTweetMentionedUserIds(req: R): Option[Seq[Long]] + } + + /** + * An implementation of `RequestView` for `PostTweetRequest`. + */ + implicit object PostTweetRequestView extends RequestView[PostTweetRequest] { + def isDark(req: PostTweetRequest): Boolean = req.dark + def sourceTweetId(req: PostTweetRequest): None.type = None + def options(req: PostTweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions + def userId(req: PostTweetRequest): UserId = req.userId + def uniquenessId(req: PostTweetRequest): Option[Long] = req.uniquenessId + def returnSuccessOnDuplicate(req: PostTweetRequest) = false + def lockKey(req: PostTweetRequest): TweetCreationLockKey = TweetCreationLockKey.byRequest(req) + def geo(req: PostTweetRequest): Option[TweetCreateGeo] = req.geo + def featureContext(req: PostTweetRequest): Option[FeatureContext] = req.featureContext + def additionalContext( + req: PostTweetRequest + ): Option[collection.Map[TweetCreateContextKey, String]] = req.additionalContext + def transientContext(req: PostTweetRequest): Option[TransientCreateContext] = + req.transientContext + def additionalFields(req: PostTweetRequest): Option[Tweet] = req.additionalFields + def duplicateState: TweetCreateState.Duplicate.type = TweetCreateState.Duplicate + def scope = "tweet" + def isNullcast(req: PostTweetRequest): Boolean = req.nullcast + def creativesContainerId(req: PostTweetRequest): Option[CreativesContainerId] = + req.underlyingCreativesContainerId + def noteTweetMentionedUserIds(req: PostTweetRequest): Option[Seq[Long]] = + req.noteTweetOptions match { + case Some(noteTweetOptions) => noteTweetOptions.mentionedUserIds + case _ => None + } + } + + /** + * An implementation of `RequestView` for `RetweetRequest`. + */ + implicit object RetweetRequestView extends RequestView[RetweetRequest] { + def isDark(req: RetweetRequest): Boolean = req.dark + def sourceTweetId(req: RetweetRequest): None.type = None + def options(req: RetweetRequest): Option[WritePathHydrationOptions] = req.hydrationOptions + def userId(req: RetweetRequest): UserId = req.userId + def uniquenessId(req: RetweetRequest): Option[Long] = req.uniquenessId + def returnSuccessOnDuplicate(req: RetweetRequest): Boolean = req.returnSuccessOnDuplicate + def lockKey(req: RetweetRequest): TweetCreationLockKey = + req.uniquenessId match { + case Some(id) => TweetCreationLockKey.byUniquenessId(req.userId, id) + case None => TweetCreationLockKey.bySourceTweetId(req.userId, req.sourceStatusId) + } + def geo(req: RetweetRequest): None.type = None + def featureContext(req: RetweetRequest): Option[FeatureContext] = req.featureContext + def additionalContext(req: RetweetRequest): None.type = None + def transientContext(req: RetweetRequest): None.type = None + def additionalFields(req: RetweetRequest): Option[Tweet] = req.additionalFields + def duplicateState: TweetCreateState.AlreadyRetweeted.type = TweetCreateState.AlreadyRetweeted + def scope = "retweet" + def isNullcast(req: RetweetRequest): Boolean = req.nullcast + def creativesContainerId(req: RetweetRequest): Option[CreativesContainerId] = None + def noteTweetMentionedUserIds(req: RetweetRequest): Option[Seq[Long]] = None + } + + /** + * A `Filter` is used to decorate a `FutureArrow` that has a known return type + * and an input type for which there is a `RequestView` type-class instance. + */ + trait Filter[Res] { self => + type T[Req] = FutureArrow[Req, Res] + + /** + * Wraps a base arrow with additional behavior. + */ + def apply[Req: RequestView](base: T[Req]): T[Req] + + /** + * Composes two filter. The resulting filter itself composes FutureArrows. + */ + def andThen(next: Filter[Res]): Filter[Res] = + new Filter[Res] { + def apply[Req: RequestView](base: T[Req]): T[Req] = + next(self(base)) + } + } + + /** + * This filter attempts to prevent some race-condition related duplicate tweet creations, + * via use of a `TweetCreateLock`. When a duplicate is detected, this filter can synthesize + * a successful `PostTweetResult` if applicable, or return the appropriate coded response. + */ + object DuplicateHandler { + def apply( + tweetCreationLock: TweetCreationLock, + getTweets: GetTweetsHandler.Type, + stats: StatsReceiver + ): Filter[PostTweetResult] = + new Filter[PostTweetResult] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + val notFoundCount = stats.counter(view.scope, "not_found") + val foundCounter = stats.counter(view.scope, "found") + + FutureArrow.rec[R, PostTweetResult] { self => req => + val duplicateKey = view.lockKey(req) + + // attempts to find the duplicate tweet. + // + // if `returnDupTweet` is true and we find the tweet, then we return a + // successful `PostTweetResult` with that tweet. if we don't find the + // tweet, we throw an `InternalServerError`. + // + // if `returnDupTweet` is false and we find the tweet, then we return + // the appropriate duplicate state. if we don't find the tweet, then + // we unlock the duplicate key and try again. + def duplicate(tweetId: TweetId, returnDupTweet: Boolean) = + findDuplicate(tweetId, req).flatMap { + case Some(postTweetResult) => + foundCounter.incr() + if (returnDupTweet) Future.value(postTweetResult) + else Future.value(PostTweetResult(state = view.duplicateState)) + + case None => + notFoundCount.incr() + if (returnDupTweet) { + // If we failed to load the tweet, but we know that it + // should exist, then return an InternalServerError, so that + // the client treats it as a failed tweet creation req. + Future.exception( + InternalServerError("Failed to load duplicate existing tweet: " + tweetId) + ) + } else { + // Assume the lock is stale if we can't load the tweet. It's + // possible that the lock is not stale, but the tweet is not + // yet available, which requires that it not be present in + // cache and not yet available from the backend. This means + // that the failure mode is to allow tweeting if we can't + // determine the state, but it should be rare that we can't + // determine it. + tweetCreationLock.unlock(duplicateKey).before(self(req)) + } + } + + tweetCreationLock(duplicateKey, view.isDark(req), view.isNullcast(req)) { + base(req) + }.rescue { + case TweetCreationInProgress => + Future.value(PostTweetResult(state = TweetCreateState.Duplicate)) + + // if tweetCreationLock detected a duplicate, look up the duplicate + // and return the appropriate result + case DuplicateTweetCreation(tweetId) => + duplicate(tweetId, view.returnDuplicateTweet(req)) + + // it's possible that tweetCreationLock didn't find a duplicate for a + // retweet attempt, but `RetweetBuilder` did. + case TweetCreateFailure.AlreadyRetweeted(tweetId) if view.returnDuplicateTweet(req) => + duplicate(tweetId, true) + } + } + } + + private def findDuplicate[R: RequestView]( + tweetId: TweetId, + req: R + ): Future[Option[PostTweetResult]] = { + val view = implicitly[RequestView[R]] + val readRequest = + GetTweetsRequest( + tweetIds = Seq(tweetId), + // Assume that the defaults are OK for all of the hydration + // options except the ones that are explicitly set in the + // req. + options = Some( + GetTweetOptions( + forUserId = Some(view.userId(req)), + includePerspectivals = true, + includeCards = view.options(req).exists(_.includeCards), + cardsPlatformKey = view.options(req).flatMap(_.cardsPlatformKey) + ) + ) + ) + + getTweets(readRequest).map { + case Seq(result) => + if (result.tweetState == StatusState.Found) { + // If the tweet was successfully found, then convert the + // read result into a successful write result. + Some( + PostTweetResult( + TweetCreateState.Ok, + result.tweet, + // if the retweet is really old, the retweet perspective might no longer + // be available, but we want to maintain the invariant that the `postRetweet` + // endpoint always returns a source tweet with the correct perspective. + result.sourceTweet.map { srcTweet => + TweetLenses.perspective + .update(_.map(_.copy(retweeted = true, retweetId = Some(tweetId)))) + .apply(srcTweet) + }, + result.quotedTweet + ) + ) + } else { + None + } + } + } + } + } + + /** + * A `Filter` that applies rate limiting to failing requests. + */ + object RateLimitFailures { + def apply( + validateLimit: RateLimitChecker.Validate, + incrementSuccess: LimiterService.IncrementByOne, + incrementFailure: LimiterService.IncrementByOne + ): Filter[TweetBuilderResult] = + new Filter[TweetBuilderResult] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + + FutureArrow[R, TweetBuilderResult] { req => + val userId = view.userId(req) + val dark = view.isDark(req) + val contributorUserId: Option[UserId] = getContributor(userId).map(_.userId) + + validateLimit((userId, dark)) + .before { + base(req).onFailure { _ => + // We don't increment the failure rate limit if the failure + // was from the failure rate limit so that the user can't + // get in a loop where tweet creation is never attempted. We + // don't increment it if the creation is dark because there + // is no way to perform a dark tweet creation through the + // API, so it's most likey some kind of test traffic like + // tap-compare. + if (!dark) incrementFailure(userId, contributorUserId) + } + } + .onSuccess { resp => + // If we return a silent failure, then we want to + // increment the rate limit as if the tweet was fully + // created, because we want it to appear that way to the + // user whose creation silently failed. + if (resp.isSilentFail) incrementSuccess(userId, contributorUserId) + } + } + } + } + } + + /** + * A `Filter` for counting non-`TweetCreateFailure` failures. + */ + object CountFailures { + def apply[Res](stats: StatsReceiver, scopeSuffix: String = "_builder"): Filter[Res] = + new Filter[Res] { + def apply[R: RequestView](base: T[R]): T[R] = { + val view = implicitly[RequestView[R]] + val exceptionCounter = ExceptionCounter(stats.scope(view.scope + scopeSuffix)) + base.onFailure { + case (_, _: TweetCreateFailure) => + case (_, ex) => exceptionCounter(ex) + } + } + } + } + + /** + * A `Filter` for logging failures. + */ + object LogFailures extends Filter[PostTweetResult] { + private[this] val failedTweetCreationsLogger = Logger( + "com.twitter.tweetypie.FailedTweetCreations" + ) + + def apply[R: RequestView](base: T[R]): T[R] = + FutureArrow[R, PostTweetResult] { req => + base(req).onFailure { + case failure => failedTweetCreationsLogger.info(s"request: $req\nfailure: $failure") + } + } + } + + /** + * A `Filter` for converting a thrown `TweetCreateFailure` into a `PostTweetResult`. + */ + object RescueTweetCreateFailure extends Filter[PostTweetResult] { + def apply[R: RequestView](base: T[R]): T[R] = + FutureArrow[R, PostTweetResult] { req => + base(req).rescue { + case failure: TweetCreateFailure => Future.value(failure.toPostTweetResult) + } + } + } + + /** + * Builds a base handler for `PostTweetRequest` and `RetweetRequest`. The handler + * calls an underlying tweet builder, creates a `InsertTweet.Event`, hydrates + * that, passes it to `tweetStore`, and then converts it to a `PostTweetResult`. + */ + object Handler { + def apply[R: RequestView]( + tweetBuilder: FutureArrow[R, TweetBuilderResult], + hydrateInsertEvent: FutureArrow[InsertTweet.Event, InsertTweet.Event], + tweetStore: InsertTweet.Store, + ): Type[R] = { + FutureArrow { req => + for { + bldrRes <- tweetBuilder(req) + event <- hydrateInsertEvent(toInsertTweetEvent(req, bldrRes)) + _ <- Future.when(!event.dark)(tweetStore.insertTweet(event)) + } yield toPostTweetResult(event) + } + } + + /** + * Converts a request/`TweetBuilderResult` pair into an `InsertTweet.Event`. + */ + def toInsertTweetEvent[R: RequestView]( + req: R, + bldrRes: TweetBuilderResult + ): InsertTweet.Event = { + val view = implicitly[RequestView[R]] + InsertTweet.Event( + tweet = bldrRes.tweet, + user = bldrRes.user, + sourceTweet = bldrRes.sourceTweet, + sourceUser = bldrRes.sourceUser, + parentUserId = bldrRes.parentUserId, + timestamp = bldrRes.createdAt, + dark = view.isDark(req) || bldrRes.isSilentFail, + hydrateOptions = view.options(req).getOrElse(WritePathHydrationOptions()), + featureContext = view.featureContext(req), + initialTweetUpdateRequest = bldrRes.initialTweetUpdateRequest, + geoSearchRequestId = for { + geo <- view.geo(req) + searchRequestID <- geo.geoSearchRequestId + } yield { + GeoSearchRequestId(requestID = searchRequestID.id) + }, + additionalContext = view.additionalContext(req), + transientContext = view.transientContext(req), + noteTweetMentionedUserIds = view.noteTweetMentionedUserIds(req) + ) + } + + /** + * Converts an `InsertTweet.Event` into a successful `PostTweetResult`. + */ + def toPostTweetResult(event: InsertTweet.Event): PostTweetResult = + PostTweetResult( + TweetCreateState.Ok, + Some(event.tweet), + sourceTweet = event.sourceTweet, + quotedTweet = event.quotedTweet + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala new file mode 100644 index 000000000..834cda148 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetDeleteEventBuilder.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.store.QuotedTweetDelete +import com.twitter.tweetypie.thriftscala.QuotedTweetDeleteRequest + +/** + * Create the appropriate QuotedTweetDelete.Event for a QuotedTweetDelete request. + */ +object QuotedTweetDeleteEventBuilder { + type Type = QuotedTweetDeleteRequest => Future[Option[QuotedTweetDelete.Event]] + + val queryOptions: TweetQuery.Options = + TweetQuery.Options(GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Optional): Type = + request => + Stitch.run( + tweetRepo(request.quotingTweetId, queryOptions).map { + _.map { quotingTweet => + QuotedTweetDelete.Event( + quotingTweetId = request.quotingTweetId, + quotingUserId = getUserId(quotingTweet), + quotedTweetId = request.quotedTweetId, + quotedUserId = request.quotedUserId, + timestamp = Time.now + ) + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala new file mode 100644 index 000000000..7a44845a8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/QuotedTweetTakedownEventBuilder.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.store.QuotedTweetTakedown +import com.twitter.tweetypie.thriftscala.QuotedTweetTakedownRequest + +/** + * Create the appropriate QuotedTweetTakedown.Event for a QuotedTweetTakedown request. + */ +object QuotedTweetTakedownEventBuilder { + type Type = QuotedTweetTakedownRequest => Future[Option[QuotedTweetTakedown.Event]] + + val queryOptions: TweetQuery.Options = + TweetQuery.Options(GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Optional): Type = + request => + Stitch.run( + tweetRepo(request.quotingTweetId, queryOptions).map { + _.map { quotingTweet => + QuotedTweetTakedown.Event( + quotingTweetId = request.quotingTweetId, + quotingUserId = getUserId(quotingTweet), + quotedTweetId = request.quotedTweetId, + quotedUserId = request.quotedUserId, + takedownCountryCodes = request.takedownCountryCodes, + takedownReasons = request.takedownReasons, + timestamp = Time.now + ) + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala new file mode 100644 index 000000000..cac90aab6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RateLimitChecker.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.TweetCreateState.RateLimitExceeded + +object RateLimitChecker { + type Dark = Boolean + type GetRemaining = FutureArrow[(UserId, Dark), Int] + type Validate = FutureArrow[(UserId, Dark), Unit] + + def getMaxMediaTags(minRemaining: LimiterService.MinRemaining, maxMediaTags: Int): GetRemaining = + FutureArrow { + case (userId, dark) => + if (dark) Future.value(maxMediaTags) + else { + val contributorUserId = getContributor(userId).map(_.userId) + minRemaining(userId, contributorUserId) + .map(_.min(maxMediaTags)) + .handle { case _ => maxMediaTags } + } + } + + def validate( + hasRemaining: LimiterService.HasRemaining, + featureStats: StatsReceiver, + rateLimitEnabled: () => Boolean + ): Validate = { + val exceededCounter = featureStats.counter("exceeded") + val checkedCounter = featureStats.counter("checked") + FutureArrow { + case (userId, dark) => + if (dark || !rateLimitEnabled()) { + Future.Unit + } else { + checkedCounter.incr() + val contributorUserId = getContributor(userId).map(_.userId) + hasRemaining(userId, contributorUserId).map { + case false => + exceededCounter.incr() + throw TweetCreateFailure.State(RateLimitExceeded) + case _ => () + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala new file mode 100644 index 000000000..2e1963074 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReplyBuilder.scala @@ -0,0 +1,633 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.twittertext.Extractor +import scala.annotation.tailrec +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.util.control.NoStackTrace + +object ReplyBuilder { + private val extractor = new Extractor + private val InReplyToTweetNotFound = + TweetCreateFailure.State(TweetCreateState.InReplyToTweetNotFound) + + case class Request( + authorId: UserId, + authorScreenName: String, + inReplyToTweetId: Option[TweetId], + tweetText: String, + prependImplicitMentions: Boolean, + enableTweetToNarrowcasting: Boolean, + excludeUserIds: Seq[UserId], + spamResult: Spam.Result, + batchMode: Option[BatchComposeMode]) + + /** + * This case class contains the fields that are shared between legacy and simplified replies. + */ + case class BaseResult( + reply: Reply, + conversationId: Option[ConversationId], + selfThreadMetadata: Option[SelfThreadMetadata], + community: Option[Communities] = None, + exclusiveTweetControl: Option[ExclusiveTweetControl] = None, + trustedFriendsControl: Option[TrustedFriendsControl] = None, + editControl: Option[EditControl] = None) { + // Creates a Result by providing the fields that differ between legacy and simplified replies. + def toResult( + tweetText: String, + directedAtMetadata: DirectedAtUserMetadata, + visibleStart: Offset.CodePoint = Offset.CodePoint(0), + ): Result = + Result( + reply, + tweetText, + directedAtMetadata, + conversationId, + selfThreadMetadata, + visibleStart, + community, + exclusiveTweetControl, + trustedFriendsControl, + editControl + ) + } + + /** + * @param reply the Reply object to include in the tweet. + * @param tweetText updated tweet text which may include prepended at-mentions, trimmed + * @param directedAtMetadata see DirectedAtHydrator for usage. + * @param conversationId conversation id to assign to the tweet. + * @param selfThreadMetadata returns the result of `SelfThreadBuilder` + * @param visibleStart offset into `tweetText` separating hideable at-mentions from the + * visible text. + */ + case class Result( + reply: Reply, + tweetText: String, + directedAtMetadata: DirectedAtUserMetadata, + conversationId: Option[ConversationId] = None, + selfThreadMetadata: Option[SelfThreadMetadata] = None, + visibleStart: Offset.CodePoint = Offset.CodePoint(0), + community: Option[Communities] = None, + exclusiveTweetControl: Option[ExclusiveTweetControl] = None, + trustedFriendsControl: Option[TrustedFriendsControl] = None, + editControl: Option[EditControl] = None) { + + /** + * @param finalText final tweet text after any server-side additions. + * @return true iff the final tweet text consists exclusively of a hidden reply mention prefix. + * When this happens there's no content to the reply and thus the tweet creation should + * fail. + */ + def replyTextIsEmpty(finalText: String): Boolean = { + + // Length of the tweet text originally output via ReplyBuilder.Result before server-side + // additions (e.g. media, quoted-tweet URLs) + val origTextLength = Offset.CodePoint.length(tweetText) + + // Length of the tweet text after server-side additions. + val finalTextLength = Offset.CodePoint.length(finalText) + + val prefixWasEntireText = origTextLength == visibleStart + val textLenUnchanged = origTextLength == finalTextLength + + prefixWasEntireText && textLenUnchanged + } + } + + type Type = Request => Future[Option[Result]] + + private object InvalidUserException extends NoStackTrace + + /** + * A user ID and screen name used for building replies. + */ + private case class User(id: UserId, screenName: String) + + /** + * Captures the in-reply-to tweet, its author, and if the user is attempting to reply to a + * retweet, then that retweet and its author. + */ + private case class ReplySource( + srcTweet: Tweet, + srcUser: User, + retweet: Option[Tweet] = None, + rtUser: Option[User] = None) { + private val photoTaggedUsers: Seq[User] = + srcTweet.mediaTags + .map(_.tagMap.values.flatten) + .getOrElse(Nil) + .map(toUser) + .toSeq + + private def toUser(mt: MediaTag): User = + mt match { + case MediaTag(_, Some(id), Some(screenName), _) => User(id, screenName) + case _ => throw InvalidUserException + } + + private def toUser(e: MentionEntity): User = + e match { + case MentionEntity(_, _, screenName, Some(id), _, _) => User(id, screenName) + case _ => throw InvalidUserException + } + + private def toUser(d: DirectedAtUser) = User(d.userId, d.screenName) + + def allCardUsers(authorUser: User, cardUsersFinder: CardUsersFinder.Type): Future[Set[UserId]] = + Stitch.run( + cardUsersFinder( + CardUsersFinder.Request( + cardReference = getCardReference(srcTweet), + urls = getUrls(srcTweet).map(_.url), + perspectiveUserId = authorUser.id + ) + ) + ) + + def srcTweetMentionedUsers: Seq[User] = getMentions(srcTweet).map(toUser) + + private trait ReplyType { + + val allExcludedUserIds: Set[UserId] + + def directedAt: Option[User] + def requiredTextMention: Option[User] + + def isExcluded(u: User): Boolean = allExcludedUserIds.contains(u.id) + + def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = { + val seen = new mutable.HashSet[UserId] + seen ++= allExcludedUserIds + // Never exclude the required mention + seen --= requiredTextMention.map(_.id) + + (requiredTextMention.toSeq ++ otherMentions) + .filter(u => seen.add(u.id)) + .take(maxImplicits.max(requiredTextMention.size)) + .map(u => s"@${u.screenName}") + .mkString(" ") + } + } + + private case class SelfReply( + allExcludedUserIds: Set[UserId], + enableTweetToNarrowcasting: Boolean) + extends ReplyType { + + private def srcTweetDirectedAt: Option[User] = getDirectedAtUser(srcTweet).map(toUser) + + override def directedAt: Option[User] = + if (!enableTweetToNarrowcasting) None + else Seq.concat(rtUser, srcTweetDirectedAt).find(!isExcluded(_)) + + override def requiredTextMention: Option[User] = + // Make sure the directedAt user is in the text to avoid confusion + directedAt + } + + private case class BatchSubsequentReply(allExcludedUserIds: Set[UserId]) extends ReplyType { + + override def directedAt: Option[User] = None + + override def requiredTextMention: Option[User] = None + + override def buildPrefix(otherMentions: Seq[User], maxImplicits: Int): String = "" + } + + private case class RegularReply( + allExcludedUserIds: Set[UserId], + enableTweetToNarrowcasting: Boolean) + extends ReplyType { + + override def directedAt: Option[User] = + Some(srcUser) + .filterNot(isExcluded) + .filter(_ => enableTweetToNarrowcasting) + + override def requiredTextMention: Option[User] = + // Include the source tweet's author as a mention in the reply, even if the reply is not + // narrowcasted to that user. All non-self-reply tweets require this mention. + Some(srcUser) + } + + /** + * Computes an implicit mention prefix to add to the tweet text as well as any directed-at user. + * + * The first implicit mention is the source-tweet's author unless the reply is a self-reply, in + * which case it inherits the DirectedAtUser from the source tweet, though the current author is + * never added. This mention, if it exists, is the only mention that may be used to direct-at a + * user and is the user that ends up in DirectedAtUserMetadata. If the user replied to a + * retweet and the reply doesn't explicitly mention the retweet author, then the retweet author + * will be next, followed by source tweet mentions and source tweet photo-tagged users. + * + * Users in excludedScreenNames originate from the PostTweetRequest and are filtered out of any + * non-leading mention. + * + * Note on maxImplicits: + * This method returns at most 'maxImplicits' mentions unless 'maxImplicits' is 0 and a + * directed-at mention is required, in which case it returns 1. If this happens the reply may + * fail downstream validation checks (e.g. TweetBuilder). With 280 visible character limit it's + * theoretically possible to explicitly mention 93 users (280 / 3) but this bug shouldn't really + * be an issue because: + * 1.) Most replies don't have 50 explicit mentions + * 2.) TOO-clients have switched to batchMode=Subsequent for self-replies which disable + source tweet's directed-at user inheritance + * 3.) Requests rarely are rejected due to mention_limit_exceeded + * If this becomes a problem we could reopen the mention limit discussion, specifically if the + * backend should allow 51 while the explicit limit remains at 50. + * + * Note on batchMode: + * Implicit mention prefix will be empty string if batchMode is BatchSubsequent. This is to + * support batch composer. + */ + def implicitMentionPrefixAndDAU( + maxImplicits: Int, + excludedUsers: Seq[User], + author: User, + enableTweetToNarrowcasting: Boolean, + batchMode: Option[BatchComposeMode] + ): (String, Option[User]) = { + def allExcludedUserIds = + (excludedUsers ++ Seq(author)).map(_.id).toSet + + val replyType = + if (author.id == srcUser.id) { + if (batchMode.contains(BatchComposeMode.BatchSubsequent)) { + BatchSubsequentReply(allExcludedUserIds) + } else { + SelfReply(allExcludedUserIds, enableTweetToNarrowcasting) + } + } else { + RegularReply(allExcludedUserIds, enableTweetToNarrowcasting) + } + + val prefix = + replyType.buildPrefix( + otherMentions = List.concat(rtUser, srcTweetMentionedUsers, photoTaggedUsers), + maxImplicits = maxImplicits + ) + + (prefix, replyType.directedAt) + } + + /** + * Finds the longest possible prefix of whitespace separated @-mentions, restricted to + * @-mentions that are derived from the reply chain. + */ + def hideablePrefix( + text: String, + cardUsers: Seq[User], + explicitMentions: Seq[Extractor.Entity] + ): Offset.CodePoint = { + val allowedMentions = + (srcTweetMentionedUsers.toSet + srcUser ++ rtUser.toSet ++ photoTaggedUsers ++ cardUsers) + .map(_.screenName.toLowerCase) + val len = Offset.CodeUnit.length(text) + + // To allow NO-BREAK SPACE' (U+00A0) in the prefix need .isSpaceChar + def isWhitespace(c: Char) = c.isWhitespace || c.isSpaceChar + + @tailrec + def skipWs(offset: Offset.CodeUnit): Offset.CodeUnit = + if (offset == len || !isWhitespace(text.charAt(offset.toInt))) offset + else skipWs(offset.incr) + + @tailrec + def go(offset: Offset.CodeUnit, mentions: Stream[Extractor.Entity]): Offset.CodeUnit = + if (offset == len) offset + else { + mentions match { + // if we are at the next mention, and it is allowed, skip past and recurse + case next #:: tail if next.getStart == offset.toInt => + if (!allowedMentions.contains(next.getValue.toLowerCase)) offset + else go(skipWs(Offset.CodeUnit(next.getEnd)), tail) + // we found non-mention text + case _ => offset + } + } + + go(Offset.CodeUnit(0), explicitMentions.toStream).toCodePoint(text) + } + } + + private def replyToUser(user: User, inReplyToStatusId: Option[TweetId] = None): Reply = + Reply( + inReplyToUserId = user.id, + inReplyToScreenName = Some(user.screenName), + inReplyToStatusId = inReplyToStatusId + ) + + /** + * A builder that generates reply from `inReplyToTweetId` or tweet text + * + * There are two kinds of "reply": + * 1. reply to tweet, which is generated from `inReplyToTweetId`. + * + * A valid reply-to-tweet satisfies the following conditions: + * 1). the tweet that is in-reply-to exists (and is visible to the user creating the tweet) + * 2). the author of the in-reply-to tweet is mentioned anywhere in the tweet, or + * this is a tweet that is in reply to the author's own tweet + * + * 2. reply to user, is generated when the tweet text starts with @user_name. This is only + * attempted if PostTweetRequest.enableTweetToNarrowcasting is true (default). + */ + def apply( + userIdentityRepo: UserIdentityRepository.Type, + tweetRepo: TweetRepository.Optional, + replyCardUsersFinder: CardUsersFinder.Type, + selfThreadBuilder: SelfThreadBuilder, + relationshipRepo: RelationshipRepository.Type, + unmentionedEntitiesRepo: UnmentionedEntitiesRepository.Type, + enableRemoveUnmentionedImplicits: Gate[Unit], + stats: StatsReceiver, + maxMentions: Int + ): Type = { + val exceptionCounters = ExceptionCounter(stats) + val modeScope = stats.scope("mode") + val compatModeCounter = modeScope.counter("compat") + val simpleModeCounter = modeScope.counter("simple") + + def getUser(key: UserKey): Future[Option[User]] = + Stitch.run( + userIdentityRepo(key) + .map(ident => User(ident.id, ident.screenName)) + .liftNotFoundToOption + ) + + def getUsers(userIds: Seq[UserId]): Future[Seq[ReplyBuilder.User]] = + Stitch.run( + Stitch + .traverse(userIds)(id => userIdentityRepo(UserKey(id)).liftNotFoundToOption) + .map(_.flatten) + .map { identities => identities.map { ident => User(ident.id, ident.screenName) } } + ) + + val tweetQueryIncludes = + TweetQuery.Include( + tweetFields = Set( + Tweet.CoreDataField.id, + Tweet.CardReferenceField.id, + Tweet.CommunitiesField.id, + Tweet.MediaTagsField.id, + Tweet.MentionsField.id, + Tweet.UrlsField.id, + Tweet.EditControlField.id + ) ++ selfThreadBuilder.requiredReplySourceFields.map(_.id) + ) + + def tweetQueryOptions(forUserId: UserId) = + TweetQuery.Options( + tweetQueryIncludes, + forUserId = Some(forUserId), + enforceVisibilityFiltering = true + ) + + def getTweet(tweetId: TweetId, forUserId: UserId): Future[Option[Tweet]] = + Stitch.run(tweetRepo(tweetId, tweetQueryOptions(forUserId))) + + def checkBlockRelationship(authorId: UserId, result: Result): Future[Unit] = { + val inReplyToBlocksTweeter = + RelationshipKey.blocks( + sourceId = result.reply.inReplyToUserId, + destinationId = authorId + ) + + Stitch.run(relationshipRepo(inReplyToBlocksTweeter)).flatMap { + case true => Future.exception(InReplyToTweetNotFound) + case false => Future.Unit + } + } + + def checkIPIPolicy(request: Request, reply: Reply): Future[Unit] = { + if (request.spamResult == Spam.DisabledByIpiPolicy) { + Future.exception(Spam.DisabledByIpiFailure(reply.inReplyToScreenName)) + } else { + Future.Unit + } + } + + def getUnmentionedUsers(replySource: ReplySource): Future[Seq[UserId]] = { + if (enableRemoveUnmentionedImplicits()) { + val srcDirectedAt = replySource.srcTweet.directedAtUserMetadata.flatMap(_.userId) + val srcTweetMentions = replySource.srcTweet.mentions.getOrElse(Nil).flatMap(_.userId) + val idsToCheck = srcTweetMentions ++ srcDirectedAt + + val conversationId = replySource.srcTweet.coreData.flatMap(_.conversationId) + conversationId match { + case Some(cid) if idsToCheck.nonEmpty => + stats.counter("unmentioned_implicits_check").incr() + Stitch + .run(unmentionedEntitiesRepo(cid, idsToCheck)).liftToTry.map { + case Return(Some(unmentionedUserIds)) => + unmentionedUserIds + case _ => Seq[UserId]() + } + case _ => Future.Nil + + } + } else { + Future.Nil + } + } + + /** + * Constructs a `ReplySource` for the given `tweetId`, which captures the source tweet to be + * replied to, its author, and if `tweetId` is for a retweet of the source tweet, then also + * that retweet and its author. If the source tweet (or a retweet of it), or a corresponding + * author, can't be found or isn't visible to the replier, then `InReplyToTweetNotFound` is + * thrown. + */ + def getReplySource(tweetId: TweetId, forUserId: UserId): Future[ReplySource] = + for { + tweet <- getTweet(tweetId, forUserId).flatMap { + case None => Future.exception(InReplyToTweetNotFound) + case Some(t) => Future.value(t) + } + + user <- getUser(UserKey(getUserId(tweet))).flatMap { + case None => Future.exception(InReplyToTweetNotFound) + case Some(u) => Future.value(u) + } + + res <- getShare(tweet) match { + case None => Future.value(ReplySource(tweet, user)) + case Some(share) => + // if the user is replying to a retweet, find the retweet source tweet, + // then update with the retweet and author. + getReplySource(share.sourceStatusId, forUserId) + .map(_.copy(retweet = Some(tweet), rtUser = Some(user))) + } + } yield res + + /** + * Computes a `Result` for the reply-to-tweet case. If `inReplyToTweetId` is for a retweet, + * the reply will be computed against the source tweet. If `prependImplicitMentions` is true + * and source tweet can't be found or isn't visible to replier, then this method will return + * a `InReplyToTweetNotFound` failure. If `prependImplicitMentions` is false, then the reply + * text must either mention the source tweet user, or it must be a reply to self; if both of + * those conditions fail, then `None` is returned. + */ + def makeReplyToTweet( + inReplyToTweetId: TweetId, + text: String, + author: User, + prependImplicitMentions: Boolean, + enableTweetToNarrowcasting: Boolean, + excludeUserIds: Seq[UserId], + batchMode: Option[BatchComposeMode] + ): Future[Option[Result]] = { + val explicitMentions: Seq[Extractor.Entity] = + extractor.extractMentionedScreennamesWithIndices(text).asScala.toSeq + val mentionedScreenNames = + explicitMentions.map(_.getValue.toLowerCase).toSet + + /** + * If `prependImplicitMentions` is true, or the reply author is the same as the in-reply-to + * author, then the reply text doesn't have to mention the in-reply-to author. Otherwise, + * check that the text contains a mention of the reply author. + */ + def isValidReplyTo(inReplyToUser: User): Boolean = + prependImplicitMentions || + (inReplyToUser.id == author.id) || + mentionedScreenNames.contains(inReplyToUser.screenName.toLowerCase) + + getReplySource(inReplyToTweetId, author.id) + .flatMap { replySrc => + val baseResult = BaseResult( + reply = replyToUser(replySrc.srcUser, Some(replySrc.srcTweet.id)), + conversationId = getConversationId(replySrc.srcTweet), + selfThreadMetadata = selfThreadBuilder.build(author.id, replySrc.srcTweet), + community = replySrc.srcTweet.communities, + // Reply tweets retain the same exclusive + // tweet controls as the tweet being replied to. + exclusiveTweetControl = replySrc.srcTweet.exclusiveTweetControl, + trustedFriendsControl = replySrc.srcTweet.trustedFriendsControl, + editControl = replySrc.srcTweet.editControl + ) + + if (isValidReplyTo(replySrc.srcUser)) { + if (prependImplicitMentions) { + + // Simplified Replies mode - append server-side generated prefix to passed in text + simpleModeCounter.incr() + // remove the in-reply-to tweet author from the excluded users, in-reply-to tweet author will always be a directedAtUser + val filteredExcludedIds = + excludeUserIds.filterNot(uid => uid == TweetLenses.userId(replySrc.srcTweet)) + for { + unmentionedUserIds <- getUnmentionedUsers(replySrc) + excludedUsers <- getUsers(filteredExcludedIds ++ unmentionedUserIds) + (prefix, directedAtUser) = replySrc.implicitMentionPrefixAndDAU( + maxImplicits = math.max(0, maxMentions - explicitMentions.size), + excludedUsers = excludedUsers, + author = author, + enableTweetToNarrowcasting = enableTweetToNarrowcasting, + batchMode = batchMode + ) + } yield { + // prefix or text (or both) can be empty strings. Add " " separator and adjust + // prefix length only when both prefix and text are non-empty. + val textChunks = Seq(prefix, text).map(_.trim).filter(_.nonEmpty) + val tweetText = textChunks.mkString(" ") + val visibleStart = + if (textChunks.size == 2) { + Offset.CodePoint.length(prefix + " ") + } else { + Offset.CodePoint.length(prefix) + } + + Some( + baseResult.toResult( + tweetText = tweetText, + directedAtMetadata = DirectedAtUserMetadata(directedAtUser.map(_.id)), + visibleStart = visibleStart + ) + ) + } + } else { + // Backwards-compatibility mode - walk from beginning of text until find visibleStart + compatModeCounter.incr() + for { + cardUserIds <- replySrc.allCardUsers(author, replyCardUsersFinder) + cardUsers <- getUsers(cardUserIds.toSeq) + optUserIdentity <- extractReplyToUser(text) + directedAtUserId = optUserIdentity.map(_.id).filter(_ => enableTweetToNarrowcasting) + } yield { + Some( + baseResult.toResult( + tweetText = text, + directedAtMetadata = DirectedAtUserMetadata(directedAtUserId), + visibleStart = replySrc.hideablePrefix(text, cardUsers, explicitMentions), + ) + ) + } + } + } else { + Future.None + } + } + .handle { + // if `getReplySource` throws this exception, but we aren't computing implicit + // mentions, then we fall back to the reply-to-user case instead of reply-to-tweet + case InReplyToTweetNotFound if !prependImplicitMentions => None + } + } + + def makeReplyToUser(text: String): Future[Option[Result]] = + extractReplyToUser(text).map(_.map { user => + Result(replyToUser(user), text, DirectedAtUserMetadata(Some(user.id))) + }) + + def extractReplyToUser(text: String): Future[Option[User]] = + Option(extractor.extractReplyScreenname(text)) match { + case None => Future.None + case Some(screenName) => getUser(UserKey(screenName)) + } + + FutureArrow[Request, Option[Result]] { request => + exceptionCounters { + (request.inReplyToTweetId.filter(_ > 0) match { + case None => + Future.None + + case Some(tweetId) => + makeReplyToTweet( + tweetId, + request.tweetText, + User(request.authorId, request.authorScreenName), + request.prependImplicitMentions, + request.enableTweetToNarrowcasting, + request.excludeUserIds, + request.batchMode + ) + }).flatMap { + case Some(r) => + // Ensure that the author of this reply is not blocked by + // the user who they are replying to. + checkBlockRelationship(request.authorId, r) + .before(checkIPIPolicy(request, r.reply)) + .before(Future.value(Some(r))) + + case None if request.enableTweetToNarrowcasting => + // We don't check the block relationship when the tweet is + // not part of a conversation (which is to say, we allow + // directed-at tweets from a blocked user.) These tweets + // will not cause notifications for the blocking user, + // despite the presence of the reply struct. + makeReplyToUser(request.tweetText) + + case None => + Future.None + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala new file mode 100644 index 000000000..e14eecc84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/RetweetBuilder.scala @@ -0,0 +1,352 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.flockdb.client._ +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields.setAdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.tweettext.Truncator +import com.twitter.tweetypie.util.CommunityUtil +import com.twitter.tweetypie.util.EditControlUtil + +case class SourceTweetRequest( + tweetId: TweetId, + user: User, + hydrateOptions: WritePathHydrationOptions) + +object RetweetBuilder { + import TweetBuilder._ + import UpstreamFailure._ + + type Type = FutureArrow[RetweetRequest, TweetBuilderResult] + + val SGSTestRole = "socialgraph" + + val log: Logger = Logger(getClass) + + /** + * Retweets text gets RT and username prepended + */ + def composeRetweetText(text: String, sourceUser: User): String = + composeRetweetText(text, sourceUser.profile.get.screenName) + + /** + * Retweets text gets RT and username prepended + */ + def composeRetweetText(text: String, screenName: String): String = + Truncator.truncateForRetweet("RT @" + screenName + ": " + text) + + // We do not want to allow community tweets to be retweeted. + def validateNotCommunityTweet(sourceTweet: Tweet): Future[Unit] = + if (CommunityUtil.hasCommunity(sourceTweet.communities)) { + Future.exception(TweetCreateFailure.State(TweetCreateState.CommunityRetweetNotAllowed)) + } else { + Future.Unit + } + + // We do not want to allow Trusted Friends tweets to be retweeted. + def validateNotTrustedFriendsTweet(sourceTweet: Tweet): Future[Unit] = + sourceTweet.trustedFriendsControl match { + case Some(trustedFriendsControl) => + Future.exception(TweetCreateFailure.State(TweetCreateState.TrustedFriendsRetweetNotAllowed)) + case None => + Future.Unit + } + + // We do not want to allow retweet of a stale version of a tweet in an edit chain. + def validateStaleTweet(sourceTweet: Tweet): Future[Unit] = { + if (!EditControlUtil.isLatestEdit(sourceTweet.editControl, sourceTweet.id).getOrElse(true)) { + Future.exception(TweetCreateFailure.State(TweetCreateState.StaleTweetRetweetNotAllowed)) + } else { + // the source tweet does not have any edit control or the source tweet is the latest tweet + Future.Unit + } + } + + /** + * Builds the RetweetBuilder + */ + def apply( + validateRequest: RetweetRequest => Future[Unit], + tweetIdGenerator: TweetIdGenerator, + tweetRepo: TweetRepository.Type, + userRepo: UserRepository.Type, + tflock: TFlockClient, + deviceSourceRepo: DeviceSourceRepository.Type, + validateUpdateRateLimit: RateLimitChecker.Validate, + spamChecker: Spam.Checker[RetweetSpamRequest] = Spam.DoNotCheckSpam, + updateUserCounts: (User, Tweet) => Future[User], + superFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + setEditWindowToSixtyMinutes: Gate[Unit] + ): RetweetBuilder.Type = { + val entityExtactor = EntityExtractor.mutationAll.endo + + val sourceTweetRepo: SourceTweetRequest => Stitch[Tweet] = + req => { + tweetRepo( + req.tweetId, + WritePathQueryOptions.retweetSourceTweet(req.user, req.hydrateOptions) + ).rescue { + case _: FilteredState => Stitch.NotFound + } + .rescue { + convertRepoExceptions(TweetCreateState.SourceTweetNotFound, TweetLookupFailure(_)) + } + } + + val getUser = userLookup(userRepo) + val getSourceUser = sourceUserLookup(userRepo) + val getDeviceSource = deviceSourceLookup(deviceSourceRepo) + + /** + * We exempt SGS test users from the check to get them through Block v2 testing. + */ + def isSGSTestRole(user: User): Boolean = + user.roles.exists { roles => roles.roles.contains(SGSTestRole) } + + def validateCanRetweet( + user: User, + sourceUser: User, + sourceTweet: Tweet, + request: RetweetRequest + ): Future[Unit] = + Future + .join( + validateNotCommunityTweet(sourceTweet), + validateNotTrustedFriendsTweet(sourceTweet), + validateSourceUserRetweetable(user, sourceUser), + validateStaleTweet(sourceTweet), + Future.when(!request.dark) { + if (request.returnSuccessOnDuplicate) + failWithRetweetIdIfAlreadyRetweeted(user, sourceTweet) + else + validateNotAlreadyRetweeted(user, sourceTweet) + } + ) + .unit + + def validateSourceUserRetweetable(user: User, sourceUser: User): Future[Unit] = + if (sourceUser.profile.isEmpty) + Future.exception(UserProfileEmptyException) + else if (sourceUser.safety.isEmpty) + Future.exception(UserSafetyEmptyException) + else if (sourceUser.view.isEmpty) + Future.exception(UserViewEmptyException) + else if (user.id != sourceUser.id && sourceUser.safety.get.isProtected) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetProtectedTweet)) + else if (sourceUser.safety.get.deactivated) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetDeactivatedUser)) + else if (sourceUser.safety.get.suspended) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetSuspendedUser)) + else if (sourceUser.view.get.blockedBy && !isSGSTestRole(user)) + Future.exception(TweetCreateFailure.State(TweetCreateState.CannotRetweetBlockingUser)) + else if (sourceUser.profile.get.screenName.isEmpty) + Future.exception( + TweetCreateFailure.State(TweetCreateState.CannotRetweetUserWithoutScreenName) + ) + else + Future.Unit + + def tflockGraphContains( + graph: StatusGraph, + fromId: Long, + toId: Long, + dir: Direction + ): Future[Boolean] = + tflock.contains(graph, fromId, toId, dir).rescue { + case ex: OverCapacity => Future.exception(ex) + case ex => Future.exception(TFlockLookupFailure(ex)) + } + + def getRetweetIdFromTflock(sourceTweetId: TweetId, userId: UserId): Future[Option[Long]] = + tflock + .selectAll( + Select( + sourceId = sourceTweetId, + graph = RetweetsGraph, + direction = Forward + ).intersect( + Select( + sourceId = userId, + graph = UserTimelineGraph, + direction = Forward + ) + ) + ) + .map(_.headOption) + + def validateNotAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] = + // use the perspective object from TLS if available, otherwise, check with tflock + (sourceTweet.perspective match { + case Some(perspective) => + Future.value(perspective.retweeted) + case None => + // we have to query the RetweetSourceGraph in the Reverse order because + // it is only defined in that direction, instead of bi-directionally + tflockGraphContains(RetweetSourceGraph, user.id, sourceTweet.id, Reverse) + }).flatMap { + case true => + Future.exception(TweetCreateFailure.State(TweetCreateState.AlreadyRetweeted)) + case false => Future.Unit + } + + def failWithRetweetIdIfAlreadyRetweeted(user: User, sourceTweet: Tweet): Future[Unit] = + // use the perspective object from TLS if available, otherwise, check with tflock + (sourceTweet.perspective.flatMap(_.retweetId) match { + case Some(tweetId) => Future.value(Some(tweetId)) + case None => + getRetweetIdFromTflock(sourceTweet.id, user.id) + }).flatMap { + case None => Future.Unit + case Some(tweetId) => + Future.exception(TweetCreateFailure.AlreadyRetweeted(tweetId)) + } + + def validateContributor(contributorIdOpt: Option[UserId]): Future[Unit] = + if (contributorIdOpt.isDefined) + Future.exception(TweetCreateFailure.State(TweetCreateState.ContributorNotSupported)) + else + Future.Unit + + case class RetweetSource(sourceTweet: Tweet, parentUserId: UserId) + + /** + * Recursively follows a retweet chain to the root source tweet. Also returns user id from the + * first walked tweet as the 'parentUserId'. + * In practice, the depth of the chain should never be greater than 2 because + * share.sourceStatusId should always reference the root (unlike share.parentStatusId). + */ + def findRetweetSource( + tweetId: TweetId, + forUser: User, + hydrateOptions: WritePathHydrationOptions + ): Future[RetweetSource] = + Stitch + .run(sourceTweetRepo(SourceTweetRequest(tweetId, forUser, hydrateOptions))) + .flatMap { tweet => + getShare(tweet) match { + case None => Future.value(RetweetSource(tweet, getUserId(tweet))) + case Some(share) => + findRetweetSource(share.sourceStatusId, forUser, hydrateOptions) + .map(_.copy(parentUserId = getUserId(tweet))) + } + } + + FutureArrow { request => + for { + () <- validateRequest(request) + userFuture = Stitch.run(getUser(request.userId)) + tweetIdFuture = tweetIdGenerator() + devsrcFuture = Stitch.run(getDeviceSource(request.createdVia)) + user <- userFuture + tweetId <- tweetIdFuture + devsrc <- devsrcFuture + rtSource <- findRetweetSource( + request.sourceStatusId, + user, + request.hydrationOptions.getOrElse(WritePathHydrationOptions(simpleQuotedTweet = true)) + ) + sourceTweet = rtSource.sourceTweet + sourceUser <- Stitch.run(getSourceUser(getUserId(sourceTweet), request.userId)) + + // We want to confirm that a user is actually allowed to + // retweet an Exclusive Tweet (only available to super followers) + () <- StratoSuperFollowRelationsRepository.Validate( + sourceTweet.exclusiveTweetControl, + user.id, + superFollowRelationsRepo) + + () <- validateUser(user) + () <- validateUpdateRateLimit((user.id, request.dark)) + () <- validateContributor(request.contributorUserId) + () <- validateCanRetweet(user, sourceUser, sourceTweet, request) + () <- unretweetEdits(sourceTweet.editControl, sourceTweet.id, user.id) + + spamRequest = RetweetSpamRequest( + retweetId = tweetId, + sourceUserId = getUserId(sourceTweet), + sourceTweetId = sourceTweet.id, + sourceTweetText = getText(sourceTweet), + sourceUserName = sourceUser.profile.map(_.screenName), + safetyMetaData = request.safetyMetaData + ) + + spamResult <- spamChecker(spamRequest) + + safety = user.safety.get + + share = Share( + sourceStatusId = sourceTweet.id, + sourceUserId = sourceUser.id, + parentStatusId = request.sourceStatusId + ) + + retweetText = composeRetweetText(getText(sourceTweet), sourceUser) + createdAt = SnowflakeId(tweetId).time + + coreData = TweetCoreData( + userId = request.userId, + text = retweetText, + createdAtSecs = createdAt.inSeconds, + createdVia = devsrc.internalName, + share = Some(share), + hasTakedown = safety.hasTakedown, + trackingId = request.trackingId, + nsfwUser = safety.nsfwUser, + nsfwAdmin = safety.nsfwAdmin, + narrowcast = request.narrowcast, + nullcast = request.nullcast + ) + + retweet = Tweet( + id = tweetId, + coreData = Some(coreData), + contributor = getContributor(request.userId), + editControl = Some( + EditControl.Initial( + EditControlUtil + .makeEditControlInitial( + tweetId = tweetId, + createdAt = createdAt, + setEditWindowToSixtyMinutes = setEditWindowToSixtyMinutes + ) + .initial + .copy(isEditEligible = Some(false)) + ) + ), + ) + + retweetWithEntities = entityExtactor(retweet) + retweetWithAdditionalFields = setAdditionalFields( + retweetWithEntities, + request.additionalFields + ) + // update the perspective and counts fields of the source tweet to reflect the effects + // of the user performing a retweet, even though those effects haven't happened yet. + updatedSourceTweet = sourceTweet.copy( + perspective = sourceTweet.perspective.map { + _.copy(retweeted = true, retweetId = Some(retweet.id)) + }, + counts = sourceTweet.counts.map { c => c.copy(retweetCount = c.retweetCount.map(_ + 1)) } + ) + + user <- updateUserCounts(user, retweetWithAdditionalFields) + } yield { + TweetBuilderResult( + tweet = retweetWithAdditionalFields, + user = user, + createdAt = createdAt, + sourceTweet = Some(updatedSourceTweet), + sourceUser = Some(sourceUser), + parentUserId = Some(rtSource.parentUserId), + isSilentFail = spamResult == Spam.SilentFail + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala new file mode 100644 index 000000000..8a675a8ce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ReverseGeocoder.scala @@ -0,0 +1,78 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.geoduck.backend.hydration.thriftscala.HydrationContext +import com.twitter.geoduck.common.thriftscala.Constants +import com.twitter.geoduck.common.thriftscala.PlaceQuery +import com.twitter.geoduck.common.thriftscala.PlaceQueryFields +import com.twitter.geoduck.service.common.clientmodules.GeoduckGeohashLocate +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.geoduck.util.primitives.LatLon +import com.twitter.geoduck.util.primitives.{Geohash => GDGeohash} +import com.twitter.geoduck.util.primitives.{Place => GDPlace} +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.repository.GeoduckPlaceConverter +import com.twitter.tweetypie.{thriftscala => TP} + +object ReverseGeocoder { + val log: Logger = Logger(getClass) + + private def validatingRGC(rgc: ReverseGeocoder): ReverseGeocoder = + FutureArrow { + case (coords: TP.GeoCoordinates, language: PlaceLanguage) => + if (LatLon.isValid(coords.latitude, coords.longitude)) + rgc((coords, language)) + else + Future.None + } + + /** + * create a Geo backed ReverseGeocoder + */ + def fromGeoduck(geohashLocate: GeoduckGeohashLocate): ReverseGeocoder = + validatingRGC( + FutureArrow { + case (geo: TP.GeoCoordinates, language: PlaceLanguage) => + if (log.isDebugEnabled) { + log.debug("RGC'ing " + geo.toString() + " with geoduck") + } + + val hydrationContext = + HydrationContext( + placeFields = Set[PlaceQueryFields]( + PlaceQueryFields.PlaceNames + ) + ) + + val gh = GDGeohash(LatLon(lat = geo.latitude, lon = geo.longitude)) + val placeQuery = PlaceQuery(placeTypes = Some(Constants.ConsumerPlaceTypes)) + + geohashLocate + .locateGeohashes(Seq(gh.toThrift), placeQuery, hydrationContext) + .onFailure { case ex => log.warn("failed to rgc " + geo.toString(), ex) } + .map { + (resp: Seq[Try[LocationResponse]]) => + resp.headOption.flatMap { + case Throw(ex) => + log.warn("rgc failed for coords: " + geo.toString(), ex) + None + case Return(locationResponse) => + GDPlace.tryLocationResponse(locationResponse) match { + case Throw(ex) => + log + .warn("rgc failed in response handling for coords: " + geo.toString(), ex) + None + case Return(tplaces) => + GDPlace.pickConsumerLocation(tplaces).map { place: GDPlace => + if (log.isDebugEnabled) { + log.debug("successfully rgc'd " + geo + " to " + place.id) + } + GeoduckPlaceConverter(language, place) + } + } + + } + } + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala new file mode 100644 index 000000000..3c7a78fd9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowRetweetSpamChecker.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.tracing.Trace +import com.twitter.service.gen.scarecrow.thriftscala.Retweet +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.spam.features.thriftscala.SafetyMetaData +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.RetweetSpamCheckRepository +import com.twitter.tweetypie.thriftscala.TweetCreateState + +case class RetweetSpamRequest( + retweetId: TweetId, + sourceUserId: UserId, + sourceTweetId: TweetId, + sourceTweetText: String, + sourceUserName: Option[String], + safetyMetaData: Option[SafetyMetaData]) + +/** + * Use the Scarecrow service as the spam checker for retweets. + */ +object ScarecrowRetweetSpamChecker { + val log: Logger = Logger(getClass) + + def requestToScarecrowRetweet(req: RetweetSpamRequest): Retweet = + Retweet( + id = req.retweetId, + sourceUserId = req.sourceUserId, + text = req.sourceTweetText, + sourceTweetId = req.sourceTweetId, + safetyMetaData = req.safetyMetaData + ) + + def apply( + stats: StatsReceiver, + repo: RetweetSpamCheckRepository.Type + ): Spam.Checker[RetweetSpamRequest] = { + + def handler(request: RetweetSpamRequest): Spam.Checker[TieredAction] = + Spam.handleScarecrowResult(stats) { + case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture + case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture + case (TieredActionResult.UrlSpam, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage)) + case (TieredActionResult.Deny, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage)) + case (TieredActionResult.DenyByIpiPolicy, _, denyMessage) => + Future.exception(Spam.DisabledByIpiFailure(request.sourceUserName, denyMessage)) + case (TieredActionResult.RateLimit, _, denyMessage) => + Future.exception( + TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage)) + case (TieredActionResult.Bounce, Some(b), _) => + Future.exception(TweetCreateFailure.Bounced(b)) + } + + req => { + Trace.record("com.twitter.tweetypie.ScarecrowRetweetSpamChecker.retweetId=" + req.retweetId) + Stitch.run(repo(requestToScarecrowRetweet(req))).flatMap(handler(req)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala new file mode 100644 index 000000000..5db66c4dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScarecrowTweetSpamChecker.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.finagle.tracing.Trace +import com.twitter.relevance.feature_store.thriftscala.FeatureData +import com.twitter.relevance.feature_store.thriftscala.FeatureValue +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.service.gen.scarecrow.thriftscala.TweetContext +import com.twitter.service.gen.scarecrow.thriftscala.TweetNew +import com.twitter.spam.features.thriftscala.SafetyMetaData +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.handler.Spam.Checker +import com.twitter.tweetypie.repository.TweetSpamCheckRepository +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.thriftscala.TweetMediaTags + +case class TweetSpamRequest( + tweetId: TweetId, + userId: UserId, + text: String, + mediaTags: Option[TweetMediaTags], + safetyMetaData: Option[SafetyMetaData], + inReplyToTweetId: Option[TweetId], + quotedTweetId: Option[TweetId], + quotedTweetUserId: Option[UserId]) + +/** + * Use the Scarecrow service as the spam checker for tweets. + */ +object ScarecrowTweetSpamChecker { + val log: Logger = Logger(getClass) + + private def requestToScarecrowTweet(req: TweetSpamRequest): TweetNew = { + // compile additional input features for the spam check + val mediaTaggedUserIds = { + val mediaTags = req.mediaTags.getOrElse(TweetMediaTags()) + mediaTags.tagMap.values.flatten.flatMap(_.userId).toSet + } + + val additionalInputFeatures = { + val mediaTaggedUserFeatures = if (mediaTaggedUserIds.nonEmpty) { + Seq( + "mediaTaggedUsers" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds))), + "victimIds" -> FeatureData(Some(FeatureValue.LongSetValue(mediaTaggedUserIds))) + ) + } else { + Seq.empty + } + + val quotedTweetIdFeature = req.quotedTweetId.map { quotedTweetId => + "quotedTweetId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetId))) + } + + val quotedTweetUserIdFeature = req.quotedTweetUserId.map { quotedTweetUserId => + "quotedTweetUserId" -> FeatureData(Some(FeatureValue.LongValue(quotedTweetUserId))) + } + + val featureMap = + (mediaTaggedUserFeatures ++ quotedTweetIdFeature ++ quotedTweetUserIdFeature).toMap + + if (featureMap.nonEmpty) Some(featureMap) else None + } + + TweetNew( + id = req.tweetId, + userId = req.userId, + text = req.text, + additionalInputFeatures = additionalInputFeatures, + safetyMetaData = req.safetyMetaData, + inReplyToStatusId = req.inReplyToTweetId + ) + } + + private def tieredActionHandler(stats: StatsReceiver): Checker[TieredAction] = + Spam.handleScarecrowResult(stats) { + case (TieredActionResult.NotSpam, _, _) => Spam.AllowFuture + case (TieredActionResult.SilentFail, _, _) => Spam.SilentFailFuture + case (TieredActionResult.DenyByIpiPolicy, _, _) => Spam.DisabledByIpiPolicyFuture + case (TieredActionResult.UrlSpam, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.UrlSpam, denyMessage)) + case (TieredActionResult.Deny, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.Spam, denyMessage)) + case (TieredActionResult.Captcha, _, denyMessage) => + Future.exception(TweetCreateFailure.State(TweetCreateState.SpamCaptcha, denyMessage)) + case (TieredActionResult.RateLimit, _, denyMessage) => + Future.exception( + TweetCreateFailure.State(TweetCreateState.SafetyRateLimitExceeded, denyMessage)) + case (TieredActionResult.Bounce, Some(b), _) => + Future.exception(TweetCreateFailure.Bounced(b)) + } + + def fromSpamCheckRepository( + stats: StatsReceiver, + repo: TweetSpamCheckRepository.Type + ): Spam.Checker[TweetSpamRequest] = { + val handler = tieredActionHandler(stats) + req => { + Trace.record("com.twitter.tweetypie.ScarecrowTweetSpamChecker.userId=" + req.userId) + Stitch.run(repo(requestToScarecrowTweet(req), TweetContext.Creation)).flatMap { resp => + handler(resp.tieredAction) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala new file mode 100644 index 000000000..77c3b2bb3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/ScrubGeoEventBuilder.scala @@ -0,0 +1,72 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.ScrubGeo +import com.twitter.tweetypie.store.ScrubGeoUpdateUserTimestamp +import com.twitter.tweetypie.thriftscala.DeleteLocationData +import com.twitter.tweetypie.thriftscala.GeoScrub + +/** + * Create the appropriate ScrubGeo.Event for a GeoScrub request. + */ +object ScrubGeoEventBuilder { + val userQueryOptions: UserQueryOptions = + UserQueryOptions( + Set(UserField.Safety, UserField.Roles), + UserVisibility.All + ) + + private def userLoader( + stats: StatsReceiver, + userRepo: UserRepository.Optional + ): UserId => Future[Option[User]] = { + val userNotFoundCounter = stats.counter("user_not_found") + userId => + Stitch.run( + userRepo(UserKey(userId), userQueryOptions) + .onSuccess(userOpt => if (userOpt.isEmpty) userNotFoundCounter.incr()) + ) + } + + object UpdateUserTimestamp { + type Type = DeleteLocationData => Future[ScrubGeoUpdateUserTimestamp.Event] + + def apply( + stats: StatsReceiver, + userRepo: UserRepository.Optional, + ): Type = { + val timestampDiffStat = stats.stat("now_delta_ms") + val loadUser = userLoader(stats, userRepo) + request: DeleteLocationData => + loadUser(request.userId).map { userOpt => + // delta between users requesting deletion and the time we publish to TweetEvents + timestampDiffStat.add((Time.now.inMillis - request.timestampMs).toFloat) + ScrubGeoUpdateUserTimestamp.Event( + userId = request.userId, + timestamp = Time.fromMilliseconds(request.timestampMs), + optUser = userOpt + ) + } + } + } + + object ScrubTweets { + type Type = GeoScrub => Future[ScrubGeo.Event] + + def apply(stats: StatsReceiver, userRepo: UserRepository.Optional): Type = { + val loadUser = userLoader(stats, userRepo) + geoScrub => + loadUser(geoScrub.userId).map { userOpt => + ScrubGeo.Event( + tweetIdSet = geoScrub.statusIds.toSet, + userId = geoScrub.userId, + enqueueMax = geoScrub.hosebirdEnqueue, + optUser = userOpt, + timestamp = Time.now + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala new file mode 100644 index 000000000..adc2c5739 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SelfThreadBuilder.scala @@ -0,0 +1,119 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.thriftscala.Reply +import com.twitter.tweetypie.thriftscala.SelfThreadMetadata +import org.apache.thrift.protocol.TField + +trait SelfThreadBuilder { + def requiredReplySourceFields: Set[TField] = + Set( + Tweet.CoreDataField, // for Reply and ConversationId + Tweet.SelfThreadMetadataField // for continuing existing self-threads + ) + + def build(authorUserId: UserId, replySourceTweet: Tweet): Option[SelfThreadMetadata] +} + +/** + * SelfThreadBuilder is used to build metadata for self-threads (tweetstorms). + * + * This builder is invoked from ReplyBuilder on tweets that pass in a inReplyToStatusId and create + * a Reply. The invocation is done inside ReplyBuilder as ReplyBuilder has already loaded the + * "reply source tweet" which has all the information needed to determine the self-thread metadata. + * + * Note that Tweet.SelfThreadMetadata schema supports representing two types of self-threads: + * 1. root self-thread : self-thread that begins alone and does not start with replying to another + * tweet. This self-thread has a self-thread ID equal to the conversation ID. + * 2. reply self-thread : self-thread that begins as a reply to another user's tweet. + * This self-thread has a self-thread ID equal to the first tweet in the + * current self-reply chain which will not equal the conversation ID. + * + * Currently only type #1 "root self-thread" is handled. + */ +object SelfThreadBuilder { + + def apply(stats: StatsReceiver): SelfThreadBuilder = { + // We want to keep open the possibility for differentiation between root + // self-threads (current functionality) and reply self-threads (possible + // future functionality). + val rootThreadStats = stats.scope("root_thread") + + // A tweet becomes a root of a self-thread only after the first self-reply + // is created. root_thread/start is incr()d during the write-path of the + // self-reply tweet, when it is known that the first/root tweet has not + // yet been assigned a SelfThreadMetadata. The write-path of the second + // tweet does not add the SelfThreadMetadata to the first tweet - that + // happens asynchronously by the SelfThreadDaemon. + val rootThreadStartCounter = rootThreadStats.counter("start") + + // root_thread/continue provides visibility into the frequency of + // continuation tweets off leaf tweets in a tweet storm. Also incr()d in + // the special case of a reply to the root tweet, which does not yet have a + // SelfThreadMetadata(isLeaf=true). + val rootThreadContinueCounter = rootThreadStats.counter("continue") + + // root_thread/branch provides visibility into how frequently self-threads + // get branched - that is, when the author self-replies to a non-leaf tweet + // in an existing thread. Knowing the frequency of branching will help us + // determine the priority of accounting for branching in various + // tweet-delete use cases. Currently we do not fix up the root tweet's + // SelfThreadMetadata when its reply tweets are deleted. + val rootThreadBranchCounter = rootThreadStats.counter("branch") + + def observeSelfThreadMetrics(replySourceSTM: Option[SelfThreadMetadata]): Unit = { + replySourceSTM match { + case Some(SelfThreadMetadata(_, isLeaf)) => + if (isLeaf) rootThreadContinueCounter.incr() + else rootThreadBranchCounter.incr() + case None => + rootThreadStartCounter.incr() + } + } + + new SelfThreadBuilder { + + override def build( + authorUserId: UserId, + replySourceTweet: Tweet + ): Option[SelfThreadMetadata] = { + // the "reply source tweet"'s author must match the current author + if (getUserId(replySourceTweet) == authorUserId) { + val replySourceSTM = getSelfThreadMetadata(replySourceTweet) + + observeSelfThreadMetrics(replySourceSTM) + + // determine if replySourceTweet stands alone (non-reply) + getReply(replySourceTweet) match { + case None | Some(Reply(None, _, _)) => + // 'replySourceTweet' started a new self-thread that stands alone + // which happens when there's no Reply or the Reply does not have + // inReplyToStatusId (directed-at user) + + // requiredReplySourceFields requires coreData and conversationId + // is required so this would have previously thrown an exception + // in ReplyBuilder if the read was partial + val convoId = replySourceTweet.coreData.get.conversationId.get + Some(SelfThreadMetadata(id = convoId, isLeaf = true)) + + case _ => + // 'replySourceTweet' was also a reply-to-tweet, so continue any + // self-thread by inheriting any SelfThreadMetadata it has + // (though always setting isLeaf to true) + replySourceSTM.map(_.copy(isLeaf = true)) + } + } else { + // Replying to a different user currently never creates a self-thread + // as all self-threads must start at the root (and match conversation + // ID). + // + // In the future replying to a different user *might* be part of a + // self-thread but we wouldn't mark it as such until the *next* tweet + // is created (at which time the self_thread daemon goes back and + // marks the first tweet as in the self-thread. + None + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala new file mode 100644 index 000000000..423543d8f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetAdditionalFieldsBuilder.scala @@ -0,0 +1,61 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.repository.UserKey +import com.twitter.tweetypie.repository.UserQueryOptions +import com.twitter.tweetypie.repository.UserRepository +import com.twitter.tweetypie.repository.UserVisibility +import com.twitter.tweetypie.store.AsyncSetAdditionalFields +import com.twitter.tweetypie.store.SetAdditionalFields +import com.twitter.tweetypie.store.TweetStoreEventOrRetry +import com.twitter.tweetypie.thriftscala.AsyncSetAdditionalFieldsRequest +import com.twitter.tweetypie.thriftscala.SetAdditionalFieldsRequest + +object SetAdditionalFieldsBuilder { + type Type = SetAdditionalFieldsRequest => Future[SetAdditionalFields.Event] + + val tweetOptions: TweetQuery.Options = TweetQuery.Options(include = GetTweetsHandler.BaseInclude) + + def apply(tweetRepo: TweetRepository.Type): Type = { + def getTweet(tweetId: TweetId) = + Stitch.run( + tweetRepo(tweetId, tweetOptions) + .rescue(HandlerError.translateNotFoundToClientError(tweetId)) + ) + + request => { + getTweet(request.additionalFields.id).map { tweet => + SetAdditionalFields.Event( + additionalFields = request.additionalFields, + userId = getUserId(tweet), + timestamp = Time.now + ) + } + } + } +} + +object AsyncSetAdditionalFieldsBuilder { + type Type = AsyncSetAdditionalFieldsRequest => Future[ + TweetStoreEventOrRetry[AsyncSetAdditionalFields.Event] + ] + + val userQueryOpts: UserQueryOptions = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): Type = { + def getUser(userId: UserId): Future[User] = + Stitch.run( + userRepo(UserKey.byId(userId), userQueryOpts) + .rescue { case NotFound => Stitch.exception(HandlerError.userNotFound(userId)) } + ) + + request => + getUser(request.userId).map { user => + AsyncSetAdditionalFields.Event.fromAsyncRequest(request, user) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala new file mode 100644 index 000000000..48dc91014 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/SetRetweetVisibilityHandler.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.store.SetRetweetVisibility +import com.twitter.tweetypie.thriftscala.SetRetweetVisibilityRequest +import com.twitter.tweetypie.thriftscala.Share +import com.twitter.tweetypie.thriftscala.Tweet + +/** + * Create a [[SetRetweetVisibility.Event]] from a [[SetRetweetVisibilityRequest]] and then + * pipe the event to [[store.SetRetweetVisibility]]. The event contains the information + * to determine if a retweet should be included in its source tweet's retweet count. + * + * Showing/hiding a retweet count is done by calling TFlock to modify an edge's state between + * `Positive` <--> `Archived` in the RetweetsGraph(6) and modifying the count in cache directly. + */ +object SetRetweetVisibilityHandler { + type Type = SetRetweetVisibilityRequest => Future[Unit] + + def apply( + tweetGetter: TweetId => Future[Option[Tweet]], + setRetweetVisibilityStore: SetRetweetVisibility.Event => Future[Unit] + ): Type = + req => + tweetGetter(req.retweetId).map { + case Some(retweet) => + getShare(retweet).map { share: Share => + val event = SetRetweetVisibility.Event( + retweetId = req.retweetId, + visible = req.visible, + srcId = share.sourceStatusId, + retweetUserId = getUserId(retweet), + srcTweetUserId = share.sourceUserId, + timestamp = Time.now + ) + setRetweetVisibilityStore(event) + } + + case None => + // No-op if either the retweet has been deleted or has no source id. + // If deleted, then we do not want to accidentally undelete a legitimately deleted retweets. + // If no source id, then we do not know the source tweet to modify its count. + Unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala new file mode 100644 index 000000000..088f9b8a9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/Spam.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.botmaker.thriftscala.BotMakerResponse +import com.twitter.bouncer.thriftscala.Bounce +import com.twitter.finagle.tracing.Trace +import com.twitter.relevance.feature_store.thriftscala.FeatureData +import com.twitter.relevance.feature_store.thriftscala.FeatureValue.StrValue +import com.twitter.service.gen.scarecrow.thriftscala.TieredAction +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.TweetCreateState + +object Spam { + sealed trait Result + case object Allow extends Result + case object SilentFail extends Result + case object DisabledByIpiPolicy extends Result + + val AllowFuture: Future[Allow.type] = Future.value(Allow) + val SilentFailFuture: Future[SilentFail.type] = Future.value(SilentFail) + val DisabledByIpiPolicyFuture: Future[DisabledByIpiPolicy.type] = + Future.value(DisabledByIpiPolicy) + + def DisabledByIpiFailure( + userName: Option[String], + customDenyMessage: Option[String] = None + ): TweetCreateFailure.State = { + val errorMsg = (customDenyMessage, userName) match { + case (Some(denyMessage), _) => denyMessage + case (_, Some(name)) => s"Some actions on this ${name} Tweet have been disabled by Twitter." + case _ => "Some actions on this Tweet have been disabled by Twitter." + } + TweetCreateFailure.State(TweetCreateState.DisabledByIpiPolicy, Some(errorMsg)) + } + + type Checker[T] = T => Future[Result] + + /** + * Dummy spam checker that always allows requests. + */ + val DoNotCheckSpam: Checker[AnyRef] = _ => AllowFuture + + def gated[T](gate: Gate[Unit])(checker: Checker[T]): Checker[T] = + req => if (gate()) checker(req) else AllowFuture + + def selected[T](gate: Gate[Unit])(ifTrue: Checker[T], ifFalse: Checker[T]): Checker[T] = + req => gate.select(ifTrue, ifFalse)()(req) + + def withEffect[T](check: Checker[T], effect: T => Unit): T => Future[Result] = { t: T => + effect(t) + check(t) + } + + /** + * Wrapper that implicitly allows retweet or tweet creation when spam + * checking fails. + */ + def allowOnException[T](checker: Checker[T]): Checker[T] = + req => + checker(req).rescue { + case e: TweetCreateFailure => Future.exception(e) + case _ => AllowFuture + } + + /** + * Handler for scarecrow result to be used by a Checker. + */ + def handleScarecrowResult( + stats: StatsReceiver + )( + handler: PartialFunction[(TieredActionResult, Option[Bounce], Option[String]), Future[Result]] + ): Checker[TieredAction] = + result => { + stats.scope("scarecrow_result").counter(result.resultCode.name).incr() + Trace.record("com.twitter.tweetypie.Spam.scarecrow_result=" + result.resultCode.name) + /* + * A bot can return a custom DenyMessage + * + * If it does, we substitute this for the 'message' in the ValidationError. + */ + val customDenyMessage: Option[String] = for { + botMakeResponse: BotMakerResponse <- result.botMakerResponse + outputFeatures <- botMakeResponse.outputFeatures + denyMessageFeature: FeatureData <- outputFeatures.get("DenyMessage") + denyMessageFeatureValue <- denyMessageFeature.featureValue + denyMessage <- denyMessageFeatureValue match { + case stringValue: StrValue => + Some(stringValue.strValue) + case _ => + None + } + } yield denyMessage + handler.applyOrElse( + (result.resultCode, result.bounce, customDenyMessage), + withEffect(DoNotCheckSpam, (_: AnyRef) => stats.counter("unexpected_result").incr()) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala new file mode 100644 index 000000000..e729e3cce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TakedownHandler.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.takedown.util.TakedownReasons._ +import com.twitter.tweetypie.store.Takedown +import com.twitter.tweetypie.thriftscala.TakedownRequest +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.util.Takedowns + +/** + * This handler processes TakedownRequest objects sent to Tweetypie's takedown endpoint. + * The request object specifies which takedown countries are being added and which are + * being removed. It also includes side effect flags for setting the tweet's has_takedown + * bit, scribing to Guano, and enqueuing to EventBus. For more information about inputs + * to the takedown endpoint, see the TakedownRequest documentation in the thrift definition. + */ +object TakedownHandler { + type Type = FutureArrow[TakedownRequest, Unit] + + def apply( + getTweet: FutureArrow[TweetId, Tweet], + getUser: FutureArrow[UserId, User], + writeTakedown: FutureEffect[Takedown.Event] + ): Type = { + FutureArrow { request => + for { + tweet <- getTweet(request.tweetId) + user <- getUser(getUserId(tweet)) + userHasTakedowns = user.takedowns.map(userTakedownsToReasons).exists(_.nonEmpty) + + existingTweetReasons = Takedowns.fromTweet(tweet).reasons + + reasonsToRemove = (request.countriesToRemove.map(countryCodeToReason) ++ + request.reasonsToRemove.map(normalizeReason)).distinct.sortBy(_.toString) + + reasonsToAdd = (request.countriesToAdd.map(countryCodeToReason) ++ + request.reasonsToAdd.map(normalizeReason)).distinct.sortBy(_.toString) + + updatedTweetTakedowns = + (existingTweetReasons ++ reasonsToAdd) + .filterNot(reasonsToRemove.contains) + .toSeq + .sortBy(_.toString) + + (cs, rs) = Takedowns.partitionReasons(updatedTweetTakedowns) + + updatedTweet = Lens.setAll( + tweet, + // these fields are cached on the Tweet in CachingTweetStore and written in + // ManhattanTweetStore + TweetLenses.hasTakedown -> (updatedTweetTakedowns.nonEmpty || userHasTakedowns), + TweetLenses.tweetypieOnlyTakedownCountryCodes -> Some(cs).filter(_.nonEmpty), + TweetLenses.tweetypieOnlyTakedownReasons -> Some(rs).filter(_.nonEmpty) + ) + + _ <- writeTakedown.when(tweet != updatedTweet) { + Takedown.Event( + tweet = updatedTweet, + timestamp = Time.now, + user = Some(user), + takedownReasons = updatedTweetTakedowns, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + auditNote = request.auditNote, + host = request.host, + byUserId = request.byUserId, + eventbusEnqueue = request.eventbusEnqueue, + scribeForAudit = request.scribeForAudit, + updateCodesAndReasons = true + ) + } + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala new file mode 100644 index 000000000..98bb33064 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetBuilder.scala @@ -0,0 +1,1180 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.gizmoduck.thriftscala.AccessPolicy +import com.twitter.gizmoduck.thriftscala.LabelValue +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields._ +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilder +import com.twitter.tweetypie.jiminy.tweetypie.NudgeBuilderRequest +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository.StratoCommunityAccessRepository.CommunityAccess +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.DeviceSourceParser +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.tweettext._ +import com.twitter.tweetypie.util.CommunityAnnotation +import com.twitter.tweetypie.util.CommunityUtil +import com.twitter.twittertext.Regex.{VALID_URL => UrlPattern} +import com.twitter.twittertext.TwitterTextParser + +case class TweetBuilderResult( + tweet: Tweet, + user: User, + createdAt: Time, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + parentUserId: Option[UserId] = None, + isSilentFail: Boolean = false, + geoSearchRequestId: Option[GeoSearchRequestId] = None, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None) + +object TweetBuilder { + import GizmoduckUserCountsUpdatingStore.isUserTweet + import PostTweet._ + import Preprocessor._ + import TweetCreateState.{Spam => CreateStateSpam, _} + import TweetText._ + import UpstreamFailure._ + + type Type = FutureArrow[PostTweetRequest, TweetBuilderResult] + + val log: Logger = Logger(getClass) + + private[this] val _unitMutation = Future.value(Mutation.unit[Any]) + def MutationUnitFuture[T]: Future[Mutation[T]] = _unitMutation.asInstanceOf[Future[Mutation[T]]] + + case class MissingConversationId(inReplyToTweetId: TweetId) extends RuntimeException + + case class TextVisibility( + visibleTextRange: Option[TextRange], + totalTextDisplayLength: Offset.DisplayUnit, + visibleText: String) { + val isExtendedTweet: Boolean = totalTextDisplayLength.toInt > OriginalMaxDisplayLength + + /** + * Going forward we will be moving away from quoted-tweets urls in tweet text, but we + * have a backwards-compat layer in Tweetypie which adds the QT url to text to provide + * support for all clients to read in a backwards-compatible way until they upgrade. + * + * Tweets can become extended as their display length can go beyond 140 + * after adding the QT short url. Therefore, we are adding below function + * to account for legacy formatting during read-time and generate a self-permalink. + */ + def isExtendedWithExtraChars(extraChars: Int): Boolean = + totalTextDisplayLength.toInt > (OriginalMaxDisplayLength - extraChars) + } + + /** Max number of users that can be tagged on a single tweet */ + val MaxMediaTagCount = 10 + + val MobileWebApp = "oauth:49152" + val M2App = "oauth:3033294" + val M5App = "oauth:3033300" + + val TestRateLimitUserRole = "stresstest" + + /** + * The fields to fetch for the user creating the tweet. + */ + val userFields: Set[UserField] = + Set( + UserField.Profile, + UserField.ProfileDesign, + UserField.Account, + UserField.Safety, + UserField.Counts, + UserField.Roles, + UserField.UrlEntities, + UserField.Labels + ) + + /** + * The fields to fetch for the user of the source tweet in a retweet. + */ + val sourceUserFields: Set[UserField] = + userFields + UserField.View + + /** + * Converts repository exceptions into an API-compatible exception type + */ + def convertRepoExceptions[A]( + notFoundState: TweetCreateState, + failureHandler: Throwable => Throwable + ): PartialFunction[Throwable, Stitch[A]] = { + // stitch.NotFound is converted to the supplied TweetCreateState, wrapped in TweetCreateFailure + case NotFound => Stitch.exception(TweetCreateFailure.State(notFoundState)) + // OverCapacity exceptions should not be translated and should bubble up to the top + case ex: OverCapacity => Stitch.exception(ex) + // Other exceptions are wrapped in the supplied failureHandler + case ex => Stitch.exception(failureHandler(ex)) + } + + /** + * Adapts a UserRepository to a Repository for looking up a single user and that + * fails with an appropriate TweetCreateFailure if the user is not found. + */ + def userLookup(userRepo: UserRepository.Type): UserId => Stitch[User] = { + val opts = UserQueryOptions(queryFields = userFields, visibility = UserVisibility.All) + + userId => + userRepo(UserKey(userId), opts) + .rescue(convertRepoExceptions[User](UserNotFound, UserLookupFailure(_))) + } + + /** + * Adapts a UserRepository to a Repository for looking up a single user and that + * fails with an appropriate TweetCreateFailure if the user is not found. + */ + def sourceUserLookup(userRepo: UserRepository.Type): (UserId, UserId) => Stitch[User] = { + val opts = UserQueryOptions(queryFields = sourceUserFields, visibility = UserVisibility.All) + + (userId, forUserId) => + userRepo(UserKey(userId), opts.copy(forUserId = Some(forUserId))) + .rescue(convertRepoExceptions[User](SourceUserNotFound, UserLookupFailure(_))) + } + + /** + * Any fields that are loaded on the user via TweetBuilder/RetweetBuilder, but which should not + * be included on the user in the async-insert actions (such as hosebird) should be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubUserInAsyncInserts: User => User = + user => user.copy(view = None) + + /** + * Any fields that are loaded on the source user via TweetBuilder/RetweetBuilder, but which + * should not be included on the user in the async-insert actions (such as hosebird) should + * be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubSourceUserInAsyncInserts: User => User = + // currently the same as scrubUserInAsyncInserts, could be different in the future + scrubUserInAsyncInserts + + /** + * Any fields that are loaded on the source tweet via RetweetBuilder, but which should not be + * included on the source tweetypie in the async-insert actions (such as hosebird) should + * be removed here. + * + * This will include perspectival fields that were loaded relative to the user creating the tweet. + */ + def scrubSourceTweetInAsyncInserts: Tweet => Tweet = + tweet => tweet.copy(perspective = None, cards = None, card2 = None) + + /** + * Adapts a DeviceSource to a Repository for looking up a single device-source and that + * fails with an appropriate TweetCreateFailure if not found. + */ + def deviceSourceLookup(devSrcRepo: DeviceSourceRepository.Type): DeviceSourceRepository.Type = + appIdStr => { + val result: Stitch[DeviceSource] = + if (DeviceSourceParser.isValid(appIdStr)) { + devSrcRepo(appIdStr) + } else { + Stitch.exception(NotFound) + } + + result.rescue(convertRepoExceptions(DeviceSourceNotFound, DeviceSourceLookupFailure(_))) + } + + /** + * Checks: + * - that we have all the user fields we need + * - that the user is active + * - that they are not a frictionless follower account + */ + def validateUser(user: User): Future[Unit] = + if (user.safety.isEmpty) + Future.exception(UserSafetyEmptyException) + else if (user.profile.isEmpty) + Future.exception(UserProfileEmptyException) + else if (user.safety.get.deactivated) + Future.exception(TweetCreateFailure.State(UserDeactivated)) + else if (user.safety.get.suspended) + Future.exception(TweetCreateFailure.State(UserSuspended)) + else if (user.labels.exists(_.labels.exists(_.labelValue == LabelValue.ReadOnly))) + Future.exception(TweetCreateFailure.State(CreateStateSpam)) + else if (user.userType == UserType.Frictionless) + Future.exception(TweetCreateFailure.State(UserNotFound)) + else if (user.userType == UserType.Soft) + Future.exception(TweetCreateFailure.State(UserNotFound)) + else if (user.safety.get.accessPolicy == AccessPolicy.BounceAll || + user.safety.get.accessPolicy == AccessPolicy.BounceAllPublicWrites) + Future.exception(TweetCreateFailure.State(UserReadonly)) + else + Future.Unit + + def validateCommunityReply( + communities: Option[Communities], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + + if (replyResult.flatMap(_.reply.inReplyToStatusId).nonEmpty) { + val rootCommunities = replyResult.flatMap(_.community) + val rootCommunityIds = CommunityUtil.communityIds(rootCommunities) + val replyCommunityIds = CommunityUtil.communityIds(communities) + + if (rootCommunityIds == replyCommunityIds) { + Future.Unit + } else { + Future.exception(TweetCreateFailure.State(CommunityReplyTweetNotAllowed)) + } + } else { + Future.Unit + } + } + + // Project requirements do not allow exclusive tweets to be replies. + // All exclusive tweets must be root tweets. + def validateExclusiveTweetNotReplies( + exclusiveTweetControls: Option[ExclusiveTweetControl], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + if (exclusiveTweetControls.isDefined && isInReplyToTweet) { + Future.exception(TweetCreateFailure.State(SuperFollowsInvalidParams)) + } else { + Future.Unit + } + } + + // Invalid parameters for Exclusive Tweets: + // - Community field set # Tweets can not be both at the same time. + def validateExclusiveTweetParams( + exclusiveTweetControls: Option[ExclusiveTweetControl], + communities: Option[Communities] + ): Future[Unit] = { + if (exclusiveTweetControls.isDefined && CommunityUtil.hasCommunity(communities)) { + Future.exception(TweetCreateFailure.State(SuperFollowsInvalidParams)) + } else { + Future.Unit + } + } + + def validateTrustedFriendsNotReplies( + trustedFriendsControl: Option[TrustedFriendsControl], + replyResult: Option[ReplyBuilder.Result] + ): Future[Unit] = { + val isInReplyToTweet = replyResult.exists(_.reply.inReplyToStatusId.isDefined) + if (trustedFriendsControl.isDefined && isInReplyToTweet) { + Future.exception(TweetCreateFailure.State(TrustedFriendsInvalidParams)) + } else { + Future.Unit + } + } + + def validateTrustedFriendsParams( + trustedFriendsControl: Option[TrustedFriendsControl], + conversationControl: Option[TweetCreateConversationControl], + communities: Option[Communities], + exclusiveTweetControl: Option[ExclusiveTweetControl] + ): Future[Unit] = { + if (trustedFriendsControl.isDefined && + (conversationControl.isDefined || CommunityUtil.hasCommunity( + communities) || exclusiveTweetControl.isDefined)) { + Future.exception(TweetCreateFailure.State(TrustedFriendsInvalidParams)) + } else { + Future.Unit + } + } + + /** + * Checks the weighted tweet text length using twitter-text, as used by clients. + * This should ensure that any tweet the client deems valid will also be deemed + * valid by Tweetypie. + */ + def prevalidateTextLength(text: String, stats: StatsReceiver): Future[Unit] = { + val twitterTextConfig = TwitterTextParser.TWITTER_TEXT_DEFAULT_CONFIG + val twitterTextResult = TwitterTextParser.parseTweet(text, twitterTextConfig) + val textTooLong = !twitterTextResult.isValid && text.length > 0 + + Future.when(textTooLong) { + val weightedLength = twitterTextResult.weightedLength + log.debug( + s"Weighted length too long. weightedLength: $weightedLength" + + s", Tweet text: '${diffshow.show(text)}'" + ) + stats.counter("check_weighted_length/text_too_long").incr() + Future.exception(TweetCreateFailure.State(TextTooLong)) + } + } + + /** + * Checks that the tweet text is neither blank nor too long. + */ + def validateTextLength( + text: String, + visibleText: String, + replyResult: Option[ReplyBuilder.Result], + stats: StatsReceiver + ): Future[Unit] = { + val utf8Length = Offset.Utf8.length(text) + + def visibleTextTooLong = + Offset.DisplayUnit.length(visibleText) > Offset.DisplayUnit(MaxVisibleWeightedEmojiLength) + + def utf8LengthTooLong = + utf8Length > Offset.Utf8(MaxUtf8Length) + + if (isBlank(text)) { + stats.counter("validate_text_length/text_cannot_be_blank").incr() + Future.exception(TweetCreateFailure.State(TextCannotBeBlank)) + } else if (replyResult.exists(_.replyTextIsEmpty(text))) { + stats.counter("validate_text_length/reply_text_cannot_be_blank").incr() + Future.exception(TweetCreateFailure.State(TextCannotBeBlank)) + } else if (visibleTextTooLong) { + // Final check that visible text does not exceed MaxVisibleWeightedEmojiLength + // characters. + // prevalidateTextLength() does some portion of validation as well, most notably + // weighted length on raw, unescaped text. + stats.counter("validate_text_length/text_too_long.visible_length_explicit").incr() + log.debug( + s"Explicit MaxVisibleWeightedLength visible length check failed. " + + s"visibleText: '${diffshow.show(visibleText)}' and " + + s"total text: '${diffshow.show(text)}'" + ) + Future.exception(TweetCreateFailure.State(TextTooLong)) + } else if (utf8LengthTooLong) { + stats.counter("validate_text_length/text_too_long.utf8_length").incr() + Future.exception(TweetCreateFailure.State(TextTooLong)) + } else { + stats.stat("validate_text_length/utf8_length").add(utf8Length.toInt) + Future.Unit + } + } + + def getTextVisibility( + text: String, + replyResult: Option[ReplyBuilder.Result], + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + attachmentUrl: Option[String] + ): TextVisibility = { + val totalTextLength = Offset.CodePoint.length(text) + val totalTextDisplayLength = Offset.DisplayUnit.length(text) + + /** + * visibleEnd for multiple scenarios: + * + * normal tweet + media - fromIndex of mediaEntity (hydrated from last media permalink) + * quote tweet + media - fromIndex of mediaEntity + * replies + media - fromIndex of mediaEntity + * normal quote tweet - total text length (visible text range will be None) + * tweets with other attachments (DM deep links) + * fromIndex of the last URL entity + */ + val visibleEnd = mediaEntities.headOption + .map(_.fromIndex) + .orElse(attachmentUrl.flatMap(_ => urlEntities.lastOption).map(_.fromIndex)) + .map(from => (from - 1).max(0)) // for whitespace, unless there is none + .map(Offset.CodePoint(_)) + .getOrElse(totalTextLength) + + val visibleStart = replyResult match { + case Some(rr) => rr.visibleStart.min(visibleEnd) + case None => Offset.CodePoint(0) + } + + if (visibleStart.toInt == 0 && visibleEnd == totalTextLength) { + TextVisibility( + visibleTextRange = None, + totalTextDisplayLength = totalTextDisplayLength, + visibleText = text + ) + } else { + val charFrom = visibleStart.toCodeUnit(text) + val charTo = charFrom.offsetByCodePoints(text, visibleEnd - visibleStart) + val visibleText = text.substring(charFrom.toInt, charTo.toInt) + + TextVisibility( + visibleTextRange = Some(TextRange(visibleStart.toInt, visibleEnd.toInt)), + totalTextDisplayLength = totalTextDisplayLength, + visibleText = visibleText + ) + } + } + + def isValidHashtag(entity: HashtagEntity): Boolean = + TweetText.codePointLength(entity.text) <= TweetText.MaxHashtagLength + + /** + * Validates that the number of various entities are within the limits, and the + * length of hashtags are with the limit. + */ + def validateEntities(tweet: Tweet): Future[Unit] = + if (getMentions(tweet).length > TweetText.MaxMentions) + Future.exception(TweetCreateFailure.State(MentionLimitExceeded)) + else if (getUrls(tweet).length > TweetText.MaxUrls) + Future.exception(TweetCreateFailure.State(UrlLimitExceeded)) + else if (getHashtags(tweet).length > TweetText.MaxHashtags) + Future.exception(TweetCreateFailure.State(HashtagLimitExceeded)) + else if (getCashtags(tweet).length > TweetText.MaxCashtags) + Future.exception(TweetCreateFailure.State(CashtagLimitExceeded)) + else if (getHashtags(tweet).exists(e => !isValidHashtag(e))) + Future.exception(TweetCreateFailure.State(HashtagLengthLimitExceeded)) + else + Future.Unit + + /** + * Update the user to what it should look like after the tweet is created + */ + def updateUserCounts(hasMedia: Tweet => Boolean): (User, Tweet) => Future[User] = + (user: User, tweet: Tweet) => { + val countAsUserTweet = isUserTweet(tweet) + val tweetsDelta = if (countAsUserTweet) 1 else 0 + val mediaTweetsDelta = if (countAsUserTweet && hasMedia(tweet)) 1 else 0 + + Future.value( + user.copy( + counts = user.counts.map { counts => + counts.copy( + tweets = counts.tweets + tweetsDelta, + mediaTweets = counts.mediaTweets.map(_ + mediaTweetsDelta) + ) + } + ) + ) + } + + def validateAdditionalFields[R](implicit view: RequestView[R]): FutureEffect[R] = + FutureEffect[R] { req => + view + .additionalFields(req) + .map(tweet => + unsettableAdditionalFieldIds(tweet) ++ rejectedAdditionalFieldIds(tweet)) match { + case Some(unsettableFieldIds) if unsettableFieldIds.nonEmpty => + Future.exception( + TweetCreateFailure.State( + InvalidAdditionalField, + Some(unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds)) + ) + ) + case _ => Future.Unit + } + } + + def validateTweetMediaTags( + stats: StatsReceiver, + getUserMediaTagRateLimit: RateLimitChecker.GetRemaining, + userRepo: UserRepository.Optional + ): (Tweet, Boolean) => Future[Mutation[Tweet]] = { + val userRepoWithStats: UserRepository.Optional = + (userKey, queryOptions) => + userRepo(userKey, queryOptions).liftToTry.map { + case Return(res @ Some(_)) => + stats.counter("found").incr() + res + case Return(None) => + stats.counter("not_found").incr() + None + case Throw(_) => + stats.counter("failed").incr() + None + } + + (tweet: Tweet, dark: Boolean) => { + val mediaTags = getMediaTagMap(tweet) + + if (mediaTags.isEmpty) { + MutationUnitFuture + } else { + getUserMediaTagRateLimit((getUserId(tweet), dark)).flatMap { remainingMediaTagCount => + val maxMediaTagCount = math.min(remainingMediaTagCount, MaxMediaTagCount) + + val taggedUserIds = + mediaTags.values.flatten.toSeq.collect { + case MediaTag(MediaTagType.User, Some(userId), _, _) => userId + }.distinct + + val droppedTagCount = taggedUserIds.size - maxMediaTagCount + if (droppedTagCount > 0) stats.counter("over_limit_tags").incr(droppedTagCount) + + val userQueryOpts = + UserQueryOptions( + queryFields = Set(UserField.MediaView), + visibility = UserVisibility.MediaTaggable, + forUserId = Some(getUserId(tweet)) + ) + + val keys = taggedUserIds.take(maxMediaTagCount).map(UserKey.byId) + val keyOpts = keys.map((_, userQueryOpts)) + + Stitch.run { + Stitch + .traverse(keyOpts)(userRepoWithStats.tupled) + .map(_.flatten) + .map { users => + val userMap = users.map(u => u.id -> u).toMap + val mediaTagsMutation = + Mutation[Seq[MediaTag]] { mediaTags => + val validMediaTags = + mediaTags.filter { + case MediaTag(MediaTagType.User, Some(userId), _, _) => + userMap.get(userId).exists(_.mediaView.exists(_.canMediaTag)) + case _ => false + } + val invalidCount = mediaTags.size - validMediaTags.size + + if (invalidCount != 0) { + stats.counter("invalid").incr(invalidCount) + Some(validMediaTags) + } else { + None + } + } + TweetLenses.mediaTagMap.mutation(mediaTagsMutation.liftMapValues) + } + } + } + } + } + } + + def validateCommunityMembership( + communityMembershipRepository: StratoCommunityMembershipRepository.Type, + communityAccessRepository: StratoCommunityAccessRepository.Type, + communities: Option[Communities] + ): Future[Unit] = + communities match { + case Some(Communities(Seq(communityId))) => + Stitch + .run { + communityMembershipRepository(communityId).flatMap { + case true => Stitch.value(None) + case false => + communityAccessRepository(communityId).map { + case Some(CommunityAccess.Public) | Some(CommunityAccess.Closed) => + Some(TweetCreateState.CommunityUserNotAuthorized) + case Some(CommunityAccess.Private) | None => + Some(TweetCreateState.CommunityNotFound) + } + } + }.flatMap { + case None => + Future.Done + case Some(tweetCreateState) => + Future.exception(TweetCreateFailure.State(tweetCreateState)) + } + case Some(Communities(communities)) if communities.length > 1 => + // Not allowed to specify more than one community ID. + Future.exception(TweetCreateFailure.State(TweetCreateState.InvalidAdditionalField)) + case _ => Future.Done + } + + private[this] val CardUriSchemeRegex = "(?i)^(?:card|tombstone):".r + + /** + * Is the given String a URI that is allowed as a card reference + * without a matching URL in the text? + */ + def hasCardsUriScheme(uri: String): Boolean = + CardUriSchemeRegex.findPrefixMatchOf(uri).isDefined + + val InvalidAdditionalFieldEmptyUrlEntities: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("url entities are empty") + ) + + val InvalidAdditionalFieldNonMatchingUrlAndShortUrl: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("non-matching url and short url") + ) + + val InvalidAdditionalFieldInvalidUri: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("invalid URI") + ) + + val InvalidAdditionalFieldInvalidCardUri: TweetCreateFailure.State = + TweetCreateFailure.State( + TweetCreateState.InvalidAdditionalField, + Some("invalid card URI") + ) + + type CardReferenceBuilder = + (Tweet, UrlShortener.Context) => Future[Mutation[Tweet]] + + def cardReferenceBuilder( + cardReferenceValidator: CardReferenceValidationHandler.Type, + urlShortener: UrlShortener.Type + ): CardReferenceBuilder = + (tweet, urlShortenerCtx) => { + getCardReference(tweet) match { + case Some(CardReference(uri)) => + for { + cardUri <- + if (hasCardsUriScheme(uri)) { + // This is an explicit card references that does not + // need a corresponding URL in the text. + Future.value(uri) + } else if (UrlPattern.matcher(uri).matches) { + // The card reference is being used to specify which URL + // card to show. We need to verify that the URL is + // actually in the tweet text, or it can be effectively + // used to bypass the tweet length limit. + val urlEntities = getUrls(tweet) + + if (urlEntities.isEmpty) { + // Fail fast if there can't possibly be a matching URL entity + Future.exception(InvalidAdditionalFieldEmptyUrlEntities) + } else { + // Look for the URL in the expanded URL entities. If + // it is present, then map it to the t.co shortened + // version of the URL. + urlEntities + .collectFirst { + case urlEntity if urlEntity.expanded.exists(_ == uri) => + Future.value(urlEntity.url) + } + .getOrElse { + // The URL may have been altered when it was + // returned from Talon, such as expanding a pasted + // t.co link. In this case, we t.co-ize the link and + // make sure that the corresponding t.co is present + // as a URL entity. + urlShortener((uri, urlShortenerCtx)).flatMap { shortened => + if (urlEntities.exists(_.url == shortened.shortUrl)) { + Future.value(shortened.shortUrl) + } else { + Future.exception(InvalidAdditionalFieldNonMatchingUrlAndShortUrl) + } + } + } + } + } else { + Future.exception(InvalidAdditionalFieldInvalidUri) + } + + validatedCardUri <- cardReferenceValidator((getUserId(tweet), cardUri)).rescue { + case CardReferenceValidationFailedException => + Future.exception(InvalidAdditionalFieldInvalidCardUri) + } + } yield { + TweetLenses.cardReference.mutation( + Mutation[CardReference] { cardReference => + Some(cardReference.copy(cardUri = validatedCardUri)) + }.checkEq.liftOption + ) + } + + case None => + MutationUnitFuture + } + } + + def filterInvalidData( + validateTweetMediaTags: (Tweet, Boolean) => Future[Mutation[Tweet]], + cardReferenceBuilder: CardReferenceBuilder + ): (Tweet, PostTweetRequest, UrlShortener.Context) => Future[Tweet] = + (tweet: Tweet, request: PostTweetRequest, urlShortenerCtx: UrlShortener.Context) => { + Future + .join( + validateTweetMediaTags(tweet, request.dark), + cardReferenceBuilder(tweet, urlShortenerCtx) + ) + .map { + case (mediaMutation, cardRefMutation) => + mediaMutation.also(cardRefMutation).endo(tweet) + } + } + + def apply( + stats: StatsReceiver, + validateRequest: PostTweetRequest => Future[Unit], + validateEdit: EditValidator.Type, + validateUser: User => Future[Unit] = TweetBuilder.validateUser, + validateUpdateRateLimit: RateLimitChecker.Validate, + tweetIdGenerator: TweetIdGenerator, + userRepo: UserRepository.Type, + deviceSourceRepo: DeviceSourceRepository.Type, + communityMembershipRepo: StratoCommunityMembershipRepository.Type, + communityAccessRepo: StratoCommunityAccessRepository.Type, + urlShortener: UrlShortener.Type, + urlEntityBuilder: UrlEntityBuilder.Type, + geoBuilder: GeoBuilder.Type, + replyBuilder: ReplyBuilder.Type, + mediaBuilder: MediaBuilder.Type, + attachmentBuilder: AttachmentBuilder.Type, + duplicateTweetFinder: DuplicateTweetFinder.Type, + spamChecker: Spam.Checker[TweetSpamRequest], + filterInvalidData: (Tweet, PostTweetRequest, UrlShortener.Context) => Future[Tweet], + updateUserCounts: (User, Tweet) => Future[User], + validateConversationControl: ConversationControlBuilder.Validate.Type, + conversationControlBuilder: ConversationControlBuilder.Type, + validateTweetWrite: TweetWriteValidator.Type, + nudgeBuilder: NudgeBuilder.Type, + communitiesValidator: CommunitiesValidator.Type, + collabControlBuilder: CollabControlBuilder.Type, + editControlBuilder: EditControlBuilder.Type, + featureSwitches: FeatureSwitches + ): TweetBuilder.Type = { + val entityExtractor = EntityExtractor.mutationWithoutUrls.endo + val getUser = userLookup(userRepo) + val getDeviceSource = deviceSourceLookup(deviceSourceRepo) + + // create a tco of the permalink for given a tweetId + val permalinkShortener = (tweetId: TweetId, ctx: UrlShortener.Context) => + urlShortener((s"https://twitter.com/i/web/status/$tweetId", ctx)).rescue { + // propagate OverCapacity + case e: OverCapacity => Future.exception(e) + // convert any other failure into UrlShorteningFailure + case e => Future.exception(UrlShorteningFailure(e)) + } + + def extractGeoSearchRequestId(tweetGeoOpt: Option[TweetCreateGeo]): Option[GeoSearchRequestId] = + for { + tweetGeo <- tweetGeoOpt + geoSearchRequestId <- tweetGeo.geoSearchRequestId + } yield GeoSearchRequestId(geoSearchRequestId.id) + + def featureSwitchResults(user: User, stats: StatsReceiver): Option[FeatureSwitchResults] = + TwitterContext() + .flatMap { viewer => + UserViewerRecipient(user, viewer, stats) + }.map { recipient => + featureSwitches.matchRecipient(recipient) + } + + FutureArrow { request => + for { + () <- validateRequest(request) + + (tweetId, user, devsrc) <- Future.join( + tweetIdGenerator().rescue { case t => Future.exception(SnowflakeFailure(t)) }, + Stitch.run(getUser(request.userId)), + Stitch.run(getDeviceSource(request.createdVia)) + ) + + () <- validateUser(user) + () <- validateUpdateRateLimit((user.id, request.dark)) + + // Feature Switch results are calculated once and shared between multiple builders + matchedResults = featureSwitchResults(user, stats) + + () <- validateConversationControl( + ConversationControlBuilder.Validate.Request( + matchedResults = matchedResults, + conversationControl = request.conversationControl, + inReplyToTweetId = request.inReplyToTweetId + ) + ) + + // strip illegal chars, normalize newlines, collapse blank lines, etc. + text = preprocessText(request.text) + + () <- prevalidateTextLength(text, stats) + + attachmentResult <- attachmentBuilder( + AttachmentBuilderRequest( + tweetId = tweetId, + user = user, + mediaUploadIds = request.mediaUploadIds, + cardReference = request.additionalFields.flatMap(_.cardReference), + attachmentUrl = request.attachmentUrl, + remoteHost = request.remoteHost, + darkTraffic = request.dark, + deviceSource = devsrc + ) + ) + + // updated text with appended attachment url, if any. + text <- Future.value( + attachmentResult.attachmentUrl match { + case None => text + case Some(url) => s"$text $url" + } + ) + + spamResult <- spamChecker( + TweetSpamRequest( + tweetId = tweetId, + userId = request.userId, + text = text, + mediaTags = request.additionalFields.flatMap(_.mediaTags), + safetyMetaData = request.safetyMetaData, + inReplyToTweetId = request.inReplyToTweetId, + quotedTweetId = attachmentResult.quotedTweet.map(_.tweetId), + quotedTweetUserId = attachmentResult.quotedTweet.map(_.userId) + ) + ) + + safety = user.safety.get + createdAt = SnowflakeId(tweetId).time + + urlShortenerCtx = UrlShortener.Context( + tweetId = tweetId, + userId = user.id, + createdAt = createdAt, + userProtected = safety.isProtected, + clientAppId = devsrc.clientAppId, + remoteHost = request.remoteHost, + dark = request.dark + ) + + replyRequest = ReplyBuilder.Request( + authorId = request.userId, + authorScreenName = user.profile.map(_.screenName).get, + inReplyToTweetId = request.inReplyToTweetId, + tweetText = text, + prependImplicitMentions = request.autoPopulateReplyMetadata, + enableTweetToNarrowcasting = request.enableTweetToNarrowcasting, + excludeUserIds = request.excludeReplyUserIds.getOrElse(Nil), + spamResult = spamResult, + batchMode = request.transientContext.flatMap(_.batchCompose) + ) + + replyResult <- replyBuilder(replyRequest) + replyOpt = replyResult.map(_.reply) + + replyConversationId <- replyResult match { + case Some(r) if r.reply.inReplyToStatusId.nonEmpty => + r.conversationId match { + case None => + // Throw this specific exception to make it easier to + // count how often we hit this corner case. + Future.exception(MissingConversationId(r.reply.inReplyToStatusId.get)) + case conversationIdOpt => Future.value(conversationIdOpt) + } + case _ => Future.value(None) + } + + // Validate that the current user can reply to this conversation, based on + // the conversation's ConversationControl. + // Note: currently we only validate conversation controls access on replies, + // therefore we use the conversationId from the inReplyToStatus. + // Validate that the exclusive tweet control option is only used by allowed users. + () <- validateTweetWrite( + TweetWriteValidator.Request( + replyConversationId, + request.userId, + request.exclusiveTweetControlOptions, + replyResult.flatMap(_.exclusiveTweetControl), + request.trustedFriendsControlOptions, + replyResult.flatMap(_.trustedFriendsControl), + attachmentResult.quotedTweet, + replyResult.flatMap(_.reply.inReplyToStatusId), + replyResult.flatMap(_.editControl), + request.editOptions + ) + ) + + convoId = replyConversationId match { + case Some(replyConvoId) => replyConvoId + case None => + // This is a root tweet, so the tweet id is the conversation id. + tweetId + } + + () <- nudgeBuilder( + NudgeBuilderRequest( + text = text, + inReplyToTweetId = replyOpt.flatMap(_.inReplyToStatusId), + conversationId = if (convoId == tweetId) None else Some(convoId), + hasQuotedTweet = attachmentResult.quotedTweet.nonEmpty, + nudgeOptions = request.nudgeOptions, + tweetId = Some(tweetId), + ) + ) + + // updated text with implicit reply mentions inserted, if any + text <- Future.value( + replyResult.map(_.tweetText).getOrElse(text) + ) + + // updated text with urls replaced with t.cos + ((text, urlEntities), (geoCoords, placeIdOpt)) <- Future.join( + urlEntityBuilder((text, urlShortenerCtx)) + .map { + case (text, urlEntities) => + UrlEntityBuilder.updateTextAndUrls(text, urlEntities)(partialHtmlEncode) + }, + if (request.geo.isEmpty) + Future.value((None, None)) + else + geoBuilder( + GeoBuilder.Request( + request.geo.get, + user.account.map(_.geoEnabled).getOrElse(false), + user.account.map(_.language).getOrElse("en") + ) + ).map(r => (r.geoCoordinates, r.placeId)) + ) + + // updated text with trailing media url + MediaBuilder.Result(text, mediaEntities, mediaKeys) <- + request.mediaUploadIds.getOrElse(Nil) match { + case Nil => Future.value(MediaBuilder.Result(text, Nil, Nil)) + case ids => + mediaBuilder( + MediaBuilder.Request( + mediaUploadIds = ids, + text = text, + tweetId = tweetId, + userId = user.id, + userScreenName = user.profile.get.screenName, + isProtected = user.safety.get.isProtected, + createdAt = createdAt, + dark = request.dark, + productMetadata = request.mediaMetadata.map(_.toMap) + ) + ) + } + + () <- Future.when(!request.dark) { + val reqInfo = + DuplicateTweetFinder.RequestInfo.fromPostTweetRequest(request, text) + + duplicateTweetFinder(reqInfo).flatMap { + case None => Future.Unit + case Some(duplicateId) => + log.debug(s"timeline_duplicate_check_failed:$duplicateId") + Future.exception(TweetCreateFailure.State(TweetCreateState.Duplicate)) + } + } + + textVisibility = getTextVisibility( + text = text, + replyResult = replyResult, + urlEntities = urlEntities, + mediaEntities = mediaEntities, + attachmentUrl = attachmentResult.attachmentUrl + ) + + () <- validateTextLength( + text = text, + visibleText = textVisibility.visibleText, + replyResult = replyResult, + stats = stats + ) + + communities = + request.additionalFields + .flatMap(CommunityAnnotation.additionalFieldsToCommunityIDs) + .map(ids => Communities(communityIds = ids)) + + rootExclusiveControls = request.exclusiveTweetControlOptions.map { _ => + ExclusiveTweetControl(request.userId) + } + + () <- validateExclusiveTweetNotReplies(rootExclusiveControls, replyResult) + () <- validateExclusiveTweetParams(rootExclusiveControls, communities) + + replyExclusiveControls = replyResult.flatMap(_.exclusiveTweetControl) + + // The userId is pulled off of the request rather than being supplied + // via the ExclusiveTweetControlOptions because additional fields + // can be set by clients to contain any value they want. + // This could include userIds that don't match their actual userId. + // Only one of replyResult or request.exclusiveTweetControlOptions will be defined. + exclusiveTweetControl = replyExclusiveControls.orElse(rootExclusiveControls) + + rootTrustedFriendsControl = request.trustedFriendsControlOptions.map { options => + TrustedFriendsControl(options.trustedFriendsListId) + } + + () <- validateTrustedFriendsNotReplies(rootTrustedFriendsControl, replyResult) + () <- validateTrustedFriendsParams( + rootTrustedFriendsControl, + request.conversationControl, + communities, + exclusiveTweetControl + ) + + replyTrustedFriendsControl = replyResult.flatMap(_.trustedFriendsControl) + + trustedFriendsControl = replyTrustedFriendsControl.orElse(rootTrustedFriendsControl) + + collabControl <- collabControlBuilder( + CollabControlBuilder.Request( + collabControlOptions = request.collabControlOptions, + replyResult = replyResult, + communities = communities, + trustedFriendsControl = trustedFriendsControl, + conversationControl = request.conversationControl, + exclusiveTweetControl = exclusiveTweetControl, + userId = request.userId + )) + + isCollabInvitation = collabControl.isDefined && (collabControl.get match { + case CollabControl.CollabInvitation(_: CollabInvitation) => true + case _ => false + }) + + coreData = TweetCoreData( + userId = request.userId, + text = text, + createdAtSecs = createdAt.inSeconds, + createdVia = devsrc.internalName, + reply = replyOpt, + hasTakedown = safety.hasTakedown, + // We want to nullcast community tweets and CollabInvitations + // This will disable tweet fanout to followers' home timelines, + // and filter the tweets from appearing from the tweeter's profile + // or search results for the tweeter's tweets. + nullcast = + request.nullcast || CommunityUtil.hasCommunity(communities) || isCollabInvitation, + narrowcast = request.narrowcast, + nsfwUser = request.possiblySensitive.getOrElse(safety.nsfwUser), + nsfwAdmin = safety.nsfwAdmin, + trackingId = request.trackingId, + placeId = placeIdOpt, + coordinates = geoCoords, + conversationId = Some(convoId), + // Set hasMedia to true if we know that there is media, + // and leave it unknown if not, so that it will be + // correctly set for pasted media. + hasMedia = if (mediaEntities.nonEmpty) Some(true) else None + ) + + tweet = Tweet( + id = tweetId, + coreData = Some(coreData), + urls = Some(urlEntities), + media = Some(mediaEntities), + mediaKeys = if (mediaKeys.nonEmpty) Some(mediaKeys) else None, + contributor = getContributor(request.userId), + visibleTextRange = textVisibility.visibleTextRange, + selfThreadMetadata = replyResult.flatMap(_.selfThreadMetadata), + directedAtUserMetadata = replyResult.map(_.directedAtMetadata), + composerSource = request.composerSource, + quotedTweet = attachmentResult.quotedTweet, + exclusiveTweetControl = exclusiveTweetControl, + trustedFriendsControl = trustedFriendsControl, + collabControl = collabControl, + noteTweet = request.noteTweetOptions.map(options => + NoteTweet(options.noteTweetId, options.isExpandable)) + ) + + editControl <- editControlBuilder( + EditControlBuilder.Request( + postTweetRequest = request, + tweet = tweet, + matchedResults = matchedResults + ) + ) + + tweet <- Future.value(tweet.copy(editControl = editControl)) + + tweet <- Future.value(entityExtractor(tweet)) + + () <- validateEntities(tweet) + + tweet <- { + val cctlRequest = + ConversationControlBuilder.Request.fromTweet( + tweet, + request.conversationControl, + request.noteTweetOptions.flatMap(_.mentionedUserIds)) + Stitch.run(conversationControlBuilder(cctlRequest)).map { conversationControl => + tweet.copy(conversationControl = conversationControl) + } + } + + tweet <- Future.value( + setAdditionalFields(tweet, request.additionalFields) + ) + () <- validateCommunityMembership(communityMembershipRepo, communityAccessRepo, communities) + () <- validateCommunityReply(communities, replyResult) + () <- communitiesValidator( + CommunitiesValidator.Request(matchedResults, safety.isProtected, communities)) + + tweet <- Future.value(tweet.copy(communities = communities)) + + tweet <- Future.value( + tweet.copy(underlyingCreativesContainerId = request.underlyingCreativesContainerId) + ) + + // For certain tweets we want to write a self-permalink which is used to generate modified + // tweet text for legacy clients that contains a link. NOTE: this permalink is for + // the tweet being created - we also create permalinks for related tweets further down + // e.g. if this tweet is an edit, we might create a permalink for the initial tweet as well + tweet <- { + val isBeyond140 = textVisibility.isExtendedWithExtraChars(attachmentResult.extraChars) + val isEditTweet = request.editOptions.isDefined + val isMixedMedia = Media.isMixedMedia(mediaEntities) + val isNoteTweet = request.noteTweetOptions.isDefined + + if (isBeyond140 || isEditTweet || isMixedMedia || isNoteTweet) + permalinkShortener(tweetId, urlShortenerCtx) + .map { selfPermalink => + tweet.copy( + selfPermalink = Some(selfPermalink), + extendedTweetMetadata = Some(ExtendedTweetMetadataBuilder(tweet, selfPermalink)) + ) + } + else { + Future.value(tweet) + } + } + + // When an edit tweet is created we have to update some information on the + // initial tweet, this object stores info about those updates for use + // in the tweet insert store. + // We update the editControl for each edit tweet and for the first edit tweet + // we update the self permalink. + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] <- editControl match { + case Some(EditControl.Edit(edit)) => + // Identifies the first edit of an initial tweet + val isFirstEdit = + request.editOptions.map(_.previousTweetId).contains(edit.initialTweetId) + + // A potential permalink for this tweet being created's initial tweet + val selfPermalinkForInitial: Future[Option[ShortenedUrl]] = + if (isFirstEdit) { + // `tweet` is the first edit of an initial tweet, which means + // we need to write a self permalink. We create it here in + // TweetBuilder and pass it through to the tweet store to + // be written to the initial tweet. + permalinkShortener(edit.initialTweetId, urlShortenerCtx).map(Some(_)) + } else { + Future.value(None) + } + + selfPermalinkForInitial.map { link => + Some( + InitialTweetUpdateRequest( + initialTweetId = edit.initialTweetId, + editTweetId = tweet.id, + selfPermalink = link + )) + } + + // This is not an edit this is the initial tweet - so there are no initial + // tweet updates + case _ => Future.value(None) + } + + tweet <- filterInvalidData(tweet, request, urlShortenerCtx) + + () <- validateEdit(tweet, request.editOptions) + + user <- updateUserCounts(user, tweet) + + } yield { + TweetBuilderResult( + tweet, + user, + createdAt, + isSilentFail = spamResult == Spam.SilentFail, + geoSearchRequestId = extractGeoSearchRequestId(request.geo), + initialTweetUpdateRequest = initialTweetUpdateRequest + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala new file mode 100644 index 000000000..a530e95a2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetCreationLock.scala @@ -0,0 +1,402 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.cache.Cache +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.util.TweetCreationLock.Key +import com.twitter.tweetypie.util.TweetCreationLock.State +import com.twitter.util.Base64Long +import scala.util.Random +import scala.util.control.NoStackTrace +import scala.util.control.NonFatal + +/** + * This exception is returned from TweetCreationLock if there is an + * in-progress cache entry for this key. It is possible that the key + * exists because the key was not properly cleaned up, but it's + * impossible to differentiate between these cases. We resolve this by + * returning TweetCreationInProgress and having a (relatively) short TTL + * on the cache entry so that the client and/or user may retry. + */ +case object TweetCreationInProgress extends Exception with NoStackTrace + +/** + * Thrown when the TweetCreationLock discovers that there is already + * a tweet with the specified uniqueness id. + */ +case class DuplicateTweetCreation(tweetId: TweetId) extends Exception with NoStackTrace + +trait TweetCreationLock { + def apply( + key: Key, + dark: Boolean, + nullcast: Boolean + )( + insert: => Future[PostTweetResult] + ): Future[PostTweetResult] + def unlock(key: Key): Future[Unit] +} + +object CacheBasedTweetCreationLock { + + /** + * Indicates that setting the lock value failed because the state of + * that key in the cache has been changed (by another process or + * cache eviction). + */ + case object UnexpectedCacheState extends Exception with NoStackTrace + + /** + * Thrown when the process of updating the lock cache failed more + * than the allowed number of times. + */ + case class RetriesExhausted(failures: Seq[Exception]) extends Exception with NoStackTrace + + def shouldRetry(e: Exception): Boolean = + e match { + case TweetCreationInProgress => false + case _: DuplicateTweetCreation => false + case _: RetriesExhausted => false + case _ => true + } + + def ttlChooser(shortTtl: Duration, longTtl: Duration): (Key, State) => Duration = + (_, state) => + state match { + case _: State.AlreadyCreated => longTtl + case _ => shortTtl + } + + /** + * The log format is tab-separated (base 64 tweet_id, base 64 + * uniqueness_id). It's logged this way in order to minimize the + * storage requirement and to make it easy to analyze. Each log line + * should be 24 bytes, including newline. + */ + val formatUniquenessLogEntry: ((String, TweetId)) => String = { + case (uniquenessId, tweetId) => Base64Long.toBase64(tweetId) + "\t" + uniquenessId + } + + /** + * Scribe the uniqueness id paired with the tweet id so that we can + * track the rate of failures of the uniqueness id check by + * detecting multiple tweets created with the same uniqueness id. + * + * Scribe to a test category because we only need to keep this + * information around for long enough to find any duplicates. + */ + val ScribeUniquenessId: FutureEffect[(String, TweetId)] = + Scribe("test_tweetypie_uniqueness_id") contramap formatUniquenessLogEntry + + private[this] val UniquenessIdLog = Logger("com.twitter.tweetypie.handler.UniquenessId") + + /** + * Log the uniqueness ids to a standard logger (for use when it's + * not production traffic). + */ + val LogUniquenessId: FutureEffect[(String, TweetId)] = FutureEffect[(String, TweetId)] { rec => + UniquenessIdLog.info(formatUniquenessLogEntry(rec)) + Future.Unit + } + + private val log = Logger(getClass) +} + +/** + * This class adds locking around Tweet creation, to prevent creating + * duplicate tweets when two identical requests arrive simultaneously. + * A lock is created in cache using the user id and a hash of the tweet text + * in the case of tweets, or the source_status_id in the case of retweets. + * If another process attempts to lock for the same user and hash, the request + * fails as a duplicate. The lock lasts for 10 seconds if it is not deleted. + * Given the hard timeout of 5 seconds on all requests, it should never take + * us longer than 5 seconds to create a request, but we've observed times of up + * to 10 seconds to create statuses for some of our more popular users. + * + * When a request with a uniqueness id is successful, the id of the + * created tweet will be stored in the cache so that subsequent + * requests can retrieve the originally-created tweet rather than + * duplicating creation or getting an exception. + */ +class CacheBasedTweetCreationLock( + cache: Cache[Key, State], + maxTries: Int, + stats: StatsReceiver, + logUniquenessId: FutureEffect[(String, TweetId)]) + extends TweetCreationLock { + import CacheBasedTweetCreationLock._ + + private[this] val eventCounters = stats.scope("event") + + private[this] def event(k: Key, name: String): Unit = { + log.debug(s"$name:$k") + eventCounters.counter(name).incr() + } + + private[this] def retryLoop[A](action: => Future[A]): Future[A] = { + def go(failures: List[Exception]): Future[A] = + if (failures.length >= maxTries) { + Future.exception(RetriesExhausted(failures.reverse)) + } else { + action.rescue { + case e: Exception if shouldRetry(e) => go(e :: failures) + } + } + + go(Nil) + } + + private[this] val lockerExceptions = ExceptionCounter(stats) + + /** + * Obtain the lock for creating a tweet. If this method completes + * without throwing an exception, then the lock value was + * successfully set in cache, which indicates a high probability + * that this is the only process that is attempting to create this + * tweet. (The uncertainty comes from the possibility of lock + * entries missing from the cache.) + * + * @throws TweetCreationInProgress if there is another process + * trying to create this tweet. + * + * @throws DuplicateTweetCreation if a tweet has already been + * created for a duplicate request. The exception has the id of + * the created tweet. + * + * @throws RetriesExhausted if obtaining the lock failed more than + * the requisite number of times. + */ + private[this] def obtainLock(k: Key, token: Long): Future[Time] = retryLoop { + val lockTime = Time.now + + // Get the current state for this key. + cache + .getWithChecksum(Seq(k)) + .flatMap(initialStateKvr => Future.const(initialStateKvr(k))) + .flatMap { + case None => + // Nothing in cache for this key + cache + .add(k, State.InProgress(token, lockTime)) + .flatMap { + case true => Future.value(lockTime) + case false => Future.exception(UnexpectedCacheState) + } + case Some((Throw(e), _)) => + Future.exception(e) + case Some((Return(st), cs)) => + st match { + case State.Unlocked => + // There is an Unlocked entry for this key, which + // implies that a previous attempt was cleaned up. + cache + .checkAndSet(k, State.InProgress(token, lockTime), cs) + .flatMap { + case true => Future.value(lockTime) + case false => Future.exception(UnexpectedCacheState) + } + case State.InProgress(cachedToken, creationStartedTimestamp) => + if (cachedToken == token) { + // There is an in-progress entry for *this process*. This + // can happen on a retry if the `add` actually succeeds + // but the future fails. The retry can return the result + // of the add that we previously tried. + Future.value(creationStartedTimestamp) + } else { + // There is an in-progress entry for *a different + // process*. This implies that there is another tweet + // creation in progress for *this tweet*. + val tweetCreationAge = Time.now - creationStartedTimestamp + k.uniquenessId.foreach { id => + log.info( + "Found an in-progress tweet creation for uniqueness id %s %s ago" + .format(id, tweetCreationAge) + ) + } + stats.stat("in_progress_age_ms").add(tweetCreationAge.inMilliseconds) + Future.exception(TweetCreationInProgress) + } + case State.AlreadyCreated(tweetId, creationStartedTimestamp) => + // Another process successfully created a tweet for this + // key. + val tweetCreationAge = Time.now - creationStartedTimestamp + stats.stat("already_created_age_ms").add(tweetCreationAge.inMilliseconds) + Future.exception(DuplicateTweetCreation(tweetId)) + } + } + } + + /** + * Attempt to remove this process' lock entry from the cache. This + * is done by writing a short-lived tombstone, so that we can ensure + * that we only overwrite the entry if it is still an entry for this + * process instead of another process' entry. + */ + private[this] def cleanupLoop(k: Key, token: Long): Future[Unit] = + retryLoop { + // Instead of deleting the value, we attempt to write Unlocked, + // because we only want to delete it if it was the value that we + // wrote ourselves, and there is no delete call that is + // conditional on the extant value. + cache + .getWithChecksum(Seq(k)) + .flatMap(kvr => Future.const(kvr(k))) + .flatMap { + case None => + // Nothing in the cache for this tweet creation, so cleanup + // is successful. + Future.Unit + + case Some((tryV, cs)) => + // If we failed trying to deserialize the value, then we + // want to let the error bubble up, because there is no good + // recovery procedure, since we can't tell whether the entry + // is ours. + Future.const(tryV).flatMap { + case State.InProgress(presentToken, _) => + if (presentToken == token) { + // This is *our* in-progress marker, so we want to + // overwrite it with the tombstone. If checkAndSet + // returns false, that's OK, because that means + // someone else overwrote the value, and we don't have + // to clean it up anymore. + cache.checkAndSet(k, State.Unlocked, cs).unit + } else { + // Indicates that another request has overwritten our + // state before we cleaned it up. This should only + // happen when our token was cleared from cache and + // another process started a duplicate create. This + // should be very infrequent. We count it just to be + // sure. + event(k, "other_attempt_in_progress") + Future.Unit + } + + case _ => + // Cleanup has succeeded, because we are not responsible + // for the cache entry anymore. + Future.Unit + } + } + }.onSuccess { _ => event(k, "cleanup_attempt_succeeded") } + .handle { + case _ => event(k, "cleanup_attempt_failed") + } + + /** + * Mark that a tweet has been successfully created. Subsequent calls + * to `apply` with this key will receive a DuplicateTweetCreation + * exception with the specified id. + */ + private[this] def creationComplete(k: Key, tweetId: TweetId, lockTime: Time): Future[Unit] = + // Unconditionally set the state because regardless of the + // value present, we know that we want to transition to the + // AlreadyCreated state for this key. + retryLoop(cache.set(k, State.AlreadyCreated(tweetId, lockTime))) + .onSuccess(_ => event(k, "mark_created_succeeded")) + .onFailure { case _ => event(k, "mark_created_failed") } + // If this fails, it's OK for the request to complete + // successfully, because it's more harmful to create the tweet + // and return failure than it is to complete it successfully, + // but fail to honor the uniqueness id next time. + .handle { case NonFatal(_) => } + + private[this] def createWithLock( + k: Key, + create: => Future[PostTweetResult] + ): Future[PostTweetResult] = { + val token = Random.nextLong + event(k, "lock_attempted") + + obtainLock(k, token) + .onSuccess { _ => event(k, "lock_obtained") } + .handle { + // If we run out of retries when trying to get the lock, then + // just go ahead with tweet creation. We should keep an eye on + // how frequently this happens, because this means that the + // only sign that this is happening will be duplicate tweet + // creations. + case RetriesExhausted(failures) => + event(k, "lock_failure_ignored") + // Treat this as the time that we obtained the lock. + Time.now + } + .onFailure { + case e => lockerExceptions(e) + } + .flatMap { lockTime => + create.transform { + case r @ Return(PostTweetResult(_, Some(tweet), _, _, _, _, _)) => + event(k, "create_succeeded") + + k.uniquenessId.foreach { u => logUniquenessId((u, tweet.id)) } + + // Update the lock entry to remember the id of the tweet we + // created and extend the TTL. + creationComplete(k, tweet.id, lockTime).before(Future.const(r)) + case other => + other match { + case Throw(e) => + log.debug(s"Tweet creation failed for key $k", e) + case Return(r) => + log.debug(s"Tweet creation failed for key $k, so unlocking: $r") + } + + event(k, "create_failed") + + // Attempt to clean up the lock after the failed create. + cleanupLoop(k, token).before(Future.const(other)) + } + } + } + + /** + * Make a best-effort attempt at removing the duplicate cache entry + * for this key. If this fails, it is not catastrophic. The worst-case + * behavior should be that the user has to wait for the short TTL to + * elapse before tweeting succeeds. + */ + def unlock(k: Key): Future[Unit] = + retryLoop(cache.delete(k).unit).onSuccess(_ => event(k, "deleted")) + + /** + * Prevent duplicate tweet creation. + * + * Ensures that no more than one tweet creation for the same key is + * happening at the same time. If `create` fails, then the key will + * be removed from the cache. If it succeeds, then the key will be + * retained. + * + * @throws DuplicateTweetCreation if a tweet has already been + * created by a previous request. The exception has the id of the + * created tweet. + * + * @throws TweetCreationInProgress. See the documentation above. + */ + def apply( + k: Key, + isDark: Boolean, + nullcast: Boolean + )( + create: => Future[PostTweetResult] + ): Future[PostTweetResult] = + if (isDark) { + event(k, "dark_create") + create + } else if (nullcast) { + event(k, "nullcast_create") + create + } else { + createWithLock(k, create).onFailure { + // Another process is creating this same tweet (or has already + // created it) + case TweetCreationInProgress => + event(k, "tweet_creation_in_progress") + case _: DuplicateTweetCreation => + event(k, "tweet_already_created") + case _ => + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala new file mode 100644 index 000000000..e1052a887 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetDeletePathHandler.scala @@ -0,0 +1,811 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.conversions.DurationOps.RichDuration +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.servo.util.FutureArrow +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.stitch.NotFound +import com.twitter.timelineservice.thriftscala.PerspectiveResult +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Time +import com.twitter.util.Try +import Try._ +import com.twitter.spam.rtf.thriftscala.SafetyLabelType +import com.twitter.tweetypie.backends.TimelineService.GetPerspectives +import com.twitter.tweetypie.util.EditControlUtil +import scala.util.control.NoStackTrace + +case class CascadedDeleteNotAvailable(retweetId: TweetId) extends Exception with NoStackTrace { + override def getMessage: String = + s"""|Cascaded delete tweet failed because tweet $retweetId + |is not present in cache or manhattan.""".stripMargin +} + +object TweetDeletePathHandler { + + type DeleteTweets = + (DeleteTweetsRequest, Boolean) => Future[Seq[DeleteTweetResult]] + + type UnretweetEdits = (Option[EditControl], TweetId, UserId) => Future[Unit] + + /** The information from a deleteTweet request that can be inspected by a deleteTweets validator */ + case class DeleteTweetsContext( + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + tweetAuthorId: UserId, + users: Map[UserId, User], + isUserErasure: Boolean, + expectedErasureUserId: Option[UserId], + tweetIsBounced: Boolean, + isBounceDelete: Boolean) + + /** Provides reason a tweet deletion was allowed */ + sealed trait DeleteAuthorization { def byUserId: Option[UserId] } + + case class AuthorizedByTweetOwner(userId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(userId) + } + case class AuthorizedByTweetContributor(contributorUserId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(contributorUserId) + } + case class AuthorizedByAdmin(adminUserId: UserId) extends DeleteAuthorization { + def byUserId: Option[UserId] = Some(adminUserId) + } + case object AuthorizedByErasure extends DeleteAuthorization { + def byUserId: None.type = None + } + + // Type for a method that receives all the relevant information about a proposed internal tweet + // deletion and can return Future.exception to cancel the delete due to a validation error or + // return a [[DeleteAuthorization]] specifying the reason the deletion is allowed. + type ValidateDeleteTweets = FutureArrow[DeleteTweetsContext, DeleteAuthorization] + + val userFieldsForDelete: Set[UserField] = + Set(UserField.Account, UserField.Profile, UserField.Roles, UserField.Safety) + + val userQueryOptions: UserQueryOptions = + UserQueryOptions( + userFieldsForDelete, + UserVisibility.All + ) + + // user_agent property originates from the client so truncate to a reasonable length + val MaxUserAgentLength = 1000 + + // Age under which we treat not found tweets in + // cascaded_delete_tweet as a temporary condition (the most likely + // explanation being that the tweet has not yet been + // replicated). Tweets older than this we assume are due to + // *permanently* inconsistent data, either spurious edges in tflock or + // tweets that are not loadable from Manhattan. + val MaxCascadedDeleteTweetTemporaryInconsistencyAge: Duration = + 10.minutes +} + +trait TweetDeletePathHandler { + import TweetDeletePathHandler.ValidateDeleteTweets + + def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] + + def deleteTweets( + request: DeleteTweetsRequest, + isUnretweetEdits: Boolean = false, + ): Future[Seq[DeleteTweetResult]] + + def internalDeleteTweets( + request: DeleteTweetsRequest, + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + validate: ValidateDeleteTweets, + isUnretweetEdits: Boolean = false + ): Future[Seq[DeleteTweetResult]] + + def unretweetEdits( + optEditControl: Option[EditControl], + excludedTweetId: TweetId, + byUserId: UserId + ): Future[Unit] +} + +/** + * Implementation of TweetDeletePathHandler + */ +class DefaultTweetDeletePathHandler( + stats: StatsReceiver, + tweetResultRepo: TweetResultRepository.Type, + userRepo: UserRepository.Optional, + stratoSafetyLabelsRepo: StratoSafetyLabelsRepository.Type, + lastQuoteOfQuoterRepo: LastQuoteOfQuoterRepository.Type, + tweetStore: TotalTweetStore, + getPerspectives: GetPerspectives) + extends TweetDeletePathHandler { + import TweetDeletePathHandler._ + + val tweetRepo: TweetRepository.Type = TweetRepository.fromTweetResult(tweetResultRepo) + + // attempt to delete tweets was made by someone other than the tweet owner or an admin user + object DeleteTweetsPermissionException extends Exception with NoStackTrace + object ExpectedUserIdMismatchException extends Exception with NoStackTrace + + private[this] val log = Logger("com.twitter.tweetypie.store.TweetDeletions") + + private[this] val cascadeEditDelete = stats.scope("cascade_edit_delete") + private[this] val cascadeEditDeletesEnqueued = cascadeEditDelete.counter("enqueued") + private[this] val cascadeEditDeleteTweets = cascadeEditDelete.counter("tweets") + private[this] val cascadeEditDeleteFailures = cascadeEditDelete.counter("failures") + + private[this] val cascadedDeleteTweet = stats.scope("cascaded_delete_tweet") + private[this] val cascadedDeleteTweetFailures = cascadedDeleteTweet.counter("failures") + private[this] val cascadedDeleteTweetSourceMatch = cascadedDeleteTweet.counter("source_match") + private[this] val cascadedDeleteTweetSourceMismatch = + cascadedDeleteTweet.counter("source_mismatch") + private[this] val cascadedDeleteTweetTweetNotFound = + cascadedDeleteTweet.counter("tweet_not_found") + private[this] val cascadedDeleteTweetTweetNotFoundAge = + cascadedDeleteTweet.stat("tweet_not_found_age") + private[this] val cascadedDeleteTweetUserNotFound = cascadedDeleteTweet.counter("user_not_found") + + private[this] val deleteTweets = stats.scope("delete_tweets") + private[this] val deleteTweetsAuth = deleteTweets.scope("per_tweet_auth") + private[this] val deleteTweetsAuthAttempts = deleteTweetsAuth.counter("attempts") + private[this] val deleteTweetsAuthFailures = deleteTweetsAuth.counter("failures") + private[this] val deleteTweetsAuthSuccessAdmin = deleteTweetsAuth.counter("success_admin") + private[this] val deleteTweetsAuthSuccessByUser = deleteTweetsAuth.counter("success_by_user") + private[this] val deleteTweetsTweets = deleteTweets.counter("tweets") + private[this] val deleteTweetsFailures = deleteTweets.counter("failures") + private[this] val deleteTweetsTweetNotFound = deleteTweets.counter("tweet_not_found") + private[this] val deleteTweetsUserNotFound = deleteTweets.counter("user_not_found") + private[this] val userIdMismatchInTweetDelete = + deleteTweets.counter("expected_actual_user_id_mismatch") + private[this] val bounceDeleteFlagNotSet = + deleteTweets.counter("bounce_delete_flag_not_set") + + private[this] def getUser(userId: UserId): Future[Option[User]] = + Stitch.run(userRepo(UserKey(userId), userQueryOptions)) + + private[this] def getUsersForDeleteTweets(userIds: Seq[UserId]): Future[Map[UserId, User]] = + Stitch.run( + Stitch + .traverse(userIds) { userId => + userRepo(UserKey(userId), userQueryOptions).map { + case Some(u) => Some(userId -> u) + case None => deleteTweetsUserNotFound.incr(); None + } + } + .map(_.flatten.toMap) + ) + + private[this] def getTweet(tweetId: TweetId): Future[Tweet] = + Stitch.run(tweetRepo(tweetId, WritePathQueryOptions.deleteTweetsWithoutEditControl)) + + private[this] def getSingleDeletedTweet( + id: TweetId, + isCascadedEditTweetDeletion: Boolean = false + ): Stitch[Option[TweetData]] = { + val opts = if (isCascadedEditTweetDeletion) { + // Disable edit control hydration if this is cascade delete of edits. + // When edit control is hydrated, the tweet will actually be considered already deleted. + WritePathQueryOptions.deleteTweetsWithoutEditControl + } else { + WritePathQueryOptions.deleteTweets + } + tweetResultRepo(id, opts) + .map(_.value) + .liftToOption { + // We treat the request the same whether the tweet never + // existed or is in one of the already-deleted states by + // just filtering out those tweets. Any tweets that we + // return should be deleted. If the tweet has been + // bounce-deleted, we never want to soft-delete it, and + // vice versa. + case NotFound | FilteredState.Unavailable.TweetDeleted | + FilteredState.Unavailable.BounceDeleted => + true + } + } + + private[this] def getTweetsForDeleteTweets( + ids: Seq[TweetId], + isCascadedEditTweetDeletion: Boolean + ): Future[Map[TweetId, TweetData]] = + Stitch + .run { + Stitch.traverse(ids) { id => + getSingleDeletedTweet(id, isCascadedEditTweetDeletion) + .map { + // When deleting a tweet that has been edited, we want to instead delete the initial version. + // Because the initial tweet will be hydrated in every request, if it is deleted, later + // revisions will be hidden, and cleaned up asynchronously by TP Daemons + + // However, we don't need to do a second lookup if it's already the original tweet + // or if we're doing a cascading edit tweet delete (deleting the entire tweet history) + case Some(tweetData) + if EditControlUtil.isInitialTweet(tweetData.tweet) || + isCascadedEditTweetDeletion => + Stitch.value(Some(tweetData)) + case Some(tweetData) => + getSingleDeletedTweet(EditControlUtil.getInitialTweetId(tweetData.tweet)) + case None => + Stitch.value(None) + // We need to preserve the input tweetId, and the initial TweetData + }.flatten.map(tweetData => (id, tweetData)) + } + } + .map(_.collect { case (tweetId, Some(tweetData)) => (tweetId, tweetData) }.toMap) + + private[this] def getStratoBounceStatuses( + ids: Seq[Long], + isUserErasure: Boolean, + isCascadedEditedTweetDeletion: Boolean + ): Future[Map[TweetId, Boolean]] = { + // Don't load bounce label for user erasure tweet deletion. + // User Erasure deletions cause unnecessary spikes of traffic + // to Strato when we read the bounce label that we don't use. + + // We also want to always delete a bounced tweet if the rest of the + // edit chain is being deleted in a cascaded edit tweet delete + if (isUserErasure || isCascadedEditedTweetDeletion) { + Future.value(ids.map(id => id -> false).toMap) + } else { + Stitch.run( + Stitch + .traverse(ids) { id => + stratoSafetyLabelsRepo(id, SafetyLabelType.Bounce).map { label => + id -> label.isDefined + } + } + .map(_.toMap) + ) + } + } + + /** A suspended/deactivated user can't delete tweets */ + private[this] def userNotSuspendedOrDeactivated(user: User): Try[User] = + user.safety match { + case None => Throw(UpstreamFailure.UserSafetyEmptyException) + case Some(safety) if safety.deactivated => + Throw( + AccessDenied( + s"User deactivated userId: ${user.id}", + errorCause = Some(AccessDeniedCause.UserDeactivated) + ) + ) + case Some(safety) if safety.suspended => + Throw( + AccessDenied( + s"User suspended userId: ${user.id}", + errorCause = Some(AccessDeniedCause.UserSuspended) + ) + ) + case _ => Return(user) + } + + /** + * Ensure that byUser has permission to delete tweet either by virtue of owning the tweet or being + * an admin user. Returns the reason as a DeleteAuthorization or else throws an Exception if not + * authorized. + */ + private[this] def userAuthorizedToDeleteTweet( + byUser: User, + optAuthenticatedUserId: Option[UserId], + tweetAuthorId: UserId + ): Try[DeleteAuthorization] = { + + def hasAdminPrivilege = + byUser.roles.exists(_.rights.contains("delete_user_tweets")) + + deleteTweetsAuthAttempts.incr() + if (byUser.id == tweetAuthorId) { + deleteTweetsAuthSuccessByUser.incr() + optAuthenticatedUserId match { + case Some(uid) => + Return(AuthorizedByTweetContributor(uid)) + case None => + Return(AuthorizedByTweetOwner(byUser.id)) + } + } else if (optAuthenticatedUserId.isEmpty && hasAdminPrivilege) { // contributor may not assume admin role + deleteTweetsAuthSuccessAdmin.incr() + Return(AuthorizedByAdmin(byUser.id)) + } else { + deleteTweetsAuthFailures.incr() + Throw(DeleteTweetsPermissionException) + } + } + + /** + * expected user id is the id provided on the DeleteTweetsRequest that the indicates which user + * owns the tweets they want to delete. The actualUserId is the actual userId on the tweet we are about to delete. + * we check to ensure they are the same as a safety check against accidental deletion of tweets either from user mistakes + * or from corrupted data (e.g bad tflock edges) + */ + private[this] def expectedUserIdMatchesActualUserId( + expectedUserId: UserId, + actualUserId: UserId + ): Try[Unit] = + if (expectedUserId == actualUserId) { + Return.Unit + } else { + userIdMismatchInTweetDelete.incr() + Throw(ExpectedUserIdMismatchException) + } + + /** + * Validation for the normal public tweet delete case, the user must be found and must + * not be suspended or deactivated. + */ + val validateTweetsForPublicDelete: ValidateDeleteTweets = FutureArrow { + ctx: DeleteTweetsContext => + Future.const( + for { + + // byUserId must be present + byUserId <- ctx.byUserId.orThrow( + ClientError(ClientErrorCause.BadRequest, "Missing byUserId") + ) + + // the byUser must be found + byUserOpt = ctx.users.get(byUserId) + byUser <- byUserOpt.orThrow( + ClientError(ClientErrorCause.BadRequest, s"User $byUserId not found") + ) + + _ <- userNotSuspendedOrDeactivated(byUser) + + _ <- validateBounceConditions( + ctx.tweetIsBounced, + ctx.isBounceDelete + ) + + // if there's a contributor, make sure the user is found and not suspended or deactivated + _ <- + ctx.authenticatedUserId + .map { uid => + ctx.users.get(uid) match { + case None => + Throw(ClientError(ClientErrorCause.BadRequest, s"Contributor $uid not found")) + case Some(authUser) => + userNotSuspendedOrDeactivated(authUser) + } + } + .getOrElse(Return.Unit) + + // if the expected user id is present, make sure it matches the user id on the tweet + _ <- + ctx.expectedErasureUserId + .map { expectedUserId => + expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId) + } + .getOrElse(Return.Unit) + + // User must own the tweet or be an admin + deleteAuth <- userAuthorizedToDeleteTweet( + byUser, + ctx.authenticatedUserId, + ctx.tweetAuthorId + ) + } yield deleteAuth + ) + } + + private def validateBounceConditions( + tweetIsBounced: Boolean, + isBounceDelete: Boolean + ): Try[Unit] = { + if (tweetIsBounced && !isBounceDelete) { + bounceDeleteFlagNotSet.incr() + Throw(ClientError(ClientErrorCause.BadRequest, "Cannot normal delete a Bounced Tweet")) + } else { + Return.Unit + } + } + + /** + * Validation for the user erasure case. User may be missing. + */ + val validateTweetsForUserErasureDaemon: ValidateDeleteTweets = FutureArrow { + ctx: DeleteTweetsContext => + Future + .const( + for { + expectedUserId <- ctx.expectedErasureUserId.orThrow( + ClientError( + ClientErrorCause.BadRequest, + "expectedUserId is required for DeleteTweetRequests" + ) + ) + + // It's critical to always check that the userId on the tweet we want to delete matches the + // userId on the erasure request. This prevents us from accidentally deleting tweets not owned by the + // erased user, even if tflock serves us bad data. + validationResult <- expectedUserIdMatchesActualUserId(expectedUserId, ctx.tweetAuthorId) + } yield validationResult + ) + .map(_ => AuthorizedByErasure) + } + + /** + * Fill in missing values of AuditDeleteTweet with values from TwitterContext. + */ + def enrichMissingFromTwitterContext(orig: AuditDeleteTweet): AuditDeleteTweet = { + val viewer = TwitterContext() + orig.copy( + host = orig.host.orElse(viewer.flatMap(_.auditIp)), + clientApplicationId = orig.clientApplicationId.orElse(viewer.flatMap(_.clientApplicationId)), + userAgent = orig.userAgent.orElse(viewer.flatMap(_.userAgent)).map(_.take(MaxUserAgentLength)) + ) + } + + /** + * core delete tweets implementation. + * + * The [[deleteTweets]] method wraps this method and provides validation required + * for a public endpoint. + */ + override def internalDeleteTweets( + request: DeleteTweetsRequest, + byUserId: Option[UserId], + authenticatedUserId: Option[UserId], + validate: ValidateDeleteTweets, + isUnretweetEdits: Boolean = false + ): Future[Seq[DeleteTweetResult]] = { + + val auditDeleteTweet = + enrichMissingFromTwitterContext(request.auditPassthrough.getOrElse(AuditDeleteTweet())) + deleteTweetsTweets.incr(request.tweetIds.size) + for { + tweetDataMap <- getTweetsForDeleteTweets( + request.tweetIds, + request.cascadedEditedTweetDeletion.getOrElse(false) + ) + + userIds: Seq[UserId] = (tweetDataMap.values.map { td => + getUserId(td.tweet) + } ++ byUserId ++ authenticatedUserId).toSeq.distinct + + users <- getUsersForDeleteTweets(userIds) + + stratoBounceStatuses <- getStratoBounceStatuses( + tweetDataMap.keys.toSeq, + request.isUserErasure, + request.cascadedEditedTweetDeletion.getOrElse(false)) + + results <- Future.collect { + request.tweetIds.map { tweetId => + tweetDataMap.get(tweetId) match { + // already deleted, so nothing to do + case None => + deleteTweetsTweetNotFound.incr() + Future.value(DeleteTweetResult(tweetId, TweetDeleteState.Ok)) + case Some(tweetData) => + val tweet: Tweet = tweetData.tweet + val tweetIsBounced = stratoBounceStatuses(tweetId) + val optSourceTweet: Option[Tweet] = tweetData.sourceTweetResult.map(_.value.tweet) + + val validation: Future[(Boolean, DeleteAuthorization)] = for { + isLastQuoteOfQuoter <- isFinalQuoteOfQuoter(tweet) + deleteAuth <- validate( + DeleteTweetsContext( + byUserId = byUserId, + authenticatedUserId = authenticatedUserId, + tweetAuthorId = getUserId(tweet), + users = users, + isUserErasure = request.isUserErasure, + expectedErasureUserId = request.expectedUserId, + tweetIsBounced = tweetIsBounced, + isBounceDelete = request.isBounceDelete + ) + ) + _ <- optSourceTweet match { + case Some(sourceTweet) if !isUnretweetEdits => + // If this is a retweet and this deletion was not triggered by + // unretweetEdits, unretweet edits of the source Tweet + // before deleting the retweet. + // + // deleteAuth will always contain a byUserId except for erasure deletion, + // in which case the retweets will be deleted individually. + deleteAuth.byUserId match { + case Some(userId) => + unretweetEdits(sourceTweet.editControl, sourceTweet.id, userId) + case None => Future.Unit + } + case _ => Future.Unit + } + } yield { + (isLastQuoteOfQuoter, deleteAuth) + } + + validation + .flatMap { + case (isLastQuoteOfQuoter: Boolean, deleteAuth: DeleteAuthorization) => + val isAdminDelete = deleteAuth match { + case AuthorizedByAdmin(_) => true + case _ => false + } + + val event = + DeleteTweet.Event( + tweet = tweet, + timestamp = Time.now, + user = users.get(getUserId(tweet)), + byUserId = deleteAuth.byUserId, + auditPassthrough = Some(auditDeleteTweet), + isUserErasure = request.isUserErasure, + isBounceDelete = request.isBounceDelete && tweetIsBounced, + isLastQuoteOfQuoter = isLastQuoteOfQuoter, + isAdminDelete = isAdminDelete + ) + val numberOfEdits: Int = tweet.editControl + .collect { + case EditControl.Initial(initial) => + initial.editTweetIds.count(_ != tweet.id) + } + .getOrElse(0) + cascadeEditDeletesEnqueued.incr(numberOfEdits) + tweetStore + .deleteTweet(event) + .map(_ => DeleteTweetResult(tweetId, TweetDeleteState.Ok)) + } + .onFailure { _ => + deleteTweetsFailures.incr() + } + .handle { + case ExpectedUserIdMismatchException => + DeleteTweetResult(tweetId, TweetDeleteState.ExpectedUserIdMismatch) + case DeleteTweetsPermissionException => + DeleteTweetResult(tweetId, TweetDeleteState.PermissionError) + } + } + } + } + } yield results + } + + private def isFinalQuoteOfQuoter(tweet: Tweet): Future[Boolean] = { + tweet.quotedTweet match { + case Some(qt) => + Stitch.run { + lastQuoteOfQuoterRepo + .apply(qt.tweetId, getUserId(tweet)) + .liftToTry + .map(_.getOrElse(false)) + } + case None => Future(false) + } + } + + /** + * Validations for the public deleteTweets endpoint. + * - ensures that the byUserId user can be found and is in the correct user state + * - ensures that the tweet is being deleted by the tweet's owner, or by an admin + * If there is a validation error, a future.exception is returned + * + * If the delete request is part of a user erasure, validations are relaxed (the User is allowed to be missing). + */ + val deleteTweetsValidator: ValidateDeleteTweets = + FutureArrow { context => + if (context.isUserErasure) { + validateTweetsForUserErasureDaemon(context) + } else { + validateTweetsForPublicDelete(context) + } + } + + override def deleteTweets( + request: DeleteTweetsRequest, + isUnretweetEdits: Boolean = false, + ): Future[Seq[DeleteTweetResult]] = { + + // For comparison testing we only want to compare the DeleteTweetsRequests that are generated + // in DeleteTweets path and not the call that comes from the Unretweet path + val context = TwitterContext() + internalDeleteTweets( + request, + byUserId = request.byUserId.orElse(context.flatMap(_.userId)), + context.flatMap(_.authenticatedUserId), + deleteTweetsValidator, + isUnretweetEdits + ) + } + + // Cascade delete tweet is the logic for removing tweets that are detached + // from their dependency which has been deleted. They are already filtered + // out from serving, so this operation reconciles storage with the view + // presented by Tweetypie. + // This RPC call is delegated from daemons or batch jobs. Currently there + // are two use-cases when this call is issued: + // * Deleting detached retweets after the source tweet was deleted. + // This is done through RetweetsDeletion daemon and the + // CleanupDetachedRetweets job. + // * Deleting edits of an initial tweet that has been deleted. + // This is done by CascadedEditedTweetDelete daemon. + // Note that, when serving the original delete request for an edit, + // the initial tweet is only deleted, which makes all edits hidden. + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = { + val contextViewer = TwitterContext() + getTweet(request.tweetId) + .transform { + case Throw( + FilteredState.Unavailable.TweetDeleted | FilteredState.Unavailable.BounceDeleted) => + // The retweet or edit was already deleted via some other mechanism + Future.Unit + + case Throw(NotFound) => + cascadedDeleteTweetTweetNotFound.incr() + val recentlyCreated = + if (SnowflakeId.isSnowflakeId(request.tweetId)) { + val age = Time.now - SnowflakeId(request.tweetId).time + cascadedDeleteTweetTweetNotFoundAge.add(age.inMilliseconds) + age < MaxCascadedDeleteTweetTemporaryInconsistencyAge + } else { + false + } + + if (recentlyCreated) { + // Treat the NotFound as a temporary condition, most + // likely due to replication lag. + Future.exception(CascadedDeleteNotAvailable(request.tweetId)) + } else { + // Treat the NotFound as a permanent inconsistenty, either + // spurious edges in tflock or invalid data in Manhattan. This + // was happening a few times an hour during the time that we + // were not treating it specially. For now, we will just log that + // it happened, but in the longer term, it would be good + // to collect this data and repair the corruption. + log.warn( + Seq( + "cascaded_delete_tweet_old_not_found", + request.tweetId, + request.cascadedFromTweetId + ).mkString("\t") + ) + Future.Done + } + + // Any other FilteredStates should not be thrown because of + // the options that we used to load the tweet, so we will just + // let them bubble up as an internal server error + case Throw(other) => + Future.exception(other) + + case Return(tweet) => + Future + .join( + isFinalQuoteOfQuoter(tweet), + getUser(getUserId(tweet)) + ) + .flatMap { + case (isLastQuoteOfQuoter, user) => + if (user.isEmpty) { + cascadedDeleteTweetUserNotFound.incr() + } + val tweetSourceId = getShare(tweet).map(_.sourceStatusId) + val initialEditId = tweet.editControl.collect { + case EditControl.Edit(edit) => edit.initialTweetId + } + if (initialEditId.contains(request.cascadedFromTweetId)) { + cascadeEditDeleteTweets.incr() + } + if (tweetSourceId.contains(request.cascadedFromTweetId) + || initialEditId.contains(request.cascadedFromTweetId)) { + cascadedDeleteTweetSourceMatch.incr() + val deleteEvent = + DeleteTweet.Event( + tweet = tweet, + timestamp = Time.now, + user = user, + byUserId = contextViewer.flatMap(_.userId), + cascadedFromTweetId = Some(request.cascadedFromTweetId), + auditPassthrough = request.auditPassthrough, + isUserErasure = false, + // cascaded deletes of retweets or edits have not been through a bouncer flow, + // so are not considered to be "bounce deleted". + isBounceDelete = false, + isLastQuoteOfQuoter = isLastQuoteOfQuoter, + isAdminDelete = false + ) + tweetStore + .deleteTweet(deleteEvent) + .onFailure { _ => + if (initialEditId.contains(request.cascadedFromTweetId)) { + cascadeEditDeleteFailures.incr() + } + } + } else { + cascadedDeleteTweetSourceMismatch.incr() + log.warn( + Seq( + "cascaded_from_tweet_id_source_mismatch", + request.tweetId, + request.cascadedFromTweetId, + tweetSourceId.orElse(initialEditId).getOrElse("-") + ).mkString("\t") + ) + Future.Done + } + } + } + .onFailure(_ => cascadedDeleteTweetFailures.incr()) + } + + // Given a list of edit Tweet ids and a user id, find the retweet ids of those edit ids from the given user + private def editTweetIdRetweetsFromUser( + editTweetIds: Seq[TweetId], + byUserId: UserId + ): Future[Seq[TweetId]] = { + if (editTweetIds.isEmpty) { + Future.value(Seq()) + } else { + getPerspectives( + Seq(tls.PerspectiveQuery(byUserId, editTweetIds)) + ).map { res: Seq[PerspectiveResult] => + res.headOption.toSeq + .flatMap(_.perspectives.flatMap(_.retweetId)) + } + } + } + + /* This function is called from three places - + * 1. When Tweetypie gets a request to retweet the latest version of an edit chain, all the + * previous revisons should be unretweeted. + * i.e. On Retweet of the latest tweet - unretweets all the previous revisions for this user. + * - create A + * - retweet A'(retweet of A) + * - create edit B(edit of A) + * - retweet B' => Deletes A' + * + * 2. When Tweetypie gets an unretweet request for a source tweet that is an edit tweet, all + * the versions of the edit chain is retweeted. + * i.e. On unretweet of any version in the edit chain - unretweets all the revisions for this user + * - create A + * - retweet A' + * - create B + * - unretweet B => Deletes A' (& also any B' if it existed) + * + * 3. When Tweetypie gets a delete request for a retweet, say A1. & if A happens to the source + * tweet for A1 & if A is an edit tweet, then the entire edit chain should be unretweeted & not + * A. i.e. On delete of a retweet - unretweet all the revisions for this user. + * - create A + * - retweet A' + * - create B + * - delete A' => Deletes A' (& also any B' if it existed) + * + * The following function has two failure scenarios - + * i. when it fails to get perspectives of any of the edit tweets. + * ii. the deletion of any of the retweets of these edits fail. + * + * In either of this scenario, we fail the entire request & the error bubbles up to the top. + * Note: The above unretweet of edits only happens for the current user. + * In normal circumstances, a maximum of one Tweet in the edit chain will have been retweeted, + * but we don't know which one it was. Additionally, there may be circumstances where + * unretweet failed, and we end up with multiple versions retweeted. For these reasons, + * we always unretweet all the revisions (except for `excludedTweetId`). + * This is a no-op if none of these versions have been retweeted. + * */ + override def unretweetEdits( + optEditControl: Option[EditControl], + excludedTweetId: TweetId, + byUserId: UserId + ): Future[Unit] = { + + val editTweetIds: Seq[TweetId] = + EditControlUtil.getEditTweetIds(optEditControl).get().filter(_ != excludedTweetId) + + (editTweetIdRetweetsFromUser(editTweetIds, byUserId).flatMap { tweetIds => + if (tweetIds.nonEmpty) { + deleteTweets( + DeleteTweetsRequest(tweetIds = tweetIds, byUserId = Some(byUserId)), + isUnretweetEdits = true + ) + } else { + Future.Nil + } + }).unit + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala new file mode 100644 index 000000000..2164b8a84 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/TweetWriteValidator.scala @@ -0,0 +1,118 @@ +package com.twitter.tweetypie.handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.repository.ConversationControlRepository +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControlOptions +import com.twitter.tweetypie.thriftscala.QuotedTweet +import com.twitter.tweetypie.thriftscala.TrustedFriendsControl +import com.twitter.tweetypie.thriftscala.TrustedFriendsControlOptions +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.tweetypie.FutureEffect +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditOptions +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementLibrary +import com.twitter.visibility.writer.interfaces.tweets.TweetWriteEnforcementRequest +import com.twitter.visibility.writer.models.ActorContext +import com.twitter.visibility.writer.Allow +import com.twitter.visibility.writer.Deny +import com.twitter.visibility.writer.DenyExclusiveTweetReply +import com.twitter.visibility.writer.DenyStaleTweetQuoteTweet +import com.twitter.visibility.writer.DenyStaleTweetReply +import com.twitter.visibility.writer.DenySuperFollowsCreate +import com.twitter.visibility.writer.DenyTrustedFriendsCreate +import com.twitter.visibility.writer.DenyTrustedFriendsQuoteTweet +import com.twitter.visibility.writer.DenyTrustedFriendsReply + +object TweetWriteValidator { + case class Request( + conversationId: Option[TweetId], + userId: UserId, + exclusiveTweetControlOptions: Option[ExclusiveTweetControlOptions], + replyToExclusiveTweetControl: Option[ExclusiveTweetControl], + trustedFriendsControlOptions: Option[TrustedFriendsControlOptions], + inReplyToTrustedFriendsControl: Option[TrustedFriendsControl], + quotedTweetOpt: Option[QuotedTweet], + inReplyToTweetId: Option[TweetId], + inReplyToEditControl: Option[EditControl], + editOptions: Option[EditOptions]) + + type Type = FutureEffect[Request] + + def apply( + convoCtlRepo: ConversationControlRepository.Type, + tweetWriteEnforcementLibrary: TweetWriteEnforcementLibrary, + enableExclusiveTweetControlValidation: Gate[Unit], + enableTrustedFriendsControlValidation: Gate[Unit], + enableStaleTweetValidation: Gate[Unit] + ): FutureEffect[Request] = + FutureEffect[Request] { request => + // We are creating up an empty TweetQuery.Options here so we can use the default + // CacheControl value and avoid hard coding it here. + val queryOptions = TweetQuery.Options(TweetQuery.Include()) + Stitch.run { + for { + convoCtl <- request.conversationId match { + case Some(convoId) => + convoCtlRepo( + convoId, + queryOptions.cacheControl + ) + case None => + Stitch.value(None) + } + + result <- tweetWriteEnforcementLibrary( + TweetWriteEnforcementRequest( + rootConversationControl = convoCtl, + convoId = request.conversationId, + exclusiveTweetControlOptions = request.exclusiveTweetControlOptions, + replyToExclusiveTweetControl = request.replyToExclusiveTweetControl, + trustedFriendsControlOptions = request.trustedFriendsControlOptions, + inReplyToTrustedFriendsControl = request.inReplyToTrustedFriendsControl, + quotedTweetOpt = request.quotedTweetOpt, + actorContext = ActorContext(request.userId), + inReplyToTweetId = request.inReplyToTweetId, + inReplyToEditControl = request.inReplyToEditControl, + editOptions = request.editOptions + ), + enableExclusiveTweetControlValidation = enableExclusiveTweetControlValidation, + enableTrustedFriendsControlValidation = enableTrustedFriendsControlValidation, + enableStaleTweetValidation = enableStaleTweetValidation + ) + _ <- result match { + case Allow => + Stitch.Done + case Deny => + Stitch.exception(TweetCreateFailure.State(TweetCreateState.ReplyTweetNotAllowed)) + case DenyExclusiveTweetReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.ExclusiveTweetEngagementNotAllowed)) + case DenySuperFollowsCreate => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.SuperFollowsCreateNotAuthorized)) + case DenyTrustedFriendsReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsEngagementNotAllowed)) + case DenyTrustedFriendsCreate => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsCreateNotAllowed)) + case DenyTrustedFriendsQuoteTweet => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.TrustedFriendsQuoteTweetNotAllowed)) + case DenyStaleTweetReply => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.StaleTweetEngagementNotAllowed)) + case DenyStaleTweetQuoteTweet => + Stitch.exception( + TweetCreateFailure.State(TweetCreateState.StaleTweetQuoteTweetNotAllowed)) + } + } yield () + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala new file mode 100644 index 000000000..1b4d46de1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/U13ValidationUtil.scala @@ -0,0 +1,21 @@ +package com.twitter.tweetypie.handler + +import com.twitter.compliance.userconsent.compliance.birthdate.GlobalBirthdateUtil +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.tweetypie.thriftscala.DeletedTweet +import org.joda.time.DateTime + +/* + * As part of GDPR U13 work, we want to block tweets created from when a user + * was < 13 from being restored. + */ + +private[handler] object U13ValidationUtil { + def wasTweetCreatedBeforeUserTurned13(user: User, deletedTweet: DeletedTweet): Boolean = + deletedTweet.createdAtSecs match { + case None => + throw NoCreatedAtTimeException + case Some(createdAtSecs) => + GlobalBirthdateUtil.isUnderSomeAge(13, new DateTime(createdAtSecs * 1000L), user) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala new file mode 100644 index 000000000..c24590298 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UndeleteTweetHandler.scala @@ -0,0 +1,215 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.TweetHydrationError +import com.twitter.tweetypie.repository.ParentUserIdRepository +import com.twitter.tweetypie.storage.TweetStorageClient.Undelete +import com.twitter.tweetypie.storage.DeleteState +import com.twitter.tweetypie.storage.DeletedTweetResponse +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.store.UndeleteTweet +import com.twitter.tweetypie.thriftscala.UndeleteTweetState.{Success => TweetypieSuccess, _} +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import scala.util.control.NoStackTrace + +trait UndeleteException extends Exception with NoStackTrace + +/** + * Exceptions we return to the user, things that we don't expect to ever happen unless there is a + * problem with the underlying data in Manhattan or a bug in [[com.twitter.tweetypie.storage.TweetStorageClient]] + */ +object NoDeletedAtTimeException extends UndeleteException +object NoCreatedAtTimeException extends UndeleteException +object NoStatusWithSuccessException extends UndeleteException +object NoUserIdWithTweetException extends UndeleteException +object NoDeletedTweetException extends UndeleteException +object SoftDeleteUserIdNotFoundException extends UndeleteException + +/** + * represents a problem that we choose to return to the user as a response state + * rather than as an exception. + */ +case class ResponseException(state: UndeleteTweetState) extends Exception with NoStackTrace { + def toResponse: UndeleteTweetResponse = UndeleteTweetResponse(state = state) +} + +private[this] object SoftDeleteExpiredException extends ResponseException(SoftDeleteExpired) +private[this] object BounceDeleteException extends ResponseException(TweetIsBounceDeleted) +private[this] object SourceTweetNotFoundException extends ResponseException(SourceTweetNotFound) +private[this] object SourceUserNotFoundException extends ResponseException(SourceUserNotFound) +private[this] object TweetExistsException extends ResponseException(TweetAlreadyExists) +private[this] object TweetNotFoundException extends ResponseException(TweetNotFound) +private[this] object U13TweetException extends ResponseException(TweetIsU13Tweet) +private[this] object UserNotFoundException extends ResponseException(UserNotFound) + +/** + * Undelete Notes: + * + * If request.force is set to true, then the undelete will take place even if the undeleted tweet + * is already present in Manhattan. This is useful if a tweet was recently restored to the backend, + * but the async actions portion of the undelete failed and you want to retry them. + * + * Before undeleting the tweet we check if it's a retweet, in which case we require that the sourceTweet + * and sourceUser exist. + * + * Tweets can only be undeleted for N days where N is the number of days before tweets marked with + * the soft_delete_state flag are deleted permanently by the cleanup job + * + */ +object UndeleteTweetHandler { + + type Type = FutureArrow[UndeleteTweetRequest, UndeleteTweetResponse] + + /** Extract an optional value inside a future or throw if it's missing. */ + def required[T](option: Future[Option[T]], ex: => Exception): Future[T] = + option.flatMap { + case None => Future.exception(ex) + case Some(i) => Future.value(i) + } + + def apply( + undelete: TweetStorageClient.Undelete, + tweetExists: FutureArrow[TweetId, Boolean], + getUser: FutureArrow[UserId, Option[User]], + getDeletedTweets: TweetStorageClient.GetDeletedTweets, + parentUserIdRepo: ParentUserIdRepository.Type, + save: FutureArrow[UndeleteTweet.Event, Tweet] + ): Type = { + + def getParentUserId(tweet: Tweet): Future[Option[UserId]] = + Stitch.run { + parentUserIdRepo(tweet) + .handle { + case ParentUserIdRepository.ParentTweetNotFound(id) => None + } + } + + val entityExtractor = EntityExtractor.mutationAll.endo + + val getDeletedTweet: Long => Future[DeletedTweetResponse] = + id => Stitch.run(getDeletedTweets(Seq(id)).map(_.head)) + + def getRequiredUser(userId: Option[UserId]): Future[User] = + userId match { + case None => Future.exception(SoftDeleteUserIdNotFoundException) + case Some(id) => required(getUser(id), UserNotFoundException) + } + + def getValidatedDeletedTweet( + tweetId: TweetId, + allowNotDeleted: Boolean + ): Future[DeletedTweet] = { + import DeleteState._ + val deletedTweet = getDeletedTweet(tweetId).map { response => + response.deleteState match { + case SoftDeleted => response.tweet + // BounceDeleted tweets violated Twitter Rules and may not be undeleted + case BounceDeleted => throw BounceDeleteException + case HardDeleted => throw SoftDeleteExpiredException + case NotDeleted => if (allowNotDeleted) response.tweet else throw TweetExistsException + case NotFound => throw TweetNotFoundException + } + } + + required(deletedTweet, NoDeletedTweetException) + } + + /** + * Fetch the source tweet's user for a deleted share + */ + def getSourceUser(share: Option[DeletedTweetShare]): Future[Option[User]] = + share match { + case None => Future.value(None) + case Some(s) => required(getUser(s.sourceUserId), SourceUserNotFoundException).map(Some(_)) + } + + /** + * Ensure that the undelete response contains all the required information to continue with + * the tweetypie undelete. + */ + def validateUndeleteResponse(response: Undelete.Response, force: Boolean): Future[Tweet] = + Future { + (response.code, response.tweet) match { + case (Undelete.UndeleteResponseCode.NotCreated, _) => throw TweetNotFoundException + case (Undelete.UndeleteResponseCode.BackupNotFound, _) => throw SoftDeleteExpiredException + case (Undelete.UndeleteResponseCode.Success, None) => throw NoStatusWithSuccessException + case (Undelete.UndeleteResponseCode.Success, Some(tweet)) => + // archivedAtMillis is required on the response unless force is present + // or the tweet is a retweet. retweets have no favs or retweets to clean up + // of their own so the original deleted at time is not needed + if (response.archivedAtMillis.isEmpty && !force && !isRetweet(tweet)) + throw NoDeletedAtTimeException + else + tweet + case (code, _) => throw new Exception(s"Unknown UndeleteResponseCode $code") + } + } + + def enforceU13Compliance(user: User, deletedTweet: DeletedTweet): Future[Unit] = + Future.when(U13ValidationUtil.wasTweetCreatedBeforeUserTurned13(user, deletedTweet)) { + throw U13TweetException + } + + /** + * Fetch required data and perform before/after validations for undelete. + * If everything looks good with the undelete, kick off the tweetypie undelete + * event. + */ + FutureArrow { request => + val hydrationOptions = request.hydrationOptions.getOrElse(WritePathHydrationOptions()) + val force = request.force.getOrElse(false) + val tweetId = request.tweetId + + (for { + // we must be able to query the tweet from the soft delete table + deletedTweet <- getValidatedDeletedTweet(tweetId, allowNotDeleted = force) + + // we always require the user + user <- getRequiredUser(deletedTweet.userId) + + // Make sure we're not restoring any u13 tweets. + () <- enforceU13Compliance(user, deletedTweet) + + // if a retweet, then sourceUser is required; sourceTweet will be hydrated in save() + sourceUser <- getSourceUser(deletedTweet.share) + + // validations passed, perform the undelete. + undeleteResponse <- Stitch.run(undelete(tweetId)) + + // validate the response + tweet <- validateUndeleteResponse(undeleteResponse, force) + + // Extract entities from tweet text + tweetWithEntities = entityExtractor(tweet) + + // If a retweet, get user id of parent retweet + parentUserId <- getParentUserId(tweet) + + // undeletion was successful, hydrate the tweet and + // kick off tweetypie async undelete actions + hydratedTweet <- save( + UndeleteTweet.Event( + tweet = tweetWithEntities, + user = user, + timestamp = Time.now, + hydrateOptions = hydrationOptions, + deletedAt = undeleteResponse.archivedAtMillis.map(Time.fromMilliseconds), + sourceUser = sourceUser, + parentUserId = parentUserId + ) + ) + } yield { + UndeleteTweetResponse(TweetypieSuccess, Some(hydratedTweet)) + }).handle { + case TweetHydrationError(_, Some(FilteredState.Unavailable.SourceTweetNotFound(_))) => + SourceTweetNotFoundException.toResponse + case ex: ResponseException => + ex.toResponse + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala new file mode 100644 index 000000000..4747ff0ea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UnretweetHandler.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala._ +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService.GetPerspectives + +object UnretweetHandler { + + type Type = UnretweetRequest => Future[UnretweetResult] + + def apply( + deleteTweets: TweetDeletePathHandler.DeleteTweets, + getPerspectives: GetPerspectives, + unretweetEdits: TweetDeletePathHandler.UnretweetEdits, + tweetRepo: TweetRepository.Type, + ): Type = { request: UnretweetRequest => + val handleEdits = getSourceTweet(request.sourceTweetId, tweetRepo).liftToTry.flatMap { + case Return(sourceTweet) => + // If we're able to fetch the source Tweet, unretweet all its other versions + unretweetEdits(sourceTweet.editControl, request.sourceTweetId, request.userId) + case Throw(_) => Future.Done + } + + handleEdits.flatMap(_ => unretweetSourceTweet(request, deleteTweets, getPerspectives)) + } + + def unretweetSourceTweet( + request: UnretweetRequest, + deleteTweets: TweetDeletePathHandler.DeleteTweets, + getPerspectives: GetPerspectives, + ): Future[UnretweetResult] = + getPerspectives( + Seq(tls.PerspectiveQuery(request.userId, Seq(request.sourceTweetId))) + ).map { results => results.head.perspectives.headOption.flatMap(_.retweetId) } + .flatMap { + case Some(id) => + deleteTweets( + DeleteTweetsRequest(tweetIds = Seq(id), byUserId = Some(request.userId)), + false + ).map(_.head).map { deleteTweetResult => + UnretweetResult(Some(deleteTweetResult.tweetId), deleteTweetResult.state) + } + case None => Future.value(UnretweetResult(None, TweetDeleteState.Ok)) + } + + def getSourceTweet( + sourceTweetId: TweetId, + tweetRepo: TweetRepository.Type + ): Future[Tweet] = { + val options: TweetQuery.Options = TweetQuery + .Options(include = TweetQuery.Include(tweetFields = Set(Tweet.EditControlField.id))) + + Stitch.run { + tweetRepo(sourceTweetId, options).rescue { + case _: FilteredState => Stitch.NotFound + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala new file mode 100644 index 000000000..875edb63c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UpdatePossiblySensitiveTweetHandler.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tweetypie.store.UpdatePossiblySensitiveTweet +import com.twitter.tweetypie.thriftscala.UpdatePossiblySensitiveTweetRequest +import com.twitter.tweetypie.util.TweetLenses + +object UpdatePossiblySensitiveTweetHandler { + type Type = FutureArrow[UpdatePossiblySensitiveTweetRequest, Unit] + + def apply( + tweetGetter: FutureArrow[TweetId, Tweet], + userGetter: FutureArrow[UserId, User], + updatePossiblySensitiveTweetStore: FutureEffect[UpdatePossiblySensitiveTweet.Event] + ): Type = + FutureArrow { request => + val nsfwAdminMutation = Mutation[Boolean](_ => request.nsfwAdmin).checkEq + val nsfwUserMutation = Mutation[Boolean](_ => request.nsfwUser).checkEq + val tweetMutation = + TweetLenses.nsfwAdmin + .mutation(nsfwAdminMutation) + .also(TweetLenses.nsfwUser.mutation(nsfwUserMutation)) + + for { + originalTweet <- tweetGetter(request.tweetId) + _ <- tweetMutation(originalTweet) match { + case None => Future.Unit + case Some(mutatedTweet) => + userGetter(getUserId(originalTweet)) + .map { user => + UpdatePossiblySensitiveTweet.Event( + tweet = mutatedTweet, + user = user, + timestamp = Time.now, + byUserId = request.byUserId, + nsfwAdminChange = nsfwAdminMutation(TweetLenses.nsfwAdmin.get(originalTweet)), + nsfwUserChange = nsfwUserMutation(TweetLenses.nsfwUser.get(originalTweet)), + note = request.note, + host = request.host + ) + } + .flatMap(updatePossiblySensitiveTweetStore) + } + } yield () + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala new file mode 100644 index 000000000..5de0fa625 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlEntityBuilder.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala.entities.EntityExtractor +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.IndexConverter +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.Preprocessor._ + +object UrlEntityBuilder { + import UpstreamFailure.UrlShorteningFailure + import UrlShortener.Context + + /** + * Extracts URLs from the given tweet text, shortens them, and returns an updated tweet + * text that contains the shortened URLs, along with the generated `UrlEntity`s. + */ + type Type = FutureArrow[(String, Context), (String, Seq[UrlEntity])] + + def fromShortener(shortener: UrlShortener.Type): Type = + FutureArrow { + case (text, ctx) => + Future + .collect(EntityExtractor.extractAllUrls(text).map(shortenEntity(shortener, _, ctx))) + .map(_.flatMap(_.toSeq)) + .map(updateTextAndUrls(text, _)(replaceInvisiblesWithWhitespace)) + } + + /** + * Update a url entity with tco-ed url + * + * @param urlEntity an url entity with long url in the `url` field + * @param ctx additional data needed to build the shortener request + * @return an updated url entity with tco-ed url in the `url` field, + * and long url in the `expanded` field + */ + private def shortenEntity( + shortener: UrlShortener.Type, + entity: UrlEntity, + ctx: Context + ): Future[Option[UrlEntity]] = + shortener((TcoUrl.normalizeProtocol(entity.url), ctx)) + .map { urlData => + Some( + entity.copy( + url = urlData.shortUrl, + expanded = Some(urlData.longUrl), + display = Some(urlData.displayText) + ) + ) + } + .rescue { + // fail tweets with invalid urls + case UrlShortener.InvalidUrlError => + Future.exception(TweetCreateFailure.State(TweetCreateState.InvalidUrl)) + // fail tweets with malware urls + case UrlShortener.MalwareUrlError => + Future.exception(TweetCreateFailure.State(TweetCreateState.MalwareUrl)) + // propagate OverCapacity + case e @ OverCapacity(_) => Future.exception(e) + // convert any other failure into UrlShorteningFailure + case e => Future.exception(UrlShorteningFailure(e)) + } + + /** + * Applies a text-modification function to all parts of the text not found within a UrlEntity, + * and then updates all the UrlEntity indices as necessary. + */ + def updateTextAndUrls( + text: String, + urlEntities: Seq[UrlEntity] + )( + textMod: String => String + ): (String, Seq[UrlEntity]) = { + var offsetInText = Offset.CodePoint(0) + var offsetInNewText = Offset.CodePoint(0) + val newText = new StringBuilder + val newUrlEntities = Seq.newBuilder[UrlEntity] + val indexConverter = new IndexConverter(text) + + urlEntities.foreach { e => + val nonUrl = textMod(indexConverter.substringByCodePoints(offsetInText.toInt, e.fromIndex)) + newText.append(nonUrl) + newText.append(e.url) + offsetInText = Offset.CodePoint(e.toIndex.toInt) + + val urlFrom = offsetInNewText + Offset.CodePoint.length(nonUrl) + val urlTo = urlFrom + Offset.CodePoint.length(e.url) + val newEntity = + e.copy(fromIndex = urlFrom.toShort, toIndex = urlTo.toShort) + + newUrlEntities += newEntity + offsetInNewText = urlTo + } + + newText.append(textMod(indexConverter.substringByCodePoints(offsetInText.toInt))) + + (newText.toString, newUrlEntities.result()) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala new file mode 100644 index 000000000..bdf939da7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UrlShortener.scala @@ -0,0 +1,106 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.service.talon.thriftscala._ +import com.twitter.servo.util.FutureArrow +import com.twitter.tco_util.DisplayUrl +import com.twitter.tco_util.TcoUrl +import com.twitter.tweetypie.backends.Talon +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.store.Guano +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import scala.util.control.NoStackTrace + +object UrlShortener { + type Type = FutureArrow[(String, Context), ShortenedUrl] + + case class Context( + tweetId: TweetId, + userId: UserId, + createdAt: Time, + userProtected: Boolean, + clientAppId: Option[Long] = None, + remoteHost: Option[String] = None, + dark: Boolean = false) + + object MalwareUrlError extends Exception with NoStackTrace + object InvalidUrlError extends Exception with NoStackTrace + + /** + * Returns a new UrlShortener that checks the response from the underlying shortner + * and, if the request is not dark but fails with a MalwareUrlError, scribes request + * info to guano. + */ + def scribeMalware(guano: Guano)(underlying: Type): Type = + FutureArrow { + case (longUrl, ctx) => + underlying((longUrl, ctx)).onFailure { + case MalwareUrlError if !ctx.dark => + guano.scribeMalwareAttempt( + Guano.MalwareAttempt( + longUrl, + ctx.userId, + ctx.clientAppId, + ctx.remoteHost + ) + ) + case _ => + } + } + + def fromTalon(talonShorten: Talon.Shorten): Type = { + val log = Logger(getClass) + + FutureArrow { + case (longUrl, ctx) => + val request = + ShortenRequest( + userId = ctx.userId, + longUrl = longUrl, + auditMsg = "tweetypie", + directMessage = Some(false), + protectedAccount = Some(ctx.userProtected), + maxShortUrlLength = None, + tweetData = Some(TweetData(ctx.tweetId, ctx.createdAt.inMilliseconds)), + trafficType = + if (ctx.dark) ShortenTrafficType.Testing + else ShortenTrafficType.Production + ) + + talonShorten(request).flatMap { res => + res.responseCode match { + case ResponseCode.Ok => + if (res.malwareStatus == MalwareStatus.UrlBlocked) { + Future.exception(MalwareUrlError) + } else { + val shortUrl = + res.fullShortUrl.getOrElse { + // fall back to fromSlug if talon response does not have the full short url + // Could be replaced with an exception once the initial integration on production + // is done + TcoUrl.fromSlug(res.shortUrl, TcoUrl.isHttps(res.longUrl)) + } + + Future.value( + ShortenedUrl( + shortUrl = shortUrl, + longUrl = res.longUrl, + displayText = DisplayUrl(shortUrl, Some(res.longUrl), true) + ) + ) + } + + case ResponseCode.BadInput => + log.warn(s"Talon rejected URL that Extractor thought was fine: $longUrl") + Future.exception(InvalidUrlError) + + // we shouldn't see other ResponseCodes, because Talon.Shorten translates them to + // exceptions, but we have this catch-all just in case. + case resCode => + log.warn(s"Unexpected response code $resCode for '$longUrl'") + Future.exception(OverCapacity("talon")) + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala new file mode 100644 index 000000000..1410525d5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/UserTakedownHandler.scala @@ -0,0 +1,79 @@ +package com.twitter.tweetypie +package handler + +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.store.Takedown +import com.twitter.tweetypie.thriftscala.DataError +import com.twitter.tweetypie.thriftscala.DataErrorCause +import com.twitter.tweetypie.thriftscala.SetTweetUserTakedownRequest + +trait UserTakedownHandler { + val setTweetUserTakedownRequest: FutureArrow[SetTweetUserTakedownRequest, Unit] +} + +/** + * This handler processes SetTweetUserTakedownRequest objects sent to Tweetypie's + * setTweetUserTakedown endpoint. These requests originate from tweetypie daemon and the + * request object specifies the user ID of the user who is being modified, and a boolean value + * to indicate whether takedown is being added or removed. + * + * If takedown is being added, the hasTakedown bit is set on all of the user's tweets. + * If takedown is being removed, we can't automatically unset the hasTakedown bit on all tweets + * since some of the tweets might have tweet-specific takedowns, in which case the hasTakedown bit + * needs to remain set. Instead, we flush the user's tweets from cache, and let the repairer + * unset the bit when hydrating tweets where the bit is set but no user or tweet + * takedown country codes are present. + */ +object UserTakedownHandler { + type Type = FutureArrow[SetTweetUserTakedownRequest, Unit] + + def takedownEvent(userHasTakedown: Boolean): Tweet => Option[Takedown.Event] = + tweet => { + val tweetHasTakedown = + TweetLenses.tweetypieOnlyTakedownCountryCodes(tweet).exists(_.nonEmpty) || + TweetLenses.tweetypieOnlyTakedownReasons(tweet).exists(_.nonEmpty) + val updatedHasTakedown = userHasTakedown || tweetHasTakedown + if (updatedHasTakedown == TweetLenses.hasTakedown(tweet)) + None + else + Some( + Takedown.Event( + tweet = TweetLenses.hasTakedown.set(tweet, updatedHasTakedown), + timestamp = Time.now, + eventbusEnqueue = false, + scribeForAudit = false, + updateCodesAndReasons = false + ) + ) + } + + def setHasTakedown( + tweetTakedown: FutureEffect[Takedown.Event], + userHasTakedown: Boolean + ): FutureEffect[Seq[Tweet]] = + tweetTakedown.contramapOption(takedownEvent(userHasTakedown)).liftSeq + + def verifyTweetUserId(expectedUserId: Option[UserId], tweet: Tweet): Unit = { + val tweetUserId: UserId = getUserId(tweet) + val tweetId: Long = tweet.id + expectedUserId.filter(_ != tweetUserId).foreach { u => + throw DataError( + message = + s"SetTweetUserTakedownRequest userId $u does not match userId $tweetUserId for Tweet: $tweetId", + errorCause = Some(DataErrorCause.UserTweetRelationship), + ) + } + } + + def apply( + getTweet: FutureArrow[TweetId, Option[Tweet]], + tweetTakedown: FutureEffect[Takedown.Event], + ): Type = + FutureArrow { request => + for { + tweet <- getTweet(request.tweetId) + _ = tweet.foreach(t => verifyTweetUserId(request.userId, t)) + _ <- setHasTakedown(tweetTakedown, request.hasTakedown)(tweet.toSeq) + } yield () + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala new file mode 100644 index 000000000..5ef7573f2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/WritePathQueryOptions.scala @@ -0,0 +1,153 @@ +package com.twitter.tweetypie.handler + +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.tweetypie.repository.CacheControl +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.thriftscala.WritePathHydrationOptions + +object WritePathQueryOptions { + + /** + * Base TweetQuery.Include for all hydration options. + */ + val BaseInclude: TweetQuery.Include = + GetTweetsHandler.BaseInclude.also( + tweetFields = Set( + Tweet.CardReferenceField.id, + Tweet.MediaTagsField.id, + Tweet.SelfPermalinkField.id, + Tweet.ExtendedTweetMetadataField.id, + Tweet.VisibleTextRangeField.id, + Tweet.NsfaHighRecallLabelField.id, + Tweet.CommunitiesField.id, + Tweet.ExclusiveTweetControlField.id, + Tweet.TrustedFriendsControlField.id, + Tweet.CollabControlField.id, + Tweet.EditControlField.id, + Tweet.EditPerspectiveField.id, + Tweet.NoteTweetField.id + ) + ) + + /** + * Base TweetQuery.Include for all creation-related hydrations. + */ + val BaseCreateInclude: TweetQuery.Include = + BaseInclude + .also( + tweetFields = Set( + Tweet.PlaceField.id, + Tweet.ProfileGeoEnrichmentField.id, + Tweet.SelfThreadMetadataField.id + ), + mediaFields = Set(MediaEntity.AdditionalMetadataField.id), + quotedTweet = Some(true), + pastedMedia = Some(true) + ) + + /** + * Base TweetQuery.Include for all deletion-related hydrations. + */ + val BaseDeleteInclude: TweetQuery.Include = BaseInclude + .also(tweetFields = + Set(Tweet.BounceLabelField.id, Tweet.ConversationControlField.id, Tweet.EditControlField.id)) + + val AllCounts: Set[Short] = StatusCounts.fieldInfos.map(_.tfield.id).toSet + + def insert( + cause: TweetQuery.Cause, + user: User, + options: WritePathHydrationOptions, + isEditControlEdit: Boolean + ): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = false, + // include counts if tweet edit, otherwise false + includeCounts = isEditControlEdit, + cause = cause, + forUser = user, + // Do not perform any filtering when we are hydrating the tweet we are creating + safetyLevel = SafetyLevel.FilterNone + ) + + def retweetSourceTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = true, + includeCounts = true, + cause = TweetQuery.Cause.Read, + forUser = user, + // If Scarecrow is down, we may proceed with creating a RT. The safetyLevel is necessary + // to prevent so that the inner tweet's count is not sent in the TweetCreateEvent we send + // to EventBus. If this were emitted, live pipeline would publish counts to the clients. + safetyLevel = SafetyLevel.TweetWritesApi + ) + + def quotedTweet(user: User, options: WritePathHydrationOptions): TweetQuery.Options = + createOptions( + writePathHydrationOptions = options, + includePerspective = true, + includeCounts = true, + cause = TweetQuery.Cause.Read, + forUser = user, + // We pass in the safetyLevel so that the inner tweet's are excluded + // from the TweetCreateEvent we send to EventBus. If this were emitted, + // live pipeline would publish counts to the clients. + safetyLevel = SafetyLevel.TweetWritesApi + ) + + private def condSet[A](cond: Boolean, item: A): Set[A] = + if (cond) Set(item) else Set.empty + + private def createOptions( + writePathHydrationOptions: WritePathHydrationOptions, + includePerspective: Boolean, + includeCounts: Boolean, + cause: TweetQuery.Cause, + forUser: User, + safetyLevel: SafetyLevel, + ): TweetQuery.Options = { + val cardsEnabled: Boolean = writePathHydrationOptions.includeCards + val cardsPlatformKeySpecified: Boolean = writePathHydrationOptions.cardsPlatformKey.nonEmpty + val cardsV1Enabled: Boolean = cardsEnabled && !cardsPlatformKeySpecified + val cardsV2Enabled: Boolean = cardsEnabled && cardsPlatformKeySpecified + + TweetQuery.Options( + include = BaseCreateInclude.also( + tweetFields = + condSet(includePerspective, Tweet.PerspectiveField.id) ++ + condSet(cardsV1Enabled, Tweet.CardsField.id) ++ + condSet(cardsV2Enabled, Tweet.Card2Field.id) ++ + condSet(includeCounts, Tweet.CountsField.id) ++ + // for PreviousCountsField, copy includeCounts state on the write path + condSet(includeCounts, Tweet.PreviousCountsField.id) ++ + // hydrate ConversationControl on Reply Tweet creations so clients can consume + Set(Tweet.ConversationControlField.id), + countsFields = if (includeCounts) AllCounts else Set.empty + ), + cause = cause, + forUserId = Some(forUser.id), + cardsPlatformKey = writePathHydrationOptions.cardsPlatformKey, + languageTag = forUser.account.map(_.language).getOrElse("en"), + extensionsArgs = writePathHydrationOptions.extensionsArgs, + safetyLevel = safetyLevel, + simpleQuotedTweet = writePathHydrationOptions.simpleQuotedTweet + ) + } + + def deleteTweets: TweetQuery.Options = + TweetQuery.Options( + include = BaseDeleteInclude, + cacheControl = CacheControl.ReadOnlyCache, + extensionsArgs = None, + requireSourceTweet = false // retweet should be deletable even if source tweet missing + ) + + def deleteTweetsWithoutEditControl: TweetQuery.Options = + deleteTweets.copy(enableEditControlHydration = false) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala new file mode 100644 index 000000000..e9d5021a0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/handler/package.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie + +import com.twitter.context.thriftscala.Viewer +import com.twitter.tweetypie.thriftscala._ + +import scala.util.matching.Regex +import com.twitter.context.TwitterContext +import com.twitter.finagle.stats.Stat +import com.twitter.snowflake.id.SnowflakeId + +package object handler { + type PlaceLanguage = String + type TweetIdGenerator = () => Future[TweetId] + type NarrowcastValidator = FutureArrow[Narrowcast, Narrowcast] + type ReverseGeocoder = FutureArrow[(GeoCoordinates, PlaceLanguage), Option[Place]] + type CardUri = String + + // A narrowcast location can be a PlaceId or a US metro code. + type NarrowcastLocation = String + + val PlaceIdRegex: Regex = """(?i)\A[0-9a-fA-F]{16}\Z""".r + + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def getContributor(userId: UserId): Option[Contributor] = { + val viewer = TwitterContext().getOrElse(Viewer()) + viewer.authenticatedUserId.filterNot(_ == userId).map(id => Contributor(id)) + } + + def trackLossyReadsAfterWrite(stat: Stat, windowLength: Duration)(tweetId: TweetId): Unit = { + // If the requested Tweet is NotFound, and the tweet age is less than the defined {{windowLength}} duration, + // then we capture the percentiles of when this request was attempted. + // This is being tracked to understand how lossy the reads are directly after tweet creation. + for { + timestamp <- SnowflakeId.timeFromIdOpt(tweetId) + age = Time.now.since(timestamp) + if age.inMillis <= windowLength.inMillis + } yield stat.add(age.inMillis) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD new file mode 100644 index 000000000..0fb53615d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/BUILD @@ -0,0 +1,58 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "core-app-services/lib:coreservices", + "featureswitches/featureswitches-core:v2", + "featureswitches/featureswitches-core/src/main/scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "mediaservices/commons/src/main/thrift:thrift-scala", + "mediaservices/media-util", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/escherbird:tweet-annotation-scala", + "src/thrift/com/twitter/escherbird/common:common-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "stitch/stitch-timelineservice/src/main/scala", + "strato/src/main/scala/com/twitter/strato/access", + "strato/src/main/scala/com/twitter/strato/callcontext", + "tco-util", + "tweet-util", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + "visibility/common/src/main/thrift/com/twitter/visibility:action-scala", + "visibility/results/src/main/scala/com/twitter/visibility/results/counts", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala new file mode 100644 index 000000000..08ad91bc8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/Card2Hydrator.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card2 +import com.twitter.expandodo.thriftscala.Card2RequestOptions +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.CardReferenceUriExtractor +import com.twitter.tweetypie.core.NonTombstone +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object Card2Hydrator { + type Type = ValueHydrator[Option[Card2], Ctx] + + case class Ctx( + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + cardReference: Option[CardReference], + underlyingTweetCtx: TweetCtx, + featureSwitchResults: Option[FeatureSwitchResults]) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.Card2Field) + val hydrationUrlBlockListKey = "card_hydration_blocklist" + + def apply(repo: Card2Repository.Type): ValueHydrator[Option[Card2], Ctx] = + ValueHydrator[Option[Card2], Ctx] { (_, ctx) => + val repoCtx = requestOptions(ctx) + val filterURLs = ctx.featureSwitchResults + .flatMap(_.getStringArray(hydrationUrlBlockListKey, false)) + .getOrElse(Seq()) + + val requests = + ctx.cardReference match { + case Some(CardReferenceUriExtractor(cardUri)) => + cardUri match { + case NonTombstone(uri) if !filterURLs.contains(uri) => + Seq((UrlCard2Key(uri), repoCtx)) + case _ => Nil + } + case _ => + ctx.urlEntities + .filterNot(e => e.expanded.exists(filterURLs.contains)) + .map(e => (UrlCard2Key(e.url), repoCtx)) + } + + Stitch + .traverse(requests) { + case (key, opts) => repo(key, opts).liftNotFoundToOption + }.liftToTry.map { + case Return(results) => + results.flatten.lastOption match { + case None => ValueState.UnmodifiedNone + case res => ValueState.modified(res) + } + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.Card2Field) && + ctx.opts.cardsPlatformKey.nonEmpty && + !ctx.isRetweet && + ctx.mediaEntities.isEmpty && + (ctx.cardReference.nonEmpty || ctx.urlEntities.nonEmpty) + } + + private[this] def requestOptions(ctx: Ctx) = + Card2RequestOptions( + platformKey = ctx.opts.cardsPlatformKey.get, + perspectiveUserId = ctx.opts.forUserId, + allowNonTcoUrls = ctx.cardReference.nonEmpty, + languageTag = Some(ctx.opts.languageTag) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala new file mode 100644 index 000000000..4a267bfb6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CardHydrator.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object CardHydrator { + type Type = ValueHydrator[Option[Seq[Card]], Ctx] + + case class Ctx( + urlEntities: Seq[UrlEntity], + mediaEntities: Seq[MediaEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.CardsField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + def apply(repo: CardRepository.Type): Type = { + def getCards(url: String): Stitch[Seq[Card]] = + repo(url).handle { case NotFound => Nil } + + ValueHydrator[Option[Seq[Card]], Ctx] { (_, ctx) => + val urls = ctx.urlEntities.map(_.url) + + Stitch.traverse(urls)(getCards _).liftToTry.map { + case Return(cards) => + // even though we are hydrating a type of Option[Seq[Card]], we only + // ever return at most one card, and always the last one. + val res = cards.flatten.lastOption.toSeq + if (res.isEmpty) ValueState.UnmodifiedNone + else ValueState.modified(Some(res)) + case _ => partialResult + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.CardsField) && + !ctx.isRetweet && + ctx.mediaEntities.isEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala new file mode 100644 index 000000000..8adee73b3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorHydrator.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object ContributorHydrator { + type Type = ValueHydrator[Option[Contributor], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.ContributorField, Contributor.ScreenNameField) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.Contributor, + hydrator = h + ) + + def apply(repo: UserIdentityRepository.Type): Type = + ValueHydrator[Contributor, TweetCtx] { (curr, _) => + repo(UserKey(curr.userId)).liftToTry.map { + case Return(userIdent) => ValueState.delta(curr, update(curr, userIdent)) + case Throw(NotFound) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + }.onlyIf((curr, _) => curr.screenName.isEmpty).liftOption + + /** + * Updates a Contributor using the given user data. + */ + private def update(curr: Contributor, userIdent: UserIdentity): Contributor = + curr.copy( + screenName = Some(userIdent.screenName) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala new file mode 100644 index 000000000..079b90f78 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ContributorVisibilityFilter.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Remove contributor data from tweet if it should not be available to the + * caller. The contributor field is populated in the cached + * [[ContributorHydrator]]. + * + * Contributor data is always available on the write path. It is available on + * the read path for the tweet author (or user authenticated as the tweet + * author in the case of contributors/teams), or if the caller has disabled + * visibility filtering. + * + * The condition for running this filtering hydrator (onlyIf) has been a + * source of confusion. Keep in mind that the condition expresses when to + * *remove* data, not when to return it. + * + * In short, keep data when: + * !reading || requested by author || !(enforce visibility filtering) + * + * Remove data when none of these conditions apply: + * reading && !(requested by author) && enforce visibility filtering + * + */ +object ContributorVisibilityFilter { + type Type = ValueHydrator[Option[Contributor], TweetCtx] + + def apply(): Type = + ValueHydrator + .map[Option[Contributor], TweetCtx] { + case (Some(_), _) => ValueState.modified(None) + case (None, _) => ValueState.unmodified(None) + } + .onlyIf { (_, ctx) => + ctx.opts.cause.reading(ctx.tweetId) && + !ctx.opts.forUserId.contains(ctx.userId) && + ctx.opts.enforceVisibilityFiltering + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala new file mode 100644 index 000000000..55df7e8a7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationControlHydrator.scala @@ -0,0 +1,108 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.ConversationControlRepository +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.ConversationControl + +private object ReplyTweetConversationControlHydrator { + type Type = ConversationControlHydrator.Type + type Ctx = ConversationControlHydrator.Ctx + + // The conversation control thrift field was added Feb 17th, 2020. + // No conversation before this will have a conversation control field to hydrate. + // We explicitly short circuit to save resources from querying for tweets we + // know do not have conversation control fields set. + val FirstValidDate: Time = Time.fromMilliseconds(1554076800000L) // 2020-02-17 + + def apply( + repo: ConversationControlRepository.Type, + stats: StatsReceiver + ): Type = { + val exceptionCounter = ExceptionCounter(stats) + + ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) => + repo(ctx.conversationId.get, ctx.opts.cacheControl).liftToTry.map { + case Return(conversationControl) => + ValueState.delta(curr, conversationControl) + case Throw(exception) => { + // In the case where we get an exception, we want to count the + // exception but fail open. + exceptionCounter(exception) + + // Reply Tweet Tweet.ConversationControlField hydration should fail open. + // Ideally we would return ValueState.partial here to notify Tweetypie the caller + // that requested the Tweet.ConversationControlField field was not hydrated. + // We cannot do so because GetTweetFields will return TweetFieldsResultFailed + // for partial results which would fail closed. + ValueState.unmodified(curr) + } + } + }.onlyIf { (_, ctx) => + // This hydrator is specifically for replies so only run when Tweet is a reply + ctx.inReplyToTweetId.isDefined && + // See comment for FirstValidDate + ctx.createdAt > FirstValidDate && + // We need conversation id to get ConversationControl + ctx.conversationId.isDefined && + // Only run if the ConversationControl was requested + ctx.tweetFieldRequested(Tweet.ConversationControlField) + } + } +} + +/** + * ConversationControlHydrator is used to hydrate the conversationControl field. + * For root Tweets, this hydrator just passes through the existing conversationControl. + * For reply Tweets, it loads the conversationControl from the root Tweet of the conversation. + * Only root Tweets in a conversation (i.e. the Tweet pointed to by conversationId) have + * a persisted conversationControl, so we have to hydrate that field for all replies in order + * to know if a Tweet in a conversation can be replied to. + */ +object ConversationControlHydrator { + type Type = ValueHydrator[Option[ConversationControl], Ctx] + + case class Ctx(conversationId: Option[ConversationId], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + private def scrubInviteViaMention( + ccOpt: Option[ConversationControl] + ): Option[ConversationControl] = { + ccOpt collect { + case ConversationControl.ByInvitation(byInvitation) => + ConversationControl.ByInvitation(byInvitation.copy(inviteViaMention = None)) + case ConversationControl.Community(community) => + ConversationControl.Community(community.copy(inviteViaMention = None)) + case ConversationControl.Followers(followers) => + ConversationControl.Followers(followers.copy(inviteViaMention = None)) + } + } + + def apply( + repo: ConversationControlRepository.Type, + disableInviteViaMention: Gate[Unit], + stats: StatsReceiver + ): Type = { + val replyTweetConversationControlHydrator = ReplyTweetConversationControlHydrator( + repo, + stats + ) + + ValueHydrator[Option[ConversationControl], Ctx] { (curr, ctx) => + val ccUpdated = if (disableInviteViaMention()) { + scrubInviteViaMention(curr) + } else { + curr + } + + if (ctx.inReplyToTweetId.isEmpty) { + // For non-reply tweets, pass through the existing conversation control + Stitch.value(ValueState.delta(curr, ccUpdated)) + } else { + replyTweetConversationControlHydrator(ccUpdated, ctx) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala new file mode 100644 index 000000000..172ff1746 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationIdHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Hydrates the conversationId field for any tweet that is a reply to another tweet. + * It uses that other tweet's conversationId. + */ +object ConversationIdHydrator { + type Type = ValueHydrator[Option[ConversationId], TweetCtx] + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.ConversationIdField) + + def apply(repo: ConversationIdRepository.Type): Type = + ValueHydrator[Option[ConversationId], TweetCtx] { (_, ctx) => + ctx.inReplyToTweetId match { + case None => + // Not a reply to another tweet, use tweet id as conversation root + Stitch.value(ValueState.modified(Some(ctx.tweetId))) + case Some(parentId) => + // Lookup conversation id from in-reply-to tweet + repo(ConversationIdKey(ctx.tweetId, parentId)).liftToTry.map { + case Return(rootId) => ValueState.modified(Some(rootId)) + case Throw(_) => ValueState.partial(None, hydratedField) + } + } + }.onlyIf((curr, _) => curr.isEmpty) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala new file mode 100644 index 000000000..3f6e6ad7e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ConversationMutedHydrator.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.FieldByPath + +/** + * Hydrates the `conversationMuted` field of Tweet. `conversationMuted` + * will be true if the conversation that this tweet is part of has been + * muted by the user. This field is perspectival, so the result of this + * hydrator should never be cached. + */ +object ConversationMutedHydrator { + type Type = ValueHydrator[Option[Boolean], Ctx] + + case class Ctx(conversationId: Option[TweetId], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.ConversationMutedField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + private[this] val modifiedTrue = ValueState.modified(Some(true)) + private[this] val modifiedFalse = ValueState.modified(Some(false)) + + def apply(repo: ConversationMutedRepository.Type): Type = { + + ValueHydrator[Option[Boolean], Ctx] { (_, ctx) => + (ctx.opts.forUserId, ctx.conversationId) match { + case (Some(userId), Some(convoId)) => + repo(userId, convoId).liftToTry + .map { + case Return(true) => modifiedTrue + case Return(false) => modifiedFalse + case Throw(_) => partialResult + } + case _ => + ValueState.StitchUnmodifiedNone + } + }.onlyIf { (curr, ctx) => + // It is unlikely that this field will already be set, but if, for + // some reason, this hydrator is run on a tweet that already has + // this value set, we will skip the work to check again. + curr.isEmpty && + // We only hydrate this field if it is explicitly requested. At + // the time of this writing, this field is only used for + // displaying UI for toggling the muted state of the relevant + // conversation. + ctx.tweetFieldRequested(Tweet.ConversationMutedField) && + // Retweets are not part of a conversation, so should not be muted. + !ctx.isRetweet + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala new file mode 100644 index 000000000..0e8a9eada --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CopyFromSourceTweet.scala @@ -0,0 +1,229 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.thriftscala._ + +object CopyFromSourceTweet { + + /** + * A `ValueHydrator` that copies and/or merges certain fields from a retweet's source + * tweet into the retweet. + */ + def hydrator: ValueHydrator[TweetData, TweetQuery.Options] = + ValueHydrator.map { (td, _) => + td.sourceTweetResult.map(_.value.tweet) match { + case None => ValueState.unmodified(td) + case Some(src) => ValueState.modified(td.copy(tweet = copy(src, td.tweet))) + } + } + + /** + * Updates `dst` with fields from `src`. This is more complicated than you would think, because: + * + * - the tweet has an extra mention entity due to the "RT @user" prefix; + * - the retweet text may be truncated at the end, and doesn't necessarily contain all of the + * the text from the source tweet. truncation may happen in the middle of entity. + * - the text in the retweet may have a different unicode normalization, which affects + * code point indices. this means entities aren't shifted by a fixed amount equal to + * the RT prefix. + * - url entities, when hydrated, may be converted to media entities; url entities may not + * be hydrated in the retweet, so the source tweet may have a media entity that corresponds + * to an unhydrated url entity in the retweet. + * - there may be multiple media entities that map to a single url entity, because the tweet + * may have multiple photos. + */ + def copy(src: Tweet, dst: Tweet): Tweet = { + val srcCoreData = src.coreData.get + val dstCoreData = dst.coreData.get + + // get the code point index of the end of the text + val max = getText(dst).codePointCount(0, getText(dst).length).toShort + + // get all entities from the source tweet, merged into a single list sorted by fromIndex. + val srcEntities = getWrappedEntities(src) + + // same for the retweet, but drop first @mention, add back later + val dstEntities = getWrappedEntities(dst).drop(1) + + // merge indices from dst into srcEntities. at the end, resort entities back + // to their original ordering. for media entities, order matters to clients. + val mergedEntities = merge(srcEntities, dstEntities, max).sortBy(_.position) + + // extract entities back out by type + val mentions = mergedEntities.collect { case WrappedMentionEntity(e, _) => e } + val hashtags = mergedEntities.collect { case WrappedHashtagEntity(e, _) => e } + val cashtags = mergedEntities.collect { case WrappedCashtagEntity(e, _) => e } + val urls = mergedEntities.collect { case WrappedUrlEntity(e, _) => e } + val media = mergedEntities.collect { case WrappedMediaEntity(e, _) => e } + + // merge the updated entities back into the retweet, adding the RT @mention back in + dst.copy( + coreData = Some( + dstCoreData.copy( + hasMedia = srcCoreData.hasMedia, + hasTakedown = dstCoreData.hasTakedown || srcCoreData.hasTakedown + ) + ), + mentions = Some(getMentions(dst).take(1) ++ mentions), + hashtags = Some(hashtags), + cashtags = Some(cashtags), + urls = Some(urls), + media = Some(media.map(updateSourceStatusId(src.id, getUserId(src)))), + quotedTweet = src.quotedTweet, + card2 = src.card2, + cards = src.cards, + language = src.language, + mediaTags = src.mediaTags, + spamLabel = src.spamLabel, + takedownCountryCodes = + mergeTakedowns(Seq(src, dst).map(TweetLenses.takedownCountryCodes.get): _*), + conversationControl = src.conversationControl, + exclusiveTweetControl = src.exclusiveTweetControl + ) + } + + /** + * Merges one or more optional lists of takedowns. If no lists are defined, returns None. + */ + private def mergeTakedowns(takedowns: Option[Seq[CountryCode]]*): Option[Seq[CountryCode]] = + if (takedowns.exists(_.isDefined)) { + Some(takedowns.flatten.flatten.distinct.sorted) + } else { + None + } + + /** + * A retweet should never have media without a source_status_id or source_user_id + */ + private def updateSourceStatusId( + srcTweetId: TweetId, + srcUserId: UserId + ): MediaEntity => MediaEntity = + mediaEntity => + if (mediaEntity.sourceStatusId.nonEmpty) { + // when sourceStatusId is set this indicates the media is "pasted media" so the values + // should already be correct (retweeting won't change sourceStatusId / sourceUserId) + mediaEntity + } else { + mediaEntity.copy( + sourceStatusId = Some(srcTweetId), + sourceUserId = Some(mediaEntity.sourceUserId.getOrElse(srcUserId)) + ) + } + + /** + * Attempts to match up entities from the source tweet with entities from the retweet, + * and to use the source tweet entities but shifted to the retweet entity indices. If an entity + * got truncated at the end of the retweet text, we drop it and any following entities. + */ + private def merge( + srcEntities: List[WrappedEntity], + rtEntities: List[WrappedEntity], + maxIndex: Short + ): List[WrappedEntity] = { + (srcEntities, rtEntities) match { + case (Nil, Nil) => + // successfully matched all entities! + Nil + + case (Nil, _) => + // no more source tweet entities, but we still have remaining retweet entities. + // this can happen if a a text truncation turns something invalid like #tag1#tag2 or + // @mention1@mention2 into a valid entity. just drop all the remaining retweet entities. + Nil + + case (_, Nil) => + // no more retweet entities, which means the remaining entities have been truncated. + Nil + + case (srcHead :: srcTail, rtHead :: rtTail) => + // we have more entities from the source tweet and the retweet. typically, we can + // match these entities because they have the same normalized text, but the retweet + // entity might be truncated, so we allow for a prefix match if the retweet entity + // ends at the end of the tweet. + val possiblyTruncated = rtHead.toIndex == maxIndex - 1 + val exactMatch = srcHead.normalizedText == rtHead.normalizedText + + if (exactMatch) { + // there could be multiple media entities for the same t.co url, so we need to find + // contiguous groupings of entities that share the same fromIndex. + val rtTail = rtEntities.dropWhile(_.fromIndex == rtHead.fromIndex) + val srcGroup = + srcEntities + .takeWhile(_.fromIndex == srcHead.fromIndex) + .map(_.shift(rtHead.fromIndex, rtHead.toIndex)) + val srcTail = srcEntities.drop(srcGroup.size) + + srcGroup ++ merge(srcTail, rtTail, maxIndex) + } else { + // if we encounter a mismatch, it is most likely because of truncation, + // so we stop here. + Nil + } + } + } + + /** + * Wraps all the entities with the appropriate WrappedEntity subclasses, merges them into + * a single list, and sorts by fromIndex. + */ + private def getWrappedEntities(tweet: Tweet): List[WrappedEntity] = + (getUrls(tweet).zipWithIndex.map { case (e, p) => WrappedUrlEntity(e, p) } ++ + getMedia(tweet).zipWithIndex.map { case (e, p) => WrappedMediaEntity(e, p) } ++ + getMentions(tweet).zipWithIndex.map { case (e, p) => WrappedMentionEntity(e, p) } ++ + getHashtags(tweet).zipWithIndex.map { case (e, p) => WrappedHashtagEntity(e, p) } ++ + getCashtags(tweet).zipWithIndex.map { case (e, p) => WrappedCashtagEntity(e, p) }) + .sortBy(_.fromIndex) + .toList + + /** + * The thrift-entity classes don't share a common entity parent class, so we wrap + * them with a class that allows us to mix entities together into a single list, and + * to provide a generic interface for shifting indicies. + */ + private sealed abstract class WrappedEntity( + val fromIndex: Short, + val toIndex: Short, + val rawText: String) { + + /** the original position of the entity within the entity group */ + val position: Int + + val normalizedText: String = TweetText.nfcNormalize(rawText).toLowerCase + + def shift(fromIndex: Short, toIndex: Short): WrappedEntity + } + + private case class WrappedUrlEntity(entity: UrlEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) { + override def shift(fromIndex: Short, toIndex: Short): WrappedUrlEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedMediaEntity(entity: MediaEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.url) { + override def shift(fromIndex: Short, toIndex: Short): WrappedMediaEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedMentionEntity(entity: MentionEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.screenName) { + override def shift(fromIndex: Short, toIndex: Short): WrappedMentionEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedHashtagEntity(entity: HashtagEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) { + override def shift(fromIndex: Short, toIndex: Short): WrappedHashtagEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } + + private case class WrappedCashtagEntity(entity: CashtagEntity, position: Int) + extends WrappedEntity(entity.fromIndex, entity.toIndex, entity.text) { + override def shift(fromIndex: Short, toIndex: Short): WrappedCashtagEntity = + copy(entity.copy(fromIndex = fromIndex, toIndex = toIndex)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala new file mode 100644 index 000000000..88d3fca3e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/CreatedAtRepairer.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.conversions.DurationOps._ +import com.twitter.snowflake.id.SnowflakeId + +object CreatedAtRepairer { + // no createdAt value should be less than this + val jan_01_2006 = 1136073600000L + + // no non-snowflake createdAt value should be greater than this + val jan_01_2011 = 1293840000000L + + // allow createdAt timestamp to be up to this amount off from the snowflake id + // before applying the correction. + val varianceThreshold: MediaId = 10.minutes.inMilliseconds +} + +/** + * Detects tweets with bad createdAt timestamps and attempts to fix, if possible + * using the snowflake id. pre-snowflake tweets are left unmodified. + */ +class CreatedAtRepairer(scribe: FutureEffect[String]) extends Mutation[Tweet] { + import CreatedAtRepairer._ + + def apply(tweet: Tweet): Option[Tweet] = { + assert(tweet.coreData.nonEmpty, "tweet core data is missing") + val createdAtMillis = getCreatedAt(tweet) * 1000 + + if (SnowflakeId.isSnowflakeId(tweet.id)) { + val snowflakeMillis = SnowflakeId(tweet.id).unixTimeMillis.asLong + val diff = (snowflakeMillis - createdAtMillis).abs + + if (diff >= varianceThreshold) { + scribe(tweet.id + "\t" + createdAtMillis) + val snowflakeSeconds = snowflakeMillis / 1000 + Some(TweetLenses.createdAt.set(tweet, snowflakeSeconds)) + } else { + None + } + } else { + // not a snowflake id, hard to repair, so just log it + if (createdAtMillis < jan_01_2006 || createdAtMillis > jan_01_2011) { + scribe(tweet.id + "\t" + createdAtMillis) + } + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala new file mode 100644 index 000000000..c1a0c5fcd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DeviceSourceHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.serverutil.DeviceSourceParser +import com.twitter.tweetypie.thriftscala.DeviceSource +import com.twitter.tweetypie.thriftscala.FieldByPath + +object DeviceSourceHydrator { + type Type = ValueHydrator[Option[DeviceSource], TweetCtx] + + // WebOauthId is the created_via value for Macaw-Swift through Woodstar. + // We need to special-case it to return the same device_source as "web", + // since we can't map multiple created_via strings to one device_source. + val WebOauthId: String = s"oauth:${DeviceSourceParser.Web}" + + val hydratedField: FieldByPath = fieldByPath(Tweet.DeviceSourceField) + + private def convertForWeb(createdVia: String) = + if (createdVia == DeviceSourceHydrator.WebOauthId) "web" else createdVia + + def apply(repo: DeviceSourceRepository.Type): Type = + ValueHydrator[Option[DeviceSource], TweetCtx] { (_, ctx) => + val req = convertForWeb(ctx.createdVia) + repo(req).liftToTry.map { + case Return(deviceSource) => ValueState.modified(Some(deviceSource)) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf((curr, ctx) => curr.isEmpty && ctx.tweetFieldRequested(Tweet.DeviceSourceField)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala new file mode 100644 index 000000000..a64d91c2e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/DirectedAtHydrator.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Hydrates the "directedAtUser" field on the tweet. This hydrators uses one of two paths depending + * if DirectedAtUserMetadata is present: + * + * 1. If DirectedAtUserMetadata exists, we use metadata.userId. + * 2. If DirectedAtUserMetadata does not exist, we use the User screenName from the mention starting + * at index 0 if the tweet also has a reply. Creation of a "reply to user" for + * leading @mentions is controlled by PostTweetRequest.enableTweetToNarrowcasting + */ +object DirectedAtHydrator { + type Type = ValueHydrator[Option[DirectedAtUser], Ctx] + + case class Ctx( + mentions: Seq[MentionEntity], + metadata: Option[DirectedAtUserMetadata], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + val directedAtScreenName: Option[String] = + mentions.headOption.filter(_.fromIndex == 0).map(_.screenName) + } + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.DirectedAtUserField) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.DirectedAt, + hydrator = h + ) + + private val partial = ValueState.partial(None, hydratedField) + + def apply(repo: UserIdentityRepository.Type, stats: StatsReceiver = NullStatsReceiver): Type = { + val withMetadata = stats.counter("with_metadata") + val noScreenName = stats.counter("no_screen_name") + val withoutMetadata = stats.counter("without_metadata") + + ValueHydrator[Option[DirectedAtUser], Ctx] { (_, ctx) => + ctx.metadata match { + case Some(DirectedAtUserMetadata(Some(uid))) => + // 1a. new approach of relying exclusively on directed-at metadata if it exists and has a user id + withMetadata.incr() + + repo(UserKey.byId(uid)).liftToTry.map { + case Return(u) => + ValueState.modified(Some(DirectedAtUser(u.id, u.screenName))) + case Throw(NotFound) => + // If user is not found, fallback to directedAtScreenName + ctx.directedAtScreenName + .map { screenName => ValueState.modified(Some(DirectedAtUser(uid, screenName))) } + .getOrElse { + // This should never happen, but let's make sure with a counter + noScreenName.incr() + ValueState.UnmodifiedNone + } + case Throw(_) => partial + } + + case Some(DirectedAtUserMetadata(None)) => + withMetadata.incr() + // 1b. new approach of relying exclusively on directed-at metadata if it exists and has no userId + ValueState.StitchUnmodifiedNone + + case None => + // 2. when DirectedAtUserMetadata not present, look for first leading mention when has reply + withoutMetadata.incr() + + val userKey = ctx.directedAtScreenName + .filter(_ => ctx.isReply) + .map(UserKey.byScreenName) + + val results = userKey.map(repo.apply).getOrElse(Stitch.NotFound) + + results.liftToTry.map { + case Return(u) => ValueState.modified(Some(DirectedAtUser(u.id, u.screenName))) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => partial + } + } + }.onlyIf((curr, _) => curr.isEmpty) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala new file mode 100644 index 000000000..8d3c5d8e2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditControlHydrator.scala @@ -0,0 +1,132 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.servo.util.Gate +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.EditControlInitial +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditGetInitialEditControlException +import com.twitter.tweetypie.util.TweetEditFailure.TweetEditInvalidEditControlException + +/** + * EditControlHydrator is used to hydrate the EditControlEdit arm of the editControl field. + * + * For Tweets without edits and for initial Tweets with subsequent edit(s), this hydrator + * passes through the existing editControl (either None or EditControlInitial). + * + * For edit Tweets, it hydrates the initial Tweet's edit control, set as a field on + * the edit control of the edit Tweet and returns the new edit control. + */ +object EditControlHydrator { + type Type = ValueHydrator[Option[EditControl], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.EditControlField) + + def apply( + repo: TweetRepository.Type, + setEditTimeWindowToSixtyMinutes: Gate[Unit], + stats: StatsReceiver + ): Type = { + val exceptionCounter = ExceptionCounter(stats) + + // Count hydration of edit control for tweets that were written before writing edit control initial. + val noEditControlHydration = stats.counter("noEditControlHydration") + // Count hydration of edit control edit tweets + val editControlEditHydration = stats.counter("editControlEditHydration") + // Count edit control edit hydration which successfully found an edit control initial + val editControlEditHydrationSuccessful = stats.counter("editControlEditHydration", "success") + // Count of initial tweets being hydrated. + val editControlInitialHydration = stats.counter("editControlInitialHydration") + // Count of edits loaded where the ID of edit is not present in the initial tweet + val editTweetIdsMissingAnEdit = stats.counter("editTweetIdsMissingAnEdit") + // Count hydrated tweets where edit control is set, but neither initial nor edit + val unknownUnionVariant = stats.counter("unknownEditControlUnionVariant") + + ValueHydrator[Option[EditControl], TweetCtx] { (curr, ctx) => + curr match { + // Tweet was created before we write edit control - hydrate the value at read. + case None => + noEditControlHydration.incr() + val editControl = EditControlUtil.makeEditControlInitial( + ctx.tweetId, + ctx.createdAt, + setEditTimeWindowToSixtyMinutes) + Stitch.value(ValueState.delta(curr, Some(editControl))) + // Tweet is an initial tweet + case Some(EditControl.Initial(_)) => + editControlInitialHydration.incr() + Stitch.value(ValueState.unmodified(curr)) + + // Tweet is an edited version + case Some(EditControl.Edit(edit)) => + editControlEditHydration.incr() + getInitialTweet(repo, edit.initialTweetId, ctx) + .flatMap(getEditControlInitial(ctx)) + .map { initial: Option[EditControlInitial] => + editControlEditHydrationSuccessful.incr() + + initial.foreach { initialTweet => + // We are able to fetch the initial tweet for this edit but this edit tweet is + // not present in the initial's editTweetIds list + if (!initialTweet.editTweetIds.contains(ctx.tweetId)) { + editTweetIdsMissingAnEdit.incr() + } + } + + val updated = edit.copy(editControlInitial = initial) + ValueState.delta(curr, Some(EditControl.Edit(updated))) + } + .onFailure(exceptionCounter(_)) + case Some(_) => // Unknown union variant + unknownUnionVariant.incr() + Stitch.exception(TweetEditInvalidEditControlException) + } + }.onlyIf { (_, ctx) => ctx.opts.enableEditControlHydration } + } + + def getInitialTweet( + repo: TweetRepository.Type, + initialTweetId: Long, + ctx: TweetCtx, + ): Stitch[Tweet] = { + val options = TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.EditControlField.id)), + cacheControl = ctx.opts.cacheControl, + enforceVisibilityFiltering = false, + safetyLevel = SafetyLevel.FilterNone, + fetchStoredTweets = ctx.opts.fetchStoredTweets + ) + repo(initialTweetId, options) + } + + def getEditControlInitial(ctx: TweetCtx): Tweet => Stitch[Option[EditControlInitial]] = { + initialTweet: Tweet => + initialTweet.editControl match { + case Some(EditControl.Initial(initial)) => + Stitch.value( + if (ctx.opts.cause.writing(ctx.tweetId)) { + // On the write path we hydrate edit control initial + // as if the initial tweet is already updated. + Some(EditControlUtil.plusEdit(initial, ctx.tweetId)) + } else { + Some(initial) + } + ) + case _ if ctx.opts.fetchStoredTweets => + // If the fetchStoredTweets parameter is set to true, it means we're fetching + // and hydrating tweets regardless of state. In this case, if the initial tweet + // doesn't exist, we return None here to ensure we still hydrate and return the + // current edit tweet. + Stitch.None + case _ => Stitch.exception(TweetEditGetInitialEditControlException) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala new file mode 100644 index 000000000..d14dad52c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditHydrator.scala @@ -0,0 +1,63 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.EditState + +/** + * An EditHydrator hydrates a value of type `A`, with a hydration context of type `C`, + * and produces a function that takes a value and context and returns an EditState[A, C] + * (an EditState encapsulates a function that takes a value and returns a new ValueState). + * + * A series of EditHydrators of the same type may be run in parallel via + * `EditHydrator.inParallel`. + */ +class EditHydrator[A, C] private (val run: (A, C) => Stitch[EditState[A]]) { + + /** + * Apply this hydrator to a value, producing an EditState. + */ + def apply(a: A, ctx: C): Stitch[EditState[A]] = run(a, ctx) + + /** + * Convert this EditHydrator to the equivalent ValueHydrator. + */ + def toValueHydrator: ValueHydrator[A, C] = + ValueHydrator[A, C] { (a, ctx) => this.run(a, ctx).map(editState => editState.run(a)) } + + /** + * Runs two EditHydrators in parallel. + */ + def inParallelWith(next: EditHydrator[A, C]): EditHydrator[A, C] = + EditHydrator[A, C] { (x0, ctx) => + Stitch.joinMap(run(x0, ctx), next.run(x0, ctx)) { + case (r1, r2) => r1.andThen(r2) + } + } +} + +object EditHydrator { + + /** + * Create an EditHydrator from a function that returns Stitch[EditState[A]]. + */ + def apply[A, C](f: (A, C) => Stitch[EditState[A]]): EditHydrator[A, C] = + new EditHydrator[A, C](f) + + /** + * Creates a "passthrough" Edit: + * Leaves A unchanged and produces empty HydrationState. + */ + def unit[A, C]: EditHydrator[A, C] = + EditHydrator { (_, _) => Stitch.value(EditState.unit[A]) } + + /** + * Runs several EditHydrators in parallel. + */ + def inParallel[A, C](bs: EditHydrator[A, C]*): EditHydrator[A, C] = + bs match { + case Seq(b) => b + case Seq(b1, b2) => b1.inParallelWith(b2) + case _ => bs.reduceLeft(_.inParallelWith(_)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala new file mode 100644 index 000000000..bc6ed36ef --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EditPerspectiveHydrator.scala @@ -0,0 +1,179 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives.Query +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.PerspectiveRepository +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.StatusPerspective +import com.twitter.tweetypie.thriftscala.TweetPerspective + +object EditPerspectiveHydrator { + + type Type = ValueHydrator[Option[TweetPerspective], Ctx] + val HydratedField: FieldByPath = fieldByPath(Tweet.EditPerspectiveField) + + case class Ctx( + currentTweetPerspective: Option[StatusPerspective], + editControl: Option[EditControl], + featureSwitchResults: Option[FeatureSwitchResults], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + // Timeline safety levels determine some part of high level traffic + // that we might want to turn off with a decider if edits traffic + // is too big for perspectives to handle. The decider allows us + // to turn down the traffic without the impact on tweet detail. + val TimelinesSafetyLevels: Set[SafetyLevel] = Set( + SafetyLevel.TimelineFollowingActivity, + SafetyLevel.TimelineHome, + SafetyLevel.TimelineConversations, + SafetyLevel.DeprecatedTimelineConnect, + SafetyLevel.TimelineMentions, + SafetyLevel.DeprecatedTimelineActivity, + SafetyLevel.TimelineFavorites, + SafetyLevel.TimelineLists, + SafetyLevel.TimelineInjection, + SafetyLevel.StickersTimeline, + SafetyLevel.LiveVideoTimeline, + SafetyLevel.QuoteTweetTimeline, + SafetyLevel.TimelineHomeLatest, + SafetyLevel.TimelineLikedBy, + SafetyLevel.TimelineRetweetedBy, + SafetyLevel.TimelineBookmark, + SafetyLevel.TimelineMedia, + SafetyLevel.TimelineReactiveBlending, + SafetyLevel.TimelineProfile, + SafetyLevel.TimelineFocalTweet, + SafetyLevel.TimelineHomeRecommendations, + SafetyLevel.NotificationsTimelineDeviceFollow, + SafetyLevel.TimelineConversationsDownranking, + SafetyLevel.TimelineHomeTopicFollowRecommendations, + SafetyLevel.TimelineHomeHydration, + SafetyLevel.FollowedTopicsTimeline, + SafetyLevel.ModeratedTweetsTimeline, + SafetyLevel.TimelineModeratedTweetsHydration, + SafetyLevel.ElevatedQuoteTweetTimeline, + SafetyLevel.TimelineConversationsDownrankingMinimal, + SafetyLevel.BirdwatchNoteTweetsTimeline, + SafetyLevel.TimelineSuperLikedBy, + SafetyLevel.UserScopedTimeline, + SafetyLevel.TweetScopedTimeline, + SafetyLevel.TimelineHomePromotedHydration, + SafetyLevel.NearbyTimeline, + SafetyLevel.TimelineProfileAll, + SafetyLevel.TimelineProfileSuperFollows, + SafetyLevel.SpaceTweetAvatarHomeTimeline, + SafetyLevel.SpaceHomeTimelineUpranking, + SafetyLevel.BlockMuteUsersTimeline, + SafetyLevel.RitoActionedTweetTimeline, + SafetyLevel.TimelineScorer, + SafetyLevel.ArticleTweetTimeline, + SafetyLevel.DesQuoteTweetTimeline, + SafetyLevel.EditHistoryTimeline, + SafetyLevel.DirectMessagesConversationTimeline, + SafetyLevel.DesHomeTimeline, + SafetyLevel.TimelineContentControls, + SafetyLevel.TimelineFavoritesSelfView, + SafetyLevel.TimelineProfileSpaces, + ) + val TweetDetailSafetyLevels: Set[SafetyLevel] = Set( + SafetyLevel.TweetDetail, + SafetyLevel.TweetDetailNonToo, + SafetyLevel.TweetDetailWithInjectionsHydration, + SafetyLevel.DesTweetDetail, + ) + + def apply( + repo: PerspectiveRepository.Type, + timelinesGate: Gate[Unit], + tweetDetailsGate: Gate[Unit], + otherSafetyLevelsGate: Gate[Unit], + bookmarksGate: Gate[Long], + stats: StatsReceiver + ): Type = { + + val statsByLevel = + SafetyLevel.list.map { level => + (level, stats.counter("perspective_by_safety_label", level.name, "calls")) + }.toMap + val editsAggregated = stats.counter("edit_perspective", "edits_aggregated") + + ValueHydrator[Option[TweetPerspective], Ctx] { (curr, ctx) => + val safetyLevel = ctx.opts.safetyLevel + val lookupsDecider = + if (TimelinesSafetyLevels.contains(safetyLevel)) timelinesGate + else if (TweetDetailSafetyLevels.contains(safetyLevel)) tweetDetailsGate + else otherSafetyLevelsGate + + val tweetIds: Seq[TweetId] = if (lookupsDecider()) tweetIdsToAggregate(ctx).toSeq else Seq() + statsByLevel + .getOrElse( + safetyLevel, + stats.counter("perspective_by_safety_label", safetyLevel.name, "calls")) + .incr(tweetIds.size) + editsAggregated.incr(tweetIds.size) + + Stitch + .traverse(tweetIds) { id => + repo( + Query( + ctx.opts.forUserId.get, + id, + PerspectiveHydrator.evaluatePerspectiveTypes( + ctx.opts.forUserId.get, + bookmarksGate, + ctx.featureSwitchResults))).liftToTry + }.map { seq => + if (seq.isEmpty) { + val editPerspective = ctx.currentTweetPerspective.map { c => + TweetPerspective( + c.favorited, + c.retweeted, + c.bookmarked + ) + } + ValueState.delta(curr, editPerspective) + } else { + val returns = seq.collect { case Return(r) => r } + val aggregate = Some( + TweetPerspective( + favorited = + returns.exists(_.favorited) || ctx.currentTweetPerspective.exists(_.favorited), + retweeted = + returns.exists(_.retweeted) || ctx.currentTweetPerspective.exists(_.retweeted), + bookmarked = Some( + returns.exists(_.bookmarked.contains(true)) || ctx.currentTweetPerspective.exists( + _.bookmarked.contains(true))) + ) + ) + + if (seq.exists(_.isThrow)) { + ValueState.partial(aggregate, HydratedField) + } else { + ValueState.modified(aggregate) + } + } + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.opts.forUserId.isDefined && + ctx.tweetFieldRequested(Tweet.EditPerspectiveField) + } + } + + private def tweetIdsToAggregate(ctx: Ctx): Set[TweetId] = { + ctx.editControl + .flatMap { + case EditControl.Initial(initial) => Some(initial) + case EditControl.Edit(edit) => edit.editControlInitial + case _ => None + } + .map(_.editTweetIds.toSet) + .getOrElse(Set()) - ctx.tweetId + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala new file mode 100644 index 000000000..578af57e5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/EscherbirdAnnotationHydrator.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations +import com.twitter.tweetypie.thriftscala.FieldByPath + +object EscherbirdAnnotationHydrator { + type Type = ValueHydrator[Option[EscherbirdEntityAnnotations], Tweet] + + val hydratedField: FieldByPath = fieldByPath(Tweet.EscherbirdEntityAnnotationsField) + + def apply(repo: EscherbirdAnnotationRepository.Type): Type = + ValueHydrator[Option[EscherbirdEntityAnnotations], Tweet] { (curr, tweet) => + repo(tweet).liftToTry.map { + case Return(Some(anns)) => ValueState.modified(Some(anns)) + case Return(None) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala new file mode 100644 index 000000000..8931f153c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/FeatureSwitchResultsHydrator.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.context.thriftscala.Viewer +import com.twitter.featureswitches.FSRecipient +import com.twitter.featureswitches.UserAgent +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.mtls.authentication.EmptyServiceIdentifier +import com.twitter.strato.callcontext.CallContext +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.ValueState + +/** + * Hydrate Feature Switch results in TweetData. We can do this once at the + * start of the hydration pipeline so that the rest of the hydrators can + * use the Feature Switch values. + */ +object FeatureSwitchResultsHydrator { + + def apply( + featureSwitchesWithoutExperiments: FeatureSwitches, + clientIdHelper: ClientIdHelper + ): TweetDataValueHydrator = ValueHydrator.map { (td, opts) => + val viewer = TwitterContext().getOrElse(Viewer()) + val recipient = + FSRecipient( + userId = viewer.userId, + clientApplicationId = viewer.clientApplicationId, + userAgent = viewer.userAgent.flatMap(UserAgent(_)), + ).withCustomFields( + "thrift_client_id" -> + clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId), + "forwarded_service_id" -> + CallContext.forwardedServiceIdentifier + .map(_.toString).getOrElse(EmptyServiceIdentifier), + "safety_level" -> opts.safetyLevel.toString, + "client_app_id_is_defined" -> viewer.clientApplicationId.isDefined.toString, + ) + val results = featureSwitchesWithoutExperiments.matchRecipient(recipient) + ValueState.unit(td.copy(featureSwitchResults = Some(results))) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala new file mode 100644 index 000000000..b53c24497 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/GeoScrubHydrator.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * This hydrator, which is really more of a "repairer", scrubs at read-time geo data + * that should have been scrubbed but wasn't. For any tweet with geo data, it checks + * the last geo-scrub timestamp, if any, for the user, and if the tweet was created before + * that timestamp, it removes the geo data. + */ +object GeoScrubHydrator { + type Data = (Option[GeoCoordinates], Option[PlaceId]) + type Type = ValueHydrator[Data, TweetCtx] + + private[this] val modifiedNoneNoneResult = ValueState.modified((None, None)) + + def apply(repo: GeoScrubTimestampRepository.Type, scribeTweetId: FutureEffect[TweetId]): Type = + ValueHydrator[Data, TweetCtx] { (curr, ctx) => + repo(ctx.userId).liftToTry.map { + case Return(geoScrubTime) if ctx.createdAt <= geoScrubTime => + scribeTweetId(ctx.tweetId) + modifiedNoneNoneResult + + // no-op on failure and no result + case _ => ValueState.unmodified(curr) + } + }.onlyIf { case ((coords, place), _) => coords.nonEmpty || place.nonEmpty } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala new file mode 100644 index 000000000..486a6ee23 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/HasMediaHydrator.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +object HasMediaHydrator { + type Type = ValueHydrator[Option[Boolean], Tweet] + + def apply(hasMedia: Tweet => Boolean): Type = + ValueHydrator + .map[Option[Boolean], Tweet] { (_, tweet) => ValueState.modified(Some(hasMedia(tweet))) } + .onlyIf((curr, ctx) => curr.isEmpty) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala new file mode 100644 index 000000000..951aa40c9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM1837FilterHydrator.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.coreservices.IM1837 +import com.twitter.tweetypie.core._ +import com.twitter.stitch.Stitch + +object IM1837FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(): Type = + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(IM1837.isAffectedClient) + val mightCrash = userAgentAffected && IM1837.textMightCrashIOS(ctx.text) + + if (mightCrash) Drop else Success + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala new file mode 100644 index 000000000..16222dec4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM2884FilterHydrator.scala @@ -0,0 +1,27 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.coreservices.IM2884 +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.stitch.Stitch + +object IM2884FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(stats: StatsReceiver): Type = { + + val im2884 = new IM2884(stats) + + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(im2884.isAffectedClient) + val mightCrash = userAgentAffected && im2884.textMightCrashIOS(ctx.text) + if (mightCrash) Drop else Success + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala new file mode 100644 index 000000000..71ee6139d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/IM3433FilterHydrator.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.coreservices.IM3433 +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState + +object IM3433FilterHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + private val Drop = + Stitch.exception(FilteredState.Unavailable.DropUnspecified) + private val Success = Stitch.value(ValueState.unmodified(())) + + def apply(stats: StatsReceiver): Type = { + + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val userAgent = TwitterContext().flatMap(_.userAgent) + val userAgentAffected = userAgent.exists(IM3433.isAffectedClient) + val mightCrash = userAgentAffected && IM3433.textMightCrashIOS(ctx.text) + if (mightCrash) Drop else Success + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala new file mode 100644 index 000000000..2a86091b9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/LanguageHydrator.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object LanguageHydrator { + type Type = ValueHydrator[Option[Language], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.LanguageField) + + private[this] def isApplicable(curr: Option[Language], ctx: TweetCtx) = + ctx.tweetFieldRequested(Tweet.LanguageField) && !ctx.isRetweet && curr.isEmpty + + def apply(repo: LanguageRepository.Type): Type = + ValueHydrator[Option[Language], TweetCtx] { (langOpt, ctx) => + repo(ctx.text).liftToTry.map { + case Return(Some(l)) => ValueState.modified(Some(l)) + case Return(None) => ValueState.unmodified(langOpt) + case Throw(_) => ValueState.partial(None, hydratedField) + } + }.onlyIf((curr, ctx) => isApplicable(curr, ctx)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala new file mode 100644 index 000000000..3f3e63fe2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaEntityHydrator.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaEntitiesHydrator { + object Cacheable { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = ValueHydrator[Seq[MediaEntity], Ctx] + + def once(h: MediaEntityHydrator.Cacheable.Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = MediaEntityHydrator.queryFilter, + hydrationType = HydrationType.CacheableMedia, + dependsOn = Set(HydrationType.Urls), + hydrator = h.liftSeq + ) + } + + object Uncacheable { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = ValueHydrator[Seq[MediaEntity], Ctx] + } +} + +object MediaEntityHydrator { + val hydratedField: FieldByPath = fieldByPath(Tweet.MediaField) + + object Cacheable { + type Type = ValueHydrator[MediaEntity, Ctx] + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + /** + * Builds a single media-hydrator out of finer-grained hydrators + * only with cacheable information. + */ + def apply(hydrateMediaUrls: Type, hydrateMediaIsProtected: Type): Type = + hydrateMediaUrls.andThen(hydrateMediaIsProtected) + } + + object Uncacheable { + type Type = ValueHydrator[MediaEntity, Ctx] + + case class Ctx(mediaKeys: Option[Seq[MediaKey]], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + + def includeMediaEntities: Boolean = tweetFieldRequested(Tweet.MediaField) + def includeAdditionalMetadata: Boolean = + opts.include.mediaFields.contains(MediaEntity.AdditionalMetadataField.id) + } + + /** + * Builds a single media-hydrator out of finer-grained hydrators + * only with uncacheable information. + */ + def apply(hydrateMediaKey: Type, hydrateMediaInfo: Type): Type = + (hydrateMediaKey + .andThen(hydrateMediaInfo)) + .onlyIf((_, ctx) => ctx.includeMediaEntities) + } + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.MediaField.id) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala new file mode 100644 index 000000000..86e7d8e1a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaInfoHydrator.scala @@ -0,0 +1,73 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.MediaKeyUtil +import com.twitter.tweetypie.media.MediaMetadataRequest +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import java.nio.ByteBuffer + +object MediaInfoHydrator { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = MediaEntityHydrator.Uncacheable.Type + + private[this] val log = Logger(getClass) + + def apply(repo: MediaMetadataRepository.Type, stats: StatsReceiver): Type = { + val attributableUserCounter = stats.counter("attributable_user") + + ValueHydrator[MediaEntity, Ctx] { (curr, ctx) => + val request = + toMediaMetadataRequest( + mediaEntity = curr, + tweetId = ctx.tweetId, + extensionsArgs = ctx.opts.extensionsArgs + ) + + request match { + case None => Stitch.value(ValueState.unmodified(curr)) + + case Some(req) => + repo(req).liftToTry.map { + case Return(metadata) => + if (metadata.attributableUserId.nonEmpty) attributableUserCounter.incr() + + ValueState.delta( + curr, + metadata.updateEntity( + mediaEntity = curr, + tweetUserId = ctx.userId, + includeAdditionalMetadata = ctx.includeAdditionalMetadata + ) + ) + + case Throw(ex) if !PartialEntityCleaner.isPartialMedia(curr) => + log.info("Ignored media info repo failure, media entity already hydrated", ex) + ValueState.unmodified(curr) + + case Throw(ex) => + log.error("Media info hydration failed", ex) + ValueState.partial(curr, MediaEntityHydrator.hydratedField) + } + } + } + } + + def toMediaMetadataRequest( + mediaEntity: MediaEntity, + tweetId: TweetId, + extensionsArgs: Option[ByteBuffer] + ): Option[MediaMetadataRequest] = + mediaEntity.isProtected.map { isProtected => + val mediaKey = MediaKeyUtil.get(mediaEntity) + + MediaMetadataRequest( + tweetId = tweetId, + mediaKey = mediaKey, + isProtected = isProtected, + extensionsArgs = extensionsArgs + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala new file mode 100644 index 000000000..9ddfe5851 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaIsProtectedHydrator.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaIsProtectedHydrator { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = MediaEntityHydrator.Cacheable.Type + + val hydratedField: FieldByPath = MediaEntityHydrator.hydratedField + + def apply(repo: UserProtectionRepository.Type): Type = + ValueHydrator[MediaEntity, Ctx] { (curr, ctx) => + val request = UserKey(ctx.userId) + + repo(request).liftToTry.map { + case Return(p) => ValueState.modified(curr.copy(isProtected = Some(p))) + case Throw(NotFound) => ValueState.unmodified(curr) + case Throw(_) => ValueState.partial(curr, hydratedField) + } + }.onlyIf { (curr, ctx) => + // We need to update isProtected for media entities that: + // 1. Do not already have it set. + // 2. Did not come from another tweet. + // + // If the entity does not have an expandedUrl, we can't be sure + // whether the media originated with this tweet. + curr.isProtected.isEmpty && + Media.isOwnMedia(ctx.tweetId, curr) && + curr.expandedUrl != null + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala new file mode 100644 index 000000000..a6e491d61 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaKeyHydrator.scala @@ -0,0 +1,54 @@ +package com.twitter.tweetypie.hydrator + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.mediaservices.commons.thriftscala._ +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala._ + +object MediaKeyHydrator { + type Ctx = MediaEntityHydrator.Uncacheable.Ctx + type Type = MediaEntityHydrator.Uncacheable.Type + + def apply(): Type = + ValueHydrator + .map[MediaEntity, Ctx] { (curr, ctx) => + val mediaKey = infer(ctx.mediaKeys, curr) + ValueState.modified(curr.copy(mediaKey = Some(mediaKey))) + } + .onlyIf((curr, ctx) => curr.mediaKey.isEmpty) + + def infer(mediaKeys: Option[Seq[MediaKey]], mediaEntity: MediaEntity): MediaKey = { + + def inferByMediaId = + mediaKeys + .flatMap(_.find(_.mediaId == mediaEntity.mediaId)) + + def contentType = + mediaEntity.sizes.find(_.sizeType == MediaSizeType.Orig).map(_.deprecatedContentType) + + def inferByContentType = + contentType.map { tpe => + val category = + tpe match { + case MediaContentType.VideoMp4 => MediaCategory.TweetGif + case MediaContentType.VideoGeneric => MediaCategory.TweetVideo + case _ => MediaCategory.TweetImage + } + MediaKey(category, mediaEntity.mediaId) + } + + def fail = + throw new IllegalStateException( + s""" + |Can't infer media key. + | mediaKeys:'$mediaKeys' + | mediaEntity:'$mediaEntity' + """.stripMargin + ) + + mediaEntity.mediaKey + .orElse(inferByMediaId) + .orElse(inferByContentType) + .getOrElse(fail) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala new file mode 100644 index 000000000..c2408b634 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaRefsHydrator.scala @@ -0,0 +1,124 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.media_util.GenericMediaKey +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala.MediaEntity +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.media.thriftscala.MediaRef +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepository +import com.twitter.tweetypie.thriftscala.FieldByPath + +/** + * MediaRefsHydrator hydrates the Tweet.mediaRefs field based on stored media keys + * and pasted media. Media keys are available in three ways: + * + * 1. (For old Tweets): in the stored MediaEntity + * 2. (For 2016+ Tweets): in the mediaKeys field + * 3. From other Tweets using pasted media + * + * This hydrator combines these three sources into a single field, providing the + * media key and source Tweet information for pasted media. + * + * Long-term we will move this logic to the write path and backfill the field for old Tweets. + */ +object MediaRefsHydrator { + type Type = ValueHydrator[Option[Seq[MediaRef]], Ctx] + + case class Ctx( + media: Seq[MediaEntity], + mediaKeys: Seq[MediaKey], + urlEntities: Seq[UrlEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy { + def includePastedMedia: Boolean = opts.include.pastedMedia + } + + val hydratedField: FieldByPath = fieldByPath(Tweet.MediaRefsField) + + def mediaKeyToMediaRef(mediaKey: MediaKey): MediaRef = + MediaRef( + genericMediaKey = GenericMediaKey(mediaKey).toStringKey() + ) + + // Convert a pasted Tweet into a Seq of MediaRef from that Tweet with the correct sourceTweetId and sourceUserId + def pastedTweetToMediaRefs( + tweet: Tweet + ): Seq[MediaRef] = + tweet.mediaRefs.toSeq.flatMap { mediaRefs => + mediaRefs.map( + _.copy( + sourceTweetId = Some(tweet.id), + sourceUserId = Some(getUserId(tweet)) + )) + } + + // Fetch MediaRefs from pasted media Tweet URLs in the Tweet text + def getPastedMediaRefs( + repo: TweetRepository.Optional, + ctx: Ctx, + includePastedMedia: Gate[Unit] + ): Stitch[Seq[MediaRef]] = { + if (includePastedMedia() && ctx.includePastedMedia) { + + // Extract Tweet ids from pasted media permalinks in the Tweet text + val pastedMediaTweetIds: Seq[TweetId] = + PastedMediaHydrator.pastedIdsAndEntities(ctx.tweetId, ctx.urlEntities).map(_._1) + + val opts = TweetQuery.Options( + include = TweetQuery.Include( + tweetFields = Set(Tweet.CoreDataField.id, Tweet.MediaRefsField.id), + pastedMedia = false // don't recursively load pasted media refs + )) + + // Load a Seq of Tweets with pasted media, ignoring any returned with NotFound or a FilteredState + val pastedTweets: Stitch[Seq[Tweet]] = Stitch + .traverse(pastedMediaTweetIds) { id => + repo(id, opts) + }.map(_.flatten) + + pastedTweets.map(_.flatMap(pastedTweetToMediaRefs)) + } else { + Stitch.Nil + } + } + + // Make empty Seq None and non-empty Seq Some(Seq(...)) to comply with the thrift field type + def optionalizeSeq(mediaRefs: Seq[MediaRef]): Option[Seq[MediaRef]] = + Some(mediaRefs).filterNot(_.isEmpty) + + def apply( + repo: TweetRepository.Optional, + includePastedMedia: Gate[Unit] + ): Type = { + ValueHydrator[Option[Seq[MediaRef]], Ctx] { (curr, ctx) => + // Fetch mediaRefs from Tweet media + val storedMediaRefs: Seq[MediaRef] = ctx.media.map { mediaEntity => + // Use MediaKeyHydrator.infer to determine the media key from the media entity + val mediaKey = MediaKeyHydrator.infer(Some(ctx.mediaKeys), mediaEntity) + mediaKeyToMediaRef(mediaKey) + } + + // Fetch mediaRefs from pasted media + getPastedMediaRefs(repo, ctx, includePastedMedia).liftToTry.map { + case Return(pastedMediaRefs) => + // Combine the refs from the Tweet's own media and those from pasted media, then limit + // to MaxMediaEntitiesPerTweet. + val limitedRefs = + (storedMediaRefs ++ pastedMediaRefs).take(PastedMediaHydrator.MaxMediaEntitiesPerTweet) + + ValueState.delta(curr, optionalizeSeq(limitedRefs)) + case Throw(_) => + ValueState.partial(optionalizeSeq(storedMediaRefs), hydratedField) + } + + }.onlyIf { (_, ctx) => + ctx.tweetFieldRequested(Tweet.MediaRefsField) || + ctx.opts.safetyLevel != SafetyLevel.FilterNone + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala new file mode 100644 index 000000000..4e3f1bc42 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaTagsHydrator.scala @@ -0,0 +1,103 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MediaTagsHydrator { + type Type = ValueHydrator[Option[TweetMediaTags], TweetCtx] + + /** + * TweetMediaTags contains a map of MediaId to Seq[MediaTag]. + * The outer traverse maps over each MediaId, while the inner + * traverse maps over each MediaTag. + * + * A MediaTag has four fields: + * + * 1: MediaTagType tag_type + * 2: optional i64 user_id + * 3: optional string screen_name + * 4: optional string name + * + * For each MediaTag, if the tag type is MediaTagType.User and the user id is defined + * (see mediaTagToKey) we look up the tagged user, using the tagging user (the tweet + * author) as the viewer id (this means that visibility rules between the tagged user + * and tagging user are applied). + * + * If we get a taggable user back, we fill in the screen name and name fields. If not, + * we drop the tag. + */ + def apply(repo: UserViewRepository.Type): Type = + ValueHydrator[TweetMediaTags, TweetCtx] { (tags, ctx) => + val mediaTagsByMediaId: Seq[(MediaId, Seq[MediaTag])] = tags.tagMap.toSeq + + Stitch + .traverse(mediaTagsByMediaId) { + case (mediaId, mediaTags) => + Stitch.traverse(mediaTags)(tag => hydrateMediaTag(repo, tag, ctx.userId)).map { + ValueState.sequence(_).map(tags => (mediaId, tags.flatten)) + } + } + .map { + // Reconstruct TweetMediaTags(tagMap: Map[MediaId, SeqMediaTag]) + ValueState.sequence(_).map(s => TweetMediaTags(s.toMap)) + } + }.onlyIf { (_, ctx) => + !ctx.isRetweet && ctx.tweetFieldRequested(Tweet.MediaTagsField) + }.liftOption + + /** + * A function to hydrate a single `MediaTag`. The return type is `Option[MediaTag]` + * because we may return `None` to filter out a `MediaTag` if the tagged user doesn't + * exist or isn't taggable. + */ + private[this] def hydrateMediaTag( + repo: UserViewRepository.Type, + mediaTag: MediaTag, + authorId: UserId + ): Stitch[ValueState[Option[MediaTag]]] = + mediaTagToKey(mediaTag) match { + case None => Stitch.value(ValueState.unmodified(Some(mediaTag))) + case Some(key) => + repo(toRepoQuery(key, authorId)) + .map { + case user if user.mediaView.exists(_.canMediaTag) => + ValueState.modified( + Some( + mediaTag.copy( + userId = Some(user.id), + screenName = user.profile.map(_.screenName), + name = user.profile.map(_.name) + ) + ) + ) + + // if `canMediaTag` is false, drop the tag + case _ => ValueState.modified(None) + } + .handle { + // if user is not found, drop the tag + case NotFound => ValueState.modified(None) + } + } + + private[this] val queryFields: Set[UserField] = Set(UserField.Profile, UserField.MediaView) + + def toRepoQuery(userKey: UserKey, forUserId: UserId): UserViewRepository.Query = + UserViewRepository.Query( + userKey = userKey, + // view is based on tagging user, not tweet viewer + forUserId = Some(forUserId), + visibility = UserVisibility.MediaTaggable, + queryFields = queryFields + ) + + private[this] def mediaTagToKey(mediaTag: MediaTag): Option[UserKey] = + mediaTag match { + case MediaTag(MediaTagType.User, Some(taggedUserId), _, _) => Some(UserKey(taggedUserId)) + case _ => None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala new file mode 100644 index 000000000..0cacf3b74 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MediaUrlFieldsHydrator.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.thriftscala._ + +object MediaUrlFieldsHydrator { + type Ctx = MediaEntityHydrator.Cacheable.Ctx + type Type = MediaEntityHydrator.Cacheable.Type + + def mediaPermalink(ctx: Ctx): Option[UrlEntity] = + ctx.urlEntities.view.reverse.find(MediaUrl.Permalink.hasTweetId(_, ctx.tweetId)) + + def apply(): Type = + ValueHydrator + .map[MediaEntity, Ctx] { (curr, ctx) => + mediaPermalink(ctx) match { + case None => ValueState.unmodified(curr) + case Some(urlEntity) => ValueState.modified(Media.copyFromUrlEntity(curr, urlEntity)) + } + } + .onlyIf((curr, ctx) => curr.url == null && Media.isOwnMedia(ctx.tweetId, curr)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala new file mode 100644 index 000000000..a1d7c09cd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/MentionEntityHydrator.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object MentionEntitiesHydrator { + type Type = ValueHydrator[Seq[MentionEntity], TweetCtx] + + def once(h: MentionEntityHydrator.Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.Mentions, + hydrator = h.liftSeq + ) + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.MentionsField.id) +} + +object MentionEntityHydrator { + type Type = ValueHydrator[MentionEntity, TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.MentionsField) + + def apply(repo: UserIdentityRepository.Type): Type = + ValueHydrator[MentionEntity, TweetCtx] { (entity, _) => + repo(UserKey(entity.screenName)).liftToTry.map { + case Return(user) => ValueState.delta(entity, update(entity, user)) + case Throw(NotFound) => ValueState.unmodified(entity) + case Throw(_) => ValueState.partial(entity, hydratedField) + } + // only hydrate mention if userId or name is empty + }.onlyIf((entity, _) => entity.userId.isEmpty || entity.name.isEmpty) + + /** + * Updates a MentionEntity using the given user data. + */ + def update(entity: MentionEntity, userIdent: UserIdentity): MentionEntity = + entity.copy( + screenName = userIdent.screenName, + userId = Some(userIdent.id), + name = Some(userIdent.realName) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala new file mode 100644 index 000000000..5babf5b88 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NegativeVisibleTextRangeRepairer.scala @@ -0,0 +1,18 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala.TextRange + +/** + * Some tweets with visibleTextRange may have fromIndex > toIndex, in which case set fromIndex + * to toIndex. + */ +object NegativeVisibleTextRangeRepairer { + private val mutation = + Mutation[Option[TextRange]] { + case Some(TextRange(from, to)) if from > to => Some(Some(TextRange(to, to))) + case _ => None + } + + private[tweetypie] val tweetMutation = TweetLenses.visibleTextRange.mutation(mutation) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala new file mode 100644 index 000000000..c7224a8db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/NoteTweetSuffixHydrator.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.thriftscala.TextRange +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.util.TweetLenses + +object NoteTweetSuffixHydrator { + + val ELLIPSIS: String = "\u2026" + + private def addTextSuffix(tweet: Tweet): Tweet = { + val originalText = TweetLenses.text(tweet) + val originalTextLength = TweetText.codePointLength(originalText) + + val visibleTextRange: TextRange = + TweetLenses + .visibleTextRange(tweet) + .getOrElse(TextRange(0, originalTextLength)) + + val insertAtCodePoint = Offset.CodePoint(visibleTextRange.toIndex) + + val textModification = TextModification.insertAt( + originalText, + insertAtCodePoint, + ELLIPSIS + ) + + val mediaEntities = TweetLenses.media(tweet) + val urlEntities = TweetLenses.urls(tweet) + + val modifiedText = textModification.updated + val modifiedMediaEntities = textModification.reindexEntities(mediaEntities) + val modifiedUrlEntities = textModification.reindexEntities(urlEntities) + val modifiedVisibleTextRange = visibleTextRange.copy(toIndex = + visibleTextRange.toIndex + TweetText.codePointLength(ELLIPSIS)) + + val updatedTweet = + Lens.setAll( + tweet, + TweetLenses.text -> modifiedText, + TweetLenses.urls -> modifiedUrlEntities.sortBy(_.fromIndex), + TweetLenses.media -> modifiedMediaEntities.sortBy(_.fromIndex), + TweetLenses.visibleTextRange -> Some(modifiedVisibleTextRange) + ) + + updatedTweet + } + + def apply(): TweetDataValueHydrator = { + ValueHydrator[TweetData, TweetQuery.Options] { (td, _) => + val updatedTweet = addTextSuffix(td.tweet) + Stitch.value(ValueState.delta(td, td.copy(tweet = updatedTweet))) + }.onlyIf { (td, _) => + td.tweet.noteTweet.isDefined && + td.tweet.noteTweet.flatMap(_.isExpandable).getOrElse(true) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala new file mode 100644 index 000000000..a15e64383 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PartialEntityCleaner.scala @@ -0,0 +1,80 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.thriftscala._ +import scala.collection.Set + +/** + * Removes partial Url, Media, and Mention entities that were not + * fully hydrated. Rather than returning no value or a value with + * incomplete entities on an entity hydration failure, we gracefully + * degrade to just omitting those entities. This step needs to be + * applied in the post-cache filter, so that we don't cache the value + * with missing entities. + * + * A MediaEntity will first be converted back to a UrlEntity if it is only + * partially hydrated. If the resulting UrlEntity is itself then only partially + * hydrated, it will get dropped also. + */ +object PartialEntityCleaner { + def apply(stats: StatsReceiver): Mutation[Tweet] = { + val scopedStats = stats.scope("partial_entity_cleaner") + Mutation + .all( + Seq( + TweetLenses.urls.mutation(urls.countMutations(scopedStats.counter("urls"))), + TweetLenses.media.mutation(media.countMutations(scopedStats.counter("media"))), + TweetLenses.mentions.mutation(mentions.countMutations(scopedStats.counter("mentions"))) + ) + ) + .onlyIf(!isRetweet(_)) + } + + private[this] def clean[E](isPartial: E => Boolean) = + Mutation[Seq[E]] { items => + items.partition(isPartial) match { + case (Nil, nonPartial) => None + case (partial, nonPartial) => Some(nonPartial) + } + } + + private[this] val mentions = + clean[MentionEntity](e => e.userId.isEmpty || e.name.isEmpty) + + private[this] val urls = + clean[UrlEntity](e => + isNullOrEmpty(e.url) || isNullOrEmpty(e.expanded) || isNullOrEmpty(e.display)) + + private[this] val media = + Mutation[Seq[MediaEntity]] { mediaEntities => + mediaEntities.partition(isPartialMedia) match { + case (Nil, nonPartial) => None + case (partial, nonPartial) => Some(nonPartial) + } + } + + def isPartialMedia(e: MediaEntity): Boolean = + e.fromIndex < 0 || + e.toIndex <= 0 || + isNullOrEmpty(e.url) || + isNullOrEmpty(e.displayUrl) || + isNullOrEmpty(e.mediaUrl) || + isNullOrEmpty(e.mediaUrlHttps) || + isNullOrEmpty(e.expandedUrl) || + e.mediaInfo.isEmpty || + e.mediaKey.isEmpty || + (MediaKeyClassifier.isImage(MediaKeyUtil.get(e)) && containsInvalidSizeVariant(e.sizes)) + + private[this] val userMentions = + clean[UserMention](e => e.screenName.isEmpty || e.name.isEmpty) + + def isNullOrEmpty(optString: Option[String]): Boolean = + optString.isEmpty || optString.exists(isNullOrEmpty(_)) + + def isNullOrEmpty(str: String): Boolean = str == null || str.isEmpty + + def containsInvalidSizeVariant(sizes: Set[MediaSize]): Boolean = + sizes.exists(size => size.height == 0 || size.width == 0) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala new file mode 100644 index 000000000..769c9bead --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PastedMediaHydrator.scala @@ -0,0 +1,102 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object PastedMediaHydrator { + type Type = ValueHydrator[PastedMedia, Ctx] + + /** + * Ensure that the final tweet has at most 4 media entities. + */ + val MaxMediaEntitiesPerTweet = 4 + + /** + * Enforce visibility rules when hydrating media for a write. + */ + val writeSafetyLevel = SafetyLevel.TweetWritesApi + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy { + def includePastedMedia: Boolean = opts.include.pastedMedia + def includeMediaEntities: Boolean = tweetFieldRequested(Tweet.MediaField) + def includeAdditionalMetadata: Boolean = + mediaFieldRequested(MediaEntity.AdditionalMetadataField.id) + def includeMediaTags: Boolean = tweetFieldRequested(Tweet.MediaTagsField) + } + + def getPastedMedia(t: Tweet): PastedMedia = PastedMedia(getMedia(t), Map.empty) + + def apply(repo: PastedMediaRepository.Type): Type = { + def hydrateOneReference( + tweetId: TweetId, + urlEntity: UrlEntity, + repoCtx: PastedMediaRepository.Ctx + ): Stitch[PastedMedia] = + repo(tweetId, repoCtx).liftToTry.map { + case Return(pastedMedia) => pastedMedia.updateEntities(urlEntity) + case _ => PastedMedia.empty + } + + ValueHydrator[PastedMedia, Ctx] { (curr, ctx) => + val repoCtx = asRepoCtx(ctx) + val idsAndEntities = pastedIdsAndEntities(ctx.tweetId, ctx.urlEntities) + + val res = Stitch.traverse(idsAndEntities) { + case (tweetId, urlEntity) => + hydrateOneReference(tweetId, urlEntity, repoCtx) + } + + res.liftToTry.map { + case Return(pastedMedias) => + val merged = pastedMedias.foldLeft(curr)(_.merge(_)) + val limited = merged.take(MaxMediaEntitiesPerTweet) + ValueState.delta(curr, limited) + + case Throw(_) => ValueState.unmodified(curr) + } + }.onlyIf { (_, ctx) => + // we only attempt to hydrate pasted media if media is requested + ctx.includePastedMedia && + !ctx.isRetweet && + ctx.includeMediaEntities + } + } + + /** + * Finds url entities for foreign permalinks, and returns a sequence of tuples containing + * the foreign tweet IDs and the associated UrlEntity containing the permalink. If the same + * permalink appears multiple times, only one of the duplicate entities is returned. + */ + def pastedIdsAndEntities( + tweetId: TweetId, + urlEntities: Seq[UrlEntity] + ): Seq[(TweetId, UrlEntity)] = + urlEntities + .foldLeft(Map.empty[TweetId, UrlEntity]) { + case (z, e) => + MediaUrl.Permalink.getTweetId(e).filter(_ != tweetId) match { + case Some(id) if !z.contains(id) => z + (id -> e) + case _ => z + } + } + .toSeq + + def asRepoCtx(ctx: Ctx) = + PastedMediaRepository.Ctx( + ctx.includeMediaEntities, + ctx.includeAdditionalMetadata, + ctx.includeMediaTags, + ctx.opts.extensionsArgs, + if (ctx.opts.cause == TweetQuery.Cause.Insert(ctx.tweetId) || + ctx.opts.cause == TweetQuery.Cause.Undelete(ctx.tweetId)) { + writeSafetyLevel + } else { + ctx.opts.safetyLevel + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala new file mode 100644 index 000000000..4a055f5ec --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PerspectiveHydrator.scala @@ -0,0 +1,112 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives.Query +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.PerspectiveRepository +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.thriftscala.StatusPerspective + +object PerspectiveHydrator { + type Type = ValueHydrator[Option[StatusPerspective], Ctx] + val hydratedField: FieldByPath = fieldByPath(Tweet.PerspectiveField) + + case class Ctx(featureSwitchResults: Option[FeatureSwitchResults], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val Types: Set[tls.PerspectiveType] = + Set( + tls.PerspectiveType.Reported, + tls.PerspectiveType.Favorited, + tls.PerspectiveType.Retweeted, + tls.PerspectiveType.Bookmarked + ) + + val TypesWithoutBookmarked: Set[tls.PerspectiveType] = + Set( + tls.PerspectiveType.Reported, + tls.PerspectiveType.Favorited, + tls.PerspectiveType.Retweeted + ) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + val bookmarksPerspectiveHydrationEnabledKey = "bookmarks_perspective_hydration_enabled" + + def evaluatePerspectiveTypes( + userId: Long, + bookmarksPerspectiveDecider: Gate[Long], + featureSwitchResults: Option[FeatureSwitchResults] + ): Set[tls.PerspectiveType] = { + if (bookmarksPerspectiveDecider(userId) || + featureSwitchResults + .flatMap(_.getBoolean(bookmarksPerspectiveHydrationEnabledKey, false)) + .getOrElse(false)) + Types + else + TypesWithoutBookmarked + } + + def apply( + repo: PerspectiveRepository.Type, + shouldHydrateBookmarksPerspective: Gate[Long], + stats: StatsReceiver + ): Type = { + val statsByLevel = + SafetyLevel.list.map(level => (level, stats.counter(level.name, "calls"))).toMap + + ValueHydrator[Option[StatusPerspective], Ctx] { (_, ctx) => + val res: Stitch[tls.TimelineEntryPerspective] = if (ctx.isRetweet) { + Stitch.value( + tls.TimelineEntryPerspective( + favorited = false, + retweetId = None, + retweeted = false, + reported = false, + bookmarked = None + ) + ) + } else { + statsByLevel + .getOrElse(ctx.opts.safetyLevel, stats.counter(ctx.opts.safetyLevel.name, "calls")) + .incr() + + repo( + Query( + userId = ctx.opts.forUserId.get, + tweetId = ctx.tweetId, + types = evaluatePerspectiveTypes( + ctx.opts.forUserId.get, + shouldHydrateBookmarksPerspective, + ctx.featureSwitchResults) + )) + } + + res.liftToTry.map { + case Return(perspective) => + ValueState.modified( + Some( + StatusPerspective( + userId = ctx.opts.forUserId.get, + favorited = perspective.favorited, + retweeted = perspective.retweeted, + retweetId = perspective.retweetId, + reported = perspective.reported, + bookmarked = perspective.bookmarked + ) + ) + ) + case _ => partialResult + } + + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.opts.forUserId.nonEmpty && + (ctx.tweetFieldRequested(Tweet.PerspectiveField) || ctx.opts.excludeReported) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala new file mode 100644 index 000000000..186619df8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PlaceHydrator.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object PlaceHydrator { + type Type = ValueHydrator[Option[Place], TweetCtx] + + val HydratedField: FieldByPath = fieldByPath(Tweet.PlaceField) + + def apply(repo: PlaceRepository.Type): Type = + ValueHydrator[Option[Place], TweetCtx] { (_, ctx) => + val key = PlaceKey(ctx.placeId.get, ctx.opts.languageTag) + repo(key).liftToTry.map { + case Return(place) => ValueState.modified(Some(place)) + case Throw(NotFound) => ValueState.UnmodifiedNone + case Throw(_) => ValueState.partial(None, HydratedField) + } + }.onlyIf { (curr, ctx) => + curr.isEmpty && + ctx.tweetFieldRequested(Tweet.PlaceField) && + !ctx.isRetweet && + ctx.placeId.nonEmpty + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala new file mode 100644 index 000000000..5dff256ac --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/PreviousTweetCountsHydrator.scala @@ -0,0 +1,152 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.FieldId +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetCountKey +import com.twitter.tweetypie.repository.TweetCountsRepository +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.StatusCounts +import com.twitter.tweetypie.thriftscala._ + +/* + * A constructor for a ValueHydrator that hydrates `previous_counts` + * information. Previous counts are applied to edit tweets, they + * are the summation of all the status_counts in an edit chain up to + * but not including the tweet being hydrated. + * + */ +object PreviousTweetCountsHydrator { + + case class Ctx( + editControl: Option[EditControl], + featureSwitchResults: Option[FeatureSwitchResults], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.PreviousCountsField) + + /* + * Params: + * tweetId: The tweet being hydrated. + * editTweetIds: The sorted list of all edits in an edit chain. + * + * Returns: tweetIds in an edit chain from the initial tweet up to but not including + * the tweet being hydrated (`tweetId`) + */ + def previousTweetIds(tweetId: TweetId, editTweetIds: Seq[TweetId]): Seq[TweetId] = { + editTweetIds.takeWhile(_ < tweetId) + } + + /* An addition operation for Option[Long] */ + def sumOptions(A: Option[Long], B: Option[Long]): Option[Long] = + (A, B) match { + case (None, None) => None + case (Some(a), None) => Some(a) + case (None, Some(b)) => Some(b) + case (Some(a), Some(b)) => Some(a + b) + } + + /* An addition operation for StatusCounts */ + def sumStatusCounts(A: StatusCounts, B: StatusCounts): StatusCounts = + StatusCounts( + retweetCount = sumOptions(A.retweetCount, B.retweetCount), + replyCount = sumOptions(A.replyCount, B.replyCount), + favoriteCount = sumOptions(A.favoriteCount, B.favoriteCount), + quoteCount = sumOptions(A.quoteCount, B.quoteCount), + bookmarkCount = sumOptions(A.bookmarkCount, B.bookmarkCount) + ) + + def apply(repo: TweetCountsRepository.Type, shouldHydrateBookmarksCount: Gate[Long]): Type = { + + /* + * Get a StatusCount representing the summed engagements of all previous + * StatusCounts in an edit chain. Only `countsFields` that are specifically requested + * are included in the aggregate StatusCount, otherwise those fields are None. + */ + def getPreviousEngagementCounts( + tweetId: TweetId, + editTweetIds: Seq[TweetId], + countsFields: Set[FieldId] + ): Stitch[ValueState[StatusCounts]] = { + val editTweetIdList = previousTweetIds(tweetId, editTweetIds) + + // StatusCounts for each edit tweet revision + val statusCountsPerEditVersion: Stitch[Seq[ValueState[StatusCounts]]] = + Stitch.collect(editTweetIdList.map { tweetId => + // Which tweet count keys to request, as indicated by the tweet options. + val keys: Seq[TweetCountKey] = + TweetCountsHydrator.toKeys(tweetId, countsFields, None) + + // A separate StatusCounts for each count field, for `tweetId` + // e.g. Seq(StatusCounts(retweetCounts=5L), StatusCounts(favCounts=6L)) + val statusCountsPerCountField: Stitch[Seq[ValueState[StatusCounts]]] = + Stitch.collect(keys.map(key => TweetCountsHydrator.statusCountsRepo(key, repo))) + + // Reduce the per-field counts into a single StatusCounts for `tweetId` + statusCountsPerCountField.map { vs => + // NOTE: This StatusCounts reduction uses different logic than + // `sumStatusCounts`. This reduction takes the latest value for a field. + // instead of summing the fields. + ValueState.sequence(vs).map(TweetCountsHydrator.reduceStatusCounts) + } + }) + + // Sum together the StatusCounts for each edit tweet revision into a single Status Count + statusCountsPerEditVersion.map { vs => + ValueState.sequence(vs).map { statusCounts => + // Reduce a list of StatusCounts into a single StatusCount by summing their fields. + statusCounts.reduce { (a, b) => sumStatusCounts(a, b) } + } + } + } + + ValueHydrator[Option[StatusCounts], Ctx] { (inputStatusCounts, ctx) => + val countsFields: Set[FieldId] = TweetCountsHydrator.filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ) + + ctx.editControl match { + case Some(EditControl.Edit(edit)) => + edit.editControlInitial match { + case Some(initial) => + val previousStatusCounts: Stitch[ValueState[StatusCounts]] = + getPreviousEngagementCounts(ctx.tweetId, initial.editTweetIds, countsFields) + + // Add the new aggregated StatusCount to the TweetData and return it + previousStatusCounts.map { valueState => + valueState.map { statusCounts => Some(statusCounts) } + } + case None => + // EditControlInitial is not hydrated within EditControlEdit + // This means we cannot provide aggregated previous counts, we will + // fail open and return the input data unchanged. + Stitch.value(ValueState.partial(inputStatusCounts, hydratedField)) + } + + case _ => + // If the tweet has an EditControlInitial - it's the first Tweet in the Edit Chain + // or has no EditControl - it could be an old Tweet from when no Edit Controls existed + // then the previous counts are set to be equal to None. + Stitch.value(ValueState.unit(None)) + } + }.onlyIf { (_, ctx: Ctx) => + // only run if the CountsField was requested; note this is ran both on read and write path + TweetCountsHydrator + .filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ).nonEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala new file mode 100644 index 000000000..ea461bae8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ProfileGeoHydrator.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.dataproducts.enrichments.thriftscala.ProfileGeoEnrichment +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.ProfileGeoKey +import com.twitter.tweetypie.repository.ProfileGeoRepository +import com.twitter.tweetypie.thriftscala.FieldByPath + +object ProfileGeoHydrator { + type Type = ValueHydrator[Option[ProfileGeoEnrichment], TweetCtx] + + val hydratedField: FieldByPath = fieldByPath(Tweet.ProfileGeoEnrichmentField) + + private[this] val partialResult = ValueState.partial(None, hydratedField) + + def apply(repo: ProfileGeoRepository.Type): Type = + ValueHydrator[Option[ProfileGeoEnrichment], TweetCtx] { (curr, ctx) => + val key = + ProfileGeoKey( + tweetId = ctx.tweetId, + userId = Some(ctx.userId), + coords = ctx.geoCoordinates + ) + repo(key).liftToTry.map { + case Return(enrichment) => ValueState.modified(Some(enrichment)) + case Throw(_) => partialResult + } + }.onlyIf((curr, ctx) => + curr.isEmpty && ctx.tweetFieldRequested(Tweet.ProfileGeoEnrichmentField)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala new file mode 100644 index 000000000..f82e9fa0b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuoteTweetVisibilityHydrator.scala @@ -0,0 +1,93 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.QuotedTweet + +/** + * Enforce that users are not shown quoted tweets where the author of the + * inner quoted tweet blocks the author of the outer quote tweet or the author + * of the inner quoted tweet is otherwise not visible to the outer author. + * + * In the example below, QuoteTweetVisibilityHydrator checks if @jack + * blocks @trollmaster. + * + * {{{ + * @viewer + * +------------------------------+ + * | @trollmaster | <-- OUTER QUOTE TWEET + * | lol u can't spell twitter | + * | +--------------------------+ | + * | | @jack | <---- INNER QUOTED TWEET + * | | just setting up my twttr | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + * In the example below, QuoteTweetVisibilityHydrator checks if @h4x0r can view + * user @protectedUser. + * + * {{{ + * @viewer + * +------------------------------+ + * | @h4x0r | <-- OUTER QUOTE TWEET + * | lol nice password | + * | +--------------------------+ | + * | | @protectedUser | <---- INNER QUOTED TWEET + * | | my password is 1234 | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + * + * In the example below, QuoteTweetVisibilityHydrator checks if @viewer blocks @jack: + * + * {{{ + * @viewer + * +------------------------------+ + * | @sometweeter | <-- OUTER QUOTE TWEET + * | This is a historic tweet | + * | +--------------------------+ | + * | | @jack | <---- INNER QUOTED TWEET + * | | just setting up my twttr | | + * | +--------------------------+ | + * +------------------------------+ + * }}} + * + */ +object QuoteTweetVisibilityHydrator { + type Type = ValueHydrator[Option[FilteredState.Unavailable], TweetCtx] + + def apply(repo: QuotedTweetVisibilityRepository.Type): QuoteTweetVisibilityHydrator.Type = + ValueHydrator[Option[FilteredState.Unavailable], TweetCtx] { (_, ctx) => + val innerTweet: QuotedTweet = ctx.quotedTweet.get + val request = QuotedTweetVisibilityRepository.Request( + outerTweetId = ctx.tweetId, + outerAuthorId = ctx.userId, + innerTweetId = innerTweet.tweetId, + innerAuthorId = innerTweet.userId, + viewerId = ctx.opts.forUserId, + safetyLevel = ctx.opts.safetyLevel + ) + + repo(request).liftToTry.map { + case Return(Some(f: FilteredState.Unavailable)) => + ValueState.modified(Some(f)) + + // For tweet::quotedTweet relationships, all other FilteredStates + // allow the quotedTweet to be hydrated and filtered independently + case Return(_) => + ValueState.UnmodifiedNone + + // On VF failure, gracefully degrade to no filtering + case Throw(_) => + ValueState.UnmodifiedNone + } + }.onlyIf { (_, ctx) => + !ctx.isRetweet && + ctx.tweetFieldRequested(Tweet.QuotedTweetField) && + ctx.opts.enforceVisibilityFiltering && + ctx.quotedTweet.isDefined + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala new file mode 100644 index 000000000..e112ef395 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetHydrator.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ + +/** + * Loads the tweet referenced by `Tweet.quotedTweet`. + */ +object QuotedTweetHydrator { + type Type = ValueHydrator[Option[QuotedTweetResult], Ctx] + + case class Ctx( + quotedTweetFilteredState: Option[FilteredState.Unavailable], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(repo: TweetResultRepository.Type): Type = { + ValueHydrator[Option[QuotedTweetResult], Ctx] { (_, ctx) => + (ctx.quotedTweetFilteredState, ctx.quotedTweet) match { + + case (_, None) => + // If there is no quoted tweet ref, leave the value as None, + // indicating undefined + ValueState.StitchUnmodifiedNone + + case (Some(fs), _) => + Stitch.value(ValueState.modified(Some(QuotedTweetResult.Filtered(fs)))) + + case (None, Some(qtRef)) => + val qtQueryOptions = + ctx.opts.copy( + // we don't want to recursively load quoted tweets + include = ctx.opts.include.copy(quotedTweet = false), + // be sure to get a clean version of the tweet + scrubUnrequestedFields = true, + // TweetVisibilityLibrary filters quoted tweets slightly differently from other tweets. + // Specifically, most Interstitial verdicts are converted to Drops. + isInnerQuotedTweet = true + ) + + repo(qtRef.tweetId, qtQueryOptions).transform { t => + Stitch.const { + QuotedTweetResult.fromTry(t).map(r => ValueState.modified(Some(r))) + } + } + } + }.onlyIf((curr, ctx) => curr.isEmpty && ctx.opts.include.quotedTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala new file mode 100644 index 000000000..e2556f986 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefHydrator.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * Adds QuotedTweet structs to tweets that contain a tweet permalink url at the end of the + * tweet text. After introduction of QT + Media, we stopped storing inner tweet permalinks + * in the outer tweet text. So this hydrator would run only for below cases: + * + * - historical quote tweets which have inner tweet url in the tweet text and url entities. + * - new quote tweets created with pasted tweet permalinks, going forward we want to persist + * quoted_tweet struct in MH for these tweets + */ +object QuotedTweetRefHydrator { + type Type = ValueHydrator[Option[QuotedTweet], Ctx] + + case class Ctx(urlEntities: Seq[UrlEntity], underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + val hydratedField: FieldByPath = fieldByPath(Tweet.QuotedTweetField) + + private val partial = ValueState.partial(None, hydratedField) + + val queryOptions: TweetQuery.Options = + TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.CoreDataField.id)), + // Don't enforce visibility filtering when loading the QuotedTweet struct because it is + // cacheable. The filtering happens in QuoteTweetVisibilityHydrator. + enforceVisibilityFiltering = false, + forUserId = None + ) + + def once(h: Type): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.QuotedTweetRef, + dependsOn = Set(HydrationType.Urls), + hydrator = h + ) + + case class UrlHydrationFailed(url: String) extends Exception + + /** + * Iterate through UrlEntity objects in reverse to identify a quoted-tweet ID + * to hydrate. Quoted tweets are indicated by a TweetPermalink in the tweet text + * that references an older tweet ID. If a quoted tweet permalink is found, also + * return the corresponding UrlEntity. + * + * @throws UrlHydrationFailed if we encounter a partial URL entity before + * finding a tweet permalink URL. + */ + def quotedTweetId(ctx: Ctx): Option[(UrlEntity, TweetId)] = + ctx.urlEntities.reverseIterator // we want the rightmost tweet permalink + .map { e: UrlEntity => + if (UrlEntityHydrator.hydrationFailed(e)) throw UrlHydrationFailed(e.url) + else (e, e.expanded) + } + .collectFirst { + case (e, Some(TweetPermalink(_, quotedTweetId))) => (e, quotedTweetId) + } + // Prevent tweet-quoting cycles + .filter { case (_, quotedTweetId) => ctx.tweetId > quotedTweetId } + + def buildShortenedUrl(e: UrlEntity): ShortenedUrl = + ShortenedUrl( + shortUrl = e.url, + // Reading from MH will also default the following to "". + // QuotedTweetRefUrlsHydrator will hydrate these cases + longUrl = e.expanded.getOrElse(""), + displayText = e.display.getOrElse("") + ) + + /** + * We run this hydrator only if: + * + * - quoted_tweet struct is empty + * - quoted_tweet is present but permalink is not + * - url entities is present. QT hydration depends on urls - long term goal + * is to entirely rely on persisted quoted_tweet struct in MH + * - requested tweet is not a retweet + * + * Hydration steps: + * - We determine the last tweet permalink from url entities + * - Extract the inner tweet Id from the permalink + * - Query tweet repo with inner tweet Id + * - Construct quoted_tweet struct from hydrated tweet object and last permalink + */ + def apply(repo: TweetRepository.Type): Type = + ValueHydrator[Option[QuotedTweet], Ctx] { (_, ctx) => + // propagate errors from quotedTweetId in Stitch + Stitch(quotedTweetId(ctx)).liftToTry.flatMap { + case Return(Some((lastPermalinkEntity, quotedTweetId))) => + repo(quotedTweetId, queryOptions).liftToTry.map { + case Return(tweet) => + ValueState.modified( + Some(asQuotedTweet(tweet, lastPermalinkEntity)) + ) + case Throw(NotFound | _: FilteredState) => ValueState.UnmodifiedNone + case Throw(_) => partial + } + case Return(None) => Stitch(ValueState.UnmodifiedNone) + case Throw(_) => Stitch(partial) + } + }.onlyIf { (curr, ctx) => + (curr.isEmpty || curr.exists(_.permalink.isEmpty)) && + !ctx.isRetweet && ctx.urlEntities.nonEmpty + } + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields(Tweet.QuotedTweetField.id) + + /** + * We construct Tweet.quoted_tweet from hydrated inner tweet. + * Note: if the inner tweet is a Retweet, we populate the quoted_tweet struct from source tweet. + */ + def asQuotedTweet(tweet: Tweet, entity: UrlEntity): QuotedTweet = { + val shortenedUrl = Some(buildShortenedUrl(entity)) + getShare(tweet) match { + case None => QuotedTweet(tweet.id, getUserId(tweet), shortenedUrl) + case Some(share) => QuotedTweet(share.sourceStatusId, share.sourceUserId, shortenedUrl) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala new file mode 100644 index 000000000..b25acfc2e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/QuotedTweetRefUrlsHydrator.scala @@ -0,0 +1,61 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tco_util.DisplayUrl +import com.twitter.tweetutil.TweetPermalink +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +/** + * This populates expanded URL and display text in ShortenedUrl struct, + * which is part of QuotedTweet metadata. We are using User Identity repo + * to retrieve user's current screen-name to construct expanded url, instead + * of relying on URL hydration. + * + * Expanded urls contain a mutable screen name and an immutable tweetId. + * when visiting the link, you're always redirected to the link with + * correct screen name - therefore, it's okay to have permalinks containing + * old screen names that have since been changed by their user in the cache. + * Keys will be auto-refreshed based on the 14 days TTL, we can also have + * a daemon flush the keys with screen-name change. + * + */ +object QuotedTweetRefUrlsHydrator { + type Type = ValueHydrator[Option[QuotedTweet], TweetCtx] + + /** + * Return true if longUrl is not set or if a prior hydration set it to shortUrl due to + * a partial (to re-attempt hydration). + */ + def needsHydration(s: ShortenedUrl): Boolean = + s.longUrl.isEmpty || s.displayText.isEmpty || s.longUrl == s.shortUrl + + def apply(repo: UserIdentityRepository.Type): Type = { + ValueHydrator[QuotedTweet, TweetCtx] { (curr, _) => + repo(UserKey(curr.userId)).liftToTry.map { r => + // we verify curr.permalink.exists pre-hydration + val shortUrl = curr.permalink.get.shortUrl + val expandedUrl = r match { + case Return(user) => TweetPermalink(user.screenName, curr.tweetId).httpsUrl + case Throw(_) => shortUrl // fall-back to shortUrl as expandedUrl + } + ValueState.delta( + curr, + curr.copy( + permalink = Some( + ShortenedUrl( + shortUrl, + expandedUrl, + DisplayUrl.truncateUrl(expandedUrl, true) + ) + ) + ) + ) + } + } + }.onlyIf { (curr, ctx) => + curr.permalink.exists(needsHydration) && + ctx.tweetFieldRequested(Tweet.QuotedTweetField) && !ctx.isRetweet + }.liftOption +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala new file mode 100644 index 000000000..f960740b2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RepairMutation.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package hydrator + +/** + * A Mutation that will note all repairs that took place in the + * supplied StatsReceiver, under the names in repairers. + */ +object RepairMutation { + def apply[T](stats: StatsReceiver, repairers: (String, Mutation[T])*): Mutation[T] = + Mutation.all( + repairers.map { + case (name, mutation) => mutation.countMutations(stats.counter(name)) + } + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala new file mode 100644 index 000000000..6fa50d572 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReplyScreenNameHydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ + +object ReplyScreenNameHydrator { + import TweetLenses.Reply.{inReplyToScreenName => screenNameLens} + + type Type = ValueHydrator[Option[Reply], TweetCtx] + + val hydratedField: FieldByPath = + fieldByPath(Tweet.CoreDataField, TweetCoreData.ReplyField, Reply.InReplyToScreenNameField) + + def once(h: ValueHydrator[Option[Reply], TweetCtx]): Type = + TweetHydration.completeOnlyOnce( + hydrationType = HydrationType.ReplyScreenName, + hydrator = h + ) + + def apply[C](repo: UserIdentityRepository.Type): ValueHydrator[Option[Reply], C] = + ValueHydrator[Reply, C] { (reply, ctx) => + val key = UserKey(reply.inReplyToUserId) + + repo(key).liftToTry.map { + case Return(user) => ValueState.modified(screenNameLens.set(reply, Some(user.screenName))) + case Throw(NotFound) => ValueState.unmodified(reply) + case Throw(_) => ValueState.partial(reply, hydratedField) + } + }.onlyIf((reply, _) => screenNameLens.get(reply).isEmpty).liftOption +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala new file mode 100644 index 000000000..6f22c0634 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ReportedTweetFilter.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala._ + +object ReportedTweetFilter { + type Type = ValueHydrator[Unit, Ctx] + + object MissingPerspectiveError + extends TweetHydrationError("Cannot determine reported state because perspective is missing") + + case class Ctx(perspective: Option[StatusPerspective], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(): Type = + ValueHydrator[Unit, Ctx] { (_, ctx) => + ctx.perspective match { + case Some(p) if !p.reported => ValueState.StitchUnmodifiedUnit + case Some(_) => Stitch.exception(FilteredState.Unavailable.Reported) + case None => Stitch.exception(MissingPerspectiveError) + } + }.onlyIf { (_, ctx) => ctx.opts.excludeReported && ctx.opts.forUserId.isDefined } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala new file mode 100644 index 000000000..c200c0d75 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetMediaRepairer.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package hydrator + +/** + * Retweets should never have their own media, and should never be cached with a media + * entity. + */ +object RetweetMediaRepairer extends Mutation[Tweet] { + def apply(tweet: Tweet): Option[Tweet] = { + if (isRetweet(tweet) && getMedia(tweet).nonEmpty) + Some(TweetLenses.media.set(tweet, Nil)) + else + None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala new file mode 100644 index 000000000..5206d39f1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/RetweetParentStatusIdRepairer.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala.Share + +/** + * When creating a retweet, we set parent_status_id to the tweet id that the user sent (the tweet they're retweeting). + * Old tweets have parent_status_id set to zero. + * When loading the old tweets, we should set parent_status_id to source_status_id if it's zero. + */ +object RetweetParentStatusIdRepairer { + private val shareMutation = + Mutation.fromPartial[Option[Share]] { + case Some(share) if share.parentStatusId == 0L => + Some(share.copy(parentStatusId = share.sourceStatusId)) + } + + private[tweetypie] val tweetMutation = TweetLenses.share.mutation(shareMutation) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala new file mode 100644 index 000000000..068a283ca --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubEngagementHydrator.scala @@ -0,0 +1,27 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala._ +import com.twitter.visibility.results.counts.EngagementCounts + +/** + * Redact Tweet.counts (StatusCounts) for some visibility results + */ +object ScrubEngagementHydrator { + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + case class Ctx(filteredState: Option[FilteredState.Suppress]) + + def apply(): Type = + ValueHydrator.map[Option[StatusCounts], Ctx] { (curr: Option[StatusCounts], ctx: Ctx) => + ctx.filteredState match { + case Some(FilteredState.Suppress(FilteredReason.SafetyResult(result))) if curr.nonEmpty => + ValueState.delta(curr, EngagementCounts.scrubEngagementCounts(result.action, curr)) + case _ => + ValueState.unmodified(curr) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala new file mode 100644 index 000000000..ef76e9e76 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ScrubUncacheableTweetRepairer.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala._ + +object ScrubUncacheable { + + // A mutation to use for scrubbing tweets for cache + val tweetMutation: Mutation[Tweet] = + Mutation { tweet => + if (tweet.place != None || + tweet.counts != None || + tweet.deviceSource != None || + tweet.perspective != None || + tweet.cards != None || + tweet.card2 != None || + tweet.spamLabels != None || + tweet.conversationMuted != None) + Some( + tweet.copy( + place = None, + counts = None, + deviceSource = None, + perspective = None, + cards = None, + card2 = None, + spamLabels = None, + conversationMuted = None + ) + ) + else + None + } + + // throws an AssertionError if a tweet when a tweet is scrubbed + def assertNotScrubbed(message: String): Mutation[Tweet] = + tweetMutation.withEffect(Effect(update => assert(update.isEmpty, message))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala new file mode 100644 index 000000000..7309b016c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SourceTweetHydrator.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable._ +import com.twitter.tweetypie.core.TweetResult +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala.DetachedRetweet + +/** + * Loads the source tweet for a retweet + */ +object SourceTweetHydrator { + type Type = ValueHydrator[Option[TweetResult], TweetCtx] + + def configureOptions(opts: TweetQuery.Options): TweetQuery.Options = { + // set scrubUnrequestedFields to false so that we will have access to + // additional fields, which will be copied into the retweet. + // set fetchStoredTweets to false because we don't want to fetch and hydrate + // the source tweet if it is deleted. + opts.copy(scrubUnrequestedFields = false, fetchStoredTweets = false, isSourceTweet = true) + } + + private object NotFoundException { + def unapply(t: Throwable): Option[Boolean] = + t match { + case NotFound => Some(false) + case TweetDeleted | BounceDeleted => Some(true) + case _ => None + } + } + + def apply( + repo: TweetResultRepository.Type, + stats: StatsReceiver, + scribeDetachedRetweets: FutureEffect[DetachedRetweet] = FutureEffect.unit + ): Type = { + val notFoundCounter = stats.counter("not_found") + + ValueHydrator[Option[TweetResult], TweetCtx] { (_, ctx) => + ctx.sourceTweetId match { + case None => + ValueState.StitchUnmodifiedNone + case Some(srcTweetId) => + repo(srcTweetId, configureOptions(ctx.opts)).liftToTry.flatMap { + case Throw(NotFoundException(isDeleted)) => + notFoundCounter.incr() + scribeDetachedRetweets(detachedRetweet(srcTweetId, ctx)) + if (ctx.opts.requireSourceTweet) { + Stitch.exception(SourceTweetNotFound(isDeleted)) + } else { + ValueState.StitchUnmodifiedNone + } + + case Return(r) => Stitch.value(ValueState.modified(Some(r))) + case Throw(t) => Stitch.exception(t) + } + } + }.onlyIf((curr, _) => curr.isEmpty) + } + + def detachedRetweet(srcTweetId: TweetId, ctx: TweetCtx): DetachedRetweet = + DetachedRetweet(ctx.tweetId, ctx.userId, srcTweetId) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala new file mode 100644 index 000000000..3727c8779 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/StripHiddenGeoCoordinates.scala @@ -0,0 +1,12 @@ +package com.twitter.tweetypie +package hydrator + +object StripHiddenGeoCoordinates extends Mutation[Tweet] { + def apply(tweet: Tweet): Option[Tweet] = + for { + coreData <- tweet.coreData + coords <- coreData.coordinates + if !coords.display + coreData2 = coreData.copy(coordinates = None) + } yield tweet.copy(coreData = Some(coreData2)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala new file mode 100644 index 000000000..d49b2c17a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/SuperfluousUrlEntityScrubber.scala @@ -0,0 +1,37 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.thriftscala._ + +/** + * Removes superfluous urls entities when there is a corresponding MediaEntity for the same + * url. + */ +object SuperfluousUrlEntityScrubber { + case class RawEntity(fromIndex: Short, toIndex: Short, url: String) + + object RawEntity { + def from(e: UrlEntity): RawEntity = RawEntity(e.fromIndex, e.toIndex, e.url) + def fromUrls(es: Seq[UrlEntity]): Set[RawEntity] = es.map(from(_)).toSet + def from(e: MediaEntity): RawEntity = RawEntity(e.fromIndex, e.toIndex, e.url) + def fromMedia(es: Seq[MediaEntity]): Set[RawEntity] = es.map(from(_)).toSet + } + + val mutation: Mutation[Tweet] = + Mutation[Tweet] { tweet => + val mediaEntities = getMedia(tweet) + val urlEntities = getUrls(tweet) + + if (mediaEntities.isEmpty || urlEntities.isEmpty) { + None + } else { + val mediaUrls = mediaEntities.map(RawEntity.from(_)).toSet + val scrubbedUrls = urlEntities.filterNot(e => mediaUrls.contains(RawEntity.from(e))) + + if (scrubbedUrls.size == urlEntities.size) + None + else + Some(TweetLenses.urls.set(tweet, scrubbedUrls)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala new file mode 100644 index 000000000..f5a510047 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TakedownHydrator.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala.FieldByPath +import com.twitter.tweetypie.util.Takedowns + +/** + * Hydrates per-country takedowns which is a union of: + * 1. per-tweet takedowns, from tweetypieOnlyTakedown{CountryCode|Reasons} fields + * 2. user takedowns, read from gizmoduck. + * + * Note that this hydrator performs backwards compatibility by converting to and from + * [[com.twitter.tseng.withholding.thriftscala.TakedownReason]]. This is possible because a taken + * down country code can always be represented as a + * [[com.twitter.tseng.withholding.thriftscala.UnspecifiedReason]]. + */ +object TakedownHydrator { + type Type = ValueHydrator[Option[Takedowns], Ctx] + + case class Ctx(tweetTakedowns: Takedowns, underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + val hydratedFields: Set[FieldByPath] = + Set( + fieldByPath(Tweet.TakedownCountryCodesField), + fieldByPath(Tweet.TakedownReasonsField) + ) + + def apply(repo: UserTakedownRepository.Type): Type = + ValueHydrator[Option[Takedowns], Ctx] { (curr, ctx) => + repo(ctx.userId).liftToTry.map { + case Return(userReasons) => + val reasons = Seq.concat(ctx.tweetTakedowns.reasons, userReasons).toSet + ValueState.delta(curr, Some(Takedowns(reasons))) + case Throw(_) => + ValueState.partial(curr, hydratedFields) + } + }.onlyIf { (_, ctx) => + ( + ctx.tweetFieldRequested(Tweet.TakedownCountryCodesField) || + ctx.tweetFieldRequested(Tweet.TakedownReasonsField) + ) && ctx.hasTakedown + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala new file mode 100644 index 000000000..5a5e62c3d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TextRepairer.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.tweettext.Preprocessor._ +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.thriftscala.entities.Implicits._ + +object TextRepairer { + def apply(replace: String => Option[TextModification]): Mutation[Tweet] = + Mutation { tweet => + replace(getText(tweet)).map { mod => + val repairedTweet = tweet.copy( + coreData = tweet.coreData.map(c => c.copy(text = mod.updated)), + urls = Some(getUrls(tweet).flatMap(mod.reindexEntity(_))), + mentions = Some(getMentions(tweet).flatMap(mod.reindexEntity(_))), + hashtags = Some(getHashtags(tweet).flatMap(mod.reindexEntity(_))), + cashtags = Some(getCashtags(tweet).flatMap(mod.reindexEntity(_))), + media = Some(getMedia(tweet).flatMap(mod.reindexEntity(_))), + visibleTextRange = tweet.visibleTextRange.flatMap(mod.reindexEntity(_)) + ) + + val repairedExtendedTweetMetadata = repairedTweet.selfPermalink.flatMap { permalink => + val extendedTweetMetadata = ExtendedTweetMetadataBuilder(repairedTweet, permalink) + val repairedTextLength = getText(repairedTweet).length + if (extendedTweetMetadata.apiCompatibleTruncationIndex == repairedTextLength) { + None + } else { + Some(extendedTweetMetadata) + } + } + + repairedTweet.copy(extendedTweetMetadata = repairedExtendedTweetMetadata) + } + } + + /** + * Removes whitespace from the tweet, and updates all entity indices. + */ + val BlankLineCollapser: Mutation[Tweet] = TextRepairer(collapseBlankLinesModification _) + + /** + * Replace a special unicode string that crashes ios app with '\ufffd' + */ + val CoreTextBugPatcher: Mutation[Tweet] = TextRepairer(replaceCoreTextBugModification _) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala new file mode 100644 index 000000000..c9c5c71f9 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetAuthorVisibilityHydrator.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ + +/** + * Ensures that the tweet's author and source tweet's author (if retweet) are visible to the + * viewing user - ctx.opts.forUserId - when enforceVisibilityFiltering is true. + * If either of these users is not visible then a FilteredState.Suppress will be returned. + * + * Note: blocking relationship is NOT checked here, this means if viewing user `forUserId` is blocked + * by either the tweet's author or source tweet's author, this will not filter out the tweet. + */ +object TweetAuthorVisibilityHydrator { + type Type = ValueHydrator[Unit, TweetCtx] + + def apply(repo: UserVisibilityRepository.Type): Type = + ValueHydrator[Unit, TweetCtx] { (_, ctx) => + val ids = Seq(ctx.userId) ++ ctx.sourceUserId + val keys = ids.map(id => toRepoQuery(id, ctx)) + + Stitch + .traverse(keys)(repo.apply).flatMap { responses => + val fs: Option[FilteredState.Unavailable] = responses.flatten.headOption + + fs match { + case Some(fs: FilteredState.Unavailable) => Stitch.exception(fs) + case None => ValueState.StitchUnmodifiedUnit + } + } + }.onlyIf((_, ctx) => ctx.opts.enforceVisibilityFiltering) + + private def toRepoQuery(userId: UserId, ctx: TweetCtx) = + UserVisibilityRepository.Query( + UserKey(userId), + ctx.opts.forUserId, + ctx.tweetId, + ctx.isRetweet, + ctx.opts.isInnerQuotedTweet, + Some(ctx.opts.safetyLevel)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala new file mode 100644 index 000000000..17462081a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCountsHydrator.scala @@ -0,0 +1,189 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.featureswitches.v2.FeatureSwitchResults +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import scala.collection.mutable + +object TweetCountsHydrator { + type Type = ValueHydrator[Option[StatusCounts], Ctx] + + case class Ctx(featureSwitchResults: Option[FeatureSwitchResults], underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + val retweetCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.RetweetCountField) + val replyCountField: FieldByPath = fieldByPath(Tweet.CountsField, StatusCounts.ReplyCountField) + val favoriteCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.FavoriteCountField) + val quoteCountField: FieldByPath = fieldByPath(Tweet.CountsField, StatusCounts.QuoteCountField) + val bookmarkCountField: FieldByPath = + fieldByPath(Tweet.CountsField, StatusCounts.BookmarkCountField) + + val emptyCounts = StatusCounts() + + val retweetCountPartial = ValueState.partial(emptyCounts, retweetCountField) + val replyCountPartial = ValueState.partial(emptyCounts, replyCountField) + val favoriteCountPartial = ValueState.partial(emptyCounts, favoriteCountField) + val quoteCountPartial = ValueState.partial(emptyCounts, quoteCountField) + val bookmarkCountPartial = ValueState.partial(emptyCounts, bookmarkCountField) + + val bookmarksCountHydrationEnabledKey = "bookmarks_count_hydration_enabled" + + /** + * Take a Seq of StatusCounts and reduce down to a single StatusCounts. + * Note: `reduce` here is safe because we are guaranteed to always have at least + * one value. + */ + def reduceStatusCounts(counts: Seq[StatusCounts]): StatusCounts = + counts.reduce { (a, b) => + StatusCounts( + retweetCount = b.retweetCount.orElse(a.retweetCount), + replyCount = b.replyCount.orElse(a.replyCount), + favoriteCount = b.favoriteCount.orElse(a.favoriteCount), + quoteCount = b.quoteCount.orElse(a.quoteCount), + bookmarkCount = b.bookmarkCount.orElse(a.bookmarkCount) + ) + } + + def toKeys( + tweetId: TweetId, + countsFields: Set[FieldId], + curr: Option[StatusCounts] + ): Seq[TweetCountKey] = { + val keys = new mutable.ArrayBuffer[TweetCountKey](4) + + countsFields.foreach { + case StatusCounts.RetweetCountField.id => + if (curr.flatMap(_.retweetCount).isEmpty) + keys += RetweetsKey(tweetId) + + case StatusCounts.ReplyCountField.id => + if (curr.flatMap(_.replyCount).isEmpty) + keys += RepliesKey(tweetId) + + case StatusCounts.FavoriteCountField.id => + if (curr.flatMap(_.favoriteCount).isEmpty) + keys += FavsKey(tweetId) + + case StatusCounts.QuoteCountField.id => + if (curr.flatMap(_.quoteCount).isEmpty) + keys += QuotesKey(tweetId) + + case StatusCounts.BookmarkCountField.id => + if (curr.flatMap(_.bookmarkCount).isEmpty) + keys += BookmarksKey(tweetId) + + case _ => + } + + keys + } + + /* + * Get a StatusCounts object for a specific tweet and specific field (e.g. only fav, or reply etc). + * StatusCounts returned from here can be combined with other StatusCounts using `sumStatusCount` + */ + def statusCountsRepo( + key: TweetCountKey, + repo: TweetCountsRepository.Type + ): Stitch[ValueState[StatusCounts]] = + repo(key).liftToTry.map { + case Return(count) => + ValueState.modified( + key match { + case _: RetweetsKey => StatusCounts(retweetCount = Some(count)) + case _: RepliesKey => StatusCounts(replyCount = Some(count)) + case _: FavsKey => StatusCounts(favoriteCount = Some(count)) + case _: QuotesKey => StatusCounts(quoteCount = Some(count)) + case _: BookmarksKey => StatusCounts(bookmarkCount = Some(count)) + } + ) + + case Throw(_) => + key match { + case _: RetweetsKey => retweetCountPartial + case _: RepliesKey => replyCountPartial + case _: FavsKey => favoriteCountPartial + case _: QuotesKey => quoteCountPartial + case _: BookmarksKey => bookmarkCountPartial + } + } + + def filterRequestedCounts( + userId: UserId, + requestedCounts: Set[FieldId], + bookmarkCountsDecider: Gate[Long], + featureSwitchResults: Option[FeatureSwitchResults] + ): Set[FieldId] = { + if (requestedCounts.contains(StatusCounts.BookmarkCountField.id)) + if (bookmarkCountsDecider(userId) || + featureSwitchResults + .flatMap(_.getBoolean(bookmarksCountHydrationEnabledKey, false)) + .getOrElse(false)) + requestedCounts + else + requestedCounts.filter(_ != StatusCounts.BookmarkCountField.id) + else + requestedCounts + } + + def apply(repo: TweetCountsRepository.Type, shouldHydrateBookmarksCount: Gate[Long]): Type = { + + val all: Set[FieldId] = StatusCounts.fieldInfos.map(_.tfield.id).toSet + + val modifiedZero: Map[Set[FieldId], ValueState[Some[StatusCounts]]] = { + for (set <- all.subsets) yield { + @inline + def zeroOrNone(fieldId: FieldId) = + if (set.contains(fieldId)) Some(0L) else None + + val statusCounts = + StatusCounts( + retweetCount = zeroOrNone(StatusCounts.RetweetCountField.id), + replyCount = zeroOrNone(StatusCounts.ReplyCountField.id), + favoriteCount = zeroOrNone(StatusCounts.FavoriteCountField.id), + quoteCount = zeroOrNone(StatusCounts.QuoteCountField.id), + bookmarkCount = zeroOrNone(StatusCounts.BookmarkCountField.id) + ) + + set -> ValueState.modified(Some(statusCounts)) + } + }.toMap + + ValueHydrator[Option[StatusCounts], Ctx] { (curr, ctx) => + val countsFields: Set[FieldId] = filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ) + if (ctx.isRetweet) { + // To avoid a reflection-induced key error where the countsFields can contain a fieldId + // that is not in the thrift schema loaded at start, we strip unknown field_ids using + // `intersect` + Stitch.value(modifiedZero(countsFields.intersect(all))) + } else { + val keys = toKeys(ctx.tweetId, countsFields, curr) + + Stitch.traverse(keys)(key => statusCountsRepo(key, repo)).map { results => + // always flag modified if starting from None + val vs0 = ValueState.success(curr.getOrElse(emptyCounts), curr.isEmpty) + val vs = vs0 +: results + + ValueState.sequence(vs).map(reduceStatusCounts).map(Some(_)) + } + } + }.onlyIf { (_, ctx) => + filterRequestedCounts( + ctx.opts.forUserId.getOrElse(ctx.userId), + ctx.opts.include.countsFields, + shouldHydrateBookmarksCount, + ctx.featureSwitchResults + ).nonEmpty + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala new file mode 100644 index 000000000..5540dc8dc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetCtx.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie +import com.twitter.tweetypie.core.TweetData +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import org.apache.thrift.protocol.TField + +/** + * Encapsulates basic, immutable details about a tweet to be hydrated, along with the + * `TweetQuery.Options`. Only tweet data that are not affected by hydration should be + * exposed here, as a single `TweetCtx` instance should be usable for the entire hydration + * of a tweet. + */ +trait TweetCtx { + def opts: TweetQuery.Options + + def tweetId: TweetId + def userId: UserId + def text: String + def createdAt: Time + def createdVia: String + def isRetweet: Boolean + def isReply: Boolean + def isSelfReply: Boolean + def sourceUserId: Option[UserId] + def sourceTweetId: Option[TweetId] + def inReplyToTweetId: Option[TweetId] + def geoCoordinates: Option[GeoCoordinates] + def placeId: Option[String] + def hasTakedown: Boolean + def quotedTweet: Option[QuotedTweet] + + def completedHydrations: Set[HydrationType] + + def isInitialInsert: Boolean = opts.cause.initialInsert(tweetId) + + def tweetFieldRequested(field: TField): Boolean = tweetFieldRequested(field.id) + def tweetFieldRequested(fieldId: FieldId): Boolean = opts.include.tweetFields.contains(fieldId) + + def mediaFieldRequested(field: TField): Boolean = mediaFieldRequested(field.id) + def mediaFieldRequested(fieldId: FieldId): Boolean = opts.include.mediaFields.contains(fieldId) +} + +object TweetCtx { + def from(td: TweetData, opts: TweetQuery.Options): TweetCtx = FromTweetData(td, opts) + + trait Proxy extends TweetCtx { + protected def underlyingTweetCtx: TweetCtx + + def opts: TweetQuery.Options = underlyingTweetCtx.opts + def tweetId: TweetId = underlyingTweetCtx.tweetId + def userId: UserId = underlyingTweetCtx.userId + def text: String = underlyingTweetCtx.text + def createdAt: Time = underlyingTweetCtx.createdAt + def createdVia: String = underlyingTweetCtx.createdVia + def isRetweet: Boolean = underlyingTweetCtx.isRetweet + def isReply: Boolean = underlyingTweetCtx.isReply + def isSelfReply: Boolean = underlyingTweetCtx.isSelfReply + def sourceUserId: Option[UserId] = underlyingTweetCtx.sourceUserId + def sourceTweetId: Option[TweetId] = underlyingTweetCtx.sourceTweetId + def inReplyToTweetId: Option[TweetId] = underlyingTweetCtx.inReplyToTweetId + def geoCoordinates: Option[GeoCoordinates] = underlyingTweetCtx.geoCoordinates + def placeId: Option[String] = underlyingTweetCtx.placeId + def hasTakedown: Boolean = underlyingTweetCtx.hasTakedown + def completedHydrations: Set[HydrationType] = underlyingTweetCtx.completedHydrations + def quotedTweet: Option[QuotedTweet] = underlyingTweetCtx.quotedTweet + } + + private case class FromTweetData(td: TweetData, opts: TweetQuery.Options) extends TweetCtx { + private val tweet = td.tweet + def tweetId: MediaId = tweet.id + def userId: UserId = getUserId(tweet) + def text: String = getText(tweet) + def createdAt: Time = getTimestamp(tweet) + def createdVia: String = TweetLenses.createdVia.get(tweet) + def isRetweet: Boolean = getShare(tweet).isDefined + def isSelfReply: Boolean = tweetypie.isSelfReply(tweet) + def isReply: Boolean = getReply(tweet).isDefined + def sourceUserId: Option[MediaId] = getShare(tweet).map(_.sourceUserId) + def sourceTweetId: Option[MediaId] = getShare(tweet).map(_.sourceStatusId) + def inReplyToTweetId: Option[MediaId] = getReply(tweet).flatMap(_.inReplyToStatusId) + def geoCoordinates: Option[GeoCoordinates] = TweetLenses.geoCoordinates.get(tweet) + def placeId: Option[String] = TweetLenses.placeId.get(tweet) + def hasTakedown: Boolean = TweetLenses.hasTakedown(tweet) + def completedHydrations: Set[HydrationType] = td.completedHydrations + def quotedTweet: Option[QuotedTweet] = getQuotedTweet(tweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala new file mode 100644 index 000000000..a12295322 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetHydration.scala @@ -0,0 +1,848 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.expandodo.thriftscala.Card +import com.twitter.expandodo.thriftscala.Card2 +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.CachedValueStatus +import com.twitter.servo.cache.LockingCache +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.thriftscala.MediaRef +import com.twitter.tweetypie.repository.PastedMedia +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.repository.TweetRepoCachePicker +import com.twitter.tweetypie.repository.TweetResultRepository +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.Takedowns +import com.twitter.util.Return +import com.twitter.util.Throw + +object TweetHydration { + + /** + * Wires up a set of hydrators that include those whose results are cached on the tweet, + * and some whose results are not cached but depend upon the results of the former. + */ + def apply( + hydratorStats: StatsReceiver, + hydrateFeatureSwitchResults: TweetDataValueHydrator, + hydrateMentions: MentionEntitiesHydrator.Type, + hydrateLanguage: LanguageHydrator.Type, + hydrateUrls: UrlEntitiesHydrator.Type, + hydrateQuotedTweetRef: QuotedTweetRefHydrator.Type, + hydrateQuotedTweetRefUrls: QuotedTweetRefUrlsHydrator.Type, + hydrateMediaCacheable: MediaEntitiesHydrator.Cacheable.Type, + hydrateReplyScreenName: ReplyScreenNameHydrator.Type, + hydrateConvoId: ConversationIdHydrator.Type, + hydratePerspective: PerspectiveHydrator.Type, + hydrateEditPerspective: EditPerspectiveHydrator.Type, + hydrateConversationMuted: ConversationMutedHydrator.Type, + hydrateContributor: ContributorHydrator.Type, + hydrateTakedowns: TakedownHydrator.Type, + hydrateDirectedAt: DirectedAtHydrator.Type, + hydrateGeoScrub: GeoScrubHydrator.Type, + hydrateCacheableRepairs: TweetDataValueHydrator, + hydrateMediaUncacheable: MediaEntitiesHydrator.Uncacheable.Type, + hydratePostCacheRepairs: TweetDataValueHydrator, + hydrateTweetLegacyFormat: TweetDataValueHydrator, + hydrateQuoteTweetVisibility: QuoteTweetVisibilityHydrator.Type, + hydrateQuotedTweet: QuotedTweetHydrator.Type, + hydratePastedMedia: PastedMediaHydrator.Type, + hydrateMediaRefs: MediaRefsHydrator.Type, + hydrateMediaTags: MediaTagsHydrator.Type, + hydrateClassicCards: CardHydrator.Type, + hydrateCard2: Card2Hydrator.Type, + hydrateContributorVisibility: ContributorVisibilityFilter.Type, + hydrateHasMedia: HasMediaHydrator.Type, + hydrateTweetCounts: TweetCountsHydrator.Type, + hydratePreviousTweetCounts: PreviousTweetCountsHydrator.Type, + hydratePlace: PlaceHydrator.Type, + hydrateDeviceSource: DeviceSourceHydrator.Type, + hydrateProfileGeo: ProfileGeoHydrator.Type, + hydrateSourceTweet: SourceTweetHydrator.Type, + hydrateIM1837State: IM1837FilterHydrator.Type, + hydrateIM2884State: IM2884FilterHydrator.Type, + hydrateIM3433State: IM3433FilterHydrator.Type, + hydrateTweetAuthorVisibility: TweetAuthorVisibilityHydrator.Type, + hydrateReportedTweetVisibility: ReportedTweetFilter.Type, + scrubSuperfluousUrlEntities: TweetDataValueHydrator, + copyFromSourceTweet: TweetDataValueHydrator, + hydrateTweetVisibility: TweetVisibilityHydrator.Type, + hydrateEscherbirdAnnotations: EscherbirdAnnotationHydrator.Type, + hydrateScrubEngagements: ScrubEngagementHydrator.Type, + hydrateConversationControl: ConversationControlHydrator.Type, + hydrateEditControl: EditControlHydrator.Type, + hydrateUnmentionData: UnmentionDataHydrator.Type, + hydrateNoteTweetSuffix: TweetDataValueHydrator + ): TweetDataValueHydrator = { + val scrubCachedTweet: TweetDataValueHydrator = + ValueHydrator + .fromMutation[Tweet, TweetQuery.Options]( + ScrubUncacheable.tweetMutation.countMutations(hydratorStats.counter("scrub_cached_tweet")) + ) + .lensed(TweetData.Lenses.tweet) + .onlyIf((td, opts) => opts.cause.reading(td.tweet.id)) + + // We perform independent hydrations of individual bits of + // data and pack the results into tuples instead of updating + // the tweet for each one in order to avoid making lots of + // copies of the tweet. + + val hydratePrimaryCacheableFields: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val urlsMediaQuoteTweet: Stitch[ + ValueState[(Seq[UrlEntity], Seq[MediaEntity], Option[QuotedTweet])] + ] = + for { + urls <- hydrateUrls(getUrls(tweet), ctx) + (media, quotedTweet) <- Stitch.join( + hydrateMediaCacheable( + getMedia(tweet), + MediaEntityHydrator.Cacheable.Ctx(urls.value, ctx) + ), + for { + qtRef <- hydrateQuotedTweetRef( + tweet.quotedTweet, + QuotedTweetRefHydrator.Ctx(urls.value, ctx) + ) + qtRefWithUrls <- hydrateQuotedTweetRefUrls(qtRef.value, ctx) + } yield { + ValueState(qtRefWithUrls.value, qtRef.state ++ qtRefWithUrls.state) + } + ) + } yield { + ValueState.join(urls, media, quotedTweet) + } + + val conversationId: Stitch[ValueState[Option[ConversationId]]] = + hydrateConvoId(getConversationId(tweet), ctx) + + val mentions: Stitch[ValueState[Seq[MentionEntity]]] = + hydrateMentions(getMentions(tweet), ctx) + + val replyScreenName: Stitch[ValueState[Option[Reply]]] = + hydrateReplyScreenName(getReply(tweet), ctx) + + val directedAt: Stitch[ValueState[Option[DirectedAtUser]]] = + hydrateDirectedAt( + getDirectedAtUser(tweet), + DirectedAtHydrator.Ctx( + mentions = getMentions(tweet), + metadata = tweet.directedAtUserMetadata, + underlyingTweetCtx = ctx + ) + ) + + val language: Stitch[ValueState[Option[Language]]] = + hydrateLanguage(tweet.language, ctx) + + val contributor: Stitch[ValueState[Option[Contributor]]] = + hydrateContributor(tweet.contributor, ctx) + + val geoScrub: Stitch[ValueState[(Option[GeoCoordinates], Option[PlaceId])]] = + hydrateGeoScrub( + (TweetLenses.geoCoordinates(tweet), TweetLenses.placeId(tweet)), + ctx + ) + + Stitch + .joinMap( + urlsMediaQuoteTweet, + conversationId, + mentions, + replyScreenName, + directedAt, + language, + contributor, + geoScrub + )(ValueState.join(_, _, _, _, _, _, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + values.map { + case ( + (urls, media, quotedTweet), + conversationId, + mentions, + reply, + directedAt, + language, + contributor, + coreGeo + ) => + val (coordinates, placeId) = coreGeo + td.copy( + tweet = tweet.copy( + coreData = tweet.coreData.map( + _.copy( + reply = reply, + conversationId = conversationId, + directedAtUser = directedAt, + coordinates = coordinates, + placeId = placeId + ) + ), + urls = Some(urls), + media = Some(media), + mentions = Some(mentions), + language = language, + quotedTweet = quotedTweet, + contributor = contributor + ) + ) + } + } + } + } + + val assertNotScrubbed: TweetDataValueHydrator = + ValueHydrator.fromMutation[TweetData, TweetQuery.Options]( + ScrubUncacheable + .assertNotScrubbed( + "output of the cacheable tweet hydrator should not require scrubbing" + ) + .lensed(TweetData.Lenses.tweet) + ) + + val hydrateDependentUncacheableFields: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val quotedTweetResult: Stitch[ValueState[Option[QuotedTweetResult]]] = + for { + qtFilterState <- hydrateQuoteTweetVisibility(None, ctx) + quotedTweet <- hydrateQuotedTweet( + td.quotedTweetResult, + QuotedTweetHydrator.Ctx(qtFilterState.value, ctx) + ) + } yield { + ValueState.join(qtFilterState, quotedTweet).map(_._2) + } + + val pastedMedia: Stitch[ValueState[PastedMedia]] = + hydratePastedMedia( + PastedMediaHydrator.getPastedMedia(tweet), + PastedMediaHydrator.Ctx(getUrls(tweet), ctx) + ) + + val mediaTags: Stitch[ValueState[Option[TweetMediaTags]]] = + hydrateMediaTags(tweet.mediaTags, ctx) + + val classicCards: Stitch[ValueState[Option[Seq[Card]]]] = + hydrateClassicCards( + tweet.cards, + CardHydrator.Ctx(getUrls(tweet), getMedia(tweet), ctx) + ) + + val card2: Stitch[ValueState[Option[Card2]]] = + hydrateCard2( + tweet.card2, + Card2Hydrator.Ctx( + getUrls(tweet), + getMedia(tweet), + getCardReference(tweet), + ctx, + td.featureSwitchResults + ) + ) + + val contributorVisibility: Stitch[ValueState[Option[Contributor]]] = + hydrateContributorVisibility(tweet.contributor, ctx) + + val takedowns: Stitch[ValueState[Option[Takedowns]]] = + hydrateTakedowns( + None, // None because uncacheable hydrator doesn't depend on previous value + TakedownHydrator.Ctx(Takedowns.fromTweet(tweet), ctx) + ) + + val conversationControl: Stitch[ValueState[Option[ConversationControl]]] = + hydrateConversationControl( + tweet.conversationControl, + ConversationControlHydrator.Ctx(getConversationId(tweet), ctx) + ) + + // PreviousTweetCounts and Perspective hydration depends on tweet.editControl.edit_control_initial + // having been hydrated in EditControlHydrator; thus we are chaining them together. + val editControlWithDependencies: Stitch[ + ValueState[ + ( + Option[EditControl], + Option[StatusPerspective], + Option[StatusCounts], + Option[TweetPerspective] + ) + ] + ] = + for { + (edit, perspective) <- Stitch.join( + hydrateEditControl(tweet.editControl, ctx), + hydratePerspective( + tweet.perspective, + PerspectiveHydrator.Ctx(td.featureSwitchResults, ctx)) + ) + (counts, editPerspective) <- Stitch.join( + hydratePreviousTweetCounts( + tweet.previousCounts, + PreviousTweetCountsHydrator.Ctx(edit.value, td.featureSwitchResults, ctx)), + hydrateEditPerspective( + tweet.editPerspective, + EditPerspectiveHydrator + .Ctx(perspective.value, edit.value, td.featureSwitchResults, ctx)) + ) + } yield { + ValueState.join(edit, perspective, counts, editPerspective) + } + + Stitch + .joinMap( + quotedTweetResult, + pastedMedia, + mediaTags, + classicCards, + card2, + contributorVisibility, + takedowns, + conversationControl, + editControlWithDependencies + )(ValueState.join(_, _, _, _, _, _, _, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + values.map { + case ( + quotedTweetResult, + pastedMedia, + ownedMediaTags, + cards, + card2, + contributor, + takedowns, + conversationControl, + (editControl, perspective, previousCounts, editPerspective) + ) => + td.copy( + tweet = tweet.copy( + media = Some(pastedMedia.mediaEntities), + mediaTags = pastedMedia.mergeTweetMediaTags(ownedMediaTags), + cards = cards, + card2 = card2, + contributor = contributor, + takedownCountryCodes = takedowns.map(_.countryCodes.toSeq), + takedownReasons = takedowns.map(_.reasons.toSeq), + conversationControl = conversationControl, + editControl = editControl, + previousCounts = previousCounts, + perspective = perspective, + editPerspective = editPerspective, + ), + quotedTweetResult = quotedTweetResult + ) + } + } + } + } + + val hydrateIndependentUncacheableFields: TweetDataEditHydrator = + EditHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + // Group together the results of hydrators that don't perform + // filtering, because we don't care about the precedence of + // exceptions from these hydrators, because the exceptions all + // indicate failures, and picking any failure will be + // fine. (All of the other hydrators might throw filtering + // exceptions, so we need to make sure that we give precedence + // to their failures.) + val hydratorsWithoutFiltering = + Stitch.joinMap( + hydrateTweetCounts(tweet.counts, TweetCountsHydrator.Ctx(td.featureSwitchResults, ctx)), + // Note: Place is cached in memcache, it is just not cached on the Tweet. + hydratePlace(tweet.place, ctx), + hydrateDeviceSource(tweet.deviceSource, ctx), + hydrateProfileGeo(tweet.profileGeoEnrichment, ctx) + )(ValueState.join(_, _, _, _)) + + /** + * Multiple hydrators throw visibility filtering exceptions so specify an order to achieve + * a deterministic hydration result while ensuring that any retweet has a source tweet: + * 1. hydrateSourceTweet throws SourceTweetNotFound, this is a detached-retweet so treat + * the retweet hydration as if it were not found + * 2. hydrateTweetAuthorVisibility + * 3. hydrateSourceTweet (other than SourceTweetNotFound already handled above) + * 4. hydrateIM1837State + * 5. hydrateIM2884State + * 6. hydrateIM3433State + * 7. hydratorsWithoutFiltering miscellaneous exceptions (any visibility filtering + * exceptions should win over failure of a hydrator) + */ + val sourceTweetAndTweetAuthorResult = + Stitch + .joinMap( + hydrateSourceTweet(td.sourceTweetResult, ctx).liftToTry, + hydrateTweetAuthorVisibility((), ctx).liftToTry, + hydrateIM1837State((), ctx).liftToTry, + hydrateIM2884State((), ctx).liftToTry, + hydrateIM3433State((), ctx).liftToTry + ) { + case (Throw(t @ FilteredState.Unavailable.SourceTweetNotFound(_)), _, _, _, _) => + Throw(t) + case (_, Throw(t), _, _, _) => Throw(t) // TweetAuthorVisibility + case (Throw(t), _, _, _, _) => Throw(t) // SourceTweet + case (_, _, Throw(t), _, _) => Throw(t) // IM1837State + case (_, _, _, Throw(t), _) => Throw(t) // IM2884State + case (_, _, _, _, Throw(t)) => Throw(t) // IM3433State + case ( + Return(sourceTweetResultValue), + Return(authorVisibilityValue), + Return(im1837Value), + Return(im2884Value), + Return(im3433Value) + ) => + Return( + ValueState + .join( + sourceTweetResultValue, + authorVisibilityValue, + im1837Value, + im2884Value, + im3433Value + ) + ) + }.lowerFromTry + + StitchExceptionPrecedence(sourceTweetAndTweetAuthorResult) + .joinWith(hydratorsWithoutFiltering)(ValueState.join(_, _)) + .toStitch + .map { values => + if (values.state.isEmpty) { + EditState.unit[TweetData] + } else { + EditState[TweetData] { tweetData => + val tweet = tweetData.tweet + values.map { + case ( + (sourceTweetResult, _, _, _, _), + (counts, place, deviceSource, profileGeo) + ) => + tweetData.copy( + tweet = tweet.copy( + counts = counts, + place = place, + deviceSource = deviceSource, + profileGeoEnrichment = profileGeo + ), + sourceTweetResult = sourceTweetResult + ) + } + } + } + } + } + + val hydrateUnmentionDataToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweet.andThen(TweetLenses.unmentionData), + (td: TweetData, opts: TweetQuery.Options) => + UnmentionDataHydrator + .Ctx(getConversationId(td.tweet), getMentions(td.tweet), TweetCtx.from(td, opts)), + hydrateUnmentionData + ) + + val hydrateCacheableFields: TweetDataValueHydrator = + ValueHydrator.inSequence( + scrubCachedTweet, + hydratePrimaryCacheableFields, + // Relies on mentions being hydrated in hydratePrimaryCacheableFields + hydrateUnmentionDataToTweetData, + assertNotScrubbed, + hydrateCacheableRepairs + ) + + // The conversation muted hydrator needs the conversation id, + // which comes from the primary cacheable fields, and the media hydrator + // needs the cacheable media entities. + val hydrateUncacheableMedia: TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = TweetCtx.from(td, opts) + val tweet = td.tweet + + val mediaCtx = + MediaEntityHydrator.Uncacheable.Ctx(td.tweet.mediaKeys, ctx) + + val media: Stitch[ValueState[Option[Seq[MediaEntity]]]] = + hydrateMediaUncacheable.liftOption.apply(td.tweet.media, mediaCtx) + + val conversationMuted: Stitch[ValueState[Option[Boolean]]] = + hydrateConversationMuted( + tweet.conversationMuted, + ConversationMutedHydrator.Ctx(getConversationId(tweet), ctx) + ) + + // MediaRefs need to be hydrated at this phase because they rely on the media field + // on the Tweet, which can get unset by later hydrators. + val mediaRefs: Stitch[ValueState[Option[Seq[MediaRef]]]] = + hydrateMediaRefs( + tweet.mediaRefs, + MediaRefsHydrator.Ctx(getMedia(tweet), getMediaKeys(tweet), getUrls(tweet), ctx) + ) + + Stitch + .joinMap( + media, + conversationMuted, + mediaRefs + )(ValueState.join(_, _, _)) + .map { values => + if (values.state.isEmpty) { + ValueState.unmodified(td) + } else { + val tweet = td.tweet + values.map { + case (media, conversationMuted, mediaRefs) => + td.copy( + tweet = tweet.copy( + media = media, + conversationMuted = conversationMuted, + mediaRefs = mediaRefs + ) + ) + } + } + } + } + + val hydrateHasMediaToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweet.andThen(TweetLenses.hasMedia), + (td: TweetData, opts: TweetQuery.Options) => td.tweet, + hydrateHasMedia + ) + + val hydrateReportedTweetVisibilityToTweetData: TweetDataValueHydrator = { + // Create a TweetDataValueHydrator that calls hydrateReportedTweetVisibility, which + // either throws a FilteredState.Unavailable or returns Unit. + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + val ctx = ReportedTweetFilter.Ctx(td.tweet.perspective, TweetCtx.from(td, opts)) + hydrateReportedTweetVisibility((), ctx).map { _ => + ValueState.unmodified(td) + } + } + } + + val hydrateTweetVisibilityToTweetData: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.suppress, + (td: TweetData, opts: TweetQuery.Options) => + TweetVisibilityHydrator.Ctx(td.tweet, TweetCtx.from(td, opts)), + hydrateTweetVisibility + ) + + val hydrateEscherbirdAnnotationsToTweetAndCachedTweet: TweetDataValueHydrator = + TweetHydration.setOnTweetAndCachedTweet( + TweetLenses.escherbirdEntityAnnotations, + (td: TweetData, _: TweetQuery.Options) => td.tweet, + hydrateEscherbirdAnnotations + ) + + val scrubEngagements: TweetDataValueHydrator = + TweetHydration.setOnTweetData( + TweetData.Lenses.tweetCounts, + (td: TweetData, _: TweetQuery.Options) => ScrubEngagementHydrator.Ctx(td.suppress), + hydrateScrubEngagements + ) + + /** + * This is where we wire up all the separate hydrators into a single [[TweetDataValueHydrator]]. + * + * Each hydrator here is either a [[TweetDataValueHydrator]] or a [[TweetDataEditHydrator]]. + * We use [[EditHydrator]]s for anything that needs to run in parallel ([[ValueHydrator]]s can + * only be run in sequence). + */ + ValueHydrator.inSequence( + // Hydrate FeatureSwitchResults first, so they can be used by other hydrators if needed + hydrateFeatureSwitchResults, + EditHydrator + .inParallel( + ValueHydrator + .inSequence( + // The result of running these hydrators is saved as `cacheableTweetResult` and + // written back to cache via `cacheChangesEffect` in `hydrateRepo` + TweetHydration.captureCacheableTweetResult( + hydrateCacheableFields + ), + // Uncacheable hydrators that depend only on the cacheable fields + hydrateUncacheableMedia, + // clean-up partially hydrated entities before any of the hydrators that look at + // url and media entities run, so that they never see bad entities. + hydratePostCacheRepairs, + // These hydrators are all dependent on each other and/or the previous hydrators + hydrateDependentUncacheableFields, + // Sets `hasMedia`. Comes after PastedMediaHydrator in order to include pasted + // pics as well as other media & urls. + hydrateHasMediaToTweetData + ) + .toEditHydrator, + // These hydrators do not rely on any other hydrators and so can be run in parallel + // with the above hydrators (and with each other) + hydrateIndependentUncacheableFields + ) + .toValueHydrator, + // Depends on reported perspectival having been hydrated in PerspectiveHydrator + hydrateReportedTweetVisibilityToTweetData, + // Remove superfluous urls entities when there is a corresponding MediaEntity for the same url + scrubSuperfluousUrlEntities, + // The copyFromSourceTweet hydrator needs to be located after the hydrators that produce the + // fields to copy. It must be located after PartialEntityCleaner (part of postCacheRepairs), + // which removes failed MediaEntities. It also depends on takedownCountryCodes having been + // hydrated in TakedownHydrator. + copyFromSourceTweet, + // depends on AdditionalFieldsHydrator and CopyFromSourceTweet to copy safety labels + hydrateTweetVisibilityToTweetData, + // for IPI'd tweets, we want to disable tweet engagement counts from being returned + // StatusCounts for replyCount, retweetCount. + // scrubEngagements hydrator must come after tweet visibility hydrator. + // tweet visibility hydrator emits the suppressed FilteredState needed for scrubbing. + scrubEngagements, + // this hydrator runs when writing the current tweet + // Escherbird comes last in order to consume a tweet that's as close as possible + // to the tweet written to tweet_events + hydrateEscherbirdAnnotationsToTweetAndCachedTweet + .onlyIf((td, opts) => opts.cause.writing(td.tweet.id)), + // Add an ellipsis to the end of the text for a Tweet that has a NoteTweet associated. + // This is so that the Tweet is displayed on the home timeline with an ellipsis, letting + // the User know that there's more to see. + hydrateNoteTweetSuffix, + /** + * Post-cache repair of QT text and entities to support rendering on all clients + * Moving this to end of the pipeline to avoid/minimize chance of following hydrators + * depending on modified tweet text or entities. + * When we start persisting shortUrl in MH - permalink won't be empty. therefore, + * we won't run QuotedTweetRefHydrator and just hydrate expanded and display + * using QuotedTweetRefUrlsHydrator. We will use hydrated permalink to repair + * QT text and entities for non-upgraded clients in this step. + * */ + hydrateTweetLegacyFormat + ) + } + + /** + * Returns a new hydrator that takes the produced result, and captures the result value + * in the `cacheableTweetResult` field of the enclosed `TweetData`. + */ + def captureCacheableTweetResult(h: TweetDataValueHydrator): TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h(td, opts).map { v => + // In addition to saving off a copy of ValueState, make sure that the TweetData inside + // the ValueState has its "completedHydrations" set to the ValueState.HydrationStates's + // completedHydrations. This is used when converting to a CachedTweet. + v.map { td => + td.copy( + cacheableTweetResult = Some(v.map(_.addHydrated(v.state.completedHydrations))) + ) + } + } + } + + /** + * Takes a ValueHydrator and a Lens and returns a `TweetDataValueHydrator` that does three things: + * + * 1. Runs the ValueHydrator on the lensed value + * 2. Saves the result back to the main tweet using the lens + * 3. Saves the result back to the tweet in cacheableTweetResult using the lens + */ + def setOnTweetAndCachedTweet[A, C]( + l: Lens[Tweet, A], + mkCtx: (TweetData, TweetQuery.Options) => C, + h: ValueHydrator[A, C] + ): TweetDataValueHydrator = { + // A lens that goes from TweetData -> tweet -> l + val tweetDataLens = TweetData.Lenses.tweet.andThen(l) + + // A lens that goes from TweetData -> cacheableTweetResult -> tweet -> l + val cachedTweetLens = + TweetLenses + .requireSome(TweetData.Lenses.cacheableTweetResult) + .andThen(TweetResult.Lenses.tweet) + .andThen(l) + + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h.run(tweetDataLens.get(td), mkCtx(td, opts)).map { r => + if (r.state.isEmpty) { + ValueState.unmodified(td) + } else { + r.map { v => Lens.setAll(td, tweetDataLens -> v, cachedTweetLens -> v) } + } + } + } + } + + /** + * Creates a `TweetDataValueHydrator` that hydrates a lensed value, overwriting + * the existing value. + */ + def setOnTweetData[A, C]( + lens: Lens[TweetData, A], + mkCtx: (TweetData, TweetQuery.Options) => C, + h: ValueHydrator[A, C] + ): TweetDataValueHydrator = + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + h.run(lens.get(td), mkCtx(td, opts)).map { r => + if (r.state.isEmpty) ValueState.unmodified(td) else r.map(lens.set(td, _)) + } + } + + /** + * Produces an [[Effect]] that can be applied to a [[TweetDataValueHydrator]] to write updated + * values back to cache. + */ + def cacheChanges( + cache: LockingCache[TweetId, Cached[TweetData]], + stats: StatsReceiver + ): Effect[ValueState[TweetData]] = { + val updatedCounter = stats.counter("updated") + val unchangedCounter = stats.counter("unchanged") + val picker = new TweetRepoCachePicker[TweetData](_.cachedAt) + val cacheErrorCounter = stats.counter("cache_error") + val missingCacheableResultCounter = stats.counter("missing_cacheable_result") + + Effect[TweetResult] { result => + // cacheErrorEncountered will never be set on `cacheableTweetResult`, so we need to + // look at the outer tweet state. + val cacheErrorEncountered = result.state.cacheErrorEncountered + + result.value.cacheableTweetResult match { + case Some(ValueState(td, state)) if state.modified && !cacheErrorEncountered => + val tweetData = td.addHydrated(state.completedHydrations) + val now = Time.now + val cached = Cached(Some(tweetData), CachedValueStatus.Found, now, Some(now)) + val handler = LockingCache.PickingHandler(cached, picker) + + updatedCounter.incr() + cache.lockAndSet(tweetData.tweet.id, handler) + + case Some(ValueState(_, _)) if cacheErrorEncountered => + cacheErrorCounter.incr() + + case None => + missingCacheableResultCounter.incr() + + case _ => + unchangedCounter.incr() + } + } + } + + /** + * Wraps a hydrator with a check such that it only executes the hydrator if `queryFilter` + * returns true for the `TweetQuery.Option` in the `Ctx` value, and the specified + * `HydrationType` is not already marked as having been completed in + * `ctx.tweetData.completedHydrations`. If these conditions pass, and the underlying + * hydrator is executed, and the result does not contain a field-level or total failure, + * then the resulting `HydrationState` is updated to indicate that the specified + * `HydrationType` has been completed. + */ + def completeOnlyOnce[A, C <: TweetCtx]( + queryFilter: TweetQuery.Options => Boolean = _ => true, + hydrationType: HydrationType, + dependsOn: Set[HydrationType] = Set.empty, + hydrator: ValueHydrator[A, C] + ): ValueHydrator[A, C] = { + val completedState = HydrationState.modified(hydrationType) + + ValueHydrator[A, C] { (a, ctx) => + hydrator(a, ctx).map { res => + if (res.state.failedFields.isEmpty && + dependsOn.forall(ctx.completedHydrations.contains)) { + // successful result! + if (!ctx.completedHydrations.contains(hydrationType)) { + res.copy(state = res.state ++ completedState) + } else { + // forced rehydration - don't add hydrationType or change modified flag + res + } + } else { + // hydration failed or not all dependencies satisfied so don't mark as complete + res + } + } + }.onlyIf { (a, ctx) => + queryFilter(ctx.opts) && + (!ctx.completedHydrations.contains(hydrationType)) + } + } + + /** + * Applies a `TweetDataValueHydrator` to a `TweetRepository.Type`-typed repository. + * The incoming `TweetQuery.Options` are first expanded using `optionsExpander`, and the + * resulting options passed to `repo` and `hydrator`. The resulting tweet result + * objects are passed to `cacheChangesEffect` for possible write-back to cache. Finally, + * the tweets are scrubbed according to the original input `TweetQuery.Options`. + */ + def hydrateRepo( + hydrator: TweetDataValueHydrator, + cacheChangesEffect: Effect[TweetResult], + optionsExpander: TweetQueryOptionsExpander.Type + )( + repo: TweetResultRepository.Type + ): TweetResultRepository.Type = + (tweetId: TweetId, originalOpts: TweetQuery.Options) => { + val expandedOpts = optionsExpander(originalOpts) + + for { + repoResult <- repo(tweetId, expandedOpts) + hydratorResult <- hydrator(repoResult.value, expandedOpts) + } yield { + val hydratingRepoResult = + TweetResult(hydratorResult.value, repoResult.state ++ hydratorResult.state) + + if (originalOpts.cacheControl.writeToCache) { + cacheChangesEffect(hydratingRepoResult) + } + + UnrequestedFieldScrubber(originalOpts).scrub(hydratingRepoResult) + } + } + + /** + * A trivial wrapper around a Stitch[_] to provide a `joinWith` + * method that lets us choose the precedence of exceptions. + * + * This wrapper is useful for the case in which it's important that + * we specify which of the two exceptions wins (such as visibility + * filtering). + * + * Since this is an [[AnyVal]], using this is no more expensive than + * inlining the joinWith method. + */ + // exposed for testing + case class StitchExceptionPrecedence[A](toStitch: Stitch[A]) extends AnyVal { + + /** + * Concurrently evaluate two Stitch[_] values. This is different + * from Stitch.join in that any exception from the expression on + * the left hand side will take precedence over an exception on + * the right hand side. This means that an exception from the + * right-hand side will not short-circuit evaluation, but an + * exception on the left-hand side *will* short-circuit. This is + * desirable because it allows us to return the failure with as + * little latency as possible. (Compare to lifting *both* to Try, + * which would force us to wait for both computations to complete + * before returning, even if the one with the higher precedence is + * already known to be an exception.) + */ + def joinWith[B, C](rhs: Stitch[B])(f: (A, B) => C): StitchExceptionPrecedence[C] = + StitchExceptionPrecedence { + Stitch + .joinMap(toStitch, rhs.liftToTry) { (a, tryB) => tryB.map(b => f(a, b)) } + .lowerFromTry + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala new file mode 100644 index 000000000..adadcefd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetLegacyFormatter.scala @@ -0,0 +1,330 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.thriftscala.UrlEntity +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.thriftscala.entities.Implicits._ +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextModification +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.util.EditControlUtil +import com.twitter.tweetypie.util.TweetLenses + +/** + * This hydrator is the backwards-compatibility layer to support QT, Edit Tweets & Mixed Media + * Tweets rendering on legacy non-updated clients. Legacy rendering provides a way for every client + * to consume these Tweets until the client is upgraded. For Edit and Mixed Media Tweets, the + * Tweet's self-permalink is appended to the visible text. For Quoting Tweets, the Quoted Tweet's + * permalink is appended to the text. For Tweets that meet multiple criteria for legacy rendering + * (e.g. QT containing Mixed Media), only one permalink is appended and the self-permalink takes + * precedence. + */ +object TweetLegacyFormatter { + + private[this] val log = Logger(getClass) + + import TweetText._ + + def legacyQtPermalink( + td: TweetData, + opts: TweetQuery.Options + ): Option[ShortenedUrl] = { + val tweet = td.tweet + val tweetText = TweetLenses.text(tweet) + val urls = TweetLenses.urls(tweet) + val ctx = TweetCtx.from(td, opts) + val qtPermalink: Option[ShortenedUrl] = tweet.quotedTweet.flatMap(_.permalink) + val qtShortUrl = qtPermalink.map(_.shortUrl) + + def urlsContains(url: String): Boolean = + urls.exists(_.url == url) + + val doLegacyQtFormatting = + !opts.simpleQuotedTweet && !ctx.isRetweet && + qtPermalink.isDefined && qtShortUrl.isDefined && + !qtShortUrl.exists(tweetText.contains) && + !qtShortUrl.exists(urlsContains) + + if (doLegacyQtFormatting) qtPermalink else None + } + + def legacySelfPermalink( + td: TweetData + ): Option[ShortenedUrl] = { + val tweet = td.tweet + val selfPermalink = tweet.selfPermalink + val tweetText = TweetLenses.text(tweet) + val urls = TweetLenses.urls(tweet) + val selfShortUrl = selfPermalink.map(_.shortUrl) + + def urlsContains(url: String): Boolean = + urls.exists(_.url == url) + + val doLegacyFormatting = + selfPermalink.isDefined && selfShortUrl.isDefined && + !selfShortUrl.exists(tweetText.contains) && + !selfShortUrl.exists(urlsContains) && + needsLegacyFormatting(td) + + if (doLegacyFormatting) selfPermalink else None + } + + def isMixedMediaTweet(tweet: Tweet): Boolean = + tweet.media.exists(Media.isMixedMedia) + + def buildUrlEntity(from: Short, to: Short, permalink: ShortenedUrl): UrlEntity = + UrlEntity( + fromIndex = from, + toIndex = to, + url = permalink.shortUrl, + expanded = Some(permalink.longUrl), + display = Some(permalink.displayText) + ) + + private[this] def isValidVisibleRange( + tweetIdForLogging: TweetId, + textRange: TextRange, + textLength: Int + ) = { + val isValid = textRange.fromIndex <= textRange.toIndex && textRange.toIndex <= textLength + if (!isValid) { + log.warn(s"Tweet $tweetIdForLogging has invalid visibleTextRange: $textRange") + } + isValid + } + + // This Function checks if legacy formatting is required for Edit & Mixed Media Tweets. + // Calls FeatureSwitches.matchRecipient which is an expensive call, + // so caution is taken to call it only once and only when needed. + def needsLegacyFormatting( + td: TweetData + ): Boolean = { + val isEdit = EditControlUtil.isEditTweet(td.tweet) + val isMixedMedia = isMixedMediaTweet(td.tweet) + val isNoteTweet = td.tweet.noteTweet.isDefined + + if (isEdit || isMixedMedia || isNoteTweet) { + + // These feature switches are disabled unless greater than certain android, ios versions + // & all versions of RWEB. + val TweetEditConsumptionEnabledKey = "tweet_edit_consumption_enabled" + val MixedMediaEnabledKey = "mixed_media_enabled" + val NoteTweetConsumptionEnabledKey = "note_tweet_consumption_enabled" + + def fsEnabled(fsKey: String): Boolean = { + td.featureSwitchResults + .flatMap(_.getBoolean(fsKey, shouldLogImpression = false)) + .getOrElse(false) + } + + val tweetEditConsumptionEnabled = fsEnabled(TweetEditConsumptionEnabledKey) + val mixedMediaEnabled = fsEnabled(MixedMediaEnabledKey) + val noteTweetConsumptionEnabled = fsEnabled(NoteTweetConsumptionEnabledKey) + + (isEdit && !tweetEditConsumptionEnabled) || + (isMixedMedia && !mixedMediaEnabled) || + (isNoteTweet && !noteTweetConsumptionEnabled) + } else { + false + } + } + + //given a permalink, the tweet text gets updated + def updateTextAndURLsAndMedia( + permalink: ShortenedUrl, + tweet: Tweet, + statsReceiver: StatsReceiver + ): Tweet = { + + val originalText = TweetLenses.text(tweet) + val originalTextLength = codePointLength(originalText) + + // Default the visible range to the whole tweet if the existing visible range is invalid. + val visibleRange: TextRange = + TweetLenses + .visibleTextRange(tweet) + .filter((r: TextRange) => isValidVisibleRange(tweet.id, r, originalTextLength)) + .getOrElse(TextRange(0, originalTextLength)) + + val permalinkShortUrl = permalink.shortUrl + val insertAtCodePoint = Offset.CodePoint(visibleRange.toIndex) + + /* + * Insertion at position 0 implies that the original tweet text has no + * visible text, so the resulting text should be only the url without + * leading padding. + */ + val padLeft = if (insertAtCodePoint.toInt > 0) " " else "" + + /* + * Empty visible text at position 0 implies that the original tweet text + * only contains a URL in the hidden suffix area, which would not already + * be padded. + */ + val padRight = if (visibleRange == TextRange(0, 0)) " " else "" + val paddedShortUrl = s"$padLeft$permalinkShortUrl$padRight" + + val tweetTextModification = TextModification.insertAt( + originalText, + insertAtCodePoint, + paddedShortUrl + ) + + /* + * As we modified tweet text and appended tweet permalink above + * we have to correct the url and media entities accordingly as they are + * expected to be present in the hidden suffix of text. + * + * - we compute the new (from, to) indices for the url entity + * - build new url entity for quoted tweet permalink or self permalink for Edit/ MM Tweets + * - shift url entities which are after visible range end + * - shift media entities associated with above url entities + */ + val shortUrlLength = codePointLength(permalinkShortUrl) + val fromIndex = insertAtCodePoint.toInt + codePointLength(padLeft) + val toIndex = fromIndex + shortUrlLength + + val tweetUrlEntity = buildUrlEntity( + from = fromIndex.toShort, + to = toIndex.toShort, + permalink = permalink + ) + + val tweetMedia = if (isMixedMediaTweet(tweet)) { + TweetLenses.media(tweet).take(1) + } else { + TweetLenses.media(tweet) + } + + val modifiedMedia = tweetTextModification.reindexEntities(tweetMedia) + val modifiedUrls = + tweetTextModification.reindexEntities(TweetLenses.urls(tweet)) :+ tweetUrlEntity + val modifiedText = tweetTextModification.updated + + /* + * Visible Text Range computation differs by scenario + * == Any Tweet with Media == + * Tweet text has a media url *after* the visible text range + * original text: [visible text] https://t.co/mediaUrl + * original range: ^START END^ + * + * Append the permalink URL to the *visible text* so non-upgraded clients can see it + * modified text: [visible text https://t.co/permalink] https://t.co/mediaUrl + * modified range: ^START END^ + * visible range expanded, permalink is visible + * + * == Non-QT Tweet w/o Media == + * original text: [visible text] + * original range: None (default: whole text is visible) + * + * modified text: [visible text https://t.co/selfPermalink] + * modified range: None (default: whole text is visible) + * trailing self permalink will be visible + * + * == QT w/o Media == + * original text: [visible text] + * original range: None (default: whole text is visible) + * + * modified text: [visible text] https://t.co/qtPermalink + * modified range: ^START END^ + * trailing QT permalink is *hidden* because legacy clients that process the visible text range know how to display QTs + * + * == Non-QT Replies w/o media == + * original text: @user [visible text] + * original range: ^START END^ + * + * modified text: @user [visible text https://t.co/selfPermalink] + * modified range: ^START END^ + * visible range expanded, self permalink is visible + * + * == QT Replies w/o media == + * original text: @user [visible text] + * original range: ^START END^ + * + * modified text: @user [visible text] https://t.co/qtPermalink + * modified range: ^START END^ + * visible range remains the same, trailing QT permalink is hidden + * + */ + + val modifiedVisibleTextRange = + if (modifiedMedia.nonEmpty || + EditControlUtil.isEditTweet(tweet) || + tweet.noteTweet.isDefined) { + Some( + visibleRange.copy( + toIndex = visibleRange.toIndex + codePointLength(padLeft) + shortUrlLength + ) + ) + } else { + Some(visibleRange) + } + + val updatedTweet = + Lens.setAll( + tweet, + TweetLenses.text -> modifiedText, + TweetLenses.urls -> modifiedUrls.sortBy(_.fromIndex), + TweetLenses.media -> modifiedMedia.sortBy(_.fromIndex), + TweetLenses.visibleTextRange -> modifiedVisibleTextRange + ) + + /** + * compute extended tweet metadata when text length > 140 + * and apply the final lens to return a modified tweet + */ + val totalDisplayLength = displayLength(modifiedText) + if (totalDisplayLength > OriginalMaxDisplayLength) { + updatedTweet.selfPermalink match { + case Some(permalink) => + val extendedTweetMetadata = ExtendedTweetMetadataBuilder(updatedTweet, permalink) + updatedTweet.copy( + extendedTweetMetadata = Some(extendedTweetMetadata) + ) + case None => + /** + * This case shouldn't happen as TweetBuilder currently populates + * selfPermalink for extended tweets. In QT + Media, we will + * use AttachmentBuilder to store selfPermalink during writes, + * if text display length is going to exceed 140 after QT url append. + */ + log.error( + s"Failed to compute extended metadata for tweet: ${tweet.id} with " + + s"display length: ${totalDisplayLength}, as self-permalink is empty." + ) + statsReceiver.counter("self_permalink_not_found").incr() + tweet + } + } else { + updatedTweet + } + } + + def apply( + statsReceiver: StatsReceiver + ): TweetDataValueHydrator = { + ValueHydrator[TweetData, TweetQuery.Options] { (td, opts) => + // Prefer any required self permalink rendering over QT permalink rendering because a + // client that doesn't understand the attributes of the Tweet (i.e. Edit, Mixed + // Media) won't be able to render the Tweet properly at all, regardless of whether + // it's a QT. By preferring a visible self-permalink, the viewer is linked to an + // RWeb view of the Tweet which can fully display all of its features. + val permalink: Option[ShortenedUrl] = + legacySelfPermalink(td) + .orElse(legacyQtPermalink(td, opts)) + + permalink match { + case Some(permalink) => + val updatedTweet = updateTextAndURLsAndMedia(permalink, td.tweet, statsReceiver) + Stitch(ValueState.delta(td, td.copy(tweet = updatedTweet))) + case _ => + Stitch(ValueState.unmodified(td)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala new file mode 100644 index 000000000..732b9c752 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetQueryOptionsExpander.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.repository.TweetQuery + +/** + * An instance of `TweetQueryOptionsExpander.Type` can be used to take a `TweetQuery.Options` + * instance provided by a user, and expand the set of options included to take into account + * dependencies between fields and options. + */ +object TweetQueryOptionsExpander { + import TweetQuery._ + + /** + * Used by AdditionalFieldsHydrator, this function type can filter out or inject fieldIds to + * request from Manhattan per tweet. + */ + type Type = Options => Options + + /** + * The identity TweetQueryOptionsExpander, which passes through fieldIds unchanged. + */ + val unit: TweetQueryOptionsExpander.Type = identity + + case class Selector(f: Include => Boolean) { + def apply(i: Include): Boolean = f(i) + + def ||(other: Selector) = Selector(i => this(i) || other(i)) + } + + private def selectTweetField(fieldId: FieldId): Selector = + Selector(_.tweetFields.contains(fieldId)) + + private val firstOrderDependencies: Seq[(Selector, Include)] = + Seq( + selectTweetField(Tweet.MediaField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.MediaKeysField.id)), + selectTweetField(Tweet.QuotedTweetField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id)), + selectTweetField(Tweet.MediaRefsField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.MediaKeysField.id)), + selectTweetField(Tweet.CardsField.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id)), + selectTweetField(Tweet.Card2Field.id) -> + Include(tweetFields = Set(Tweet.UrlsField.id, Tweet.CardReferenceField.id)), + selectTweetField(Tweet.CoreDataField.id) -> + Include(tweetFields = Set(Tweet.DirectedAtUserMetadataField.id)), + selectTweetField(Tweet.SelfThreadInfoField.id) -> + Include(tweetFields = Set(Tweet.CoreDataField.id)), + (selectTweetField(Tweet.TakedownCountryCodesField.id) || + selectTweetField(Tweet.TakedownReasonsField.id)) -> + Include( + tweetFields = Set( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + Tweet.TweetypieOnlyTakedownReasonsField.id + ) + ), + selectTweetField(Tweet.EditPerspectiveField.id) -> + Include(tweetFields = Set(Tweet.PerspectiveField.id)), + Selector(_.quotedTweet) -> + Include(tweetFields = Set(Tweet.QuotedTweetField.id)), + // asking for any count implies getting the Tweet.counts field + Selector(_.countsFields.nonEmpty) -> + Include(tweetFields = Set(Tweet.CountsField.id)), + // asking for any media field implies getting the Tweet.media field + Selector(_.mediaFields.nonEmpty) -> + Include(tweetFields = Set(Tweet.MediaField.id)), + selectTweetField(Tweet.UnmentionDataField.id) -> + Include(tweetFields = Set(Tweet.MentionsField.id)), + ) + + private val allDependencies = + firstOrderDependencies.map { + case (sel, inc) => sel -> transitiveExpand(inc) + } + + private def transitiveExpand(inc: Include): Include = + firstOrderDependencies.foldLeft(inc) { + case (z, (selector, include)) => + if (!selector(z)) z + else z ++ include ++ transitiveExpand(include) + } + + /** + * Sequentially composes multiple TweetQueryOptionsExpander into a new TweetQueryOptionsExpander + */ + def sequentially(updaters: TweetQueryOptionsExpander.Type*): TweetQueryOptionsExpander.Type = + options => + updaters.foldLeft(options) { + case (options, updater) => updater(options) + } + + /** + * For requested fields that depend on other fields being present for correct hydration, + * returns an updated `TweetQuery.Options` with those dependee fields included. + */ + def expandDependencies: TweetQueryOptionsExpander.Type = + options => + options.copy( + include = allDependencies.foldLeft(options.include) { + case (z, (selector, include)) => + if (!selector(options.include)) z + else z ++ include + } + ) + + /** + * If the gate is true, add 'fields' to the list of tweetFields to load. + */ + def gatedTweetFieldUpdater( + gate: Gate[Unit], + fields: Seq[FieldId] + ): TweetQueryOptionsExpander.Type = + options => + if (gate()) { + options.copy( + include = options.include.also(tweetFields = fields) + ) + } else { + options + } + + /** + * Uses a `ThreadLocal` to remember the last expansion performed, and to reuse the + * previous result if the input value is the same. This is useful to avoid repeatedly + * computing the expansion of the same input when multiple tweets are queried together + * with the same options. + */ + def threadLocalMemoize(expander: Type): Type = { + val memo: ThreadLocal[Option[(Options, Options)]] = + new ThreadLocal[Option[(Options, Options)]] { + override def initialValue(): None.type = None + } + + options => + memo.get() match { + case Some((`options`, res)) => res + case _ => + val res = expander(options) + memo.set(Some((options, res))) + res + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala new file mode 100644 index 000000000..9d05fbf8e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/TweetVisibilityHydrator.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.util.CommunityUtil + +object TweetVisibilityHydrator { + type Type = ValueHydrator[Option[FilteredState.Suppress], Ctx] + + case class Ctx(tweet: Tweet, underlyingTweetCtx: TweetCtx) extends TweetCtx.Proxy + + def apply( + repo: TweetVisibilityRepository.Type, + failClosedInVF: Gate[Unit], + stats: StatsReceiver + ): Type = { + val outcomeScope = stats.scope("outcome") + val unavailable = outcomeScope.counter("unavailable") + val suppress = outcomeScope.counter("suppress") + val allow = outcomeScope.counter("allow") + val failClosed = outcomeScope.counter("fail_closed") + val communityFailClosed = outcomeScope.counter("community_fail_closed") + val failOpen = outcomeScope.counter("fail_open") + + ValueHydrator[Option[FilteredState.Suppress], Ctx] { (curr, ctx) => + val request = TweetVisibilityRepository.Request( + tweet = ctx.tweet, + viewerId = ctx.opts.forUserId, + safetyLevel = ctx.opts.safetyLevel, + isInnerQuotedTweet = ctx.opts.isInnerQuotedTweet, + isRetweet = ctx.isRetweet, + hydrateConversationControl = ctx.tweetFieldRequested(Tweet.ConversationControlField), + isSourceTweet = ctx.opts.isSourceTweet + ) + + repo(request).liftToTry.flatMap { + // If FilteredState.Unavailable is returned from repo then throw it + case Return(Some(fs: FilteredState.Unavailable)) => + unavailable.incr() + Stitch.exception(fs) + // If FilteredState.Suppress is returned from repo then return it + case Return(Some(fs: FilteredState.Suppress)) => + suppress.incr() + Stitch.value(ValueState.modified(Some(fs))) + // If None is returned from repo then return unmodified + case Return(None) => + allow.incr() + ValueState.StitchUnmodifiedNone + // Propagate thrown exceptions if fail closed + case Throw(e) if failClosedInVF() => + failClosed.incr() + Stitch.exception(e) + // Community tweets are special cased to fail closed to avoid + // leaking tweets expected to be private to a community. + case Throw(e) if CommunityUtil.hasCommunity(request.tweet.communities) => + communityFailClosed.incr() + Stitch.exception(e) + case Throw(_) => + failOpen.incr() + Stitch.value(ValueState.unmodified(curr)) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala new file mode 100644 index 000000000..dd6b1ee91 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnmentionDataHydrator.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.core.ValueState +import com.twitter.tweetypie.thriftscala.MentionEntity +import com.twitter.tweetypie.unmentions.thriftscala.UnmentionData + +object UnmentionDataHydrator { + type Type = ValueHydrator[Option[UnmentionData], Ctx] + + case class Ctx( + conversationId: Option[TweetId], + mentions: Seq[MentionEntity], + underlyingTweetCtx: TweetCtx) + extends TweetCtx.Proxy + + def apply(): Type = { + ValueHydrator.map[Option[UnmentionData], Ctx] { (_, ctx) => + val mentionedUserIds: Seq[UserId] = ctx.mentions.flatMap(_.userId) + + ValueState.modified( + Some(UnmentionData(ctx.conversationId, Option(mentionedUserIds).filter(_.nonEmpty))) + ) + } + }.onlyIf { (_, ctx) => + ctx.tweetFieldRequested(Tweet.UnmentionDataField) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala new file mode 100644 index 000000000..1f69b7ecd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UnrequestedFieldScrubber.scala @@ -0,0 +1,211 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala._ + +/** + * A hydrator that scrubs tweet fields that weren't requested. Those fields might be + * present because they were previously requested and were cached with the tweet. + */ +trait UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult + def scrub(tweetData: TweetData): TweetData + def scrub(tweet: Tweet): Tweet +} + +object UnrequestedFieldScrubber { + def apply(options: TweetQuery.Options): UnrequestedFieldScrubber = + if (!options.scrubUnrequestedFields) NullScrubber + else new ScrubberImpl(options.include) + + private object NullScrubber extends UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult = tweetResult + def scrub(tweetData: TweetData): TweetData = tweetData + def scrub(tweet: Tweet): Tweet = tweet + } + + class ScrubberImpl(i: TweetQuery.Include) extends UnrequestedFieldScrubber { + def scrub(tweetResult: TweetResult): TweetResult = + tweetResult.map(scrub(_)) + + def scrub(tweetData: TweetData): TweetData = + tweetData.copy( + tweet = scrub(tweetData.tweet), + sourceTweetResult = tweetData.sourceTweetResult.map(scrub(_)), + quotedTweetResult = + if (!i.quotedTweet) None + else tweetData.quotedTweetResult.map(qtr => qtr.map(scrub)) + ) + + def scrub(tweet: Tweet): Tweet = { + val tweet2 = scrubKnownFields(tweet) + + val unhandledFields = i.tweetFields -- AdditionalFields.CompiledFieldIds + + if (unhandledFields.isEmpty) { + tweet2 + } else { + tweet2.unsetFields(unhandledFields) + } + } + + def scrubKnownFields(tweet: Tweet): Tweet = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.tweetFields.contains(fieldId)) value else None + + tweet.copy( + coreData = filter(Tweet.CoreDataField.id, tweet.coreData), + urls = filter(Tweet.UrlsField.id, tweet.urls), + mentions = filter(Tweet.MentionsField.id, tweet.mentions), + hashtags = filter(Tweet.HashtagsField.id, tweet.hashtags), + cashtags = filter(Tweet.CashtagsField.id, tweet.cashtags), + media = filter(Tweet.MediaField.id, tweet.media), + place = filter(Tweet.PlaceField.id, tweet.place), + quotedTweet = filter(Tweet.QuotedTweetField.id, tweet.quotedTweet), + takedownCountryCodes = + filter(Tweet.TakedownCountryCodesField.id, tweet.takedownCountryCodes), + counts = filter(Tweet.CountsField.id, tweet.counts.map(scrub)), + deviceSource = filter(Tweet.DeviceSourceField.id, tweet.deviceSource), + perspective = filter(Tweet.PerspectiveField.id, tweet.perspective), + cards = filter(Tweet.CardsField.id, tweet.cards), + card2 = filter(Tweet.Card2Field.id, tweet.card2), + language = filter(Tweet.LanguageField.id, tweet.language), + spamLabels = None, // unused + contributor = filter(Tweet.ContributorField.id, tweet.contributor), + profileGeoEnrichment = + filter(Tweet.ProfileGeoEnrichmentField.id, tweet.profileGeoEnrichment), + conversationMuted = filter(Tweet.ConversationMutedField.id, tweet.conversationMuted), + takedownReasons = filter(Tweet.TakedownReasonsField.id, tweet.takedownReasons), + selfThreadInfo = filter(Tweet.SelfThreadInfoField.id, tweet.selfThreadInfo), + // additional fields + mediaTags = filter(Tweet.MediaTagsField.id, tweet.mediaTags), + schedulingInfo = filter(Tweet.SchedulingInfoField.id, tweet.schedulingInfo), + bindingValues = filter(Tweet.BindingValuesField.id, tweet.bindingValues), + replyAddresses = None, // unused + obsoleteTwitterSuggestInfo = None, // unused + escherbirdEntityAnnotations = + filter(Tweet.EscherbirdEntityAnnotationsField.id, tweet.escherbirdEntityAnnotations), + spamLabel = filter(Tweet.SpamLabelField.id, tweet.spamLabel), + abusiveLabel = filter(Tweet.AbusiveLabelField.id, tweet.abusiveLabel), + lowQualityLabel = filter(Tweet.LowQualityLabelField.id, tweet.lowQualityLabel), + nsfwHighPrecisionLabel = + filter(Tweet.NsfwHighPrecisionLabelField.id, tweet.nsfwHighPrecisionLabel), + nsfwHighRecallLabel = filter(Tweet.NsfwHighRecallLabelField.id, tweet.nsfwHighRecallLabel), + abusiveHighRecallLabel = + filter(Tweet.AbusiveHighRecallLabelField.id, tweet.abusiveHighRecallLabel), + lowQualityHighRecallLabel = + filter(Tweet.LowQualityHighRecallLabelField.id, tweet.lowQualityHighRecallLabel), + personaNonGrataLabel = + filter(Tweet.PersonaNonGrataLabelField.id, tweet.personaNonGrataLabel), + recommendationsLowQualityLabel = filter( + Tweet.RecommendationsLowQualityLabelField.id, + tweet.recommendationsLowQualityLabel + ), + experimentationLabel = + filter(Tweet.ExperimentationLabelField.id, tweet.experimentationLabel), + tweetLocationInfo = filter(Tweet.TweetLocationInfoField.id, tweet.tweetLocationInfo), + cardReference = filter(Tweet.CardReferenceField.id, tweet.cardReference), + supplementalLanguage = + filter(Tweet.SupplementalLanguageField.id, tweet.supplementalLanguage), + selfPermalink = filter(Tweet.SelfPermalinkField.id, tweet.selfPermalink), + extendedTweetMetadata = + filter(Tweet.ExtendedTweetMetadataField.id, tweet.extendedTweetMetadata), + communities = filter(Tweet.CommunitiesField.id, tweet.communities), + visibleTextRange = filter(Tweet.VisibleTextRangeField.id, tweet.visibleTextRange), + spamHighRecallLabel = filter(Tweet.SpamHighRecallLabelField.id, tweet.spamHighRecallLabel), + duplicateContentLabel = + filter(Tweet.DuplicateContentLabelField.id, tweet.duplicateContentLabel), + liveLowQualityLabel = filter(Tweet.LiveLowQualityLabelField.id, tweet.liveLowQualityLabel), + nsfaHighRecallLabel = filter(Tweet.NsfaHighRecallLabelField.id, tweet.nsfaHighRecallLabel), + pdnaLabel = filter(Tweet.PdnaLabelField.id, tweet.pdnaLabel), + searchBlacklistLabel = + filter(Tweet.SearchBlacklistLabelField.id, tweet.searchBlacklistLabel), + lowQualityMentionLabel = + filter(Tweet.LowQualityMentionLabelField.id, tweet.lowQualityMentionLabel), + bystanderAbusiveLabel = + filter(Tweet.BystanderAbusiveLabelField.id, tweet.bystanderAbusiveLabel), + automationHighRecallLabel = + filter(Tweet.AutomationHighRecallLabelField.id, tweet.automationHighRecallLabel), + goreAndViolenceLabel = + filter(Tweet.GoreAndViolenceLabelField.id, tweet.goreAndViolenceLabel), + untrustedUrlLabel = filter(Tweet.UntrustedUrlLabelField.id, tweet.untrustedUrlLabel), + goreAndViolenceHighRecallLabel = filter( + Tweet.GoreAndViolenceHighRecallLabelField.id, + tweet.goreAndViolenceHighRecallLabel + ), + nsfwVideoLabel = filter(Tweet.NsfwVideoLabelField.id, tweet.nsfwVideoLabel), + nsfwNearPerfectLabel = + filter(Tweet.NsfwNearPerfectLabelField.id, tweet.nsfwNearPerfectLabel), + automationLabel = filter(Tweet.AutomationLabelField.id, tweet.automationLabel), + nsfwCardImageLabel = filter(Tweet.NsfwCardImageLabelField.id, tweet.nsfwCardImageLabel), + duplicateMentionLabel = + filter(Tweet.DuplicateMentionLabelField.id, tweet.duplicateMentionLabel), + bounceLabel = filter(Tweet.BounceLabelField.id, tweet.bounceLabel), + selfThreadMetadata = filter(Tweet.SelfThreadMetadataField.id, tweet.selfThreadMetadata), + composerSource = filter(Tweet.ComposerSourceField.id, tweet.composerSource), + editControl = filter(Tweet.EditControlField.id, tweet.editControl), + developerBuiltCardId = filter( + Tweet.DeveloperBuiltCardIdField.id, + tweet.developerBuiltCardId + ), + creativeEntityEnrichmentsForTweet = filter( + Tweet.CreativeEntityEnrichmentsForTweetField.id, + tweet.creativeEntityEnrichmentsForTweet + ), + previousCounts = filter(Tweet.PreviousCountsField.id, tweet.previousCounts), + mediaRefs = filter(Tweet.MediaRefsField.id, tweet.mediaRefs), + isCreativesContainerBackendTweet = filter( + Tweet.IsCreativesContainerBackendTweetField.id, + tweet.isCreativesContainerBackendTweet), + editPerspective = filter(Tweet.EditPerspectiveField.id, tweet.editPerspective), + noteTweet = filter(Tweet.NoteTweetField.id, tweet.noteTweet), + + // tweetypie-internal metadata + directedAtUserMetadata = + filter(Tweet.DirectedAtUserMetadataField.id, tweet.directedAtUserMetadata), + tweetypieOnlyTakedownReasons = + filter(Tweet.TweetypieOnlyTakedownReasonsField.id, tweet.tweetypieOnlyTakedownReasons), + mediaKeys = filter(Tweet.MediaKeysField.id, tweet.mediaKeys), + tweetypieOnlyTakedownCountryCodes = filter( + Tweet.TweetypieOnlyTakedownCountryCodesField.id, + tweet.tweetypieOnlyTakedownCountryCodes + ), + underlyingCreativesContainerId = filter( + Tweet.UnderlyingCreativesContainerIdField.id, + tweet.underlyingCreativesContainerId), + unmentionData = filter(Tweet.UnmentionDataField.id, tweet.unmentionData), + blockingUnmentions = filter(Tweet.BlockingUnmentionsField.id, tweet.blockingUnmentions), + settingsUnmentions = filter(Tweet.SettingsUnmentionsField.id, tweet.settingsUnmentions) + ) + } + + def scrub(counts: StatusCounts): StatusCounts = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.countsFields.contains(fieldId)) value else None + + StatusCounts( + replyCount = filter(StatusCounts.ReplyCountField.id, counts.replyCount), + favoriteCount = filter(StatusCounts.FavoriteCountField.id, counts.favoriteCount), + retweetCount = filter(StatusCounts.RetweetCountField.id, counts.retweetCount), + quoteCount = filter(StatusCounts.QuoteCountField.id, counts.quoteCount), + bookmarkCount = filter(StatusCounts.BookmarkCountField.id, counts.bookmarkCount) + ) + } + + def scrub(media: MediaEntity): MediaEntity = { + @inline + def filter[A](fieldId: FieldId, value: Option[A]): Option[A] = + if (i.mediaFields.contains(fieldId)) value else None + + media.copy( + additionalMetadata = + filter(MediaEntity.AdditionalMetadataField.id, media.additionalMetadata) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala new file mode 100644 index 000000000..9ffdf0139 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/UrlEntityHydrator.scala @@ -0,0 +1,122 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tco_util.DisplayUrl +import com.twitter.tco_util.InvalidUrlException +import com.twitter.tco_util.TcoSlug +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.thriftscala._ +import scala.util.control.NonFatal + +object UrlEntitiesHydrator { + type Type = ValueHydrator[Seq[UrlEntity], TweetCtx] + + def once(h: ValueHydrator[UrlEntity, TweetCtx]): Type = + TweetHydration.completeOnlyOnce( + queryFilter = queryFilter, + hydrationType = HydrationType.Urls, + hydrator = h.liftSeq + ) + + def queryFilter(opts: TweetQuery.Options): Boolean = + opts.include.tweetFields.contains(Tweet.UrlsField.id) +} + +/** + * Hydrates UrlEntities. If there is a failure to hydrate an entity, the entity is left + * unhydrated, so that we can try again later. The PartialEntityCleaner will remove + * the partial entity before returning to clients. + */ +object UrlEntityHydrator { + + /** + * a function type that takes a shorten-url and an expanded-url, and generates a + * "display url" (which isn't really a url). this may fail if the expanded-url + * can't be parsed as a valid url, in which case None is returned. + */ + type Truncator = (String, String) => Option[String] + + val hydratedField: FieldByPath = fieldByPath(Tweet.UrlsField) + val log: Logger = Logger(getClass) + + def apply(repo: UrlRepository.Type, stats: StatsReceiver): ValueHydrator[UrlEntity, TweetCtx] = { + val toDisplayUrl = truncator(stats) + + ValueHydrator[UrlEntity, TweetCtx] { (curr, _) => + val slug = getTcoSlug(curr) + + val result: Stitch[Option[Try[ExpandedUrl]]] = Stitch.collect(slug.map(repo(_).liftToTry)) + + result.map { + case Some(Return(expandedUrl)) => + ValueState.modified(update(curr, expandedUrl, toDisplayUrl)) + + case None => + ValueState.unmodified(curr) + + case Some(Throw(NotFound)) => + // If the UrlEntity contains an invalid t.co slug that can't be resolved, + // leave the entity unhydrated, to be removed later by the PartialEntityCleaner. + // We don't consider this a partial because the input is invalid and is not + // expected to succeed. + ValueState.unmodified(curr) + + case Some(Throw(_)) => + // On failure, use the t.co link as the expanded url so that it is still clickable, + // but also still flag the failure + ValueState.partial( + update(curr, ExpandedUrl(curr.url), toDisplayUrl), + hydratedField + ) + } + }.onlyIf((curr, ctx) => !ctx.isRetweet && isUnhydrated(curr)) + } + + /** + * a UrlEntity needs hydration if the expanded url is either unset or set to the + * shortened url . + */ + def isUnhydrated(entity: UrlEntity): Boolean = + entity.expanded.isEmpty || hydrationFailed(entity) + + /** + * Did the hydration of this URL entity fail? + */ + def hydrationFailed(entity: UrlEntity): Boolean = + entity.expanded.contains(entity.url) + + def update(entity: UrlEntity, expandedUrl: ExpandedUrl, toDisplayUrl: Truncator): UrlEntity = + entity.copy( + expanded = Some(expandedUrl.text), + display = toDisplayUrl(entity.url, expandedUrl.text) + ) + + def getTcoSlug(entity: UrlEntity): Option[UrlSlug] = + TcoSlug.unapply(entity.url).map(UrlSlug(_)) + + def truncator(stats: StatsReceiver): Truncator = { + val truncationStats = stats.scope("truncations") + val truncationsCounter = truncationStats.counter("count") + val truncationExceptionsCounter = truncationStats.counter("exceptions") + + (shortUrl, expandedUrl) => + try { + truncationsCounter.incr() + Some(DisplayUrl(shortUrl, Some(expandedUrl), true)) + } catch { + case NonFatal(ex) => + truncationExceptionsCounter.incr() + truncationStats.counter(ex.getClass.getName).incr() + ex match { + case InvalidUrlException(_) => + log.warn(s"failed to truncate: `$shortUrl` / `$expandedUrl`") + case _ => + log.warn(s"failed to truncate: `$shortUrl` / `$expandedUrl`", ex) + } + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala new file mode 100644 index 000000000..0504d7429 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/ValueHydrator.scala @@ -0,0 +1,200 @@ +package com.twitter.tweetypie +package hydrator + +import com.twitter.servo.util.ExceptionCounter +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.EditState +import com.twitter.tweetypie.core.ValueState +import com.twitter.util.Try + +/** + * A ValueHydrator hydrates a value of type `A`, with a hydration context of type `C`, + * and produces a value of type ValueState[A] (ValueState encapsulates the value and + * its associated HydrationState). + * + * Because ValueHydrators take a value and produce a new value, they can easily be run + * in sequence, but not in parallel. To run hydrators in parallel, see [[EditHydrator]]. + * + * A series of ValueHydrators of the same type may be run in sequence via + * `ValueHydrator.inSequence`. + * + */ +class ValueHydrator[A, C] private (val run: (A, C) => Stitch[ValueState[A]]) { + + /** + * Apply this hydrator to a value, producing a ValueState. + */ + def apply(a: A, ctx: C): Stitch[ValueState[A]] = run(a, ctx) + + /** + * Apply with an empty context: only used in tests. + */ + def apply(a: A)(implicit ev: Unit <:< C): Stitch[ValueState[A]] = + apply(a, ev(())) + + /** + * Convert this ValueHydrator to the equivalent EditHydrator. + */ + def toEditHydrator: EditHydrator[A, C] = + EditHydrator[A, C] { (a, ctx) => this.run(a, ctx).map(value => EditState(_ => value)) } + + /** + * Chains two ValueHydrators in sequence. + */ + def andThen(next: ValueHydrator[A, C]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (x0, ctx) => + for { + r1 <- run(x0, ctx) + r2 <- next.run(r1.value, ctx) + } yield { + ValueState(r2.value, r1.state ++ r2.state) + } + } + + /** + * Executes this ValueHydrator conditionally based on a Gate. + */ + def ifEnabled(gate: Gate[Unit]): ValueHydrator[A, C] = + onlyIf((_, _) => gate()) + + /** + * Executes this ValueHydrator conditionally based on a boolean function. + */ + def onlyIf(cond: (A, C) => Boolean): ValueHydrator[A, C] = + ValueHydrator { (a, c) => + if (cond(a, c)) { + run(a, c) + } else { + Stitch.value(ValueState.unit(a)) + } + } + + /** + * Converts a ValueHydrator of input type `A` to input type `Option[A]`. + */ + def liftOption: ValueHydrator[Option[A], C] = + liftOption(None) + + /** + * Converts a ValueHydrator of input type `A` to input type `Option[A]` with a + * default input value. + */ + def liftOption(default: A): ValueHydrator[Option[A], C] = + liftOption(Some(default)) + + private def liftOption(default: Option[A]): ValueHydrator[Option[A], C] = { + val none = Stitch.value(ValueState.unit(None)) + + ValueHydrator[Option[A], C] { (a, ctx) => + a.orElse(default) match { + case Some(a) => this.run(a, ctx).map(s => s.map(Some.apply)) + case None => none + } + } + } + + /** + * Converts a ValueHydrator of input type `A` to input type `Seq[A]`. + */ + def liftSeq: ValueHydrator[Seq[A], C] = + ValueHydrator[Seq[A], C] { (as, ctx) => + Stitch.traverse(as)(a => run(a, ctx)).map(rs => ValueState.sequence[A](rs)) + } + + /** + * Produces a new ValueHydrator that collects stats on the hydration. + */ + def observe( + stats: StatsReceiver, + mkExceptionCounter: (StatsReceiver, String) => ExceptionCounter = (stats, scope) => + new ExceptionCounter(stats, scope) + ): ValueHydrator[A, C] = { + val callCounter = stats.counter("calls") + val noopCounter = stats.counter("noop") + val modifiedCounter = stats.counter("modified") + val partialCounter = stats.counter("partial") + val completedCounter = stats.counter("completed") + + val exceptionCounter = mkExceptionCounter(stats, "failures") + + ValueHydrator[A, C] { (a, ctx) => + this.run(a, ctx).respond { + case Return(ValueState(_, state)) => + callCounter.incr() + + if (state.isEmpty) { + noopCounter.incr() + } else { + if (state.modified) modifiedCounter.incr() + if (state.failedFields.nonEmpty) partialCounter.incr() + if (state.completedHydrations.nonEmpty) completedCounter.incr() + } + case Throw(ex) => + callCounter.incr() + exceptionCounter(ex) + } + } + } + + /** + * Produces a new ValueHydrator that uses a lens to extract the value to hydrate, + * using this hydrator, and then to put the updated value back in the enclosing struct. + */ + def lensed[B](lens: Lens[B, A]): ValueHydrator[B, C] = + ValueHydrator[B, C] { (b, ctx) => + this.run(lens.get(b), ctx).map { + case ValueState(value, state) => + ValueState(lens.set(b, value), state) + } + } +} + +object ValueHydrator { + + /** + * Create a ValueHydrator from a function that returns Stitch[ValueState[A]] + */ + def apply[A, C](f: (A, C) => Stitch[ValueState[A]]): ValueHydrator[A, C] = + new ValueHydrator[A, C](f) + + /** + * Produces a ValueState instance with the given value and an empty HydrationState + */ + def unit[A, C]: ValueHydrator[A, C] = + ValueHydrator { (a, _) => Stitch.value(ValueState.unit(a)) } + + /** + * Runs several ValueHydrators in sequence. + */ + def inSequence[A, C](bs: ValueHydrator[A, C]*): ValueHydrator[A, C] = + bs match { + case Seq(b) => b + case Seq(b1, b2) => b1.andThen(b2) + case _ => bs.reduceLeft(_.andThen(_)) + } + + /** + * Creates a `ValueHydrator` from a Mutation. If the mutation returns None (indicating + * no change) the hydrator will return an ValueState.unmodified with the input value; + * otherwise, it will return an ValueState.modified with the mutated value. + * If the mutation throws an exception, it will be caught and lifted to Stitch.exception. + */ + def fromMutation[A, C](mutation: Mutation[A]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (input, _) => + Stitch.const( + Try { + mutation(input) match { + case None => ValueState.unmodified(input) + case Some(output) => ValueState.modified(output) + } + } + ) + } + + /** + * Creates a Hydrator from a non-`Stitch` producing function. If the function throws + * an error it will be caught and converted to a Throw. + */ + def map[A, C](f: (A, C) => ValueState[A]): ValueHydrator[A, C] = + ValueHydrator[A, C] { (a, ctx) => Stitch.const(Try(f(a, ctx))) } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala new file mode 100644 index 000000000..0542cf4f5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/hydrator/package.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie + +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.TweetQuery +import com.twitter.tweetypie.thriftscala.FieldByPath +import org.apache.thrift.protocol.TField +import com.twitter.context.TwitterContext + +package object hydrator { + type TweetDataValueHydrator = ValueHydrator[TweetData, TweetQuery.Options] + type TweetDataEditHydrator = EditHydrator[TweetData, TweetQuery.Options] + + def fieldByPath(fields: TField*): FieldByPath = FieldByPath(fields.map(_.id)) + + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD new file mode 100644 index 000000000..dc5edd30e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "image-fetcher-service/thrift/src/main/thrift:thrift-scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "mediaservices/mediainfo-server/thrift/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "user-image-service/thrift/src/main/thrift:thrift-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala new file mode 100644 index 000000000..c33ed5e66 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaClient.scala @@ -0,0 +1,288 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediainfo.server.{thriftscala => mis} +import com.twitter.mediaservices.commons.mediainformation.thriftscala.UserDefinedProductMetadata +import com.twitter.mediaservices.commons.photurkey.thriftscala.PrivacyType +import com.twitter.mediaservices.commons.servercommon.thriftscala.{ServerError => CommonServerError} +import com.twitter.mediaservices.commons.thriftscala.ProductKey +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.servo.util.FutureArrow +import com.twitter.thumbingbird.{thriftscala => ifs} +import com.twitter.tweetypie.backends.MediaInfoService +import com.twitter.tweetypie.backends.UserImageService +import com.twitter.tweetypie.core.UpstreamFailure +import com.twitter.user_image_service.{thriftscala => uis} +import com.twitter.user_image_service.thriftscala.MediaUpdateAction +import com.twitter.user_image_service.thriftscala.MediaUpdateAction.Delete +import com.twitter.user_image_service.thriftscala.MediaUpdateAction.Undelete +import java.nio.ByteBuffer +import scala.util.control.NoStackTrace + +/** + * The MediaClient trait encapsulates the various operations we make to the different media services + * backends. + */ +trait MediaClient { + import MediaClient._ + + /** + * On tweet creation, if the tweet contains media upload ids, we call this operation to process + * that media and get back metadata about the media. + */ + def processMedia: ProcessMedia + + /** + * On the read path, when hydrating a MediaEntity, we call this operation to get metadata + * about existing media. + */ + def getMediaMetadata: GetMediaMetadata + + def deleteMedia: DeleteMedia + + def undeleteMedia: UndeleteMedia +} + +/** + * Request type for the MediaClient.updateMedia operation. + */ +private case class UpdateMediaRequest( + mediaKey: MediaKey, + action: MediaUpdateAction, + tweetId: TweetId) + +case class DeleteMediaRequest(mediaKey: MediaKey, tweetId: TweetId) { + private[media] def toUpdateMediaRequest = UpdateMediaRequest(mediaKey, Delete, tweetId) +} + +case class UndeleteMediaRequest(mediaKey: MediaKey, tweetId: TweetId) { + private[media] def toUpdateMediaRequest = UpdateMediaRequest(mediaKey, Undelete, tweetId) +} + +/** + * Request type for the MediaClient.processMedia operation. + */ +case class ProcessMediaRequest( + mediaIds: Seq[MediaId], + userId: UserId, + tweetId: TweetId, + isProtected: Boolean, + productMetadata: Option[Map[MediaId, UserDefinedProductMetadata]]) { + private[media] def toProcessTweetMediaRequest = + uis.ProcessTweetMediaRequest(mediaIds, userId, tweetId) + + private[media] def toUpdateProductMetadataRequests(mediaKeys: Seq[MediaKey]) = + productMetadata match { + case None => Seq() + case Some(map) => + mediaKeys.flatMap { mediaKey => + map.get(mediaKey.mediaId).map { metadata => + uis.UpdateProductMetadataRequest(ProductKey(tweetId.toString, mediaKey), metadata) + } + } + } +} + +/** + * Request type for the MediaClient.getMediaMetdata operation. + */ +case class MediaMetadataRequest( + mediaKey: MediaKey, + tweetId: TweetId, + isProtected: Boolean, + extensionsArgs: Option[ByteBuffer]) { + private[media] def privacyType = MediaClient.toPrivacyType(isProtected) + + /** + * For debugging purposes, make a copy of the byte buffer at object + * creation time, so that we can inspect the original buffer if there + * is an error. + * + * Once we have found the problem, this method should be removed. + */ + val savedExtensionArgs: Option[ByteBuffer] = + extensionsArgs.map { buf => + val b = buf.asReadOnlyBuffer() + val ary = new Array[Byte](b.remaining) + b.get(ary) + ByteBuffer.wrap(ary) + } + + private[media] def toGetTweetMediaInfoRequest = + mis.GetTweetMediaInfoRequest( + mediaKey = mediaKey, + tweetId = Some(tweetId), + privacyType = privacyType, + stratoExtensionsArgs = extensionsArgs + ) +} + +object MediaClient { + import MediaExceptions._ + + /** + * Operation type for processing uploaded media during tweet creation. + */ + type ProcessMedia = FutureArrow[ProcessMediaRequest, Seq[MediaKey]] + + /** + * Operation type for deleting and undeleting tweets. + */ + private[media] type UpdateMedia = FutureArrow[UpdateMediaRequest, Unit] + + type UndeleteMedia = FutureArrow[UndeleteMediaRequest, Unit] + + type DeleteMedia = FutureArrow[DeleteMediaRequest, Unit] + + /** + * Operation type for getting media metadata for existing media during tweet reads. + */ + type GetMediaMetadata = FutureArrow[MediaMetadataRequest, MediaMetadata] + + /** + * Builds a UpdateMedia FutureArrow using UserImageService endpoints. + */ + private[media] object UpdateMedia { + def apply(updateTweetMedia: UserImageService.UpdateTweetMedia): UpdateMedia = + FutureArrow[UpdateMediaRequest, Unit] { r => + updateTweetMedia(uis.UpdateTweetMediaRequest(r.mediaKey, r.action, Some(r.tweetId))).unit + }.translateExceptions(handleMediaExceptions) + } + + /** + * Builds a ProcessMedia FutureArrow using UserImageService endpoints. + */ + object ProcessMedia { + + def apply( + updateProductMetadata: UserImageService.UpdateProductMetadata, + processTweetMedia: UserImageService.ProcessTweetMedia + ): ProcessMedia = { + + val updateProductMetadataSeq = updateProductMetadata.liftSeq + + FutureArrow[ProcessMediaRequest, Seq[MediaKey]] { req => + for { + mediaKeys <- processTweetMedia(req.toProcessTweetMediaRequest).map(_.mediaKeys) + _ <- updateProductMetadataSeq(req.toUpdateProductMetadataRequests(mediaKeys)) + } yield { + sortKeysByIds(req.mediaIds, mediaKeys) + } + }.translateExceptions(handleMediaExceptions) + } + + /** + * Sort the mediaKeys Seq based on the media id ordering specified by the + * caller's request mediaIds Seq. + */ + private def sortKeysByIds(mediaIds: Seq[MediaId], mediaKeys: Seq[MediaKey]): Seq[MediaKey] = { + val idToKeyMap = mediaKeys.map(key => (key.mediaId, key)).toMap + mediaIds.flatMap(idToKeyMap.get) + } + } + + /** + * Builds a GetMediaMetadata FutureArrow using MediaInfoService endpoints. + */ + object GetMediaMetadata { + + private[this] val log = Logger(getClass) + + def apply(getTweetMediaInfo: MediaInfoService.GetTweetMediaInfo): GetMediaMetadata = + FutureArrow[MediaMetadataRequest, MediaMetadata] { req => + getTweetMediaInfo(req.toGetTweetMediaInfoRequest).map { res => + MediaMetadata( + res.mediaKey, + res.assetUrlHttps, + res.sizes.toSet, + res.mediaInfo, + res.additionalMetadata.flatMap(_.productMetadata), + res.stratoExtensionsReply, + res.additionalMetadata + ) + } + }.translateExceptions(handleMediaExceptions) + } + + private[media] def toPrivacyType(isProtected: Boolean): PrivacyType = + if (isProtected) PrivacyType.Protected else PrivacyType.Public + + /** + * Constructs an implementation of the MediaClient interface using backend instances. + */ + def fromBackends( + userImageService: UserImageService, + mediaInfoService: MediaInfoService + ): MediaClient = + new MediaClient { + + val getMediaMetadata = + GetMediaMetadata( + getTweetMediaInfo = mediaInfoService.getTweetMediaInfo + ) + + val processMedia = + ProcessMedia( + userImageService.updateProductMetadata, + userImageService.processTweetMedia + ) + + private val updateMedia = + UpdateMedia( + userImageService.updateTweetMedia + ) + + val deleteMedia: FutureArrow[DeleteMediaRequest, Unit] = + FutureArrow[DeleteMediaRequest, Unit](r => updateMedia(r.toUpdateMediaRequest)) + + val undeleteMedia: FutureArrow[UndeleteMediaRequest, Unit] = + FutureArrow[UndeleteMediaRequest, Unit](r => updateMedia(r.toUpdateMediaRequest)) + } +} + +/** + * Exceptions from the various media services backends that indicate bad requests (validation + * failures) are converted to a MediaClientException. Exceptions that indicate a server + * error are converted to a UpstreamFailure.MediaServiceServerError. + * + * MediaNotFound: Given media id does not exist. It could have been expired + * BadMedia: Given media is corrupted and can not be processed. + * InvalidMedia: Given media has failed to pass one or more validations (size, dimensions, type etc.) + * BadRequest Request is bad, but reason not available + */ +object MediaExceptions { + import UpstreamFailure.MediaServiceServerError + + // Extends NoStackTrace because the circumstances in which the + // exceptions are generated don't yield useful stack traces + // (e.g. you can't tell from the stack trace anything about what + // backend call was being made.) + abstract class MediaClientException(message: String) extends Exception(message) with NoStackTrace + + class MediaNotFound(message: String) extends MediaClientException(message) + class BadMedia(message: String) extends MediaClientException(message) + class InvalidMedia(message: String) extends MediaClientException(message) + class BadRequest(message: String) extends MediaClientException(message) + + // translations from various media service errors into MediaExceptions + val handleMediaExceptions: PartialFunction[Any, Exception] = { + case uis.BadRequest(msg, reason) => + reason match { + case Some(uis.BadRequestReason.MediaNotFound) => new MediaNotFound(msg) + case Some(uis.BadRequestReason.BadMedia) => new BadMedia(msg) + case Some(uis.BadRequestReason.InvalidMedia) => new InvalidMedia(msg) + case _ => new BadRequest(msg) + } + case ifs.BadRequest(msg, reason) => + reason match { + case Some(ifs.BadRequestReason.NotFound) => new MediaNotFound(msg) + case _ => new BadRequest(msg) + } + case mis.BadRequest(msg, reason) => + reason match { + case Some(mis.BadRequestReason.MediaNotFound) => new MediaNotFound(msg) + case _ => new BadRequest(msg) + } + case ex: CommonServerError => MediaServiceServerError(ex) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala new file mode 100644 index 000000000..013bd0dea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyClassifier.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.media + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.thriftscala.MediaCategory + +object MediaKeyClassifier { + + class Classifier(categories: Set[MediaCategory]) { + + def apply(mediaKey: MediaKey): Boolean = + categories.contains(mediaKey.mediaCategory) + + def unapply(mediaKey: MediaKey): Option[MediaKey] = + apply(mediaKey) match { + case false => None + case true => Some(mediaKey) + } + } + + val isImage: Classifier = new Classifier(Set(MediaCategory.TweetImage)) + val isGif: Classifier = new Classifier(Set(MediaCategory.TweetGif)) + val isVideo: Classifier = new Classifier( + Set(MediaCategory.TweetVideo, MediaCategory.AmplifyVideo) + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala new file mode 100644 index 000000000..6a62e1d3d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaKeyUtil.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie.media + +import com.twitter.mediaservices.commons.thriftscala._ +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.thriftscala.MediaEntity + +object MediaKeyUtil { + + def get(mediaEntity: MediaEntity): MediaKey = + mediaEntity.mediaKey.getOrElse { + throw new IllegalStateException("""Media key undefined. This state is unexpected, the media + |key should be set by the tweet creation for new tweets + |and by `MediaKeyHydrator` for legacy tweets.""".stripMargin) + } + + def contentType(mediaKey: MediaKey): MediaContentType = + mediaKey.mediaCategory match { + case MediaCategory.TweetImage => MediaContentType.ImageJpeg + case MediaCategory.TweetGif => MediaContentType.VideoMp4 + case MediaCategory.TweetVideo => MediaContentType.VideoGeneric + case MediaCategory.AmplifyVideo => MediaContentType.VideoGeneric + case mediaCats => throw new NotImplementedError(mediaCats.toString) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala new file mode 100644 index 000000000..135ec014d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/media/MediaMetadata.scala @@ -0,0 +1,58 @@ +package com.twitter.tweetypie +package media + +import com.twitter.mediaservices.commons.mediainformation.{thriftscala => mic} +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.mediaservices.commons.tweetmedia.thriftscala._ +import com.twitter.tweetypie.thriftscala._ +import java.nio.ByteBuffer + +/** + * MediaMetadata encapsulates the metadata about tweet media that we receive from + * the various media services backends on tweet create or on tweet read. This data, + * combined with data stored on the tweet, is sufficient to hydrate tweet media entities. + */ +case class MediaMetadata( + mediaKey: MediaKey, + assetUrlHttps: String, + sizes: Set[MediaSize], + mediaInfo: MediaInfo, + productMetadata: Option[mic.UserDefinedProductMetadata] = None, + extensionsReply: Option[ByteBuffer] = None, + additionalMetadata: Option[mic.AdditionalMetadata] = None) { + def assetUrlHttp: String = MediaUrl.httpsToHttp(assetUrlHttps) + + def attributableUserId: Option[UserId] = + additionalMetadata.flatMap(_.ownershipInfo).flatMap(_.attributableUserId) + + def updateEntity( + mediaEntity: MediaEntity, + tweetUserId: UserId, + includeAdditionalMetadata: Boolean + ): MediaEntity = { + // Abort if we accidentally try to replace the media. This + // indicates a logic error that caused mismatched media info. + // This could be internal or external to TweetyPie. + require( + mediaEntity.mediaId == mediaKey.mediaId, + "Tried to update media with mediaId=%s with mediaInfo.mediaId=%s" + .format(mediaEntity.mediaId, mediaKey.mediaId) + ) + + mediaEntity.copy( + mediaUrl = assetUrlHttp, + mediaUrlHttps = assetUrlHttps, + sizes = sizes, + mediaInfo = Some(mediaInfo), + extensionsReply = extensionsReply, + // the following two fields are deprecated and will be removed soon + nsfw = false, + mediaPath = MediaUrl.mediaPathFromUrl(assetUrlHttps), + metadata = productMetadata, + additionalMetadata = additionalMetadata.filter(_ => includeAdditionalMetadata), + // MIS allows media to be shared among authorized users so add in sourceUserId if it doesn't + // match the current tweet's userId. + sourceUserId = attributableUserId.filter(_ != tweetUserId) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala new file mode 100644 index 000000000..c2f836e97 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/package.scala @@ -0,0 +1,114 @@ +package com.twitter + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala._ +import com.twitter.gizmoduck.thriftscala.QueryFields + +package object tweetypie { + // common imports that many classes need, will probably expand this list in the future. + type Logger = com.twitter.util.logging.Logger + val Logger: com.twitter.util.logging.Logger.type = com.twitter.util.logging.Logger + type StatsReceiver = com.twitter.finagle.stats.StatsReceiver + val TweetLenses: com.twitter.tweetypie.util.TweetLenses.type = + com.twitter.tweetypie.util.TweetLenses + + type Future[A] = com.twitter.util.Future[A] + val Future: com.twitter.util.Future.type = com.twitter.util.Future + + type Duration = com.twitter.util.Duration + val Duration: com.twitter.util.Duration.type = com.twitter.util.Duration + + type Time = com.twitter.util.Time + val Time: com.twitter.util.Time.type = com.twitter.util.Time + + type Try[A] = com.twitter.util.Try[A] + val Try: com.twitter.util.Try.type = com.twitter.util.Try + + type Throw[A] = com.twitter.util.Throw[A] + val Throw: com.twitter.util.Throw.type = com.twitter.util.Throw + + type Return[A] = com.twitter.util.Return[A] + val Return: com.twitter.util.Return.type = com.twitter.util.Return + + type Gate[T] = com.twitter.servo.util.Gate[T] + val Gate: com.twitter.servo.util.Gate.type = com.twitter.servo.util.Gate + + type Effect[A] = com.twitter.servo.util.Effect[A] + val Effect: com.twitter.servo.util.Effect.type = com.twitter.servo.util.Effect + + type FutureArrow[A, B] = com.twitter.servo.util.FutureArrow[A, B] + val FutureArrow: com.twitter.servo.util.FutureArrow.type = com.twitter.servo.util.FutureArrow + + type FutureEffect[A] = com.twitter.servo.util.FutureEffect[A] + val FutureEffect: com.twitter.servo.util.FutureEffect.type = com.twitter.servo.util.FutureEffect + + type Lens[A, B] = com.twitter.servo.data.Lens[A, B] + val Lens: com.twitter.servo.data.Lens.type = com.twitter.servo.data.Lens + + type Mutation[A] = com.twitter.servo.data.Mutation[A] + val Mutation: com.twitter.servo.data.Mutation.type = com.twitter.servo.data.Mutation + + type User = com.twitter.gizmoduck.thriftscala.User + val User: com.twitter.gizmoduck.thriftscala.User.type = com.twitter.gizmoduck.thriftscala.User + type Safety = com.twitter.gizmoduck.thriftscala.Safety + val Safety: com.twitter.gizmoduck.thriftscala.Safety.type = + com.twitter.gizmoduck.thriftscala.Safety + type UserField = com.twitter.gizmoduck.thriftscala.QueryFields + val UserField: QueryFields.type = com.twitter.gizmoduck.thriftscala.QueryFields + + type Tweet = thriftscala.Tweet + val Tweet: com.twitter.tweetypie.thriftscala.Tweet.type = thriftscala.Tweet + + type ThriftTweetService = TweetServiceInternal.MethodPerEndpoint + + type TweetId = Long + type UserId = Long + type MediaId = Long + type AppId = Long + type KnownDeviceToken = String + type ConversationId = Long + type CommunityId = Long + type PlaceId = String + type FieldId = Short + type Count = Long + type CountryCode = String // ISO 3166-1-alpha-2 + type CreativesContainerId = Long + + def hasGeo(tweet: Tweet): Boolean = + TweetLenses.placeId.get(tweet).nonEmpty || + TweetLenses.geoCoordinates.get(tweet).nonEmpty + + def getUserId(tweet: Tweet): UserId = TweetLenses.userId.get(tweet) + def getText(tweet: Tweet): String = TweetLenses.text.get(tweet) + def getCreatedAt(tweet: Tweet): Long = TweetLenses.createdAt.get(tweet) + def getCreatedVia(tweet: Tweet): String = TweetLenses.createdVia.get(tweet) + def getReply(tweet: Tweet): Option[Reply] = TweetLenses.reply.get(tweet) + def getDirectedAtUser(tweet: Tweet): Option[DirectedAtUser] = + TweetLenses.directedAtUser.get(tweet) + def getShare(tweet: Tweet): Option[Share] = TweetLenses.share.get(tweet) + def getQuotedTweet(tweet: Tweet): Option[QuotedTweet] = TweetLenses.quotedTweet.get(tweet) + def getUrls(tweet: Tweet): Seq[UrlEntity] = TweetLenses.urls.get(tweet) + def getMedia(tweet: Tweet): Seq[MediaEntity] = TweetLenses.media.get(tweet) + def getMediaKeys(tweet: Tweet): Seq[MediaKey] = TweetLenses.mediaKeys.get(tweet) + def getMentions(tweet: Tweet): Seq[MentionEntity] = TweetLenses.mentions.get(tweet) + def getCashtags(tweet: Tweet): Seq[CashtagEntity] = TweetLenses.cashtags.get(tweet) + def getHashtags(tweet: Tweet): Seq[HashtagEntity] = TweetLenses.hashtags.get(tweet) + def getMediaTagMap(tweet: Tweet): Map[MediaId, Seq[MediaTag]] = TweetLenses.mediaTagMap.get(tweet) + def isRetweet(tweet: Tweet): Boolean = tweet.coreData.flatMap(_.share).nonEmpty + def isSelfReply(authorUserId: UserId, r: Reply): Boolean = + r.inReplyToStatusId.isDefined && (r.inReplyToUserId == authorUserId) + def isSelfReply(tweet: Tweet): Boolean = { + getReply(tweet).exists { r => isSelfReply(getUserId(tweet), r) } + } + def getConversationId(tweet: Tweet): Option[TweetId] = TweetLenses.conversationId.get(tweet) + def getSelfThreadMetadata(tweet: Tweet): Option[SelfThreadMetadata] = + TweetLenses.selfThreadMetadata.get(tweet) + def getCardReference(tweet: Tweet): Option[CardReference] = TweetLenses.cardReference.get(tweet) + def getEscherbirdAnnotations(tweet: Tweet): Option[EscherbirdEntityAnnotations] = + TweetLenses.escherbirdEntityAnnotations.get(tweet) + def getCommunities(tweet: Tweet): Option[Communities] = TweetLenses.communities.get(tweet) + def getTimestamp(tweet: Tweet): Time = + if (SnowflakeId.isSnowflakeId(tweet.id)) SnowflakeId(tweet.id).time + else Time.fromSeconds(getCreatedAt(tweet).toInt) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD new file mode 100644 index 000000000..a57db5f55 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/BUILD @@ -0,0 +1,82 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/ibm/icu:icu4j", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "audience-rewards/thrift/src/main/thrift:thrift-scala", + "creatives-container/thrift/src/main/thrift:creatives-container-service-scala", + "featureswitches/featureswitches-core/src/main/scala:recipient", + "featureswitches/featureswitches-core/src/main/scala:useragent", + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/primitives", + "geoduck/util/src/main/scala/com/twitter/geoduck/util/service", + "passbird/thrift-only/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/json", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/java/com/twitter/common/text/language:language-identifier", + "src/java/com/twitter/common/text/pipeline", + "src/scala/com/twitter/search/blender/services/strato", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/consumer_privacy/mention_controls:thrift-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/dataproducts:enrichments_profilegeo-scala", + "src/thrift/com/twitter/dataproducts:service-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/escherbird/common:common-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/service/talon/gen:thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-label-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/spam/rtf:tweet-rtf-event-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-compat/src/main/scala/com/twitter/stitch/compat", + "stitch/stitch-core", + "stitch/stitch-timelineservice", + "strato/src/main/scala/com/twitter/strato/catalog", + "strato/src/main/scala/com/twitter/strato/client", + "strato/src/main/scala/com/twitter/strato/data", + "strato/src/main/scala/com/twitter/strato/thrift", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context/src/main/scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + "vibes/src/main/thrift/com/twitter/vibes:vibes-scala", + "visibility/common/src/main/scala/com/twitter/visibility/common/tflock", + "visibility/common/src/main/scala/com/twitter/visibility/common/user_result", + "visibility/common/src/main/thrift/com/twitter/visibility:action-scala", + "visibility/lib:tweets", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala new file mode 100644 index 000000000..fd1ad5fd3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CacheStitch.scala @@ -0,0 +1,87 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.repository._ +import com.twitter.stitch.Stitch +import com.twitter.util.Try + +object CacheStitch { + + /** + * Cacheable defines a function that takes a cache query and a Try value, + * and returns what should be written to cache, as a Option[StitchLockingCache.Val]. + * + * None signifies that this value should not be written to cache. + * + * Val can be one of Found[V], NotFound, and Deleted. The function will determine what kinds + * of values and exceptions (captured in the Try) correspond to which kind of cached values. + */ + type Cacheable[Q, V] = (Q, Try[V]) => Option[StitchLockingCache.Val[V]] + + // Cache successful values as Found, stitch.NotFound as NotFound, and don't cache other exceptions + def cacheFoundAndNotFound[K, V]: CacheStitch.Cacheable[K, V] = + (_, t: Try[V]) => + t match { + // Write successful values as Found + case Return(v) => Some(StitchLockingCache.Val.Found[V](v)) + + // Write stitch.NotFound as NotFound + case Throw(com.twitter.stitch.NotFound) => Some(StitchLockingCache.Val.NotFound) + + // Don't write other exceptions back to cache + case _ => None + } +} + +case class CacheStitch[Q, K, V]( + repo: Q => Stitch[V], + cache: StitchLockingCache[K, V], + queryToKey: Q => K, + handler: CachedResult.Handler[K, V], + cacheable: CacheStitch.Cacheable[Q, V]) + extends (Q => Stitch[V]) { + import com.twitter.servo.repository.CachedResultAction._ + + private[this] def getFromCache(key: K): Stitch[CachedResult[K, V]] = { + cache + .get(key) + .handle { + case t => CachedResult.Failed(key, t) + } + } + + // Exposed for testing + private[repository] def readThrough(query: Q): Stitch[V] = + repo(query).liftToTry.applyEffect { value: Try[V] => + cacheable(query, value) match { + case Some(v) => + // cacheable returned Some of a StitchLockingCache.Val to cache + // + // This is async to ensure that we don't wait for the cache + // update to complete before returning. This also ignores + // any exceptions from setting the value. + Stitch.async(cache.lockAndSet(queryToKey(query), v)) + case None => + // cacheable returned None so don't cache + Stitch.Unit + } + }.lowerFromTry + + private[this] def handle(query: Q, action: CachedResultAction[V]): Stitch[V] = + action match { + case HandleAsFound(value) => Stitch(value) + case HandleAsMiss => readThrough(query) + case HandleAsDoNotCache => repo(query) + case HandleAsFailed(t) => Stitch.exception(t) + case HandleAsNotFound => Stitch.NotFound + case t: TransformSubAction[V] => handle(query, t.action).map(t.f) + case SoftExpiration(subAction) => + Stitch + .async(readThrough(query)) + .flatMap { _ => handle(query, subAction) } + } + + override def apply(query: Q): Stitch[V] = + getFromCache(queryToKey(query)) + .flatMap { result: CachedResult[K, V] => handle(query, handler(result)) } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala new file mode 100644 index 000000000..0ebf12998 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CachingTweetRepository.scala @@ -0,0 +1,329 @@ +package com.twitter.tweetypie +package repository + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.tracing.Trace +import com.twitter.servo.cache._ +import com.twitter.servo.repository._ +import com.twitter.servo.util.Transformer +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.CachedBounceDeleted.isBounceDeleted +import com.twitter.tweetypie.repository.CachedBounceDeleted.toBounceDeletedTweetResult +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.util.Base64Long + +case class TweetKey(cacheVersion: Int, id: TweetId) + extends ScopedCacheKey("t", "t", cacheVersion, Base64Long.toBase64(id)) + +case class TweetKeyFactory(cacheVersion: Int) { + val fromId: TweetId => TweetKey = (id: TweetId) => TweetKey(cacheVersion, id) + val fromTweet: Tweet => TweetKey = (tweet: Tweet) => fromId(tweet.id) + val fromCachedTweet: CachedTweet => TweetKey = (ms: CachedTweet) => fromTweet(ms.tweet) +} + +// Helper methods for working with cached bounce-deleted tweets, +// grouped together here to keep the definitions of "bounce +// deleted" in one place. +object CachedBounceDeleted { + // CachedTweet for use in CachingTweetStore + def toBounceDeletedCachedTweet(tweetId: TweetId): CachedTweet = + CachedTweet( + tweet = Tweet(id = tweetId), + isBounceDeleted = Some(true) + ) + + def isBounceDeleted(cached: Cached[CachedTweet]): Boolean = + cached.status == CachedValueStatus.Found && + cached.value.flatMap(_.isBounceDeleted).contains(true) + + // TweetResult for use in CachingTweetRepository + def toBounceDeletedTweetResult(tweetId: TweetId): TweetResult = + TweetResult( + TweetData( + tweet = Tweet(id = tweetId), + isBounceDeleted = true + ) + ) + + def isBounceDeleted(tweetResult: TweetResult): Boolean = + tweetResult.value.isBounceDeleted +} + +object TweetResultCache { + def apply( + tweetDataCache: Cache[TweetId, Cached[TweetData]] + ): Cache[TweetId, Cached[TweetResult]] = { + val transformer: Transformer[Cached[TweetResult], Cached[TweetData]] = + new Transformer[Cached[TweetResult], Cached[TweetData]] { + def to(cached: Cached[TweetResult]) = + Return(cached.map(_.value)) + + def from(cached: Cached[TweetData]) = + Return(cached.map(TweetResult(_))) + } + + new KeyValueTransformingCache( + tweetDataCache, + transformer, + identity + ) + } +} + +object TweetDataCache { + def apply( + cachedTweetCache: Cache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetId => TweetKey + ): Cache[TweetId, Cached[TweetData]] = { + val transformer: Transformer[Cached[TweetData], Cached[CachedTweet]] = + new Transformer[Cached[TweetData], Cached[CachedTweet]] { + def to(cached: Cached[TweetData]) = + Return(cached.map(_.toCachedTweet)) + + def from(cached: Cached[CachedTweet]) = + Return(cached.map(c => TweetData.fromCachedTweet(c, cached.cachedAt))) + } + + new KeyValueTransformingCache( + cachedTweetCache, + transformer, + tweetKeyFactory + ) + } +} + +object TombstoneTtl { + import CachedResult._ + + def fixed(ttl: Duration): CachedNotFound[TweetId] => Duration = + _ => ttl + + /** + * A simple ttl calculator that is set to `min` if the age is less than `from`, + * then linearly interpolated between `min` and `max` when the age is between `from` and `to`, + * and then equal to `max` if the age is greater than `to`. + */ + def linear( + min: Duration, + max: Duration, + from: Duration, + to: Duration + ): CachedNotFound[TweetId] => Duration = { + val rate = (max - min).inMilliseconds / (to - from).inMilliseconds.toDouble + cached => { + if (SnowflakeId.isSnowflakeId(cached.key)) { + val age = cached.cachedAt - SnowflakeId(cached.key).time + if (age <= from) min + else if (age >= to) max + else min + (age - from) * rate + } else { + // When it's not a snowflake id, cache it for the maximum time. + max + } + } + } + + /** + * Checks if the given `cached` value is an expired tombstone + */ + def isExpired( + tombstoneTtl: CachedNotFound[TweetId] => Duration, + cached: CachedNotFound[TweetId] + ): Boolean = + Time.now - cached.cachedAt > tombstoneTtl(cached) +} + +object CachingTweetRepository { + import CachedResult._ + import CachedResultAction._ + + val failuresLog: Logger = Logger("com.twitter.tweetypie.repository.CachingTweetRepoFailures") + + def apply( + cache: LockingCache[TweetId, Cached[TweetResult]], + tombstoneTtl: CachedNotFound[TweetId] => Duration, + stats: StatsReceiver, + clientIdHelper: ClientIdHelper, + logCacheExceptions: Gate[Unit] = Gate.False, + )( + underlying: TweetResultRepository.Type + ): TweetResultRepository.Type = { + val cachingRepo: ((TweetId, TweetQuery.Options)) => Stitch[TweetResult] = + CacheStitch[(TweetId, TweetQuery.Options), TweetId, TweetResult]( + repo = underlying.tupled, + cache = StitchLockingCache( + underlying = cache, + picker = new TweetRepoCachePicker[TweetResult](_.value.cachedAt) + ), + queryToKey = _._1, // extract tweet id from (TweetId, TweetQuery.Options) + handler = mkHandler(tombstoneTtl, stats, logCacheExceptions, clientIdHelper), + cacheable = cacheable + ) + + (tweetId, options) => + if (options.cacheControl.readFromCache) { + cachingRepo((tweetId, options)) + } else { + underlying(tweetId, options) + } + } + + val cacheable: CacheStitch.Cacheable[(TweetId, TweetQuery.Options), TweetResult] = { + case ((tweetId, options), tweetResult) => + if (!options.cacheControl.writeToCache) { + None + } else { + tweetResult match { + // Write stitch.NotFound as a NotFound cache entry + case Throw(com.twitter.stitch.NotFound) => + Some(StitchLockingCache.Val.NotFound) + + // Write FilteredState.TweetDeleted as a Deleted cache entry + case Throw(TweetDeleted) => + Some(StitchLockingCache.Val.Deleted) + + // Write BounceDeleted as a Found cache entry, with the CachedTweet.isBounceDeleted flag. + // servo.cache.thriftscala.CachedValueStatus.Deleted tombstones do not allow for storing + // app-defined metadata. + case Throw(BounceDeleted) => + Some(StitchLockingCache.Val.Found(toBounceDeletedTweetResult(tweetId))) + + // Regular found tweets are not written to cache here - instead the cacheable result is + // written to cache via TweetHydration.cacheChanges + case Return(_: TweetResult) => None + + // Don't write other exceptions back to cache + case _ => None + } + } + } + + object LogLens { + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + def logMessage(logger: Logger, clientIdHelper: ClientIdHelper, data: (String, Any)*): Unit = { + val allData = data ++ defaultData(clientIdHelper) + val msg = mapper.writeValueAsString(Map(allData: _*)) + logger.info(msg) + } + + private def defaultData(clientIdHelper: ClientIdHelper): Seq[(String, Any)] = { + val viewer = TwitterContext() + Seq( + "client_id" -> clientIdHelper.effectiveClientId, + "trace_id" -> Trace.id.traceId.toString, + "audit_ip" -> viewer.flatMap(_.auditIp), + "application_id" -> viewer.flatMap(_.clientApplicationId), + "user_agent" -> viewer.flatMap(_.userAgent), + "authenticated_user_id" -> viewer.flatMap(_.authenticatedUserId) + ) + } + } + + def mkHandler( + tombstoneTtl: CachedNotFound[TweetId] => Duration, + stats: StatsReceiver, + logCacheExceptions: Gate[Unit], + clientIdHelper: ClientIdHelper, + ): Handler[TweetId, TweetResult] = { + val baseHandler = defaultHandler[TweetId, TweetResult] + val cacheErrorState = HydrationState(modified = false, cacheErrorEncountered = true) + val cachedFoundCounter = stats.counter("cached_found") + val notFoundCounter = stats.counter("not_found") + val cachedNotFoundAsNotFoundCounter = stats.counter("cached_not_found_as_not_found") + val cachedNotFoundAsMissCounter = stats.counter("cached_not_found_as_miss") + val cachedDeletedCounter = stats.counter("cached_deleted") + val cachedBounceDeletedCounter = stats.counter("cached_bounce_deleted") + val failedCounter = stats.counter("failed") + val otherCounter = stats.counter("other") + + { + case res @ CachedFound(_, tweetResult, _, _) => + if (isBounceDeleted(tweetResult)) { + cachedBounceDeletedCounter.incr() + HandleAsFailed(FilteredState.Unavailable.BounceDeleted) + } else { + cachedFoundCounter.incr() + baseHandler(res) + } + + case res @ NotFound(_) => + notFoundCounter.incr() + baseHandler(res) + + // expires NotFound tombstones if old enough + case cached @ CachedNotFound(_, _, _) => + if (TombstoneTtl.isExpired(tombstoneTtl, cached)) { + cachedNotFoundAsMissCounter.incr() + HandleAsMiss + } else { + cachedNotFoundAsNotFoundCounter.incr() + HandleAsNotFound + } + + case CachedDeleted(_, _, _) => + cachedDeletedCounter.incr() + HandleAsFailed(FilteredState.Unavailable.TweetDeleted) + + // don't attempt to write back to cache on a cache read failure + case Failed(k, t) => + // After result is found, mark it with cacheErrorEncountered + failedCounter.incr() + + if (logCacheExceptions()) { + LogLens.logMessage( + failuresLog, + clientIdHelper, + "type" -> "cache_failed", + "tweet_id" -> k, + "throwable" -> t.getClass.getName + ) + } + + TransformSubAction[TweetResult](HandleAsDoNotCache, _.mapState(_ ++ cacheErrorState)) + + case res => + otherCounter.incr() + baseHandler(res) + } + + } +} + +/** + * A LockingCache.Picker for use with CachingTweetRepository which prevents overwriting values in + * cache that are newer than the value previously read from cache. + */ +class TweetRepoCachePicker[T](cachedAt: T => Option[Time]) extends LockingCache.Picker[Cached[T]] { + private val newestPicker = new PreferNewestCached[T] + + override def apply(newValue: Cached[T], oldValue: Cached[T]): Option[Cached[T]] = { + oldValue.status match { + // never overwrite a `Deleted` tombstone via read-through. + case CachedValueStatus.Deleted => None + + // only overwrite a `Found` value with an update based off of that same cache entry. + case CachedValueStatus.Found => + newValue.value.flatMap(cachedAt) match { + // if prevCacheAt is the same as oldValue.cachedAt, then the value in cache hasn't changed + case Some(prevCachedAt) if prevCachedAt == oldValue.cachedAt => Some(newValue) + // otherwise, the value in cache has changed since we read it, so don't overwrite + case _ => None + } + + // we may hit an expired/older tombstone, which should be safe to overwrite with a fresh + // tombstone of a new value returned from Manhattan. + case CachedValueStatus.NotFound => newestPicker(newValue, oldValue) + + // we shouldn't see any other CachedValueStatus, but if we do, play it safe and don't + // overwrite (it will be as if the read that triggered this never happened) + case _ => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala new file mode 100644 index 000000000..9b6f4b154 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/Card2Repository.scala @@ -0,0 +1,56 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Expandodo + +sealed trait Card2Key { + def toCard2Request: Card2Request +} + +final case class UrlCard2Key(url: String) extends Card2Key { + override def toCard2Request: Card2Request = + Card2Request(`type` = Card2RequestType.ByUrl, url = Some(url)) +} + +final case class ImmediateValuesCard2Key(values: Seq[Card2ImmediateValue], tweetId: TweetId) + extends Card2Key { + override def toCard2Request: Card2Request = + Card2Request( + `type` = Card2RequestType.ByImmediateValues, + immediateValues = Some(values), + statusId = Some(tweetId) + ) +} + +object Card2Repository { + type Type = (Card2Key, Card2RequestOptions) => Stitch[Card2] + + def apply(getCards2: Expandodo.GetCards2, maxRequestSize: Int): Type = { + case class RequestGroup(opts: Card2RequestOptions) extends SeqGroup[Card2Key, Option[Card2]] { + override def run(keys: Seq[Card2Key]): Future[Seq[Try[Option[Card2]]]] = + LegacySeqGroup.liftToSeqTry( + getCards2((keys.map(_.toCard2Request), opts)).map { res => + res.responsesCode match { + case Card2ResponsesCode.Ok => + res.responses.map(_.card) + + case _ => + // treat all other failure cases as card-not-found + Seq.fill(keys.size)(None) + } + } + ) + + override def maxSize: Int = maxRequestSize + } + + (card2Key, opts) => + Stitch + .call(card2Key, RequestGroup(opts)) + .lowerFromOption() + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala new file mode 100644 index 000000000..b420b5814 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardRepository.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.MapGroup +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Expandodo + +object CardRepository { + type Type = String => Stitch[Seq[Card]] + + def apply(getCards: Expandodo.GetCards, maxRequestSize: Int): Type = { + object RequestGroup extends MapGroup[String, Seq[Card]] { + override def run(urls: Seq[String]): Future[String => Try[Seq[Card]]] = + getCards(urls.toSet).map { responseMap => url => + responseMap.get(url) match { + case None => Throw(NotFound) + case Some(r) => Return(r.cards.getOrElse(Nil)) + } + } + + override def maxSize: Int = maxRequestSize + } + + url => Stitch.call(url, RequestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala new file mode 100644 index 000000000..3cf546bb7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CardUsersRepository.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.expandodo.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Expandodo + +object CardUsersRepository { + type CardUri = String + type Type = (CardUri, Context) => Stitch[Option[Set[UserId]]] + + case class Context(perspectiveUserId: UserId) extends AnyVal + + case class GetUsersGroup(perspectiveId: UserId, getCardUsers: Expandodo.GetCardUsers) + extends SeqGroup[CardUri, GetCardUsersResponse] { + protected override def run(keys: Seq[CardUri]): Future[Seq[Try[GetCardUsersResponse]]] = + LegacySeqGroup.liftToSeqTry( + getCardUsers( + GetCardUsersRequests( + requests = keys.map(k => GetCardUsersRequest(k)), + perspectiveUserId = Some(perspectiveId) + ) + ).map(_.responses) + ) + } + + def apply(getCardUsers: Expandodo.GetCardUsers): Type = + (cardUri, ctx) => + Stitch.call(cardUri, GetUsersGroup(ctx.perspectiveUserId, getCardUsers)).map { resp => + val authorUserIds = resp.authorUserIds.map(_.toSet) + val siteUserIds = resp.siteUserIds.map(_.toSet) + + if (authorUserIds.isEmpty) { + siteUserIds + } else if (siteUserIds.isEmpty) { + authorUserIds + } else { + Some(authorUserIds.get ++ siteUserIds.get) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala new file mode 100644 index 000000000..64052b116 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationControlRepository.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.thriftscala.ConversationControl + +/** + * This repository loads up the conversation control values for a tweet which controls who can reply + * to a tweet. Because the conversation control values are stored on the root tweet of a conversation, + * we need to make sure that the code is able to load the data from the root tweet. To ensure this, + * no visibility filtering options are set on the query to load the root tweet fields. + * + * If visibility filtering was enabled, and the root tweet was filtered for the requesting user, + * then the conversation control data would not be returned and enforcement would effectively be + * side-stepped. + */ +object ConversationControlRepository { + private[this] val log = Logger(getClass) + type Type = (TweetId, CacheControl) => Stitch[Option[ConversationControl]] + + def apply(repo: TweetRepository.Type, stats: StatsReceiver): Type = + (conversationId: TweetId, cacheControl: CacheControl) => { + val options = TweetQuery.Options( + include = TweetQuery.Include(Set(Tweet.ConversationControlField.id)), + // We want the root tweet of a conversation that we're looking up to be + // cached with the same policy as the tweet we're looking up. + cacheControl = cacheControl, + enforceVisibilityFiltering = false, + safetyLevel = SafetyLevel.FilterNone + ) + + repo(conversationId, options) + .map(rootTweet => rootTweet.conversationControl) + .handle { + // We don't know of any case where tweets would return NotFound, but for + // for pragmatic reasons, we're opening the conversation for replies + // in case a bug causing tweets to be NotFound exists. + case NotFound => + stats.counter("tweet_not_found") + None + // If no root tweet is found, the reply has no conversation controls + // this is by design, deleting the root tweet "opens" the conversation + case TweetDeleted => + stats.counter("tweet_deleted") + None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala new file mode 100644 index 000000000..b9a9b26ad --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationIdRepository.scala @@ -0,0 +1,95 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +case class ConversationIdKey(tweetId: TweetId, parentId: TweetId) + +object ConversationIdRepository { + type Type = ConversationIdKey => Stitch[TweetId] + + def apply(multiSelectOne: Iterable[Select[StatusGraph]] => Future[Seq[Option[Long]]]): Type = + key => Stitch.call(key, Group(multiSelectOne)) + + private case class Group( + multiSelectOne: Iterable[Select[StatusGraph]] => Future[Seq[Option[Long]]]) + extends SeqGroup[ConversationIdKey, TweetId] { + + private[this] def getConversationIds( + keys: Seq[ConversationIdKey], + getLookupId: ConversationIdKey => TweetId + ): Future[Map[ConversationIdKey, TweetId]] = { + val distinctIds = keys.map(getLookupId).distinct + val tflockQueries = distinctIds.map(ConversationGraph.to) + if (tflockQueries.isEmpty) { + Future.value(Map[ConversationIdKey, TweetId]()) + } else { + multiSelectOne(tflockQueries).map { results => + // first, we need to match up the distinct ids requested with the corresponding result + val resultMap = + distinctIds + .zip(results) + .collect { + case (id, Some(conversationId)) => id -> conversationId + } + .toMap + + // then we need to map keys into the above map + keys.flatMap { key => resultMap.get(getLookupId(key)).map(key -> _) }.toMap + } + } + } + + /** + * Returns a key-value result that maps keys to the tweet's conversation IDs. + * + * Example: + * Tweet B is a reply to tweet A with conversation ID c. + * We want to get B's conversation ID. Then, for the request + * + * ConversationIdRequest(B.id, A.id) + * + * our key-value result's "found" map will contain a pair (B.id -> c). + */ + protected override def run(keys: Seq[ConversationIdKey]): Future[Seq[Try[TweetId]]] = + LegacySeqGroup.liftToSeqTry( + for { + // Try to get the conversation IDs for the parent tweets + convIdsFromParent <- getConversationIds(keys, _.parentId) + + // Collect the tweet IDs whose parents' conversation IDs couldn't be found. + // We assume that happened in one of two cases: + // * for a tweet whose parent has been deleted + // * for a tweet whose parent is the root of a conversation + // Note: In either case, we will try to look up the conversation ID of the tweet whose parent + // couldn't be found. If that can't be found either, we will eventually return the parent ID. + tweetsWhoseParentsDontHaveConvoIds = keys.toSet -- convIdsFromParent.keys + + // Collect the conversation IDs for the tweets whose parents have not been found, now using the + // tweets' own IDs. + convIdsFromTweet <- + getConversationIds(tweetsWhoseParentsDontHaveConvoIds.toSeq, _.tweetId) + + // Combine the by-parent-ID and by-tweet-ID results. + convIdMap = convIdsFromParent ++ convIdsFromTweet + + // Assign conversation IDs to all not-found tweet IDs. + // A tweet might not have received a conversation ID if + // * the parent of the tweet is the root of the conversation, and we are in the write path + // for creating the tweet. In that case, the conversation ID should be the tweet's parent + // ID. + // * it had been created before TFlock started handling conversation IDs. In that case, the + // conversation ID will just point to the parent tweet so that we can have a conversation of + // at least two tweets. + // So in both cases, we want to return the tweet's parent ID. + } yield { + keys.map { + case k @ ConversationIdKey(t, p) => convIdMap.getOrElse(k, p) + } + } + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala new file mode 100644 index 000000000..16e08f46c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ConversationMutedRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch + +object ConversationMutedRepository { + + /** + * Same type as com.twitter.stitch.timelineservice.TimelineService.GetConversationMuted but + * without using Arrow. + */ + type Type = (UserId, TweetId) => Stitch[Boolean] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala new file mode 100644 index 000000000..d74c1c185 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/CreativesContainerMaterializationRepository.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie.repository + +import com.twitter.container.thriftscala.MaterializeAsTweetFieldsRequest +import com.twitter.container.thriftscala.MaterializeAsTweetRequest +import com.twitter.container.{thriftscala => ccs} +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.Return +import com.twitter.tweetypie.{thriftscala => tp} +import com.twitter.tweetypie.backends +import com.twitter.tweetypie.thriftscala.GetTweetFieldsResult +import com.twitter.tweetypie.thriftscala.GetTweetResult +import com.twitter.util.Future +import com.twitter.util.Try + +/** + * A special kind of tweet is that, when [[tp.Tweet.underlyingCreativesContainerId]] is presented. + * tweetypie will delegate hydration of this tweet to creatives container service. + */ +object CreativesContainerMaterializationRepository { + + type GetTweetType = + (ccs.MaterializeAsTweetRequest, Option[tp.GetTweetOptions]) => Stitch[tp.GetTweetResult] + + type GetTweetFieldsType = + ( + ccs.MaterializeAsTweetFieldsRequest, + tp.GetTweetFieldsOptions + ) => Stitch[tp.GetTweetFieldsResult] + + def apply( + materializeAsTweet: backends.CreativesContainerService.MaterializeAsTweet + ): GetTweetType = { + case class RequestGroup(opts: Option[tp.GetTweetOptions]) + extends SeqGroup[ccs.MaterializeAsTweetRequest, tp.GetTweetResult] { + override protected def run( + keys: Seq[MaterializeAsTweetRequest] + ): Future[Seq[Try[GetTweetResult]]] = + materializeAsTweet(ccs.MaterializeAsTweetRequests(keys, opts)).map { + res: Seq[GetTweetResult] => res.map(Return(_)) + } + } + + (request, options) => Stitch.call(request, RequestGroup(options)) + } + + def materializeAsTweetFields( + materializeAsTweetFields: backends.CreativesContainerService.MaterializeAsTweetFields + ): GetTweetFieldsType = { + case class RequestGroup(opts: tp.GetTweetFieldsOptions) + extends SeqGroup[ccs.MaterializeAsTweetFieldsRequest, tp.GetTweetFieldsResult] { + override protected def run( + keys: Seq[MaterializeAsTweetFieldsRequest] + ): Future[Seq[Try[GetTweetFieldsResult]]] = + materializeAsTweetFields(ccs.MaterializeAsTweetFieldsRequests(keys, opts)).map { + res: Seq[GetTweetFieldsResult] => res.map(Return(_)) + } + } + + (request, options) => Stitch.call(request, RequestGroup(options)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala new file mode 100644 index 000000000..711e603c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeletedTweetVisibilityRepository.scala @@ -0,0 +1,84 @@ +package com.twitter.tweetypie.repository + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.FilteredState.HasFilteredReason +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.SourceTweetNotFound +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredStateUnavailable +import com.twitter.visibility.interfaces.tweets.DeletedTweetVisibilityLibrary +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.TweetDeleteReason +import com.twitter.visibility.models.TweetDeleteReason.TweetDeleteReason +import com.twitter.visibility.models.ViewerContext + +/** + * Generate FilteredReason for tweet entities in following delete states: + * com.twitter.tweetypie.core.FilteredState.Unavailable + * - SourceTweetNotFound(true) + * - TweetDeleted + * - BounceDeleted + * + * Callers of this repository should be ready to handle empty response (Stitch.None) + * from the underlying VF library when: + * 1.the tweet should not NOT be filtered for the given safety level + * 2.the tweet is not a relevant content to be handled by the library + */ +object DeletedTweetVisibilityRepository { + type Type = VisibilityRequest => Stitch[Option[FilteredReason]] + + case class VisibilityRequest( + filteredState: Throwable, + tweetId: TweetId, + safetyLevel: Option[ThriftSafetyLevel], + viewerId: Option[Long], + isInnerQuotedTweet: Boolean) + + def apply( + visibilityLibrary: DeletedTweetVisibilityLibrary.Type + ): Type = { request => + toVisibilityTweetDeleteState(request.filteredState, request.isInnerQuotedTweet) + .map { deleteReason => + val safetyLevel = SafetyLevel.fromThrift( + request.safetyLevel.getOrElse(ThriftSafetyLevel.FilterDefault) + ) + val isRetweet = request.filteredState == SourceTweetNotFound(true) + visibilityLibrary( + DeletedTweetVisibilityLibrary.Request( + request.tweetId, + safetyLevel, + ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + deleteReason, + isRetweet, + request.isInnerQuotedTweet + ) + ).map(toFilteredStateUnavailable) + .map { + //Accept FilteredReason + case Some(fs) if fs.isInstanceOf[HasFilteredReason] => + Some(fs.asInstanceOf[HasFilteredReason].filteredReason) + case _ => None + } + } + .getOrElse(Stitch.None) + } + + /** + * @return map an error from tweet hydration to a VF model TweetDeleteReason, + * None when the error is not related to delete state tweets. + */ + private def toVisibilityTweetDeleteState( + tweetDeleteState: Throwable, + isInnerQuotedTweet: Boolean + ): Option[TweetDeleteReason] = { + tweetDeleteState match { + case TweetDeleted => Some(TweetDeleteReason.Deleted) + case BounceDeleted => Some(TweetDeleteReason.BounceDeleted) + case SourceTweetNotFound(true) if !isInnerQuotedTweet => Some(TweetDeleteReason.Deleted) + case _ => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala new file mode 100644 index 000000000..f88513458 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/DeviceSourceRepository.scala @@ -0,0 +1,75 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.passbird.clientapplication.thriftscala.ClientApplication +import com.twitter.passbird.clientapplication.thriftscala.GetClientApplicationsResponse +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.MapGroup +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.DeviceSource + +// converts the device source parameter value to lower-case, to make the cached +// key case-insensitive +case class DeviceSourceKey(param: String) extends ScopedCacheKey("t", "ds", 1, param.toLowerCase) + +object DeviceSourceRepository { + type Type = String => Stitch[DeviceSource] + + type GetClientApplications = FutureArrow[Seq[Long], GetClientApplicationsResponse] + + val DefaultUrl = "https://help.twitter.com/en/using-twitter/how-to-tweet#source-labels" + + def formatUrl(name: String, url: String): String = s"""$name""" + + /** + * Construct an html a tag from the client application + * name and url for the display field because some + * clients depend on this. + */ + def deviceSourceDisplay( + name: String, + urlOpt: Option[String] + ): String = + urlOpt match { + case Some(url) => formatUrl(name = name, url = url) // data sanitized by passbird + case None => + formatUrl(name = name, url = DefaultUrl) // data sanitized by passbird + } + + def toDeviceSource(app: ClientApplication): DeviceSource = + DeviceSource( + // The id field used to represent the id of a row + // in the now deprecated device_sources mysql table. + id = 0L, + parameter = "oauth:" + app.id, + internalName = "oauth:" + app.id, + name = app.name, + url = app.url.getOrElse(""), + display = deviceSourceDisplay(app.name, app.url), + clientAppId = Some(app.id) + ) + + def apply( + parseAppId: String => Option[Long], + getClientApplications: GetClientApplications + ): DeviceSourceRepository.Type = { + val getClientApplicationsGroup = new MapGroup[Long, DeviceSource] { + def run(ids: Seq[Long]): Future[Long => Try[DeviceSource]] = + getClientApplications(ids).map { response => id => + response.found.get(id) match { + case Some(app) => Return(toDeviceSource(app)) + case None => Throw(NotFound) + } + } + } + + appIdStr => + parseAppId(appIdStr) match { + case Some(appId) => + Stitch.call(appId, getClientApplicationsGroup) + case None => + Stitch.exception(NotFound) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala new file mode 100644 index 000000000..57857c386 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/EscherbirdAnnotationRepository.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Escherbird +import com.twitter.tweetypie.thriftscala.EscherbirdEntityAnnotations + +object EscherbirdAnnotationRepository { + type Type = Tweet => Stitch[Option[EscherbirdEntityAnnotations]] + + def apply(annotate: Escherbird.Annotate): Type = + // use a `SeqGroup` to group the future-calls together, even though they can be + // executed independently, in order to help keep hydration between different tweets + // in sync, to improve batching in hydrators which occur later in the pipeline. + tweet => + Stitch + .call(tweet, LegacySeqGroup(annotate.liftSeq)) + .map { annotations => + if (annotations.isEmpty) None + else Some(EscherbirdEntityAnnotations(annotations)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala new file mode 100644 index 000000000..476790b60 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoScrubTimestampRepository.scala @@ -0,0 +1,16 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.Stitch +import com.twitter.util.Base64Long + +case class GeoScrubTimestampKey(userId: UserId) + extends ScopedCacheKey("t", "gs", 1, Base64Long.toBase64(userId)) + +object GeoScrubTimestampRepository { + type Type = UserId => Stitch[Time] + + def apply(getLastGeoScrubTime: UserId => Stitch[Option[Time]]): Type = + userId => getLastGeoScrubTime(userId).lowerFromOption() +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala new file mode 100644 index 000000000..483f3f73f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/GeoduckPlaceRepository.scala @@ -0,0 +1,132 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.geoduck.common.{thriftscala => Geoduck} +import com.twitter.geoduck.service.thriftscala.GeoContext +import com.twitter.geoduck.service.thriftscala.Key +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.geoduck.util.service.GeoduckLocate +import com.twitter.geoduck.util.service.LocationResponseExtractors +import com.twitter.geoduck.util.{primitives => GDPrimitive} +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.{thriftscala => TP} + +object GeoduckPlaceConverter { + + def LocationResponseToTPPlace(lang: String, lr: LocationResponse): Option[TP.Place] = + GDPrimitive.Place + .fromLocationResponse(lr) + .headOption + .map(apply(lang, _)) + + def convertPlaceType(pt: Geoduck.PlaceType): TP.PlaceType = pt match { + case Geoduck.PlaceType.Unknown => TP.PlaceType.Unknown + case Geoduck.PlaceType.Country => TP.PlaceType.Country + case Geoduck.PlaceType.Admin => TP.PlaceType.Admin + case Geoduck.PlaceType.City => TP.PlaceType.City + case Geoduck.PlaceType.Neighborhood => TP.PlaceType.Neighborhood + case Geoduck.PlaceType.Poi => TP.PlaceType.Poi + case Geoduck.PlaceType.ZipCode => TP.PlaceType.Admin + case Geoduck.PlaceType.Metro => TP.PlaceType.Admin + case Geoduck.PlaceType.Admin0 => TP.PlaceType.Admin + case Geoduck.PlaceType.Admin1 => TP.PlaceType.Admin + case _ => + throw new IllegalStateException(s"Invalid place type: $pt") + } + + def convertPlaceName(gd: Geoduck.PlaceName): TP.PlaceName = + TP.PlaceName( + name = gd.name, + language = gd.language.getOrElse("en"), + `type` = convertPlaceNameType(gd.nameType), + preferred = gd.preferred + ) + + def convertPlaceNameType(pt: Geoduck.PlaceNameType): TP.PlaceNameType = pt match { + case Geoduck.PlaceNameType.Normal => TP.PlaceNameType.Normal + case Geoduck.PlaceNameType.Abbreviation => TP.PlaceNameType.Abbreviation + case Geoduck.PlaceNameType.Synonym => TP.PlaceNameType.Synonym + case _ => + throw new IllegalStateException(s"Invalid place name type: $pt") + } + + def convertAttributes(attrs: collection.Set[Geoduck.PlaceAttribute]): Map[String, String] = + attrs.map(attr => attr.key -> attr.value.getOrElse("")).toMap + + def convertBoundingBox(geom: GDPrimitive.Geometry): Seq[TP.GeoCoordinates] = + geom.coordinates.map { coord => + TP.GeoCoordinates( + latitude = coord.lat, + longitude = coord.lon + ) + } + + def apply(queryLang: String, geoplace: GDPrimitive.Place): TP.Place = { + val bestname = geoplace.bestName(queryLang).getOrElse(geoplace.hexId) + TP.Place( + id = geoplace.hexId, + `type` = convertPlaceType(geoplace.placeType), + name = bestname, + fullName = geoplace.fullName(queryLang).getOrElse(bestname), + attributes = convertAttributes(geoplace.attributes), + boundingBox = geoplace.boundingBox.map(convertBoundingBox), + countryCode = geoplace.countryCode, + containers = Some(geoplace.cone.map(_.hexId).toSet + geoplace.hexId), + countryName = geoplace.countryName(queryLang) + ) + } + + def convertGDKey(key: Key, lang: String): PlaceKey = { + val Key.PlaceId(pid) = key + PlaceKey("%016x".format(pid), lang) + } +} + +object GeoduckPlaceRepository { + val context: GeoContext = + GeoContext( + placeFields = Set( + Geoduck.PlaceQueryFields.Attributes, + Geoduck.PlaceQueryFields.BoundingBox, + Geoduck.PlaceQueryFields.PlaceNames, + Geoduck.PlaceQueryFields.Cone + ), + placeTypes = Set( + Geoduck.PlaceType.Country, + Geoduck.PlaceType.Admin0, + Geoduck.PlaceType.Admin1, + Geoduck.PlaceType.City, + Geoduck.PlaceType.Neighborhood + ), + includeCountryCode = true, + hydrateCone = true + ) + + def apply(geoduck: GeoduckLocate): PlaceRepository.Type = { + val geoduckGroup = LegacySeqGroup((ids: Seq[Key.PlaceId]) => geoduck(context, ids)) + + placeKey => + val placeId = + try { + Stitch.value( + Key.PlaceId(java.lang.Long.parseUnsignedLong(placeKey.placeId, 16)) + ) + } catch { + case _: NumberFormatException => Stitch.exception(NotFound) + } + + placeId + .flatMap(id => Stitch.call(id, geoduckGroup)) + .rescue { case LocationResponseExtractors.Failure(ex) => Stitch.exception(ex) } + .map { resp => + GDPrimitive.Place + .fromLocationResponse(resp) + .headOption + .map(GeoduckPlaceConverter(placeKey.language, _)) + } + .lowerFromOption() + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala new file mode 100644 index 000000000..9c853b85c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/LastQuoteOfQuoterRepository.scala @@ -0,0 +1,24 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client.QuoteTweetsIndexGraph +import com.twitter.flockdb.client.TFlockClient +import com.twitter.flockdb.client.UserTimelineGraph +import com.twitter.stitch.Stitch + +object LastQuoteOfQuoterRepository { + type Type = (TweetId, UserId) => Stitch[Boolean] + + def apply( + tflockReadClient: TFlockClient + ): Type = + (tweetId, userId) => { + // Select the tweets authored by userId quoting tweetId. + // By intersecting the tweet quotes with this user's tweets. + val quotesFromQuotingUser = QuoteTweetsIndexGraph + .from(tweetId) + .intersect(UserTimelineGraph.from(userId)) + + Stitch.callFuture(tflockReadClient.selectAll(quotesFromQuotingUser).map(_.size <= 1)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala new file mode 100644 index 000000000..dd87a2e99 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ManhattanTweetRepository.scala @@ -0,0 +1,147 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.storage.TweetStorageClient.GetStoredTweet +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage._ +import scala.util.control.NoStackTrace + +case class StorageGetTweetFailure(tweetId: TweetId, underlying: Throwable) + extends Exception(s"tweetId=$tweetId", underlying) + with NoStackTrace + +object ManhattanTweetRepository { + private[this] val logger = Logger(getClass) + + def apply( + getTweet: TweetStorageClient.GetTweet, + getStoredTweet: TweetStorageClient.GetStoredTweet, + shortCircuitLikelyPartialTweetReads: Gate[Duration], + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): TweetResultRepository.Type = { + def likelyAvailable(tweetId: TweetId): Boolean = + if (SnowflakeId.isSnowflakeId(tweetId)) { + val tweetAge: Duration = Time.now.since(SnowflakeId(tweetId).time) + !shortCircuitLikelyPartialTweetReads(tweetAge) + } else { + true // Not a snowflake id, so should definitely be available + } + + val likelyPartialTweetReadsCounter = statsReceiver.counter("likely_partial_tweet_reads") + + (tweetId, options) => + if (!likelyAvailable(tweetId)) { + likelyPartialTweetReadsCounter.incr() + val currentClient = + clientIdHelper.effectiveClientId.getOrElse(ClientIdHelper.UnknownClientId) + logger.debug(s"likely_partial_tweet_read $tweetId $currentClient") + Stitch.exception(NotFound) + } else if (options.fetchStoredTweets) { + getStoredTweet(tweetId).liftToTry.flatMap(handleGetStoredTweetResponse(tweetId, _)) + } else { + getTweet(tweetId).liftToTry.flatMap(handleGetTweetResponse(tweetId, _)) + } + } + + private def handleGetTweetResponse( + tweetId: tweetypie.TweetId, + response: Try[GetTweet.Response] + ): Stitch[TweetResult] = { + response match { + case Return(GetTweet.Response.Found(tweet)) => + Stitch.value(TweetResult(TweetData(tweet = tweet), HydrationState.modified)) + case Return(GetTweet.Response.NotFound) => + Stitch.exception(NotFound) + case Return(GetTweet.Response.Deleted) => + Stitch.exception(FilteredState.Unavailable.TweetDeleted) + case Return(_: GetTweet.Response.BounceDeleted) => + Stitch.exception(FilteredState.Unavailable.BounceDeleted) + case Throw(_: storage.RateLimited) => + Stitch.exception(OverCapacity(s"Storage overcapacity, tweetId=$tweetId")) + case Throw(e) => + Stitch.exception(StorageGetTweetFailure(tweetId, e)) + } + } + + private def handleGetStoredTweetResponse( + tweetId: tweetypie.TweetId, + response: Try[GetStoredTweet.Response] + ): Stitch[TweetResult] = { + def translateErrors( + getStoredTweetErrs: Seq[GetStoredTweet.Error] + ): Seq[StoredTweetResult.Error] = { + getStoredTweetErrs.map { + case GetStoredTweet.Error.TweetIsCorrupt => StoredTweetResult.Error.Corrupt + case GetStoredTweet.Error.ScrubbedFieldsPresent => + StoredTweetResult.Error.ScrubbedFieldsPresent + case GetStoredTweet.Error.TweetFieldsMissingOrInvalid => + StoredTweetResult.Error.FieldsMissingOrInvalid + case GetStoredTweet.Error.TweetShouldBeHardDeleted => + StoredTweetResult.Error.ShouldBeHardDeleted + } + } + + def toTweetResult( + tweet: Tweet, + state: Option[TweetStateRecord], + errors: Seq[GetStoredTweet.Error] + ): TweetResult = { + val translatedErrors = translateErrors(errors) + val canHydrate: Boolean = + !translatedErrors.contains(StoredTweetResult.Error.Corrupt) && + !translatedErrors.contains(StoredTweetResult.Error.FieldsMissingOrInvalid) + + val storedTweetResult = state match { + case None => StoredTweetResult.Present(translatedErrors, canHydrate) + case Some(TweetStateRecord.HardDeleted(_, softDeletedAtMsec, hardDeletedAtMsec)) => + StoredTweetResult.HardDeleted(softDeletedAtMsec, hardDeletedAtMsec) + case Some(TweetStateRecord.SoftDeleted(_, softDeletedAtMsec)) => + StoredTweetResult.SoftDeleted(softDeletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.BounceDeleted(_, deletedAtMsec)) => + StoredTweetResult.BounceDeleted(deletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.Undeleted(_, undeletedAtMsec)) => + StoredTweetResult.Undeleted(undeletedAtMsec, translatedErrors, canHydrate) + case Some(TweetStateRecord.ForceAdded(_, addedAtMsec)) => + StoredTweetResult.ForceAdded(addedAtMsec, translatedErrors, canHydrate) + } + + TweetResult( + TweetData(tweet = tweet, storedTweetResult = Some(storedTweetResult)), + HydrationState.modified) + } + + val tweetResult = response match { + case Return(GetStoredTweet.Response.FoundAny(tweet, state, _, _, errors)) => + toTweetResult(tweet, state, errors) + case Return(GetStoredTweet.Response.Failed(tweetId, _, _, _, errors)) => + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.Failed(translateErrors(errors)))) + TweetResult(tweetData, HydrationState.modified) + case Return(GetStoredTweet.Response.HardDeleted(tweetId, state, _, _)) => + toTweetResult(Tweet(tweetId), state, Seq()) + case Return(GetStoredTweet.Response.NotFound(tweetId)) => { + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.NotFound) + ) + TweetResult(tweetData, HydrationState.modified) + } + case _ => { + val tweetData = TweetData( + tweet = Tweet(tweetId), + storedTweetResult = Some(StoredTweetResult.Failed(Seq(StoredTweetResult.Error.Corrupt)))) + TweetResult(tweetData, HydrationState.modified) + } + } + + Stitch.value(tweetResult) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala new file mode 100644 index 000000000..f9aa5a832 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/MediaMetadataRepository.scala @@ -0,0 +1,22 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.media.MediaMetadata +import com.twitter.tweetypie.media.MediaMetadataRequest + +object MediaMetadataRepository { + type Type = MediaMetadataRequest => Stitch[MediaMetadata] + + def apply(getMediaMetadata: FutureArrow[MediaMetadataRequest, MediaMetadata]): Type = { + // use an `SeqGroup` to group the future-calls together, even though they can be + // executed independently, in order to help keep hydration between different tweets + // in sync, to improve batching in hydrators which occur later in the pipeline. + val requestGroup = SeqGroup[MediaMetadataRequest, MediaMetadata] { + requests: Seq[MediaMetadataRequest] => + Future.collect(requests.map(r => getMediaMetadata(r).liftToTry)) + } + mediaReq => Stitch.call(mediaReq, requestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala new file mode 100644 index 000000000..8c7092a53 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ParentUserIdRepository.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState.Unavailable.BounceDeleted +import com.twitter.tweetypie.core.FilteredState.Unavailable.SourceTweetNotFound +import com.twitter.tweetypie.core.FilteredState.Unavailable.TweetDeleted + +object ParentUserIdRepository { + type Type = Tweet => Stitch[Option[UserId]] + + case class ParentTweetNotFound(tweetId: TweetId) extends Exception + + def apply(tweetRepo: TweetRepository.Type): Type = { + val options = TweetQuery.Options(TweetQuery.Include(Set(Tweet.CoreDataField.id))) + + tweet => + getShare(tweet) match { + case Some(share) if share.sourceStatusId == share.parentStatusId => + Stitch.value(Some(share.sourceUserId)) + case Some(share) => + tweetRepo(share.parentStatusId, options) + .map(tweet => Some(getUserId(tweet))) + .rescue { + case NotFound | TweetDeleted | BounceDeleted | SourceTweetNotFound(_) => + Stitch.exception(ParentTweetNotFound(share.parentStatusId)) + } + case None => + Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala new file mode 100644 index 000000000..dd21e4ec1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PastedMediaRepository.scala @@ -0,0 +1,129 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.media.Media +import com.twitter.tweetypie.media.MediaUrl +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.util.MediaId +import java.nio.ByteBuffer + +case class PastedMedia(mediaEntities: Seq[MediaEntity], mediaTags: Map[MediaId, Seq[MediaTag]]) { + + /** + * Updates the copied media entities to have the same indices as the given UrlEntity. + */ + def updateEntities(urlEntity: UrlEntity): PastedMedia = + if (mediaEntities.isEmpty) this + else copy(mediaEntities = mediaEntities.map(Media.copyFromUrlEntity(_, urlEntity))) + + def merge(that: PastedMedia): PastedMedia = + PastedMedia( + mediaEntities = this.mediaEntities ++ that.mediaEntities, + mediaTags = this.mediaTags ++ that.mediaTags + ) + + /** + * Return a new PastedMedia that contains only the first maxMediaEntities media entities + */ + def take(maxMediaEntities: Int): PastedMedia = { + val entities = this.mediaEntities.take(maxMediaEntities) + val mediaIds = entities.map(_.mediaId) + val pastedTags = mediaTags.filterKeys { id => mediaIds.contains(id) } + + PastedMedia( + mediaEntities = entities, + mediaTags = pastedTags + ) + } + + def mergeTweetMediaTags(ownedTags: Option[TweetMediaTags]): Option[TweetMediaTags] = { + val merged = ownedTags.map(_.tagMap).getOrElse(Map.empty) ++ mediaTags + if (merged.nonEmpty) { + Some(TweetMediaTags(merged)) + } else { + None + } + } +} + +object PastedMedia { + import MediaUrl.Permalink.hasTweetId + + val empty: PastedMedia = PastedMedia(Nil, Map.empty) + + /** + * @param tweet: the tweet whose media URL was pasted. + * + * @return the media that should be copied to a tweet that has a + * link to the media in this tweet, along with its protection + * status. The returned media entities will have sourceStatusId + * and sourceUserId set appropriately for inclusion in a different + * tweet. + */ + def getMediaEntities(tweet: Tweet): Seq[MediaEntity] = + getMedia(tweet).collect { + case mediaEntity if hasTweetId(mediaEntity, tweet.id) => + setSource(mediaEntity, tweet.id, getUserId(tweet)) + } + + def setSource(mediaEntity: MediaEntity, tweetId: TweetId, userId: TweetId): MediaEntity = + mediaEntity.copy( + sourceStatusId = Some(tweetId), + sourceUserId = Some(mediaEntity.sourceUserId.getOrElse(userId)) + ) +} + +object PastedMediaRepository { + type Type = (TweetId, Ctx) => Stitch[PastedMedia] + + case class Ctx( + includeMediaEntities: Boolean, + includeAdditionalMetadata: Boolean, + includeMediaTags: Boolean, + extensionsArgs: Option[ByteBuffer], + safetyLevel: SafetyLevel) { + def asTweetQueryOptions: TweetQuery.Options = + TweetQuery.Options( + enforceVisibilityFiltering = true, + extensionsArgs = extensionsArgs, + safetyLevel = safetyLevel, + include = TweetQuery.Include( + tweetFields = + Set(Tweet.CoreDataField.id) ++ + (if (includeMediaEntities) Set(Tweet.MediaField.id) else Set.empty) ++ + (if (includeMediaTags) Set(Tweet.MediaTagsField.id) else Set.empty), + mediaFields = if (includeMediaEntities && includeAdditionalMetadata) { + Set(MediaEntity.AdditionalMetadataField.id) + } else { + Set.empty + }, + // don't recursively load pasted media + pastedMedia = false + ) + ) + } + + /** + * A Repository of PastedMedia fetched from other tweets. We query the tweet with + * default global visibility filtering enabled, so we won't see entities for users that + * are protected, deactivated, suspended, etc. + */ + def apply(tweetRepo: TweetRepository.Type): Type = + (tweetId, ctx) => + tweetRepo(tweetId, ctx.asTweetQueryOptions) + .flatMap { t => + val entities = PastedMedia.getMediaEntities(t) + if (entities.nonEmpty) { + Stitch.value(PastedMedia(entities, getMediaTagMap(t))) + } else { + Stitch.NotFound + } + } + .rescue { + // drop filtered tweets + case _: FilteredState => Stitch.NotFound + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala new file mode 100644 index 000000000..26525ab6c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PenguinLanguageRepository.scala @@ -0,0 +1,53 @@ +package com.twitter.tweetypie +package repository + +import com.ibm.icu.util.ULocale +import com.twitter.common.text.pipeline.TwitterLanguageIdentifier +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.repository.LanguageRepository.Text +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.FuturePool +import com.twitter.util.logging.Logger + +object LanguageRepository { + type Type = Text => Stitch[Option[Language]] + type Text = String +} + +object PenguinLanguageRepository { + private val identifier = new TwitterLanguageIdentifier.Builder().buildForTweet() + private val log = Logger(getClass) + + def isRightToLeft(lang: String): Boolean = + new ULocale(lang).getCharacterOrientation == "right-to-left" + + def apply(futurePool: FuturePool): LanguageRepository.Type = { + val identifyOne = + FutureArrow[Text, Option[Language]] { text => + futurePool { + try { + Some(identifier.identify(text)) + } catch { + case e: IllegalArgumentException => + val userId = TwitterContext().map(_.userId) + val encodedText = com.twitter.util.Base64StringEncoder.encode(text.getBytes) + log.info(s"${e.getMessage} : USER ID - $userId : TEXT - $encodedText") + None + } + }.map { + case Some(langWithScore) => + val lang = langWithScore.getLocale.getLanguage + Some( + Language( + language = lang, + rightToLeft = isRightToLeft(lang), + confidence = langWithScore.getScore + )) + case None => None + } + } + + text => Stitch.call(text, LegacySeqGroup(identifyOne.liftSeq)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala new file mode 100644 index 000000000..ac609097a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PerspectiveRepository.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.stitch.timelineservice.TimelineService.GetPerspectives +import com.twitter.timelineservice.thriftscala.TimelineEntryPerspective + +object PerspectiveRepository { + + /** + * Same type as com.twitter.stitch.timelineservice.TimelineService.GetPerspectives but without + * using Arrow. + */ + type Type = GetPerspectives.Query => Stitch[TimelineEntryPerspective] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala new file mode 100644 index 000000000..8219eb350 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/PlaceRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.thriftscala.Place + +case class PlaceKey(placeId: PlaceId, language: String) + extends ScopedCacheKey("t", "geo", 1, placeId, language) + +object PlaceRepository { + type Type = PlaceKey => Stitch[Place] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala new file mode 100644 index 000000000..6968c71c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/ProfileGeoRepository.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.dataproducts.enrichments.thriftscala._ +import com.twitter.gizmoduck.thriftscala.UserResponseState._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.GnipEnricherator +import com.twitter.tweetypie.thriftscala.GeoCoordinates + +case class ProfileGeoKey(tweetId: TweetId, userId: Option[UserId], coords: Option[GeoCoordinates]) { + def key: TweetData = + TweetData( + tweetId = tweetId, + userId = userId, + coordinates = coords.map(ProfileGeoRepository.convertGeo) + ) +} + +object ProfileGeoRepository { + type Type = ProfileGeoKey => Stitch[ProfileGeoEnrichment] + + case class UnexpectedState(state: EnrichmentHydrationState) extends Exception(state.name) + + def convertGeo(coords: GeoCoordinates): TweetyPieGeoCoordinates = + TweetyPieGeoCoordinates( + latitude = coords.latitude, + longitude = coords.longitude, + geoPrecision = coords.geoPrecision, + display = coords.display + ) + + def apply(hydrateProfileGeo: GnipEnricherator.HydrateProfileGeo): Type = { + import EnrichmentHydrationState._ + + val emptyEnrichmentStitch = Stitch.value(ProfileGeoEnrichment()) + + val profileGeoGroup = SeqGroup[TweetData, ProfileGeoResponse] { keys: Seq[TweetData] => + // Gnip ignores writePath and treats all requests as reads + LegacySeqGroup.liftToSeqTry( + hydrateProfileGeo(ProfileGeoRequest(requests = keys, writePath = false)) + ) + } + + (geoKey: ProfileGeoKey) => + Stitch + .call(geoKey.key, profileGeoGroup) + .flatMap { + case ProfileGeoResponse(_, Success, Some(enrichment), _) => + Stitch.value(enrichment) + case ProfileGeoResponse(_, Success, None, _) => + // when state is Success enrichment should always be Some, but default to be safe + emptyEnrichmentStitch + case ProfileGeoResponse( + _, + UserLookupError, + _, + Some(DeactivatedUser | SuspendedUser | NotFound) + ) => + emptyEnrichmentStitch + case r => + Stitch.exception(UnexpectedState(r.state)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala new file mode 100644 index 000000000..ed8116476 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuotedTweetVisibilityRepository.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredState +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.QuotedTweetVisibilityRequest +import com.twitter.visibility.interfaces.tweets.TweetAndAuthor +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.ViewerContext + +/** + * This repository handles visibility filtering of inner quoted tweets + * based on relationships between the inner and outer tweets. This is + * additive to independent visibility filtering of the inner tweet. + */ +object QuotedTweetVisibilityRepository { + type Type = Request => Stitch[Option[FilteredState]] + + case class Request( + outerTweetId: TweetId, + outerAuthorId: UserId, + innerTweetId: TweetId, + innerAuthorId: UserId, + viewerId: Option[UserId], + safetyLevel: ThriftSafetyLevel) + + def apply( + quotedTweetVisibilityLibrary: QuotedTweetVisibilityLibrary.Type, + visibilityDeciderGates: VisibilityDeciderGates, + ): QuotedTweetVisibilityRepository.Type = { request: Request => + quotedTweetVisibilityLibrary( + QuotedTweetVisibilityRequest( + quotedTweet = TweetAndAuthor(request.innerTweetId, request.innerAuthorId), + outerTweet = TweetAndAuthor(request.outerTweetId, request.outerAuthorId), + ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + safetyLevel = SafetyLevel.fromThrift(request.safetyLevel) + ) + ).map(visibilityResult => + toFilteredState( + visibilityResult = visibilityResult, + disableLegacyInterstitialFilteredReason = + visibilityDeciderGates.disableLegacyInterstitialFilteredReason())) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala new file mode 100644 index 000000000..7de373848 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/QuoterHasAlreadyQuotedRepository.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client.QuotersGraph +import com.twitter.flockdb.client.TFlockClient +import com.twitter.stitch.Stitch + +object QuoterHasAlreadyQuotedRepository { + type Type = (TweetId, UserId) => Stitch[Boolean] + + def apply( + tflockReadClient: TFlockClient + ): Type = + (tweetId, userId) => Stitch.callFuture(tflockReadClient.contains(QuotersGraph, tweetId, userId)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala new file mode 100644 index 000000000..9b6304b4a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RelationshipRepository.scala @@ -0,0 +1,53 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.servo.util.FutureArrow +import com.twitter.socialgraph.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +object RelationshipKey { + def blocks(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Blocking) + + def follows(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Following) + + def mutes(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.Muting) + + def reported(sourceId: UserId, destinationId: UserId): RelationshipKey = + RelationshipKey(sourceId, destinationId, RelationshipType.ReportedAsSpam) +} + +case class RelationshipKey( + sourceId: UserId, + destinationId: UserId, + relationship: RelationshipType) { + def asExistsRequest: ExistsRequest = + ExistsRequest( + source = sourceId, + target = destinationId, + relationships = Seq(Relationship(relationship)) + ) +} + +object RelationshipRepository { + type Type = RelationshipKey => Stitch[Boolean] + + def apply( + exists: FutureArrow[(Seq[ExistsRequest], Option[RequestContext]), Seq[ExistsResult]], + maxRequestSize: Int + ): Type = { + val relationshipGroup: SeqGroup[RelationshipKey, Boolean] = + new SeqGroup[RelationshipKey, Boolean] { + override def run(keys: Seq[RelationshipKey]): Future[Seq[Try[Boolean]]] = + LegacySeqGroup.liftToSeqTry( + exists((keys.map(_.asExistsRequest), None)).map(_.map(_.exists))) + override val maxSize: Int = maxRequestSize + } + + relationshipKey => Stitch.call(relationshipKey, relationshipGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala new file mode 100644 index 000000000..610f3f3c4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/RetweetSpamCheckRepository.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Scarecrow + +object RetweetSpamCheckRepository { + type Type = scarecrow.Retweet => Stitch[scarecrow.TieredAction] + + def apply(checkRetweet: Scarecrow.CheckRetweet): Type = + retweet => Stitch.callFuture(checkRetweet(retweet)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala new file mode 100644 index 000000000..7808d465f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StitchLockingCache.scala @@ -0,0 +1,161 @@ +package com.twitter.tweetypie.repository + +import com.twitter.servo.cache.{CachedValueStatus => Status, LockingCache => KVLockingCache, _} +import com.twitter.servo.repository.{CachedResult => Result} +import com.twitter.stitch.MapGroup +import com.twitter.stitch.Group +import com.twitter.stitch.Stitch +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Time +import com.twitter.util.Try + +/** + * Adapts a key-value locking cache to Arrow and + * normalizes the results to `CachedResult`. + */ +trait StitchLockingCache[K, V] { + val get: K => Stitch[Result[K, V]] + val lockAndSet: (K, StitchLockingCache.Val[V]) => Stitch[Unit] + val delete: K => Stitch[Boolean] +} + +object StitchLockingCache { + + /** + * Value intended to be written back to cache using lockAndSet. + * + * Note that only a subset of CachedValueStatus values are eligible for writing: + * Found, NotFound, and Deleted + */ + sealed trait Val[+V] + object Val { + case class Found[V](value: V) extends Val[V] + case object NotFound extends Val[Nothing] + case object Deleted extends Val[Nothing] + } + + /** + * A Group for batching get requests to a [[KVLockingCache]]. + */ + private case class GetGroup[K, V](cache: KVLockingCache[K, Cached[V]], override val maxSize: Int) + extends MapGroup[K, Result[K, V]] { + + private[this] def cachedToResult(key: K, cached: Cached[V]): Try[Result[K, V]] = + cached.status match { + case Status.NotFound => Return(Result.CachedNotFound(key, cached.cachedAt)) + case Status.Deleted => Return(Result.CachedDeleted(key, cached.cachedAt)) + case Status.SerializationFailed => Return(Result.SerializationFailed(key)) + case Status.DeserializationFailed => Return(Result.DeserializationFailed(key)) + case Status.Evicted => Return(Result.NotFound(key)) + case Status.DoNotCache => Return(Result.DoNotCache(key, cached.doNotCacheUntil)) + case Status.Found => + cached.value match { + case None => Return(Result.NotFound(key)) + case Some(value) => Return(Result.CachedFound(key, value, cached.cachedAt)) + } + case _ => Throw(new UnsupportedOperationException) + } + + override protected def run(keys: Seq[K]): Future[K => Try[Result[K, V]]] = + cache.get(keys).map { (result: KeyValueResult[K, Cached[V]]) => key => + result.found.get(key) match { + case Some(cached) => cachedToResult(key, cached) + case None => + result.failed.get(key) match { + case Some(t) => Return(Result.Failed(key, t)) + case None => Return(Result.NotFound(key)) + } + } + } + } + + /** + * Used in the implementation of LockAndSetGroup. This is just a + * glorified tuple with special equality semantics where calls with + * the same key will compare equal. MapGroup will use this as a key + * in a Map, which will prevent duplicate lockAndSet calls with the + * same key. We don't care which one we use + */ + private class LockAndSetCall[K, V](val key: K, val value: V) { + override def equals(other: Any): Boolean = + other match { + case call: LockAndSetCall[_, _] => call.key == key + case _ => false + } + + override def hashCode(): Int = key.hashCode + } + + /** + * A Group for `lockAndSet` calls to a [[KVLockingCache]]. This is + * necessary to avoid writing back a key multiple times if it is + * appears more than once in a batch. LockAndSetCall considers two + * calls equal even if the values differ because multiple lockAndSet + * calls for the same key will eventually result in only one being + * chosen by the cache anyway, and this avoids conflicting + * lockAndSet calls. + * + * For example, consider a tweet that mentions @jack twice + * when @jack is not in cache. That will result in two queries to + * load @jack, which will be deduped by the Group when the repo is + * called. Despite the fact that it is loaded only once, each of the + * two loads is oblivious to the other, so each of them attempts to + * write the value back to cache, resulting in two `lockAndSet` + * calls for @jack, so we have to dedupe them again. + */ + private case class LockAndSetGroup[K, V]( + cache: KVLockingCache[K, V], + picker: KVLockingCache.Picker[V]) + extends MapGroup[LockAndSetCall[K, V], Option[V]] { + + override def run( + calls: Seq[LockAndSetCall[K, V]] + ): Future[LockAndSetCall[K, V] => Try[Option[V]]] = + Future + .collect { + calls.map { call => + // This is masked to prevent interrupts to the overall + // request from interrupting writes back to cache. + cache + .lockAndSet(call.key, KVLockingCache.PickingHandler(call.value, picker)) + .masked + .liftToTry + } + } + .map(responses => calls.zip(responses).toMap) + } + + def apply[K, V]( + underlying: KVLockingCache[K, Cached[V]], + picker: KVLockingCache.Picker[Cached[V]], + maxRequestSize: Int = Int.MaxValue + ): StitchLockingCache[K, V] = + new StitchLockingCache[K, V] { + override val get: K => Stitch[Result[K, V]] = { + val group: Group[K, Result[K, V]] = GetGroup(underlying, maxRequestSize) + + (key: K) => Stitch.call(key, group) + } + + override val lockAndSet: (K, Val[V]) => Stitch[Unit] = { + val group = LockAndSetGroup(underlying, picker) + + (key: K, value: Val[V]) => { + val now = Time.now + val cached: Cached[V] = + value match { + case Val.Found(v) => Cached[V](Some(v), Status.Found, now, Some(now)) + case Val.NotFound => Cached[V](None, Status.NotFound, now, Some(now)) + case Val.Deleted => Cached[V](None, Status.Deleted, now, Some(now)) + } + + Stitch.call(new LockAndSetCall(key, cached), group).unit + } + } + + override val delete: K => Stitch[Boolean] = + (key: K) => Stitch.callFuture(underlying.delete(key)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala new file mode 100644 index 000000000..2658446a3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityAccessRepository.scala @@ -0,0 +1,26 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.CommunityId +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +object StratoCommunityAccessRepository { + type Type = CommunityId => Stitch[Option[CommunityAccess]] + + sealed trait CommunityAccess + object CommunityAccess { + case object Public extends CommunityAccess + case object Closed extends CommunityAccess + case object Private extends CommunityAccess + } + + val column = "communities/access.Community" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[CommunityId, Unit, CommunityAccess] = + client.fetcher[CommunityId, CommunityAccess](column) + + communityId => fetcher.fetch(communityId).map(_.v) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala new file mode 100644 index 000000000..cfeb070c1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoCommunityMembershipRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.CommunityId +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +object StratoCommunityMembershipRepository { + type Type = CommunityId => Stitch[Boolean] + + val column = "communities/isMember.Community" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[CommunityId, Unit, Boolean] = + client.fetcher[CommunityId, Boolean](column) + + communityId => fetcher.fetch(communityId).map(_.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala new file mode 100644 index 000000000..8c510e533 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoPromotedTweetRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.tweetypie.TweetId +import com.twitter.strato.client.{Client => StratoClient} + +object StratoPromotedTweetRepository { + type Type = TweetId => Stitch[Boolean] + + val column = "tweetypie/isPromoted.Tweet" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[TweetId, Unit, Boolean] = + client.fetcher[TweetId, Boolean](column) + + tweetId => fetcher.fetch(tweetId).map(f => f.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala new file mode 100644 index 000000000..68f537fce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSafetyLabelsRepository.scala @@ -0,0 +1,49 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.search.blender.services.strato.UserSearchSafetySettings +import com.twitter.spam.rtf.thriftscala.SafetyLabel +import com.twitter.spam.rtf.thriftscala.SafetyLabelMap +import com.twitter.spam.rtf.thriftscala.SafetyLabelType +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.visibility.common.UserSearchSafetySource + +object StratoSafetyLabelsRepository { + type Type = (TweetId, SafetyLabelType) => Stitch[Option[SafetyLabel]] + + def apply(client: StratoClient): Type = { + val safetyLabelMapRepo = StratoSafetyLabelMapRepository(client) + + (tweetId, safetyLabelType) => + safetyLabelMapRepo(tweetId).map( + _.flatMap(_.labels).flatMap(_.get(safetyLabelType)) + ) + } +} + +object StratoSafetyLabelMapRepository { + type Type = TweetId => Stitch[Option[SafetyLabelMap]] + + val column = "visibility/baseTweetSafetyLabelMap" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[TweetId, Unit, SafetyLabelMap] = + client.fetcher[TweetId, SafetyLabelMap](column) + + tweetId => fetcher.fetch(tweetId).map(_.v) + } +} + +object StratoUserSearchSafetySourceRepository { + type Type = UserId => Stitch[UserSearchSafetySettings] + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Unit, UserSearchSafetySettings] = + client.fetcher[UserId, UserSearchSafetySettings](UserSearchSafetySource.Column) + + userId => fetcher.fetch(userId).map(_.v.getOrElse(UserSearchSafetySource.DefaultSetting)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala new file mode 100644 index 000000000..1fb825c6b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSubscriptionVerificationRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.tweetypie.UserId +import com.twitter.strato.client.{Client => StratoClient} + +object StratoSubscriptionVerificationRepository { + type Type = (UserId, String) => Stitch[Boolean] + + val column = "subscription-services/subscription-verification/cacheProtectedHasAccess.User" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Seq[String], Seq[String]] = + client.fetcher[UserId, Seq[String], Seq[String]](column) + + (userId, resource) => fetcher.fetch(userId, Seq(resource)).map(f => f.v.nonEmpty) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala new file mode 100644 index 000000000..e86352c37 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowEligibleRepository.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.UserId + +object StratoSuperFollowEligibleRepository { + type Type = UserId => Stitch[Boolean] + + val column = "audiencerewards/audienceRewardsService/getSuperFollowEligibility.User" + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[UserId, Unit, Boolean] = + client.fetcher[UserId, Boolean](column) + + userId => fetcher.fetch(userId).map(_.v.getOrElse(false)) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala new file mode 100644 index 000000000..e6fa65268 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/StratoSuperFollowRelationsRepository.scala @@ -0,0 +1,60 @@ +package com.twitter.tweetypie.repository + +import com.twitter.audience_rewards.thriftscala.HasSuperFollowingRelationshipRequest +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.core.TweetCreateFailure +import com.twitter.tweetypie.thriftscala.ExclusiveTweetControl +import com.twitter.tweetypie.thriftscala.TweetCreateState + +object StratoSuperFollowRelationsRepository { + type Type = (UserId, UserId) => Stitch[Boolean] + + def apply(client: StratoClient): Type = { + + val column = "audiencerewards/superFollows/hasSuperFollowingRelationshipV2" + + val fetcher: Fetcher[HasSuperFollowingRelationshipRequest, Unit, Boolean] = + client.fetcher[HasSuperFollowingRelationshipRequest, Boolean](column) + + (authorId, viewerId) => { + // Owner of an exclusive tweet chain can respond to their own + // tweets / replies, despite not super following themselves + if (authorId == viewerId) { + Stitch.True + } else { + val key = HasSuperFollowingRelationshipRequest(authorId, viewerId) + // The default relation for this column is "missing", aka None. + // This needs to be mapped to false since Super Follows are a sparse relation. + fetcher.fetch(key).map(_.v.getOrElse(false)) + } + } + } + + object Validate { + def apply( + exclusiveTweetControl: Option[ExclusiveTweetControl], + userId: UserId, + superFollowRelationsRepo: StratoSuperFollowRelationsRepository.Type + ): Future[Unit] = { + Stitch + .run { + exclusiveTweetControl.map(_.conversationAuthorId) match { + // Don't do exclusive tweet validation on non exclusive tweets. + case None => + Stitch.value(true) + + case Some(convoAuthorId) => + superFollowRelationsRepo(userId, convoAuthorId) + } + }.map { + case true => Future.Unit + case false => + Future.exception(TweetCreateFailure.State(TweetCreateState.SourceTweetNotFound)) + }.flatten + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala new file mode 100644 index 000000000..82bbd2930 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetCountsRepository.scala @@ -0,0 +1,59 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.flockdb.client._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup + +sealed trait TweetCountKey { + // The flockdb Select used to calculate the count from TFlock + def toSelect: Select[StatusGraph] + + // The Tweet id for this count + def tweetId: TweetId + + // com.twitter.servo.cache.MemcacheCache calls toString to turn this key into a cache key + def toString: String +} + +case class RetweetsKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = RetweetsGraph.from(tweetId) + override lazy val toString: String = "cnts:rt:" + tweetId +} + +case class RepliesKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = RepliesToTweetsGraph.from(tweetId) + override lazy val toString: String = "cnts:re:" + tweetId +} + +case class FavsKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = FavoritesGraph.to(tweetId) + override lazy val toString: String = "cnts:fv:" + tweetId +} + +case class QuotesKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = QuotersGraph.from(tweetId) + override lazy val toString: String = "cnts:qt:" + tweetId +} + +case class BookmarksKey(tweetId: TweetId) extends TweetCountKey { + lazy val toSelect: Select[StatusGraph] = BookmarksGraph.to(tweetId) + override lazy val toString: String = "cnts:bm:" + tweetId +} + +object TweetCountsRepository { + type Type = TweetCountKey => Stitch[Count] + + def apply(tflock: TFlockClient, maxRequestSize: Int): Type = { + object RequestGroup extends SeqGroup[TweetCountKey, Count] { + override def run(keys: Seq[TweetCountKey]): Future[Seq[Try[MediaId]]] = { + val selects = MultiSelect[StatusGraph]() ++= keys.map(_.toSelect) + LegacySeqGroup.liftToSeqTry(tflock.multiCount(selects).map(counts => counts.map(_.toLong))) + } + override val maxSize: Int = maxRequestSize + } + + key => Stitch.call(key, RequestGroup) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala new file mode 100644 index 000000000..efbd5b61f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetQuery.scala @@ -0,0 +1,147 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import java.nio.ByteBuffer + +object TweetQuery { + + /** + * Parent trait that indicates what triggered the tweet query. + */ + sealed trait Cause { + import Cause._ + + /** + * Is the tweet query hydrating the specified tweet for the purposes of a write? + */ + def writing(tweetId: TweetId): Boolean = + this match { + case w: Write if w.tweetId == tweetId => true + case _ => false + } + + /** + * Is the tweet query performing a regular read for any tweet? If the cause is + * a write on a different tweet, then any other tweet that is read in support of the write + * is considered a normal read, and is subject to read-path hydration. + */ + def reading(tweetId: TweetId): Boolean = + !writing(tweetId) + + /** + * Are we performing an insert after create on the specified tweet? An undelete operation + * performs an insert, but is not considered an initial insert. + */ + def initialInsert(tweetId: TweetId): Boolean = + this match { + case Insert(`tweetId`) => true + case _ => false + } + } + + object Cause { + case object Read extends Cause + trait Write extends Cause { + val tweetId: TweetId + } + case class Insert(tweetId: TweetId) extends Write + case class Undelete(tweetId: TweetId) extends Write + } + + /** + * Options for TweetQuery. + * + * @param include indicates which optionally hydrated fields on each tweet should be + * hydrated and included. + * @param enforceVisibilityFiltering whether Tweetypie visibility hydrators should be run to + * filter protected tweets, blocked quote tweets, contributor data, etc. This does not affect + * Visibility Library (http://go/vf) based filtering. + * @param cause indicates what triggered the read: a normal read, or a write operation. + * @param forExternalConsumption when true, the tweet is being read for rendering to an external + * client such as the iPhone Twitter app and is subject to being Dropped to prevent serving + * "bad" text to clients that might crash their OS. When false, the tweet is being read for internal + * non-client purposes and should never be Dropped. + * @param isInnerQuotedTweet Set by [[com.twitter.tweetypie.hydrator.QuotedTweetHydrator]], + * to be used by [[com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary]] + * so VisibilityFiltering library can execute Interstitial logic on inner quoted tweets. + * @param fetchStoredTweets Set by GetStoredTweetsHandler. If set to true, the Manhattan storage + * layer will fetch and construct Tweets regardless of what state they're in. + */ + case class Options( + include: TweetQuery.Include, + cacheControl: CacheControl = CacheControl.ReadWriteCache, + cardsPlatformKey: Option[String] = None, + excludeReported: Boolean = false, + enforceVisibilityFiltering: Boolean = false, + safetyLevel: SafetyLevel = SafetyLevel.FilterNone, + forUserId: Option[UserId] = None, + languageTag: String = "en", + extensionsArgs: Option[ByteBuffer] = None, + cause: Cause = Cause.Read, + scrubUnrequestedFields: Boolean = true, + requireSourceTweet: Boolean = true, + forExternalConsumption: Boolean = false, + simpleQuotedTweet: Boolean = false, + isInnerQuotedTweet: Boolean = false, + fetchStoredTweets: Boolean = false, + isSourceTweet: Boolean = false, + enableEditControlHydration: Boolean = true) + + case class Include( + tweetFields: Set[FieldId] = Set.empty, + countsFields: Set[FieldId] = Set.empty, + mediaFields: Set[FieldId] = Set.empty, + quotedTweet: Boolean = false, + pastedMedia: Boolean = false) { + + /** + * Accumulates additional (rather than replaces) field ids. + */ + def also( + tweetFields: Traversable[FieldId] = Nil, + countsFields: Traversable[FieldId] = Nil, + mediaFields: Traversable[FieldId] = Nil, + quotedTweet: Option[Boolean] = None, + pastedMedia: Option[Boolean] = None + ): Include = + copy( + tweetFields = this.tweetFields ++ tweetFields, + countsFields = this.countsFields ++ countsFields, + mediaFields = this.mediaFields ++ mediaFields, + quotedTweet = quotedTweet.getOrElse(this.quotedTweet), + pastedMedia = pastedMedia.getOrElse(this.pastedMedia) + ) + + /** + * Removes field ids. + */ + def exclude( + tweetFields: Traversable[FieldId] = Nil, + countsFields: Traversable[FieldId] = Nil, + mediaFields: Traversable[FieldId] = Nil + ): Include = + copy( + tweetFields = this.tweetFields -- tweetFields, + countsFields = this.countsFields -- countsFields, + mediaFields = this.mediaFields -- mediaFields + ) + + def ++(that: Include): Include = + copy( + tweetFields = this.tweetFields ++ that.tweetFields, + countsFields = this.countsFields ++ that.countsFields, + mediaFields = this.mediaFields ++ that.mediaFields, + quotedTweet = this.quotedTweet || that.quotedTweet, + pastedMedia = this.pastedMedia || that.pastedMedia + ) + } +} + +sealed case class CacheControl(writeToCache: Boolean, readFromCache: Boolean) + +object CacheControl { + val NoCache: CacheControl = CacheControl(false, false) + val ReadOnlyCache: CacheControl = CacheControl(false, true) + val ReadWriteCache: CacheControl = CacheControl(true, true) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala new file mode 100644 index 000000000..f0f24fafa --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetRepository.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ + +object TweetRepository { + type Type = (TweetId, TweetQuery.Options) => Stitch[Tweet] + type Optional = (TweetId, TweetQuery.Options) => Stitch[Option[Tweet]] + + def tweetGetter(repo: Optional, opts: TweetQuery.Options): FutureArrow[TweetId, Option[Tweet]] = + FutureArrow(tweetId => Stitch.run(repo(tweetId, opts))) + + def tweetGetter(repo: Optional): FutureArrow[(TweetId, TweetQuery.Options), Option[Tweet]] = + FutureArrow { case (tweetId, opts) => Stitch.run(repo(tweetId, opts)) } + + /** + * Converts a `TweetResultRepository.Type`-typed repo to an `TweetRepository.Type`-typed repo. + */ + def fromTweetResult(repo: TweetResultRepository.Type): Type = + (tweetId, options) => repo(tweetId, options).map(_.value.tweet) + + /** + * Converts a `Type`-typed repo to an `Optional`-typed + * repo, where NotFound or filtered tweets are returned as `None`. + */ + def optional(repo: Type): Optional = + (tweetId, options) => + repo(tweetId, options).liftToOption { case NotFound | (_: FilteredState) => true } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala new file mode 100644 index 000000000..2e8f50ffd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetResultRepository.scala @@ -0,0 +1,17 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.TweetResult + +object TweetResultRepository { + type Type = (TweetId, TweetQuery.Options) => Stitch[TweetResult] + + /** + * Short-circuits the request of invalid tweet ids (`<= 0`) by immediately throwing `NotFound`. + */ + def shortCircuitInvalidIds(repo: Type): Type = { + case (tweetId, _) if tweetId <= 0 => Stitch.NotFound + case (tweetId, options) => repo(tweetId, options) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala new file mode 100644 index 000000000..98b3f5e47 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetSpamCheckRepository.scala @@ -0,0 +1,14 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.gen.scarecrow.{thriftscala => scarecrow} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.backends.Scarecrow + +object TweetSpamCheckRepository { + + type Type = (scarecrow.TweetNew, scarecrow.TweetContext) => Stitch[scarecrow.CheckTweetResponse] + + def apply(checkTweet: Scarecrow.CheckTweet2): Type = + (tweetNew, tweetContext) => Stitch.callFuture(checkTweet((tweetNew, tweetContext))) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala new file mode 100644 index 000000000..f0017b2fd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/TweetVisibilityRepository.scala @@ -0,0 +1,123 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.logging.Logger +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.repository.VisibilityResultToFilteredState.toFilteredState +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.visibility.configapi.configs.VisibilityDeciderGates +import com.twitter.visibility.interfaces.tweets.TweetVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.TweetVisibilityRequest +import com.twitter.visibility.models.SafetyLevel.DeprecatedSafetyLevel +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.ViewerContext + +/** + * This repository handles visibility filtering of tweets + * + * i.e. deciding whether to drop/suppress tweets based on viewer + * and safety level for instance. Rules in VF library can be thought as: + * + * (SafetyLevel)(Viewer, Content, Features) => Action + * + * SafetyLevel represents the product context in which the Viewer is + * requesting to view the Content. Example: TimelineHome, TweetDetail, + * Recommendations, Notifications + * + * Content here is mainly tweets (can be users, notifications, cards etc) + * + * Features might include safety labels and other metadata of a Tweet, + * flags set on a User (including the Viewer), relationships between Users + * (e.g. block, follow), relationships between Users and Content + * (e.g. reported for spam) + * + * We initialize VisibilityLibrary using UserSource and UserRelationshipSource: + * Stitch interfaces that provide methods to retrieve user and relationship + * information in Gizmoduck and SocialGraph repositories, respectively. + * This user and relationship info along with Tweet labels, provide necessary + * features to take a filtering decision. + * + * Actions supported in Tweetypie right now are Drop and Suppress. + * In the future, we might want to surface other granular actions such as + * Tombstone and Downrank which are supported in VF lib. + * + * The TweetVisibilityRepository has the following format: + * + * Request(Tweet, Option[SafetyLevel], Option[UserId]) => Stitch[Option[FilteredState]] + * + * SafetyLevel is plumbed from the tweet query options. + * + * In addition to the latency stats and rpc counts from VF library, we also capture + * unsupported and deprecated safety level stats here to inform the relevant clients. + * + * go/visibilityfiltering, go/visibilityfilteringdocs + * + */ +object TweetVisibilityRepository { + type Type = Request => Stitch[Option[FilteredState]] + + case class Request( + tweet: Tweet, + viewerId: Option[UserId], + safetyLevel: ThriftSafetyLevel, + isInnerQuotedTweet: Boolean, + isRetweet: Boolean, + hydrateConversationControl: Boolean, + isSourceTweet: Boolean) + + def apply( + visibilityLibrary: TweetVisibilityLibrary.Type, + visibilityDeciderGates: VisibilityDeciderGates, + log: Logger, + statsReceiver: StatsReceiver + ): TweetVisibilityRepository.Type = { + + val noTweetRulesCounter = statsReceiver.counter("no_tweet_rules_requests") + val deprecatedScope = statsReceiver.scope("deprecated_safety_level") + + request: Request => + SafetyLevel.fromThrift(request.safetyLevel) match { + case DeprecatedSafetyLevel => + deprecatedScope.counter(request.safetyLevel.name.toLowerCase()).incr() + log.warning("Deprecated SafetyLevel (%s) requested".format(request.safetyLevel.name)) + Stitch.None + case safetyLevel: SafetyLevel => + if (!TweetVisibilityLibrary.hasTweetRules(safetyLevel)) { + noTweetRulesCounter.incr() + Stitch.None + } else { + visibilityLibrary( + TweetVisibilityRequest( + tweet = request.tweet, + safetyLevel = safetyLevel, + viewerContext = ViewerContext.fromContextWithViewerIdFallback(request.viewerId), + isInnerQuotedTweet = request.isInnerQuotedTweet, + isRetweet = request.isRetweet, + hydrateConversationControl = request.hydrateConversationControl, + isSourceTweet = request.isSourceTweet + ) + ).map(visibilityResult => + toFilteredState( + visibilityResult = visibilityResult, + disableLegacyInterstitialFilteredReason = + visibilityDeciderGates.disableLegacyInterstitialFilteredReason())) + } + } + } + + /** + * We can skip visibility filtering when any of the following is true: + * + * - SafetyLevel is deprecated + * - SafetyLevel has no tweet rules + */ + def canSkipVisibilityFiltering(thriftSafetyLevel: ThriftSafetyLevel): Boolean = + SafetyLevel.fromThrift(thriftSafetyLevel) match { + case DeprecatedSafetyLevel => + true + case safetyLevel: SafetyLevel => + !TweetVisibilityLibrary.hasTweetRules(safetyLevel) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala new file mode 100644 index 000000000..c7165f95b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionInfoRepository.scala @@ -0,0 +1,39 @@ +package com.twitter.tweetypie.repository + +import com.twitter.consumer_privacy.mention_controls.thriftscala.UnmentionInfo +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.strato.thrift.ScroogeConvImplicits._ + +object UnmentionInfoRepository { + type Type = Tweet => Stitch[Option[UnmentionInfo]] + + val column = "consumer-privacy/mentions-management/unmentionInfoFromTweet" + case class UnmentionInfoView(asViewer: Option[Long]) + + /** + * Creates a function that extracts users fields from a tweet and checks + * if the extracted users have been unmentioned from the tweet's asssociated conversation. + * This function enables the prefetch caching of UnmentionInfo used by graphql during createTweet + * events and mirrors the logic found in the unmentionInfo Strato column found + * here: http://go/unmentionInfo.strato + * @param client Strato client + * @return + */ + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Tweet, UnmentionInfoView, UnmentionInfo] = + client.fetcher[Tweet, UnmentionInfoView, UnmentionInfo](column) + + tweet => + tweet.coreData.flatMap(_.conversationId) match { + case Some(conversationId) => + val viewerUserId = TwitterContext().flatMap(_.userId) + fetcher + .fetch(tweet, UnmentionInfoView(viewerUserId)) + .map(_.v) + case _ => Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala new file mode 100644 index 000000000..ea02cbdd3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UnmentionedEntitiesRepository.scala @@ -0,0 +1,28 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} + +/** + * Repository for fetching UserIds that have unmentioned themselves from a conversation. + */ +object UnmentionedEntitiesRepository { + type Type = (ConversationId, Seq[UserId]) => Stitch[Option[Seq[UserId]]] + + val column = "consumer-privacy/mentions-management/getUnmentionedUsersFromConversation" + case class GetUnmentionView(userIds: Option[Seq[Long]]) + + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Long, GetUnmentionView, Seq[Long]] = + client.fetcher[Long, GetUnmentionView, Seq[Long]](column) + + (conversationId, userIds) => + if (userIds.nonEmpty) { + fetcher.fetch(conversationId, GetUnmentionView(Some(userIds))).map(_.v) + } else { + Stitch.None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala new file mode 100644 index 000000000..b2bf53bac --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UrlRepository.scala @@ -0,0 +1,69 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.service.talon.thriftscala._ +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Talon +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.core.OverCapacity + +case class UrlSlug(text: String) extends AnyVal +case class ExpandedUrl(text: String) extends AnyVal + +object UrlRepository { + type Type = UrlSlug => Stitch[ExpandedUrl] + + /** + * Builds a UrlRepository from a Talon.Expand arrow. + */ + def apply( + talonExpand: Talon.Expand, + tweetypieClientId: String, + statsReceiver: StatsReceiver, + clientIdHelper: ClientIdHelper, + ): Type = { + val observedTalonExpand: Talon.Expand = + talonExpand + .trackOutcome(statsReceiver, _ => clientIdHelper.effectiveClientId.getOrElse("unknown")) + + val expandGroup = SeqGroup[ExpandRequest, Try[ExpandResponse]] { requests => + LegacySeqGroup.liftToSeqTry( + Future.collect(requests.map(r => observedTalonExpand(r).liftToTry))) + } + + slug => + val request = toExpandRequest(slug, auditMessage(tweetypieClientId, clientIdHelper)) + + Stitch + .call(request, expandGroup) + .lowerFromTry + .flatMap(toExpandedUrl(slug, _)) + } + + def auditMessage(tweetypieClientId: String, clientIdHelper: ClientIdHelper): String = { + tweetypieClientId + clientIdHelper.effectiveClientId.mkString(":", "", "") + } + + def toExpandRequest(slug: UrlSlug, auditMessage: String): ExpandRequest = + ExpandRequest(userId = 0, shortUrl = slug.text, fromUser = false, auditMsg = Some(auditMessage)) + + def toExpandedUrl(slug: UrlSlug, res: ExpandResponse): Stitch[ExpandedUrl] = + res.responseCode match { + case ResponseCode.Ok => + // use Option(res.longUrl) because res.longUrl can be null + Option(res.longUrl) match { + case None => Stitch.NotFound + case Some(longUrl) => Stitch.value(ExpandedUrl(longUrl)) + } + + case ResponseCode.BadInput => + Stitch.NotFound + + // we shouldn't see other ResponseCodes, because Talon.Expand translates them to + // exceptions, but we have this catch-all just in case. + case _ => + Stitch.exception(OverCapacity("talon")) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala new file mode 100644 index 000000000..204b86cef --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserInfoRepository.scala @@ -0,0 +1,138 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.gizmoduck.thriftscala.UserResponseState +import com.twitter.spam.rtf.thriftscala.{SafetyLevel => ThriftSafetyLevel} +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.core._ +import com.twitter.tweetypie.thriftscala.UserIdentity +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityLibrary +import com.twitter.visibility.interfaces.tweets.UserUnavailableStateVisibilityRequest +import com.twitter.visibility.models.SafetyLevel +import com.twitter.visibility.models.UserUnavailableStateEnum +import com.twitter.visibility.models.ViewerContext +import com.twitter.visibility.thriftscala.UserVisibilityResult + +/** + * Some types of user (e.g. frictionless users) may not + * have profiles, so a missing UserIdentity may mean that the user + * does not exist, or that the user does not have a profile. + */ +object UserIdentityRepository { + type Type = UserKey => Stitch[UserIdentity] + + def apply(repo: UserRepository.Type): Type = { key => + val opts = UserQueryOptions(Set(UserField.Profile), UserVisibility.Mentionable) + repo(key, opts) + .map { user => + user.profile.map { profile => + UserIdentity( + id = user.id, + screenName = profile.screenName, + realName = profile.name + ) + } + } + .lowerFromOption() + } +} + +object UserProtectionRepository { + type Type = UserKey => Stitch[Boolean] + + def apply(repo: UserRepository.Type): Type = { + val opts = UserQueryOptions(Set(UserField.Safety), UserVisibility.All) + + userKey => + repo(userKey, opts) + .map(user => user.safety.map(_.isProtected)) + .lowerFromOption() + } +} + +/** + * Query Gizmoduck to check if a user `forUserId` can see user `userKey`. + * If forUserId is Some(), this will also check protected relationship, + * if it's None, it will check others as per UserVisibility.Visible policy in + * UserRepository.scala. If forUserId is None, this doesn't verify any + * relationships, visibility is determined based solely on user's + * properties (eg. deactivated, suspended, etc) + */ +object UserVisibilityRepository { + type Type = Query => Stitch[Option[FilteredState.Unavailable]] + + case class Query( + userKey: UserKey, + forUserId: Option[UserId], + tweetId: TweetId, + isRetweet: Boolean, + isInnerQuotedTweet: Boolean, + safetyLevel: Option[ThriftSafetyLevel]) + + def apply( + repo: UserRepository.Type, + userUnavailableAuthorStateVisibilityLibrary: UserUnavailableStateVisibilityLibrary.Type + ): Type = + query => { + repo( + query.userKey, + UserQueryOptions( + Set(), + UserVisibility.Visible, + forUserId = query.forUserId, + filteredAsFailure = true, + safetyLevel = query.safetyLevel + ) + ) + // We don't actually care about the response here (User's data), only whether + // it was filtered or not + .map { case _ => None } + .rescue { + case fs: FilteredState.Unavailable => Stitch.value(Some(fs)) + case UserFilteredFailure(state, reason) => + userUnavailableAuthorStateVisibilityLibrary + .apply( + UserUnavailableStateVisibilityRequest( + query.safetyLevel + .map(SafetyLevel.fromThrift).getOrElse(SafetyLevel.FilterDefault), + query.tweetId, + ViewerContext.fromContextWithViewerIdFallback(query.forUserId), + toUserUnavailableState(state, reason), + query.isRetweet, + query.isInnerQuotedTweet + ) + ).map(VisibilityResultToFilteredState.toFilteredStateUnavailable) + case NotFound => Stitch.value(Some(FilteredState.Unavailable.Author.NotFound)) + } + } + + def toUserUnavailableState( + userResponseState: UserResponseState, + userVisibilityResult: Option[UserVisibilityResult] + ): UserUnavailableStateEnum = { + (userResponseState, userVisibilityResult) match { + case (UserResponseState.DeactivatedUser, _) => UserUnavailableStateEnum.Deactivated + case (UserResponseState.OffboardedUser, _) => UserUnavailableStateEnum.Offboarded + case (UserResponseState.ErasedUser, _) => UserUnavailableStateEnum.Erased + case (UserResponseState.SuspendedUser, _) => UserUnavailableStateEnum.Suspended + case (UserResponseState.ProtectedUser, _) => UserUnavailableStateEnum.Protected + case (_, Some(result)) => UserUnavailableStateEnum.Filtered(result) + case _ => UserUnavailableStateEnum.Unavailable + } + } +} + +object UserViewRepository { + type Type = Query => Stitch[User] + + case class Query( + userKey: UserKey, + forUserId: Option[UserId], + visibility: UserVisibility, + queryFields: Set[UserField] = Set(UserField.View)) + + def apply(repo: UserRepository.Type): UserViewRepository.Type = + query => + repo(query.userKey, UserQueryOptions(query.queryFields, query.visibility, query.forUserId)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala new file mode 100644 index 000000000..ca80d9503 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserRepository.scala @@ -0,0 +1,285 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.UserResponseState +import com.twitter.gizmoduck.thriftscala.UserResult +import com.twitter.servo.cache.ScopedCacheKey +import com.twitter.servo.json.syntax._ +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.NotFound +import com.twitter.stitch.SeqGroup +import com.twitter.stitch.Stitch +import com.twitter.stitch.compat.LegacySeqGroup +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.core._ +import com.twitter.util.Base64Long.toBase64 +import com.twitter.util.logging.Logger +import com.twitter.visibility.thriftscala.UserVisibilityResult +import scala.util.control.NoStackTrace + +sealed trait UserKey + +object UserKey { + def byId(userId: UserId): UserKey = UserIdKey(userId) + def byScreenName(screenName: String): UserKey = ScreenNameKey.toLowerCase(screenName) + def apply(userId: UserId): UserKey = UserIdKey(userId) + def apply(screenName: String): UserKey = ScreenNameKey.toLowerCase(screenName) +} + +case class UserIdKey(userId: UserId) + extends ScopedCacheKey("t", "usr", 1, "id", toBase64(userId)) + with UserKey + +object ScreenNameKey { + def toLowerCase(screenName: String): ScreenNameKey = ScreenNameKey(screenName.toLowerCase) +} + +/** + * Use UserKey.apply(String) instead of ScreenNameKey(String) to construct a key, + * as it will down-case the screen-name to better utilize the user cache. + */ +case class ScreenNameKey private (screenName: String) + extends ScopedCacheKey("t", "usr", 1, "sn", screenName) + with UserKey + +/** + * A set of flags, used in UserQuery, which control whether to include or filter out + * users in various non-standard states. + */ +case class UserVisibility( + filterProtected: Boolean, + filterSuspended: Boolean, + filterDeactivated: Boolean, + filterOffboardedAndErased: Boolean, + filterNoScreenName: Boolean, + filterPeriscope: Boolean, + filterSoft: Boolean) + +object UserVisibility { + + /** + * No filtering, can see every user that gizmoduck can return. + */ + val All: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = false, + filterDeactivated = false, + filterOffboardedAndErased = false, + filterNoScreenName = false, + filterPeriscope = false, + filterSoft = false + ) + + /** + * Only includes users that would be visible to a non-logged in user, + * or a logged in user where the following graph is checked for + * protected users. + * + * no-screen-name, soft, and periscope users are visible, but not + * mentionable. + */ + val Visible: UserVisibility = UserVisibility( + filterProtected = true, + filterSuspended = true, + filterDeactivated = true, + filterOffboardedAndErased = true, + filterNoScreenName = false, + filterPeriscope = false, + filterSoft = false + ) + + val MediaTaggable: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = true, + filterDeactivated = true, + filterOffboardedAndErased = true, + filterNoScreenName = true, + filterPeriscope = true, + filterSoft = true + ) + + /** + * Includes all mentionable users (filter deactivated/offboarded/erased/no-screen-name users) + */ + val Mentionable: UserVisibility = UserVisibility( + filterProtected = false, + filterSuspended = false, + filterDeactivated = false, + filterOffboardedAndErased = true, + filterNoScreenName = true, + filterPeriscope = true, + filterSoft = true + ) +} + +/** + * The `visibility` field includes a set of flags that indicate whether users in + * various non-standard states should be included in the `found` results, or filtered + * out. By default, "filtered out" means to treat them as `notFound`, but if `filteredAsFailure` + * is true, then the filtered users will be indicated in a [[UserFilteredFailure]] result. + */ +case class UserQueryOptions( + queryFields: Set[UserField] = Set.empty, + visibility: UserVisibility, + forUserId: Option[UserId] = None, + filteredAsFailure: Boolean = false, + safetyLevel: Option[SafetyLevel] = None) { + def toLookupContext: LookupContext = + LookupContext( + includeFailed = true, + forUserId = forUserId, + includeProtected = !visibility.filterProtected, + includeSuspended = !visibility.filterSuspended, + includeDeactivated = !visibility.filterDeactivated, + includeErased = !visibility.filterOffboardedAndErased, + includeNoScreenNameUsers = !visibility.filterNoScreenName, + includePeriscopeUsers = !visibility.filterPeriscope, + includeSoftUsers = !visibility.filterSoft, + includeOffboarded = !visibility.filterOffboardedAndErased, + safetyLevel = safetyLevel + ) +} + +case class UserLookupFailure(message: String, state: UserResponseState) extends RuntimeException { + override def getMessage(): String = + s"$message: responseState = $state" +} + +/** + * Indicates a failure due to the user being filtered. + * + * @see [[GizmoduckUserRepository.FilteredStates]] + */ +case class UserFilteredFailure(state: UserResponseState, reason: Option[UserVisibilityResult]) + extends Exception + with NoStackTrace + +object UserRepository { + type Type = (UserKey, UserQueryOptions) => Stitch[User] + type Optional = (UserKey, UserQueryOptions) => Stitch[Option[User]] + + def optional(repo: Type): Optional = + (userKey, queryOptions) => repo(userKey, queryOptions).liftNotFoundToOption + + def userGetter( + userRepo: UserRepository.Optional, + opts: UserQueryOptions + ): UserKey => Future[Option[User]] = + userKey => Stitch.run(userRepo(userKey, opts)) +} + +object GizmoduckUserRepository { + private[this] val log = Logger(getClass) + + def apply( + getById: Gizmoduck.GetById, + getByScreenName: Gizmoduck.GetByScreenName, + maxRequestSize: Int = Int.MaxValue + ): UserRepository.Type = { + case class GetBy[K]( + opts: UserQueryOptions, + get: ((LookupContext, Seq[K], Set[UserField])) => Future[Seq[UserResult]]) + extends SeqGroup[K, UserResult] { + override def run(keys: Seq[K]): Future[Seq[Try[UserResult]]] = + LegacySeqGroup.liftToSeqTry(get((opts.toLookupContext, keys, opts.queryFields))) + override def maxSize: Int = maxRequestSize + } + + (key, opts) => { + val result = + key match { + case UserIdKey(id) => Stitch.call(id, GetBy(opts, getById)) + case ScreenNameKey(sn) => Stitch.call(sn, GetBy(opts, getByScreenName)) + } + + result.flatMap(r => Stitch.const(toTryUser(r, opts.filteredAsFailure))) + } + } + + private def toTryUser( + userResult: UserResult, + filteredAsFailure: Boolean + ): Try[User] = + userResult.responseState match { + case s if s.forall(SuccessStates.contains(_)) => + userResult.user match { + case Some(u) => + Return(u) + + case None => + log.warn( + s"User expected to be present, but not found in:\n${userResult.prettyPrint}" + ) + // This should never happen, but if it does, treat it as the + // user being returned as NotFound. + Throw(NotFound) + } + + case Some(s) if NotFoundStates.contains(s) => + Throw(NotFound) + + case Some(s) if FilteredStates.contains(s) => + Throw(if (filteredAsFailure) UserFilteredFailure(s, userResult.unsafeReason) else NotFound) + + case Some(UserResponseState.Failed) => + def lookupFailure(msg: String) = + UserLookupFailure(msg, UserResponseState.Failed) + + Throw { + userResult.failureReason + .map { reason => + reason.internalServerError + .orElse { + reason.overCapacity.map { e => + // Convert Gizmoduck OverCapacity to Tweetypie + // OverCapacity exception, explaining that it was + // propagated from Gizmoduck. + OverCapacity(s"gizmoduck over capacity: ${e.message}") + } + } + .orElse(reason.unexpectedException.map(lookupFailure)) + .getOrElse(lookupFailure("failureReason empty")) + } + .getOrElse(lookupFailure("failureReason missing")) + } + + case Some(unexpected) => + Throw(UserLookupFailure("Unexpected response state", unexpected)) + } + + /** + * States that we expect to correspond to a user being returned. + */ + val SuccessStates: Set[UserResponseState] = + Set[UserResponseState]( + UserResponseState.Found, + UserResponseState.Partial + ) + + /** + * States that always correspond to a NotFound response. + */ + val NotFoundStates: Set[UserResponseState] = + Set[UserResponseState]( + UserResponseState.NotFound, + // These are really filtered out, but we treat them as not found + // since we don't have analogous filtering states for tweets. + UserResponseState.PeriscopeUser, + UserResponseState.SoftUser, + UserResponseState.NoScreenNameUser + ) + + /** + * Response states that correspond to a FilteredState + */ + val FilteredStates: Set[UserResponseState] = + Set( + UserResponseState.DeactivatedUser, + UserResponseState.OffboardedUser, + UserResponseState.ErasedUser, + UserResponseState.SuspendedUser, + UserResponseState.ProtectedUser, + UserResponseState.UnsafeUser + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala new file mode 100644 index 000000000..488c2cca6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserTakedownRepository.scala @@ -0,0 +1,26 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.stitch.Stitch +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason + +/** + * Query TakedownReason objects from gizmoduck + * + * No backfill job has been completed so there may exist users that have a takedown + * country_code without a corresponding UnspecifiedReason takedown_reason. Therefore, + * read from both fields and merge into a set of TakedownReason, translating raw takedown + * country_code into TakedownReason.UnspecifiedReason(country_code). + */ +object UserTakedownRepository { + type Type = UserId => Stitch[Set[TakedownReason]] + + val userQueryOptions: UserQueryOptions = + UserQueryOptions(Set(UserField.Takedowns), UserVisibility.All) + + def apply(userRepo: UserRepository.Type): UserTakedownRepository.Type = + userId => + userRepo(UserKey(userId = userId), userQueryOptions) + .map(_.takedowns.map(TakedownReasons.userTakedownsToReasons).getOrElse(Set.empty)) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala new file mode 100644 index 000000000..1dd2b0e92 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/UserViewerRecipient.scala @@ -0,0 +1,78 @@ +package com.twitter.tweetypie +package repository + +import com.twitter.context.thriftscala.Viewer +import com.twitter.featureswitches.Recipient +import com.twitter.featureswitches.TOOClient +import com.twitter.featureswitches.UserAgent +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.User +import com.twitter.tweetypie.UserId +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.repository.UserViewerRecipient.UserIdMismatchException + +/** + * Provides a Recipient backed by a Gizmoduck User and TwitterContext Viewer for + * use in FeatureSwitch validation. + */ +object UserViewerRecipient { + object UserIdMismatchException extends Exception + + def apply(user: User, viewer: Viewer, stats: StatsReceiver): Option[Recipient] = { + // This is a workaround for thrift API clients that allow users to Tweet on behalf + // of other Twitter users. This is similar to go/contributors, however some platforms + // have enabled workflows that don't use the go/contributors auth platform, and + // therefore the TwitterContext Viewer isn't set up correctly for contributor requests. + if (viewer.userId.contains(user.id)) { + Some(new UserViewerRecipient(user, viewer)) + } else { + val mismatchScope = stats.scope(s"user_viewer_mismatch") + ClientIdHelper.default.effectiveClientIdRoot.foreach { clientId => + mismatchScope.scope("client").counter(clientId).incr() + } + mismatchScope.counter("total").incr() + None + } + } +} + +class UserViewerRecipient( + user: User, + viewer: Viewer) + extends Recipient { + + if (!viewer.userId.contains(user.id)) { + throw UserIdMismatchException + } + + override def userId: Option[UserId] = viewer.userId + + override def userRoles: Option[Set[String]] = user.roles.map(_.roles.toSet) + + override def deviceId: Option[String] = viewer.deviceId + + override def guestId: Option[Long] = viewer.guestId + + override def languageCode: Option[String] = viewer.requestLanguageCode + + override def signupCountryCode: Option[String] = user.safety.flatMap(_.signupCountryCode) + + override def countryCode: Option[String] = viewer.requestCountryCode + + override def userAgent: Option[UserAgent] = viewer.userAgent.flatMap(UserAgent(_)) + + override def isManifest: Boolean = false + + override def isVerified: Option[Boolean] = user.safety.map(_.verified) + + override def clientApplicationId: Option[Long] = viewer.clientApplicationId + + @Deprecated + override def isTwoffice: Option[Boolean] = None + + @Deprecated + override def tooClient: Option[TOOClient] = None + + @Deprecated + override def highWaterMark: Option[Long] = None +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala new file mode 100644 index 000000000..780773942 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VibeRepository.scala @@ -0,0 +1,30 @@ +package com.twitter.tweetypie.repository + +import com.twitter.stitch.Stitch +import com.twitter.strato.client.Fetcher +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.vibes.thriftscala.VibeV2 + +object VibeRepository { + type Type = Tweet => Stitch[Option[VibeV2]] + + val column = "vibes/vibe.Tweet" + case class VibeView(viewerId: Option[Long]) + + /** + * Creates a function that applies the vibes/vibe.Tweet strato column fetch on the given + * Tweet. Strato column source: go/vibe.strato + * @param client Strato client + * @return + */ + def apply(client: StratoClient): Type = { + val fetcher: Fetcher[Long, VibeView, VibeV2] = + client.fetcher[Long, VibeView, VibeV2](column) + tweet => + fetcher + .fetch(tweet.id, VibeView(None)) + .map(_.v) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala new file mode 100644 index 000000000..4eec0613f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/VisibilityResultToFilteredState.scala @@ -0,0 +1,209 @@ +package com.twitter.tweetypie.repository + +import com.twitter.spam.rtf.thriftscala.FilteredReason +import com.twitter.spam.rtf.thriftscala.KeywordMatch +import com.twitter.spam.rtf.thriftscala.SafetyResult +import com.twitter.tweetypie.core.FilteredState +import com.twitter.tweetypie.core.FilteredState.Suppress +import com.twitter.tweetypie.core.FilteredState.Unavailable +import com.twitter.visibility.builder.VisibilityResult +import com.twitter.visibility.common.user_result.UserVisibilityResultHelper +import com.twitter.visibility.rules.Reason._ +import com.twitter.visibility.rules._ +import com.twitter.visibility.{thriftscala => vfthrift} + +object VisibilityResultToFilteredState { + def toFilteredStateUnavailable( + visibilityResult: VisibilityResult + ): Option[FilteredState.Unavailable] = { + val dropSafetyResult = Some( + Unavailable.Drop(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + + visibilityResult.verdict match { + case Drop(ExclusiveTweet, _) => + dropSafetyResult + + case Drop(NsfwViewerIsUnderage | NsfwViewerHasNoStatedAge | NsfwLoggedOut, _) => + dropSafetyResult + + case Drop(TrustedFriendsTweet, _) => + dropSafetyResult + + case _: LocalizedTombstone => dropSafetyResult + + case Drop(StaleTweet, _) => dropSafetyResult + + // legacy drop actions + case dropAction: Drop => unavailableFromDropAction(dropAction) + + // not an unavailable state that can be mapped + case _ => None + } + } + + def toFilteredState( + visibilityResult: VisibilityResult, + disableLegacyInterstitialFilteredReason: Boolean + ): Option[FilteredState] = { + val suppressSafetyResult = Some( + Suppress(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + val dropSafetyResult = Some( + Unavailable.Drop(FilteredReason.SafetyResult(visibilityResult.getSafetyResult)) + ) + + visibilityResult.verdict match { + case _: Appealable => suppressSafetyResult + + case _: Preview => suppressSafetyResult + + case _: InterstitialLimitedEngagements => suppressSafetyResult + + case _: EmergencyDynamicInterstitial => suppressSafetyResult + + case _: SoftIntervention => suppressSafetyResult + + case _: LimitedEngagements => suppressSafetyResult + + case _: TweetInterstitial => suppressSafetyResult + + case _: TweetVisibilityNudge => suppressSafetyResult + + case Interstitial( + ViewerBlocksAuthor | ViewerReportedAuthor | ViewerReportedTweet | ViewerMutesAuthor | + ViewerHardMutedAuthor | MutedKeyword | InterstitialDevelopmentOnly | HatefulConduct | + AbusiveBehavior, + _, + _) if disableLegacyInterstitialFilteredReason => + suppressSafetyResult + + case Interstitial( + ViewerBlocksAuthor | ViewerReportedAuthor | ViewerReportedTweet | + InterstitialDevelopmentOnly, + _, + _) => + suppressSafetyResult + + case _: ComplianceTweetNotice => suppressSafetyResult + + case Drop(ExclusiveTweet, _) => + dropSafetyResult + + case Drop(NsfwViewerIsUnderage | NsfwViewerHasNoStatedAge | NsfwLoggedOut, _) => + dropSafetyResult + + case Drop(TrustedFriendsTweet, _) => + dropSafetyResult + + case Drop(StaleTweet, _) => dropSafetyResult + + case _: LocalizedTombstone => dropSafetyResult + + case _: Avoid => suppressSafetyResult + + // legacy drop actions + case dropAction: Drop => unavailableFromDropAction(dropAction) + + // legacy suppress actions + case action => suppressFromVisibilityAction(action, !disableLegacyInterstitialFilteredReason) + } + } + + def toFilteredState( + userVisibilityResult: Option[vfthrift.UserVisibilityResult] + ): FilteredState.Unavailable = + userVisibilityResult + .collect { + case blockedUser if UserVisibilityResultHelper.isDropAuthorBlocksViewer(blockedUser) => + Unavailable.Drop(FilteredReason.AuthorBlockViewer(true)) + + /** + * Reuse states for author visibility issues from the [[UserRepository]] for consistency with + * other logic for handling the same types of author visibility filtering. + */ + case protectedUser if UserVisibilityResultHelper.isDropProtectedAuthor(protectedUser) => + Unavailable.Author.Protected + case suspendedUser if UserVisibilityResultHelper.isDropSuspendedAuthor(suspendedUser) => + Unavailable.Author.Suspended + case nsfwUser if UserVisibilityResultHelper.isDropNsfwAuthor(nsfwUser) => + Unavailable.Drop(FilteredReason.ContainNsfwMedia(true)) + case mutedByViewer if UserVisibilityResultHelper.isDropViewerMutesAuthor(mutedByViewer) => + Unavailable.Drop(FilteredReason.ViewerMutesAuthor(true)) + case blockedByViewer + if UserVisibilityResultHelper.isDropViewerBlocksAuthor(blockedByViewer) => + Unavailable.Drop( + FilteredReason.SafetyResult( + SafetyResult( + None, + vfthrift.Action.Drop( + vfthrift.Drop(Some(vfthrift.DropReason.ViewerBlocksAuthor(true))) + )))) + } + .getOrElse(FilteredState.Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + + private def unavailableFromDropAction(dropAction: Drop): Option[FilteredState.Unavailable] = + dropAction match { + case Drop(AuthorBlocksViewer, _) => + Some(Unavailable.Drop(FilteredReason.AuthorBlockViewer(true))) + case Drop(Unspecified, _) => + Some(Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + case Drop(MutedKeyword, _) => + Some(Unavailable.Drop(FilteredReason.TweetMatchesViewerMutedKeyword(KeywordMatch("")))) + case Drop(ViewerMutesAuthor, _) => + Some(Unavailable.Drop(FilteredReason.ViewerMutesAuthor(true))) + case Drop(Nsfw, _) => + Some(Unavailable.Drop(FilteredReason.ContainNsfwMedia(true))) + case Drop(NsfwMedia, _) => + Some(Unavailable.Drop(FilteredReason.ContainNsfwMedia(true))) + case Drop(PossiblyUndesirable, _) => + Some(Unavailable.Drop(FilteredReason.PossiblyUndesirable(true))) + case Drop(Bounce, _) => + Some(Unavailable.Drop(FilteredReason.TweetIsBounced(true))) + + /** + * Reuse states for author visibility issues from the [[UserRepository]] for consistency with + * other logic for handling the same types of author visibility filtering. + */ + case Drop(ProtectedAuthor, _) => + Some(Unavailable.Author.Protected) + case Drop(SuspendedAuthor, _) => + Some(Unavailable.Author.Suspended) + case Drop(OffboardedAuthor, _) => + Some(Unavailable.Author.Offboarded) + case Drop(DeactivatedAuthor, _) => + Some(Unavailable.Author.Deactivated) + case Drop(ErasedAuthor, _) => + Some(Unavailable.Author.Deactivated) + case _: Drop => + Some(Unavailable.Drop(FilteredReason.UnspecifiedReason(true))) + } + + private def suppressFromVisibilityAction( + action: Action, + enableLegacyFilteredReason: Boolean + ): Option[FilteredState.Suppress] = + action match { + case interstitial: Interstitial => + interstitial.reason match { + case MutedKeyword if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.TweetMatchesViewerMutedKeyword(KeywordMatch("")))) + case ViewerMutesAuthor if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.ViewerMutesAuthor(true))) + case ViewerHardMutedAuthor if enableLegacyFilteredReason => + Some(Suppress(FilteredReason.ViewerMutesAuthor(true))) + // Interstitial tweets are considered suppressed by Tweetypie. For + // legacy behavior reasons, these tweets should be dropped when + // appearing as a quoted tweet via a call to getTweets. + case Nsfw => + Some(Suppress(FilteredReason.ContainNsfwMedia(true))) + case NsfwMedia => + Some(Suppress(FilteredReason.ContainNsfwMedia(true))) + case PossiblyUndesirable => + Some(Suppress(FilteredReason.PossiblyUndesirable(true))) + case _ => + Some(Suppress(FilteredReason.PossiblyUndesirable(true))) + } + case _ => None + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala new file mode 100644 index 000000000..5aa38d1e2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/repository/package.scala @@ -0,0 +1,8 @@ +package com.twitter.tweetypie + +import com.twitter.context.TwitterContext +package object repository { + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala new file mode 100644 index 000000000..c6480d546 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityService.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.Service +import com.twitter.util.Activity +import com.twitter.util.Future + +/** + * Transforms an `Activity` that contains a `Service` into a `Service`. + * The implementation guarantees that the service is rebuilt only when the + * activity changes, not on every request. + */ +object ActivityService { + + def apply[Req, Rep](activity: Activity[Service[Req, Rep]]): Service[Req, Rep] = { + + val serviceEvent = + ActivityUtil.strict(activity).values.map(_.get) + + new Service[Req, Rep] { + + def apply(req: Req): Future[Rep] = + serviceEvent.toFuture.flatMap(_.apply(req)) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala new file mode 100644 index 000000000..2ee6d9bd5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ActivityUtil.scala @@ -0,0 +1,23 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.util.Activity +import com.twitter.util.Closable +import com.twitter.util.Var +import com.twitter.util.Witness + +object ActivityUtil { + + /** + * Makes the composition strict up to the point where it is called. + * Compositions based on the returned activity will have + * the default lazy behavior. + */ + def strict[T](activity: Activity[T]): Activity[T] = { + val state = Var(Activity.Pending: Activity.State[T]) + val event = activity.states + + Closable.closeOnCollect(event.register(Witness(state)), state) + + new Activity(state) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD new file mode 100644 index 000000000..c660ac645 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BUILD @@ -0,0 +1,23 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/inject:guice", + "finagle/finagle-core/src/main", + "finagle/finagle-memcached/src/main/scala", + "scrooge/scrooge-core", + "tweetypie/servo/util", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala/entities", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "twitter-config/yaml", + "util/util-hashing/src/main/scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala new file mode 100644 index 000000000..d9e57213a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/BoringStackTrace.scala @@ -0,0 +1,43 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.ChannelException +import com.twitter.finagle.TimeoutException +import com.twitter.scrooge.ThriftException +import java.net.SocketException +import java.nio.channels.CancelledKeyException +import java.nio.channels.ClosedChannelException +import java.util.concurrent.CancellationException +import java.util.concurrent.{TimeoutException => JTimeoutException} +import org.apache.thrift.TApplicationException +import scala.util.control.NoStackTrace + +object BoringStackTrace { + + /** + * These exceptions are boring because they are expected to + * occasionally (or even regularly) happen during normal operation + * of the service. The intention is to make it easier to debug + * problems by making interesting exceptions easier to see. + * + * The best way to mark an exception as boring is to extend from + * NoStackTrace, since that is a good indication that we don't care + * about the details. + */ + def isBoring(t: Throwable): Boolean = + t match { + case _: NoStackTrace => true + case _: TimeoutException => true + case _: CancellationException => true + case _: JTimeoutException => true + case _: ChannelException => true + case _: SocketException => true + case _: ClosedChannelException => true + case _: CancelledKeyException => true + case _: ThriftException => true + // DeadlineExceededExceptions are propagated as: + // org.apache.thrift.TApplicationException: Internal error processing issue3: 'com.twitter.finagle.service.DeadlineFilter$DeadlineExceededException: exceeded request deadline of 100.milliseconds by 4.milliseconds. Deadline expired at 2020-08-27 17:07:46 +0000 and now it is 2020-08-27 17:07:46 +0000.' + case e: TApplicationException => + e.getMessage != null && e.getMessage.contains("DeadlineExceededException") + case _ => false + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala new file mode 100644 index 000000000..f898c53fc --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/CaffeineMemcacheClient.scala @@ -0,0 +1,174 @@ +package com.twitter.tweetypie.serverutil + +import com.github.benmanes.caffeine.cache.stats.CacheStats +import com.github.benmanes.caffeine.cache.stats.StatsCounter +import com.github.benmanes.caffeine.cache.AsyncCacheLoader +import com.github.benmanes.caffeine.cache.AsyncLoadingCache +import com.github.benmanes.caffeine.cache.Caffeine +import com.twitter.finagle.memcached.protocol.Value +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.memcached.GetResult +import com.twitter.finagle.memcached.ProxyClient +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.{Promise => TwitterPromise} +import com.twitter.util.logging.Logger +import java.util.concurrent.TimeUnit.NANOSECONDS +import java.util.concurrent.CompletableFuture +import java.util.concurrent.Executor +import java.util.concurrent.TimeUnit +import java.util.function.BiConsumer +import java.util.function.Supplier +import java.lang +import java.util +import scala.collection.JavaConverters._ + +object CaffeineMemcacheClient { + val logger: Logger = Logger(getClass) + + /** + * Helper method to convert between Java 8's CompletableFuture and Twitter's Future. + */ + private def toTwitterFuture[T](cf: CompletableFuture[T]): Future[T] = { + if (cf.isDone && !cf.isCompletedExceptionally && !cf.isCancelled) { + Future.const(Return(cf.get())) + } else { + val p = new TwitterPromise[T] with TwitterPromise.InterruptHandler { + override protected def onInterrupt(t: Throwable): Unit = cf.cancel(true) + } + cf.whenComplete(new BiConsumer[T, Throwable] { + override def accept(result: T, exception: Throwable): Unit = { + if (exception != null) { + p.updateIfEmpty(Throw(exception)) + } else { + p.updateIfEmpty(Return(result)) + } + } + }) + p + } + } +} + +class CaffeineMemcacheClient( + override val proxyClient: Client, + val maximumSize: Int = 1000, + val ttl: Duration = Duration.fromSeconds(10), + stats: StatsReceiver = NullStatsReceiver) + extends ProxyClient { + import CaffeineMemcacheClient._ + + private[this] object Stats extends StatsCounter { + private val hits = stats.counter("hits") + private val miss = stats.counter("misses") + private val totalLoadTime = stats.stat("loads") + private val loadSuccess = stats.counter("loads-success") + private val loadFailure = stats.counter("loads-failure") + private val eviction = stats.counter("evictions") + private val evictionWeight = stats.counter("evictions-weight") + + override def recordHits(i: Int): Unit = hits.incr(i) + override def recordMisses(i: Int): Unit = miss.incr(i) + override def recordLoadSuccess(l: Long): Unit = { + loadSuccess.incr() + totalLoadTime.add(NANOSECONDS.toMillis(l)) + } + + override def recordLoadFailure(l: Long): Unit = { + loadFailure.incr() + totalLoadTime.add(NANOSECONDS.toMillis(l)) + } + + override def recordEviction(): Unit = recordEviction(1) + override def recordEviction(weight: Int): Unit = { + eviction.incr() + evictionWeight.incr(weight) + } + + /** + * We are currently not using this method. + */ + override def snapshot(): CacheStats = { + new CacheStats(0, 0, 0, 0, 0, 0, 0) + } + } + + private[this] object MemcachedAsyncCacheLoader extends AsyncCacheLoader[String, GetResult] { + private[this] val EmptyMisses: Set[String] = Set.empty + private[this] val EmptyFailures: Map[String, Throwable] = Map.empty + private[this] val EmptyHits: Map[String, Value] = Map.empty + + override def asyncLoad(key: String, executor: Executor): CompletableFuture[GetResult] = { + val f = new util.function.Function[util.Map[String, GetResult], GetResult] { + override def apply(r: util.Map[String, GetResult]): GetResult = r.get(key) + } + asyncLoadAll(Seq(key).asJava, executor).thenApply(f) + } + + /** + * Converts response from multi-key to single key. Memcache returns the result + * in one struct that contains all the hits, misses and exceptions. Caffeine + * requires a map from a key to the result, so we do that conversion here. + */ + override def asyncLoadAll( + keys: lang.Iterable[_ <: String], + executor: Executor + ): CompletableFuture[util.Map[String, GetResult]] = { + val result = new CompletableFuture[util.Map[String, GetResult]]() + proxyClient.getResult(keys.asScala).respond { + case Return(r) => + val map = new util.HashMap[String, GetResult]() + r.hits.foreach { + case (key, value) => + map.put( + key, + r.copy(hits = Map(key -> value), misses = EmptyMisses, failures = EmptyFailures) + ) + } + r.misses.foreach { key => + map.put(key, r.copy(hits = EmptyHits, misses = Set(key), failures = EmptyFailures)) + } + // We are passing through failures so that we maintain the contract expected by clients. + // Without passing through the failures, several metrics get lost. Some of these failures + // might get cached. The cache is short-lived, so we are not worried when it does + // get cached. + r.failures.foreach { + case (key, value) => + map.put( + key, + r.copy(hits = EmptyHits, misses = EmptyMisses, failures = Map(key -> value)) + ) + } + result.complete(map) + case Throw(ex) => + logger.warn("Error loading keys from memcached", ex) + result.completeExceptionally(ex) + } + result + } + } + + private[this] val cache: AsyncLoadingCache[String, GetResult] = + Caffeine + .newBuilder() + .maximumSize(maximumSize) + .refreshAfterWrite(ttl.inMilliseconds * 3 / 4, TimeUnit.MILLISECONDS) + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .recordStats(new Supplier[StatsCounter] { + override def get(): StatsCounter = Stats + }) + .buildAsync(MemcachedAsyncCacheLoader) + + override def getResult(keys: Iterable[String]): Future[GetResult] = { + val twitterFuture = toTwitterFuture(cache.getAll(keys.asJava)) + twitterFuture + .map { result => + val values = result.values().asScala + values.reduce(_ ++ _) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala new file mode 100644 index 000000000..1600269e3 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/DeviceSourceParser.scala @@ -0,0 +1,100 @@ +package com.twitter.tweetypie.serverutil + +/** + * Parse a device source into an OAuth app id. This mapping is + * neccesary when you need to request information about a client from + * a service that only knows about clients in terms of oauthIds. + * + * This happens either by parsing out an explicit "oauth:" app id or + * using a mapping from old non oauth clientIds like "web" and "sms" + * to oauthIds that have retroactively been assigned to those clients. + * If the legacy id cannot be found in the map and it's a non-numeric + * string, it's converted to the oauthId for twitter.com. + * + * Tweets with non oauth clientIds are still being created because + * thats how the monorail creates them. We also need to be able to + * process any app id string that is in old tweet data. + * + */ +object DeviceSourceParser { + + /** + * The oauth id for twitter.com. Also used as a default oauth id for + * other clients without their own + */ + val Web = 268278L + + /** + * The OAuth app ids for known legacy device sources. + */ + val legacyMapping: Map[String, Long] = Map[String, Long]( + "web" -> Web, + "tweetbutton" -> 6219130L, + "keitai_web" -> 38366L, + "sms" -> 241256L + ) + + /** + * Attempt to convert a client application id String into an OAuth + * id. + * + * The string must consist of the characters "oauth:" followed by a + * non-negative, decimal long. The text is case-insensitive, and + * whitespace at the beginning or end is ignored. + * + * We want to accept input as liberally as possible, because if we + * fail to do that here, it will get counted as a "legacy app id" + */ + val parseOAuthAppId: String => Option[Long] = { + // Case-insensitive, whitespace insensitive. The javaWhitespace + // character class is consistent with Character.isWhitespace, but is + // sadly different from \s. It will likely not matter in the long + // run, but this accepts more inputs and is easier to test (because + // we can use isWhitespace) + val OAuthAppIdRe = """(?i)\p{javaWhitespace}*oauth:(\d+)\p{javaWhitespace}*""".r + + _ match { + case OAuthAppIdRe(digits) => + // We should only get NumberFormatException when the number is + // larger than a Long, because the regex will rule out all of + // the other invalid cases. + try Some(digits.toLong) + catch { case _: NumberFormatException => None } + case _ => + None + } + } + + /** + * Attempt to convert a client application id String into an OAuth id or legacy identifier without + * any fallback behavior. + */ + val parseStrict: String => Option[Long] = + appIdStr => + parseOAuthAppId(appIdStr) + .orElse(legacyMapping.get(appIdStr)) + + /** + * Return true if a string can be used as a valid client application id or legacy identifier + */ + val isValid: String => Boolean = appIdStr => parseStrict(appIdStr).isDefined + + /** + * Build a parser that converts device sources to OAuth app ids, + * including performing the legacy mapping. + */ + val parseAppId: String => Option[Long] = { + val IsNumericRe = """-?[0-9]+""".r + + appIdStr => + parseStrict(appIdStr) + .orElse { + appIdStr match { + // We just fail the lookup if the app id looks like it's + // numeric. + case IsNumericRe() => None + case _ => Some(Web) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala new file mode 100644 index 000000000..0a7c6e43b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExceptionCounter.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo +import com.twitter.servo.util.ExceptionCategorizer + +object ExceptionCounter { + // These throwables are alertable because they indicate conditions we never expect in production. + def isAlertable(throwable: Throwable): Boolean = + throwable match { + case e: RuntimeException => true + case e: Error => true + case _ => false + } + + // count how many exceptions are alertable and how many are boring + val tweetypieCategorizers: ExceptionCategorizer = + ExceptionCategorizer.const("alertableException").onlyIf(isAlertable) ++ + ExceptionCategorizer.const("boringException").onlyIf(BoringStackTrace.isBoring) + + val defaultCategorizer: ExceptionCategorizer = + ExceptionCategorizer.default() ++ tweetypieCategorizers + + def defaultCategorizer(name: String): ExceptionCategorizer = + ExceptionCategorizer.default(Seq(name)) ++ tweetypieCategorizers + + def apply(statsReceiver: StatsReceiver): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, defaultCategorizer) + + def apply(statsReceiver: StatsReceiver, name: String): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, defaultCategorizer(name)) + + def apply( + statsReceiver: StatsReceiver, + categorizer: ExceptionCategorizer + ): servo.util.ExceptionCounter = + new servo.util.ExceptionCounter(statsReceiver, categorizer) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala new file mode 100644 index 000000000..53a3bc18d --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/ExtendedTweetMetadataBuilder.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.tweetypie.getCashtags +import com.twitter.tweetypie.getHashtags +import com.twitter.tweetypie.getMedia +import com.twitter.tweetypie.getMentions +import com.twitter.tweetypie.getText +import com.twitter.tweetypie.getUrls +import com.twitter.tweetypie.thriftscala.ExtendedTweetMetadata +import com.twitter.tweetypie.thriftscala.ShortenedUrl +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.tweetypie.tweettext.Offset +import com.twitter.tweetypie.tweettext.TextEntity +import com.twitter.tweetypie.tweettext.Truncator +import com.twitter.tweetypie.tweettext.TweetText +import com.twitter.tweetypie.thriftscala.entities.Implicits._ + +/** + * Computes the appropriate truncation index to support rendering on legacy clients. + */ +object ExtendedTweetMetadataBuilder { + import TweetText._ + + def apply(tweet: Tweet, selfPermalink: ShortenedUrl): ExtendedTweetMetadata = { + + def entityRanges[T: TextEntity](entities: Seq[T]): Seq[(Int, Int)] = + entities.map(e => (TextEntity.fromIndex(e).toInt, TextEntity.toIndex(e).toInt)) + + val allEntityRanges = + Offset.Ranges.fromCodePointPairs( + entityRanges(getUrls(tweet)) ++ + entityRanges(getMentions(tweet)) ++ + entityRanges(getMedia(tweet)) ++ + entityRanges(getHashtags(tweet)) ++ + entityRanges(getCashtags(tweet)) + ) + + val text = getText(tweet) + + val apiCompatibleTruncationIndex = + // need to leave enough space for ellipsis, space, and self-permalink + Truncator.truncationPoint( + text = text, + maxDisplayLength = OriginalMaxDisplayLength - selfPermalink.shortUrl.length - 2, + atomicUnits = allEntityRanges + ) + + ExtendedTweetMetadata( + apiCompatibleTruncationIndex = apiCompatibleTruncationIndex.codePointOffset.toInt + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala new file mode 100644 index 000000000..0cbecec88 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/NullMemcacheClient.scala @@ -0,0 +1,46 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.finagle.memcached +import com.twitter.finagle.memcached.CasResult +import com.twitter.io.Buf +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Time +import java.lang + +/** + * This will be used during CI test runs, in the no-cache scenarios for both DCs. + * We are treating this as cache of instantaneous expiry. MockClient uses an in-memory map as + * an underlying data-store, we extend it and prevent any writes to the map - thus making sure + * it's always empty. + */ +class NullMemcacheClient extends memcached.MockClient { + override def set(key: String, flags: Int, expiry: Time, value: Buf): Future[Unit] = Future.Done + + override def add(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(true) + + override def append(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def prepend(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def replace(key: String, flags: Int, expiry: Time, value: Buf): Future[lang.Boolean] = + Future.value(false) + + override def checkAndSet( + key: String, + flags: Int, + expiry: Time, + value: Buf, + casUnique: Buf + ): Future[CasResult] = Future.value(CasResult.NotFound) + + override def delete(key: String): Future[lang.Boolean] = Future.value(false) + + override def incr(key: String, delta: Long): Future[Option[lang.Long]] = + Future.value(None) + + override def decr(key: String, delta: Long): Future[Option[lang.Long]] = + Future.value(None) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala new file mode 100644 index 000000000..f2c32d7b4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/PartnerMedia.scala @@ -0,0 +1,15 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.config.yaml.YamlMap +import scala.util.matching.Regex + +object PartnerMedia { + def load(yamlMap: YamlMap): Seq[Regex] = + (httpOrHttps(yamlMap) ++ httpOnly(yamlMap)).map(_.r) + + private def httpOrHttps(yamlMap: YamlMap): Seq[String] = + yamlMap.stringSeq("http_or_https").map("""^(?:https?\:\/\/)?""" + _) + + private def httpOnly(yamlMap: YamlMap): Seq[String] = + yamlMap.stringSeq("http_only").map("""^(?:http\:\/\/)?""" + _) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala new file mode 100644 index 000000000..566d43c24 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/StoredCard.scala @@ -0,0 +1,36 @@ +package com.twitter.tweetypie.serverutil + +import com.twitter.tweetypie.thriftscala.CardReference +import com.twitter.util.Try +import java.net.URI +import scala.util.control.NonFatal + +/** + * Utility to extract the stored card id out of a CardReference + */ +object StoredCard { + + private val cardScheme = "card" + private val cardPrefix = s"$cardScheme://" + + /** + * Looks at the CardReference to determines if the cardUri points to a "stored" + * card id. Stored Card URIs are are expected to be in the format "card://" + * (case sensitive). In future these URIs can potentially be: + * "card://[/path[?queryString]]. Note that this utility cares just about the + * "Stored Card" types. So it just skips the other card types. + */ + def unapply(cr: CardReference): Option[Long] = { + try { + for { + uriStr <- Option(cr.cardUri) if uriStr.startsWith(cardPrefix) + uri <- Try(new URI(uriStr)).toOption + if uri.getScheme == cardScheme && uri.getHost != null + } yield uri.getHost.toLong // throws NumberFormatException non numeric host (cardIds) + } catch { + // The validations are done upstream by the TweetBuilder, so exceptions + // due to bad URIs will be swallowed. + case NonFatal(e) => None + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD new file mode 100644 index 000000000..768daa991 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/BUILD @@ -0,0 +1,15 @@ +scala_library( + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/repo", + "tweetypie/servo/util", + "snowflake:id", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/thrift:compiled-scala", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala new file mode 100644 index 000000000..6f1f49cd0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/TweetCacheWrite.scala @@ -0,0 +1,99 @@ +package com.twitter.tweetypie.serverutil.logcachewrites + +import com.twitter.servo.cache.Cached +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.core.Serializer +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.util.Time +import java.util.Base64 + +/** + * A record of a tweet cache write. This is used for debugging. These log + * messages are scribed to test_tweetypie_tweet_cache_writes. + */ +case class TweetCacheWrite( + tweetId: TweetId, + timestamp: Time, + action: String, + value: Option[Cached[CachedTweet]]) { + + /** + * Convert to a tab-separated string suitable for writing to a log message. + * + * Fields are: + * - Tweet id + * - Timestamp: + * If the tweet id is a snowflake id, this is an offset since tweet creation. + * If it is not a snowflake id, then this is a Unix epoch time in + * milliseconds. (The idea is that for most tweets, this encoding will make + * it easier to see the interval between events and whether it occured soon + * after tweet creation.) + * - Cache action ("set", "add", "replace", "cas", "delete") + * - Base64-encoded Cached[CachedTweet] struct + */ + def toLogMessage: String = { + val builder = new java.lang.StringBuilder + val timestampOffset = + if (SnowflakeId.isSnowflakeId(tweetId)) { + SnowflakeId(tweetId).unixTimeMillis.asLong + } else { + 0 + } + builder + .append(tweetId) + .append('\t') + .append(timestamp.inMilliseconds - timestampOffset) + .append('\t') + .append(action) + .append('\t') + value.foreach { ct => + // When logging, we end up serializing the value twice, once for the + // cache write and once for the logging. This is suboptimal, but the + // assumption is that we only do this for a small fraction of cache + // writes, so it should be ok. The reason that this is necessary is + // because we want to do the filtering on the deserialized value, so + // the serialized value is not available at the level that we are + // doing the filtering. + val thriftBytes = Serializer.CachedTweet.CachedCompact.to(ct).get + builder.append(Base64.getEncoder.encodeToString(thriftBytes)) + } + builder.toString + } +} + +object TweetCacheWrite { + case class ParseException(msg: String, cause: Exception) extends RuntimeException(cause) { + override def getMessage: String = s"Failed to parse as TweetCacheWrite: $msg" + } + + /** + * Parse a TweetCacheWrite object from the result of TweetCacheWrite.toLogMessage + */ + def fromLogMessage(msg: String): TweetCacheWrite = + try { + val (tweetIdStr, timestampStr, action, cachedTweetStr) = + msg.split('\t') match { + case Array(f1, f2, f3) => (f1, f2, f3, None) + case Array(f1, f2, f3, f4) => (f1, f2, f3, Some(f4)) + } + val tweetId = tweetIdStr.toLong + val timestamp = { + val offset = + if (SnowflakeId.isSnowflakeId(tweetId)) { + SnowflakeId(tweetId).unixTimeMillis.asLong + } else { + 0 + } + Time.fromMilliseconds(timestampStr.toLong + offset) + } + val value = cachedTweetStr.map { str => + val thriftBytes = Base64.getDecoder.decode(str) + Serializer.CachedTweet.CachedCompact.from(thriftBytes).get + } + + TweetCacheWrite(tweetIdStr.toLong, timestamp, action, value) + } catch { + case e: Exception => throw ParseException(msg, e) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala new file mode 100644 index 000000000..a332c8e59 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil/logcachewrites/WriteLoggingCache.scala @@ -0,0 +1,66 @@ +package com.twitter.tweetypie.serverutil.logcachewrites + +import com.twitter.servo.cache.Checksum +import com.twitter.servo.cache.CacheWrapper +import com.twitter.util.Future +import com.twitter.util.logging.Logger +import scala.util.control.NonFatal + +trait WriteLoggingCache[K, V] extends CacheWrapper[K, V] { + // Use getClass so we can see which implementation is actually failing. + private[this] lazy val logFailureLogger = Logger(getClass) + + def selectKey(k: K): Boolean + def select(k: K, v: V): Boolean + def log(action: String, k: K, v: Option[V]): Unit + + def safeLog(action: String, k: K, v: Option[V]): Unit = + try { + log(action, k, v) + } catch { + case NonFatal(e) => + // The exception occurred in logging, and we don't want to fail the + // request with the logging failure if this happens, so log it and carry + // on. + logFailureLogger.error("Logging cache write", e) + } + + override def add(k: K, v: V): Future[Boolean] = + // Call the selection function before doing the work. Since it's highly + // likely that the Future will succeed, it's cheaper to call the function + // before we make the call so that we can avoid creating the callback and + // attaching it to the Future if we would not log. + if (select(k, v)) { + underlyingCache.add(k, v).onSuccess(r => if (r) safeLog("add", k, Some(v))) + } else { + underlyingCache.add(k, v) + } + + override def checkAndSet(k: K, v: V, checksum: Checksum): Future[Boolean] = + if (select(k, v)) { + underlyingCache.checkAndSet(k, v, checksum).onSuccess(r => if (r) safeLog("cas", k, Some(v))) + } else { + underlyingCache.checkAndSet(k, v, checksum) + } + + override def set(k: K, v: V): Future[Unit] = + if (select(k, v)) { + underlyingCache.set(k, v).onSuccess(_ => safeLog("set", k, Some(v))) + } else { + underlyingCache.set(k, v) + } + + override def replace(k: K, v: V): Future[Boolean] = + if (select(k, v)) { + underlyingCache.replace(k, v).onSuccess(r => if (r) safeLog("replace", k, Some(v))) + } else { + underlyingCache.replace(k, v) + } + + override def delete(k: K): Future[Boolean] = + if (selectKey(k)) { + underlyingCache.delete(k).onSuccess(r => if (r) safeLog("delete", k, None)) + } else { + underlyingCache.delete(k) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD new file mode 100644 index 000000000..1fb3cf249 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/BUILD @@ -0,0 +1,38 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/org/apache/thrift:libthrift", + "core-app-services/failed_task:writer", + "core-app-services/lib:coreservices", + "finagle/finagle-core/src/main", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-stats", + "quill/capture", + "quill/core/src/main/thrift:thrift-scala", + "scrooge/scrooge-core/src/main/scala", + "tweetypie/servo/request/src/main/scala", + "tweetypie/servo/util", + "src/thrift/com/twitter/servo:servo-exception-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/handler", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/store", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/context", + "tweetypie/common/src/scala/com/twitter/tweetypie/thriftscala", + "twitter-server-internal", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala new file mode 100644 index 000000000..f19245b60 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ClientHandlingTweetService.scala @@ -0,0 +1,524 @@ +/** Copyright 2012 Twitter, Inc. */ +package com.twitter.tweetypie.service + +import com.twitter.coreservices.StratoPublicApiRequestAttributionCounter +import com.twitter.finagle.CancelledRequestException +import com.twitter.finagle.context.Contexts +import com.twitter.finagle.context.Deadline +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.stats.DefaultStatsReceiver +import com.twitter.finagle.stats.Stat +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.util.ExceptionCategorizer +import com.twitter.servo.util.MemoizedExceptionCounterFactory +import com.twitter.tweetypie.Future +import com.twitter.tweetypie.Gate +import com.twitter.tweetypie.Logger +import com.twitter.tweetypie.StatsReceiver +import com.twitter.tweetypie.ThriftTweetService +import com.twitter.tweetypie.TweetId +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.context.TweetypieContext +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.serverutil.ExceptionCounter +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Promise + +/** + * A TweetService that takes care of the handling of requests from + * external services. In particular, this wrapper doesn't have any + * logic for handling requests itself. It just serves as a gateway for + * requests and responses, making sure that the underlying tweet + * service only sees requests it should handle and that the external + * clients get clean responses. + * + * - Ensures that exceptions are propagated cleanly + * - Sheds traffic if necessary + * - Authenticates clients + * - Records stats about clients + * + * For each endpoint, we record both client-specific and total metrics for number of requests, + * successes, exceptions, and latency. The stats names follow the patterns: + * - .//requests + * - .//success + * - .//client_errors + * - .//server_errors + * - .//exceptions + * - .//exceptions/ + * - .///requests + * - .///success + * - .///exceptions + * - .///exceptions/ + */ +class ClientHandlingTweetService( + underlying: ThriftTweetService, + stats: StatsReceiver, + loadShedEligible: Gate[String], + shedReadTrafficVoluntarily: Gate[Unit], + requestAuthorizer: ClientRequestAuthorizer, + getTweetsAuthorizer: MethodAuthorizer[GetTweetsRequest], + getTweetFieldsAuthorizer: MethodAuthorizer[GetTweetFieldsRequest], + requestSizeAuthorizer: MethodAuthorizer[Int], + clientIdHelper: ClientIdHelper) + extends ThriftTweetService { + import RescueExceptions._ + + private val log = Logger("com.twitter.tweetypie.service.TweetService") + + private[this] val Requests = "requests" + private[this] val Success = "success" + private[this] val Latency = "latency_ms" + + private[this] val StratoStatsCounter = new StratoPublicApiRequestAttributionCounter( + DefaultStatsReceiver + ) + private[this] val clientServerCategorizer = + ExceptionCategorizer.simple { + _ match { + case _: ClientError | _: AccessDenied => "client_errors" + case _ => "server_errors" + } + } + + private[this] val preServoExceptionCountersWithClientId = + new MemoizedExceptionCounterFactory(stats) + private[this] val preServoExceptionCounters = + new MemoizedExceptionCounterFactory(stats, categorizer = ExceptionCounter.defaultCategorizer) + private[this] val postServoExceptionCounters = + new MemoizedExceptionCounterFactory(stats, categorizer = clientServerCategorizer) + + private def clientId: String = + clientIdHelper.effectiveClientId.getOrElse(ClientIdHelper.UnknownClientId) + private def clientIdRoot: String = + clientIdHelper.effectiveClientIdRoot.getOrElse(ClientIdHelper.UnknownClientId) + + private[this] val futureOverCapacityException = + Future.exception(OverCapacity("Request rejected due to load shedding.")) + + private[this] def ifNotOverCapacityRead[T]( + methodStats: StatsReceiver, + requestSize: Long + )( + f: => Future[T] + ): Future[T] = { + val couldShed = loadShedEligible(clientId) + val doShed = couldShed && shedReadTrafficVoluntarily() + + methodStats.stat("loadshed_incoming_requests").add(requestSize) + if (couldShed) { + methodStats.stat("loadshed_eligible_requests").add(requestSize) + } else { + methodStats.stat("loadshed_ineligible_requests").add(requestSize) + } + + if (doShed) { + methodStats.stat("loadshed_rejected_requests").add(requestSize) + futureOverCapacityException + } else { + f + } + } + + private def maybeTimeFuture[A](maybeStat: Option[Stat])(f: => Future[A]) = + maybeStat match { + case Some(stat) => Stat.timeFuture(stat)(f) + case None => f + } + + /** + * Perform the action, increment the appropriate counters, and clean up the exceptions to servo exceptions + * + * This method also masks all interrupts to prevent request cancellation on hangup. + */ + private[this] def trackS[T]( + name: String, + requestInfo: Any, + extraStatPrefix: Option[String] = None, + requestSize: Option[Long] = None + )( + action: StatsReceiver => Future[T] + ): Future[T] = { + val methodStats = stats.scope(name) + val clientStats = methodStats.scope(clientIdRoot) + val cancelledCounter = methodStats.counter("cancelled") + + /** + * Returns an identical future except that it ignores interrupts and increments a counter + * when a request is cancelled. This is [[Future]].masked but with a counter. + */ + def maskedWithStats[A](f: Future[A]): Future[A] = { + val p = Promise[A]() + p.setInterruptHandler { + case _: ClientDiscardedRequestException | _: CancelledRequestException => + cancelledCounter.incr() + } + f.proxyTo(p) + p + } + + maskedWithStats( + requestAuthorizer(name, clientIdHelper.effectiveClientId) + .flatMap { _ => + methodStats.counter(Requests).incr() + extraStatPrefix.foreach(p => methodStats.counter(p, Requests).incr()) + clientStats.counter(Requests).incr() + StratoStatsCounter.recordStats(name, "tweets", requestSize.getOrElse(1L)) + + Stat.timeFuture(methodStats.stat(Latency)) { + Stat.timeFuture(clientStats.stat(Latency)) { + maybeTimeFuture(extraStatPrefix.map(p => methodStats.stat(p, Latency))) { + TweetypieContext.Local.trackStats(stats, methodStats, clientStats) + + // Remove the deadline for backend requests when we mask client cancellations so + // that side-effects are applied to all backend services even after client timeouts. + // Wrap and then flatten an extra layer of Future to capture any thrown exceptions. + Future(Contexts.broadcast.letClear(Deadline)(action(methodStats))).flatten + } + } + } + } + ).onSuccess { _ => + methodStats.counter(Success).incr() + extraStatPrefix.foreach(p => methodStats.counter(p, Success).incr()) + clientStats.counter(Success).incr() + } + .onFailure { e => + preServoExceptionCounters(name)(e) + preServoExceptionCountersWithClientId(name, clientIdRoot)(e) + } + .rescue(rescueToServoFailure(name, clientId)) + .onFailure { e => + postServoExceptionCounters(name)(e) + logFailure(e, requestInfo) + } + } + + def track[T]( + name: String, + requestInfo: Any, + extraStatPrefix: Option[String] = None, + requestSize: Option[Long] = None + )( + action: => Future[T] + ): Future[T] = { + trackS(name, requestInfo, extraStatPrefix, requestSize) { _: StatsReceiver => action } + } + + private def logFailure(ex: Throwable, requestInfo: Any): Unit = + log.warn(s"Returning failure response: $ex\n failed request info: $requestInfo") + + object RequestWidthPrefix { + private def prefix(width: Int) = { + val bucketMin = + width match { + case c if c < 10 => "0_9" + case c if c < 100 => "10_99" + case _ => "100_plus" + } + s"width_$bucketMin" + } + + def forGetTweetsRequest(r: GetTweetsRequest): String = prefix(r.tweetIds.size) + def forGetTweetFieldsRequest(r: GetTweetFieldsRequest): String = prefix(r.tweetIds.size) + } + + object WithMediaPrefix { + def forPostTweetRequest(r: PostTweetRequest): String = + if (r.mediaUploadIds.exists(_.nonEmpty)) + "with_media" + else + "without_media" + } + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + trackS( + "get_tweets", + request, + Some(RequestWidthPrefix.forGetTweetsRequest(request)), + Some(request.tweetIds.size) + ) { stats => + getTweetsAuthorizer(request, clientId).flatMap { _ => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + underlying.getTweets(request) + } + } + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + trackS( + "get_tweet_fields", + request, + Some(RequestWidthPrefix.forGetTweetFieldsRequest(request)), + Some(request.tweetIds.size) + ) { stats => + getTweetFieldsAuthorizer(request, clientId).flatMap { _ => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + underlying.getTweetFields(request) + } + } + } + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + track("replicated_get_tweets", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweets(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + track("replicated_get_tweet_fields", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweetFields(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + trackS("get_tweet_counts", request, requestSize = Some(request.tweetIds.size)) { stats => + ifNotOverCapacityRead(stats, request.tweetIds.length) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getTweetCounts(request) + } + } + } + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + track("replicated_get_tweet_counts", request, requestSize = Some(request.tweetIds.size)) { + underlying.replicatedGetTweetCounts(request).rescue { + case e: Throwable => Future.Unit // do not need deferredrpc to retry on exceptions + } + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + track("post_tweet", request, Some(WithMediaPrefix.forPostTweetRequest(request))) { + underlying.postTweet(request) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + track("post_retweet", request) { + underlying.postRetweet(request) + } + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + track("set_additional_fields", request) { + underlying.setAdditionalFields(request) + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + track("delete_additional_fields", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.deleteAdditionalFields(request) + } + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + track("async_set_additional_fields", request) { + underlying.asyncSetAdditionalFields(request) + } + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + track("async_delete_additional_fields", request) { + underlying.asyncDeleteAdditionalFields(request) + } + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + track("replicated_undelete_tweet2", request) { underlying.replicatedUndeleteTweet2(request) } + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + track("replicated_insert_tweet2", request) { underlying.replicatedInsertTweet2(request) } + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + track("async_insert", request) { underlying.asyncInsert(request) } + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + track("update_possibly_sensitive_tweet", request) { + underlying.updatePossiblySensitiveTweet(request) + } + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + track("async_update_possibly_sensitive_tweet", request) { + underlying.asyncUpdatePossiblySensitiveTweet(request) + } + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + track("replicated_update_possibly_sensitive_tweet", tweet) { + underlying.replicatedUpdatePossiblySensitiveTweet(tweet) + } + + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + track("undelete_tweet", request) { + underlying.undeleteTweet(request) + } + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + track("async_undelete_tweet", request) { + underlying.asyncUndeleteTweet(request) + } + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + track("unretweet", request) { + underlying.unretweet(request) + } + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + track("erase_user_tweets", request) { + underlying.eraseUserTweets(request) + } + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + track("async_erase_user_tweets", request) { + underlying.asyncEraseUserTweets(request) + } + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + track("async_delete", request) { underlying.asyncDelete(request) } + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + track("delete_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.deleteTweets(request) + } + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + track("cascaded_delete_tweet", request) { underlying.cascadedDeleteTweet(request) } + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + track("replicated_delete_tweet2", request) { underlying.replicatedDeleteTweet2(request) } + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + track("incr_tweet_fav_count", request) { underlying.incrTweetFavCount(request) } + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + track("async_incr_fav_count", request) { underlying.asyncIncrFavCount(request) } + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + track("replicated_incr_fav_count", tweetId) { + underlying.replicatedIncrFavCount(tweetId, delta) + } + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + track("incr_tweet_bookmark_count", request) { underlying.incrTweetBookmarkCount(request) } + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + track("async_incr_bookmark_count", request) { underlying.asyncIncrBookmarkCount(request) } + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + track("replicated_incr_bookmark_count", tweetId) { + underlying.replicatedIncrBookmarkCount(tweetId, delta) + } + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + track("replicated_set_additional_fields", request) { + underlying.replicatedSetAdditionalFields(request) + } + + def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = { + track("set_retweet_visibility", request) { + underlying.setRetweetVisibility(request) + } + } + + def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = { + track("async_set_retweet_visibility", request) { + underlying.asyncSetRetweetVisibility(request) + } + } + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + track("replicated_set_retweet_visibility", request) { + underlying.replicatedSetRetweetVisibility(request) + } + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + track("replicated_delete_additional_fields", request) { + underlying.replicatedDeleteAdditionalFields(request) + } + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + track("replicated_takedown", tweet) { underlying.replicatedTakedown(tweet) } + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + track("scrub_geo_update_user_timestamp", request) { + underlying.scrubGeoUpdateUserTimestamp(request) + } + + override def scrubGeo(request: GeoScrub): Future[Unit] = + track("scrub_geo", request, requestSize = Some(request.statusIds.size)) { + requestSizeAuthorizer(request.statusIds.size, clientId).flatMap { _ => + underlying.scrubGeo(request) + } + } + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + track("replicated_scrub_geo", tweetIds) { underlying.replicatedScrubGeo(tweetIds) } + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + track("delete_location_data", request) { + underlying.deleteLocationData(request) + } + + override def flush(request: FlushRequest): Future[Unit] = + track("flush", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.flush(request) + } + } + + override def takedown(request: TakedownRequest): Future[Unit] = + track("takedown", request) { underlying.takedown(request) } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + track("async_takedown", request) { + underlying.asyncTakedown(request) + } + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + track("set_tweet_user_takedown", request) { underlying.setTweetUserTakedown(request) } + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + track("quoted_tweet_delete", request) { + underlying.quotedTweetDelete(request) + } + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + track("quoted_tweet_takedown", request) { + underlying.quotedTweetTakedown(request) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + track("get_deleted_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getDeletedTweets(request) + } + } + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = { + track("get_stored_tweets", request, requestSize = Some(request.tweetIds.size)) { + requestSizeAuthorizer(request.tweetIds.size, clientId).flatMap { _ => + underlying.getStoredTweets(request) + } + } + } + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = { + track("get_stored_tweets_by_user", request) { + underlying.getStoredTweetsByUser(request) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala new file mode 100644 index 000000000..f148fb25a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/DispatchingTweetService.scala @@ -0,0 +1,376 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package service + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.exception.thriftscala.ClientErrorCause +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.handler._ +import com.twitter.tweetypie.store._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +/** + * Implementation of the TweetService which dispatches requests to underlying + * handlers and stores. + */ +class DispatchingTweetService( + asyncDeleteAdditionalFieldsBuilder: AsyncDeleteAdditionalFieldsBuilder.Type, + asyncSetAdditionalFieldsBuilder: AsyncSetAdditionalFieldsBuilder.Type, + deleteAdditionalFieldsBuilder: DeleteAdditionalFieldsBuilder.Type, + deleteLocationDataHandler: DeleteLocationDataHandler.Type, + deletePathHandler: TweetDeletePathHandler, + eraseUserTweetsHandler: EraseUserTweetsHandler, + getDeletedTweetsHandler: GetDeletedTweetsHandler.Type, + getStoredTweetsHandler: GetStoredTweetsHandler.Type, + getStoredTweetsByUserHandler: GetStoredTweetsByUserHandler.Type, + getTweetCountsHandler: GetTweetCountsHandler.Type, + getTweetsHandler: GetTweetsHandler.Type, + getTweetFieldsHandler: GetTweetFieldsHandler.Type, + postTweetHandler: PostTweet.Type[PostTweetRequest], + postRetweetHandler: PostTweet.Type[RetweetRequest], + quotedTweetDeleteBuilder: QuotedTweetDeleteEventBuilder.Type, + quotedTweetTakedownBuilder: QuotedTweetTakedownEventBuilder.Type, + scrubGeoScrubTweetsBuilder: ScrubGeoEventBuilder.ScrubTweets.Type, + scrubGeoUpdateUserTimestampBuilder: ScrubGeoEventBuilder.UpdateUserTimestamp.Type, + setAdditionalFieldsBuilder: SetAdditionalFieldsBuilder.Type, + setRetweetVisibilityHandler: SetRetweetVisibilityHandler.Type, + statsReceiver: StatsReceiver, + takedownHandler: TakedownHandler.Type, + tweetStore: TotalTweetStore, + undeleteTweetHandler: UndeleteTweetHandler.Type, + unretweetHandler: UnretweetHandler.Type, + updatePossiblySensitiveTweetHandler: UpdatePossiblySensitiveTweetHandler.Type, + userTakedownHandler: UserTakedownHandler.Type, + clientIdHelper: ClientIdHelper) + extends ThriftTweetService { + import AdditionalFields._ + + // Incoming reads + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = + getTweetsHandler(request) + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = + getTweetFieldsHandler(request) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = + getTweetCountsHandler(request) + + // Incoming deletes + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + deletePathHandler.cascadedDeleteTweet(request) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = + deletePathHandler.deleteTweets(request) + + // Incoming writes + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + postTweetHandler(request) + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = + postRetweetHandler(request) + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = { + val setFields = AdditionalFields.nonEmptyAdditionalFieldIds(request.additionalFields) + if (setFields.isEmpty) { + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + s"${SetAdditionalFieldsRequest.AdditionalFieldsField.name} is empty, there must be at least one field to set" + ) + ) + } else { + + unsettableAdditionalFieldIds(request.additionalFields) match { + case Nil => + setAdditionalFieldsBuilder(request).flatMap(tweetStore.setAdditionalFields) + case unsettableFieldIds => + Future.exception( + ClientError( + ClientErrorCause.BadRequest, + unsettableAdditionalFieldIdsErrorMessage(unsettableFieldIds) + ) + ) + } + } + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = + if (request.tweetIds.isEmpty || request.fieldIds.isEmpty) { + Future.exception( + ClientError(ClientErrorCause.BadRequest, "request contains empty tweet ids or field ids") + ) + } else if (request.fieldIds.exists(!isAdditionalFieldId(_))) { + Future.exception( + ClientError(ClientErrorCause.BadRequest, "cannot delete non-additional fields") + ) + } else { + deleteAdditionalFieldsBuilder(request).flatMap { events => + Future.join(events.map(tweetStore.deleteAdditionalFields)) + } + } + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + AsyncInsertTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncInsertTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncInsertTweet(e) + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + asyncSetAdditionalFieldsBuilder(request).map { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncSetAdditionalFields(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncSetAdditionalFields(e) + } + + /** + * Set if a retweet should be included in its source tweet's retweet count. + * + * This is called by our RetweetVisibility daemon when a user enter/exit + * suspended or read-only state and all their retweets visibility need to + * be modified. + * + * @see [[SetRetweetVisibilityHandler]] for more implementation details + */ + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = + setRetweetVisibilityHandler(request) + + override def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = + AsyncSetRetweetVisibility.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncSetRetweetVisibility(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncSetRetweetVisibility(e) + } + + /** + * When a tweet has been successfully undeleted from storage in Manhattan this endpoint will + * enqueue requests to three related endpoints via deferredRPC: + * + * 1. asyncUndeleteTweet: Asynchronously handle aspects of the undelete not required for the response. + * 2. replicatedUndeleteTweet2: Send the undeleted tweet to other clusters for cache caching. + * + * @see [[UndeleteTweetHandler]] for the core undelete implementation + */ + override def undeleteTweet(request: UndeleteTweetRequest): Future[UndeleteTweetResponse] = + undeleteTweetHandler(request) + + /** + * The async method that undeleteTweet calls to handle notifiying other services of the undelete + * See [[TweetStores.asyncUndeleteTweetStore]] for all the stores that handle this event. + */ + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + AsyncUndeleteTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncUndeleteTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncUndeleteTweet(e) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = + getDeletedTweetsHandler(request) + + /** + * Triggers the deletion of all of a users tweets. Used by Gizmoduck when erasing a user + * after they have been deactived for some number of days. + */ + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + eraseUserTweetsHandler.eraseUserTweetsRequest(request) + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + eraseUserTweetsHandler.asyncEraseUserTweetsRequest(request) + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + AsyncDeleteTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncDeleteTweet(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncDeleteTweet(e) + } + + /* + * unretweet a tweet. + * + * There are two ways to unretweet: + * - call deleteTweets() with the retweetId + * - call unretweet() with the retweeter userId and sourceTweetId + * + * This is useful if you want to be able to undo a retweet without having to + * keep track of a retweetId + * + * Returns DeleteTweetResult for any deleted retweets. + */ + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = + unretweetHandler(request) + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + asyncDeleteAdditionalFieldsBuilder(request).map { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncDeleteAdditionalFields(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncDeleteAdditionalFields(e) + } + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + tweetStore.incrFavCount(IncrFavCount.Event(request.tweetId, request.delta, Time.now)) + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + tweetStore.asyncIncrFavCount(AsyncIncrFavCount.Event(request.tweetId, request.delta, Time.now)) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + tweetStore.incrBookmarkCount(IncrBookmarkCount.Event(request.tweetId, request.delta, Time.now)) + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + tweetStore.asyncIncrBookmarkCount( + AsyncIncrBookmarkCount.Event(request.tweetId, request.delta, Time.now)) + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + scrubGeoUpdateUserTimestampBuilder(request).flatMap(tweetStore.scrubGeoUpdateUserTimestamp) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + deleteLocationDataHandler(request) + + override def scrubGeo(request: GeoScrub): Future[Unit] = + scrubGeoScrubTweetsBuilder(request).flatMap(tweetStore.scrubGeo) + + override def takedown(request: TakedownRequest): Future[Unit] = + takedownHandler(request) + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + quotedTweetDeleteBuilder(request).flatMap { + case Some(event) => tweetStore.quotedTweetDelete(event) + case None => Future.Unit + } + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + quotedTweetTakedownBuilder(request).flatMap { + case Some(event) => tweetStore.quotedTweetTakedown(event) + case None => Future.Unit + } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + AsyncTakedown.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(e) => tweetStore.asyncTakedown(e) + case TweetStoreEventOrRetry.Retry(e) => tweetStore.retryAsyncTakedown(e) + } + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + userTakedownHandler(request) + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = { + AsyncUpdatePossiblySensitiveTweet.Event.fromAsyncRequest(request) match { + case TweetStoreEventOrRetry.First(event) => + tweetStore.asyncUpdatePossiblySensitiveTweet(event) + case TweetStoreEventOrRetry.Retry(event) => + tweetStore.retryAsyncUpdatePossiblySensitiveTweet(event) + } + } + + override def flush(request: FlushRequest): Future[Unit] = { + // The logged "previous Tweet" value is intended to be used when interactively debugging an + // issue and an engineer flushes the tweet manually, e.g. from tweetypie.cmdline console. + // Don't log automated flushes originating from tweetypie-daemons to cut down noise. + val logExisting = !clientIdHelper.effectiveClientIdRoot.exists(_ == "tweetypie-daemons") + tweetStore.flush( + Flush.Event(request.tweetIds, request.flushTweets, request.flushCounts, logExisting) + ) + } + + // Incoming replication events + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + getTweetCounts(request).unit + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + getTweetFields(request).unit + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + getTweets(request).unit + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + tweetStore.replicatedInsertTweet( + ReplicatedInsertTweet + .Event( + request.cachedTweet.tweet, + request.cachedTweet, + request.quoterHasAlreadyQuotedTweet.getOrElse(false), + request.initialTweetUpdateRequest + ) + ) + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + tweetStore.replicatedDeleteTweet( + ReplicatedDeleteTweet.Event( + tweet = request.tweet, + isErasure = request.isErasure, + isBounceDelete = request.isBounceDelete, + isLastQuoteOfQuoter = request.isLastQuoteOfQuoter.getOrElse(false) + ) + ) + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + tweetStore.replicatedIncrFavCount(ReplicatedIncrFavCount.Event(tweetId, delta)) + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + tweetStore.replicatedIncrBookmarkCount(ReplicatedIncrBookmarkCount.Event(tweetId, delta)) + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + tweetStore.replicatedScrubGeo(ReplicatedScrubGeo.Event(tweetIds)) + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + tweetStore.replicatedSetAdditionalFields( + ReplicatedSetAdditionalFields.Event(request.additionalFields) + ) + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + tweetStore.replicatedSetRetweetVisibility( + ReplicatedSetRetweetVisibility.Event(request.srcId, request.visible) + ) + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + Future.join( + request.fieldsMap.map { + case (tweetId, fieldIds) => + tweetStore.replicatedDeleteAdditionalFields( + ReplicatedDeleteAdditionalFields.Event(tweetId, fieldIds) + ) + }.toSeq + ) + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + tweetStore.replicatedUndeleteTweet( + ReplicatedUndeleteTweet + .Event( + request.cachedTweet.tweet, + request.cachedTweet, + request.quoterHasAlreadyQuotedTweet.getOrElse(false) + )) + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + tweetStore.replicatedTakedown(ReplicatedTakedown.Event(tweet)) + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + updatePossiblySensitiveTweetHandler(request) + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + tweetStore.replicatedUpdatePossiblySensitiveTweet( + ReplicatedUpdatePossiblySensitiveTweet.Event(tweet) + ) + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = + getStoredTweetsHandler(request) + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = + getStoredTweetsByUserHandler(request) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala new file mode 100644 index 000000000..c1dd98151 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/FailureLoggingTweetService.scala @@ -0,0 +1,76 @@ +package com.twitter.tweetypie +package service + +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.coreservices.failed_task.writer.FailedTaskWriter +import com.twitter.scrooge.ThriftException +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.tweetypie.serverutil.BoringStackTrace +import com.twitter.tweetypie.thriftscala._ +import scala.util.control.NoStackTrace + +object FailureLoggingTweetService { + + /** + * Defines the universe of exception types for which we should scribe + * the failure. + */ + private def shouldWrite(t: Throwable): Boolean = + t match { + case _: ThriftException => true + case _: PostTweetFailure => true + case _ => !BoringStackTrace.isBoring(t) + } + + /** + * Holds failure information from a failing PostTweetResult. + * + * FailedTaskWriter logs an exception with the failed request, so we + * need to package up any failure that we want to log into an + * exception. + */ + private class PostTweetFailure(state: TweetCreateState, reason: Option[String]) + extends Exception + with NoStackTrace { + override def toString: String = s"PostTweetFailure($state, $reason)" + } +} + +/** + * Wraps a tweet service with scribing of failed requests in order to + * enable analysis of failures for diagnosing problems. + */ +class FailureLoggingTweetService( + failedTaskWriter: FailedTaskWriter[Array[Byte]], + protected val underlying: ThriftTweetService) + extends TweetServiceProxy { + import FailureLoggingTweetService._ + + private[this] object writers { + private[this] def writer[T <: ThriftStruct]( + name: String, + codec: ThriftStructCodec[T] + ): (T, Throwable) => Future[Unit] = { + val taskWriter = failedTaskWriter(name, BinaryScalaCodec(codec).apply) + + (t, exc) => + Future.when(shouldWrite(exc)) { + taskWriter.writeFailure(t, exc) + } + } + + val postTweet: (PostTweetRequest, Throwable) => Future[Unit] = + writer("post_tweet", PostTweetRequest) + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = + underlying.postTweet(request).respond { + // Log requests for states other than OK to enable debugging creation failures + case Return(res) if res.state != TweetCreateState.Ok => + writers.postTweet(request, new PostTweetFailure(res.state, res.failureReason)) + case Throw(exc) => + writers.postTweet(request, exc) + case _ => + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala new file mode 100644 index 000000000..8b1d2e1db --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/MethodAuthorizer.scala @@ -0,0 +1,91 @@ +package com.twitter.tweetypie +package service + +/** + * An authorizer for determining if a request to a + * method should be rejected. + * + * This class is in the spirit of servo.request.ClientRequestAuthorizer. + * The difference is ClientRequestAuthorizer only operates + * on two pieces of information, clientId and a method name. + * + * This class can be used to create a more complex authorizer that + * operates on the specifics of a request. e.g, an + * authorizer that disallows certain clients from passing + * certain optional flags. + * + * Note: With some work, ClientRequestAuthorizer could be + * generalized to support cases like this. If we end up making + * more method authorizers it might be worth it to + * go that route. + */ +abstract class MethodAuthorizer[T]() { + def apply(request: T, clientId: String): Future[Unit] + + /** + * Created decidered MethodAuthorizer + * if the decider is off it will execute + * MethodAuthorizer.unit, which always succeeds. + */ + def enabledBy(decider: Gate[Unit]): MethodAuthorizer[T] = + MethodAuthorizer.select(decider, this, MethodAuthorizer.unit) + + /** + * Transform this MethodAuthorizer[T] into a MethodAuthorizer[A] + * by providing a function from A => T + */ + def contramap[A](f: A => T): MethodAuthorizer[A] = + MethodAuthorizer[A] { (request, clientId) => this(f(request), clientId) } +} + +object MethodAuthorizer { + + /** + * @param f an authorization function that returns + * Future.Unit if the request is authorized, and Future.exception() + * if the request is not authorized. + * + * @return An instance of MethodAuthorizer with an apply method + * that returns f + */ + def apply[T](f: (T, String) => Future[Unit]): MethodAuthorizer[T] = + new MethodAuthorizer[T]() { + def apply(request: T, clientId: String): Future[Unit] = f(request, clientId) + } + + /** + * @param authorizers A seq of MethodAuthorizers to be + * composed into one. + * @return A MethodAuthorizer that sequentially executes + * all of the authorizers + */ + def all[T](authorizers: Seq[MethodAuthorizer[T]]): MethodAuthorizer[T] = + MethodAuthorizer { (request, clientId) => + authorizers.foldLeft(Future.Unit) { + case (f, authorize) => f.before(authorize(request, clientId)) + } + } + + /** + * @return A MethodAuthorizer that always returns Future.Unit + * Useful if you need to decider off your MethodAuthorizer + * and replace it with one that always passes. + */ + def unit[T]: MethodAuthorizer[T] = MethodAuthorizer { (request, client) => Future.Unit } + + /** + * @return A MethodAuthorizer that switches between two provided + * MethodAuthorizers depending on a decider. + */ + def select[T]( + decider: Gate[Unit], + ifTrue: MethodAuthorizer[T], + ifFalse: MethodAuthorizer[T] + ): MethodAuthorizer[T] = + MethodAuthorizer { (request, client) => + decider.pick( + ifTrue(request, client), + ifFalse(request, client) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala new file mode 100644 index 000000000..d0337076a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ObservedTweetService.scala @@ -0,0 +1,422 @@ +package com.twitter.tweetypie +package service + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.servo.util.SynchronizedHashMap +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.service.observer._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.finagle.tracing.Trace + +/** + * Wraps an underlying TweetService, observing requests and results. + */ +class ObservedTweetService( + protected val underlying: ThriftTweetService, + stats: StatsReceiver, + clientIdHelper: ClientIdHelper) + extends TweetServiceProxy { + + private[this] val asyncEventOrRetryScope = stats.scope("async_event_or_retry") + private[this] val deleteFieldsScope = stats.scope("delete_additional_fields") + private[this] val deleteTweetsScope = stats.scope("delete_tweets") + private[this] val getDeletedTweetsScope = stats.scope("get_deleted_tweets") + private[this] val getTweetCountsScope = stats.scope("get_tweet_counts") + private[this] val getTweetsScope = stats.scope("get_tweets") + private[this] val getTweetFieldsScope = stats.scope("get_tweet_fields") + private[this] val postTweetScope = stats.scope("post_tweet") + private[this] val replicatedInsertTweet2Scope = stats.scope("replicated_insert_tweet2") + private[this] val retweetScope = stats.scope("post_retweet") + private[this] val scrubGeoScope = stats.scope("scrub_geo") + private[this] val setFieldsScope = stats.scope("set_additional_fields") + private[this] val setRetweetVisibilityScope = stats.scope("set_retweet_visibility") + private[this] val getStoredTweetsScope = stats.scope("get_stored_tweets") + private[this] val getStoredTweetsByUserScope = stats.scope("get_stored_tweets_by_user") + + private[this] val defaultGetTweetsRequestOptions = GetTweetOptions() + + /** Increments the appropriate write success/failure counter */ + private[this] val observeWriteResult: Effect[Try[_]] = { + withAndWithoutClientId(stats) { (stats, _) => + val successCounter = stats.counter("write_successes") + val failureCounter = stats.counter("write_failures") + val clientErrorCounter = stats.counter("write_client_errors") + Effect[Try[_]] { + case Return(_) => successCounter.incr() + case Throw(ClientError(_, _)) | Throw(AccessDenied(_, _)) => clientErrorCounter.incr() + case Throw(_) => failureCounter.incr() + } + } + } + + /** Increments the tweet_creates counter on future success. */ + private[this] val observeTweetWriteSuccess: Effect[Any] = { + withAndWithoutClientId(stats) { (stats, _) => + val counter = stats.counter("tweet_writes") + Effect[Any] { _ => counter.incr() } + } + } + + private[this] val observeGetTweetsRequest = + withAndWithoutClientId(getTweetsScope) { + GetTweetsObserver.observeRequest + } + + private[this] val observeGetTweetFieldsRequest = + withAndWithoutClientId(getTweetFieldsScope) { + GetTweetFieldsObserver.observeRequest + } + + private[this] val observeGetTweetCountsRequest = + withAndWithoutClientId(getTweetCountsScope) { (s, _) => + GetTweetCountsObserver.observeRequest(s) + } + + private[this] val observeRetweetRequest: Effect[RetweetRequest] = + withAndWithoutClientId(retweetScope) { (s, _) => Observer.observeRetweetRequest(s) } + + private[this] val observeDeleteTweetsRequest = + withAndWithoutClientId(deleteTweetsScope) { (s, _) => Observer.observeDeleteTweetsRequest(s) } + + private[this] val observeSetFieldsRequest: Effect[SetAdditionalFieldsRequest] = + withAndWithoutClientId(setFieldsScope) { (s, _) => Observer.observeSetFieldsRequest(s) } + + private[this] val observeSetRetweetVisibilityRequest: Effect[SetRetweetVisibilityRequest] = + withAndWithoutClientId(setRetweetVisibilityScope) { (s, _) => + Observer.observeSetRetweetVisibilityRequest(s) + } + + private[this] val observeDeleteFieldsRequest: Effect[DeleteAdditionalFieldsRequest] = + withAndWithoutClientId(deleteFieldsScope) { (s, _) => Observer.observeDeleteFieldsRequest(s) } + + private[this] val observePostTweetAdditionals: Effect[Tweet] = + withAndWithoutClientId(postTweetScope) { (s, _) => Observer.observeAdditionalFields(s) } + + private[this] val observePostTweetRequest: Effect[PostTweetRequest] = + withAndWithoutClientId(postTweetScope) { (s, _) => PostTweetObserver.observerRequest(s) } + + private[this] val observeGetTweetResults = + withAndWithoutClientId(getTweetsScope) { + GetTweetsObserver.observeResults + } + + private[this] val observeGetTweetFieldsResults: Effect[Seq[GetTweetFieldsResult]] = + GetTweetFieldsObserver.observeResults(getTweetFieldsScope) + + private[this] val observeTweetCountsResults = + GetTweetCountsObserver.observeResults(getTweetCountsScope) + + private[this] val observeScrubGeoRequest = + Observer.observeScrubGeo(scrubGeoScope) + + private[this] val observeRetweetResponse = + PostTweetObserver.observeResults(retweetScope, byClient = false) + + private[this] val observePostTweetResponse = + PostTweetObserver.observeResults(postTweetScope, byClient = false) + + private[this] val observeAsyncInsertRequest = + Observer.observeAsyncInsertRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncSetAdditionalFieldsRequest = + Observer.observeAsyncSetAdditionalFieldsRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncSetRetweetVisibilityRequest = + Observer.observeAsyncSetRetweetVisibilityRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncUndeleteTweetRequest = + Observer.observeAsyncUndeleteTweetRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncDeleteTweetRequest = + Observer.observeAsyncDeleteTweetRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncDeleteAdditionalFieldsRequest = + Observer.observeAsyncDeleteAdditionalFieldsRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncTakedownRequest = + Observer.observeAsyncTakedownRequest(asyncEventOrRetryScope) + + private[this] val observeAsyncUpdatePossiblySensitiveTweetRequest = + Observer.observeAsyncUpdatePossiblySensitiveTweetRequest(asyncEventOrRetryScope) + + private[this] val observedReplicatedInsertTweet2Request = + Observer.observeReplicatedInsertTweetRequest(replicatedInsertTweet2Scope) + + private[this] val observeGetTweetFieldsResultState: Effect[GetTweetFieldsObserver.Type] = + withAndWithoutClientId(getTweetFieldsScope) { (statsReceiver, _) => + GetTweetFieldsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetTweetsResultState: Effect[GetTweetsObserver.Type] = + withAndWithoutClientId(getTweetsScope) { (statsReceiver, _) => + GetTweetsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetTweetCountsResultState: Effect[GetTweetCountsObserver.Type] = + withAndWithoutClientId(getTweetCountsScope) { (statsReceiver, _) => + GetTweetCountsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetDeletedTweetsResultState: Effect[GetDeletedTweetsObserver.Type] = + withAndWithoutClientId(getDeletedTweetsScope) { (statsReceiver, _) => + GetDeletedTweetsObserver.observeExchange(statsReceiver) + } + + private[this] val observeGetStoredTweetsRequest: Effect[GetStoredTweetsRequest] = + GetStoredTweetsObserver.observeRequest(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsResult: Effect[Seq[GetStoredTweetsResult]] = + GetStoredTweetsObserver.observeResult(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsResultState: Effect[GetStoredTweetsObserver.Type] = + GetStoredTweetsObserver.observeExchange(getStoredTweetsScope) + + private[this] val observeGetStoredTweetsByUserRequest: Effect[GetStoredTweetsByUserRequest] = + GetStoredTweetsByUserObserver.observeRequest(getStoredTweetsByUserScope) + + private[this] val observeGetStoredTweetsByUserResult: Effect[GetStoredTweetsByUserResult] = + GetStoredTweetsByUserObserver.observeResult(getStoredTweetsByUserScope) + + private[this] val observeGetStoredTweetsByUserResultState: Effect[ + GetStoredTweetsByUserObserver.Type + ] = + GetStoredTweetsByUserObserver.observeExchange(getStoredTweetsByUserScope) + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = { + val actualRequest = + if (request.options.nonEmpty) request + else request.copy(options = Some(defaultGetTweetsRequestOptions)) + observeGetTweetsRequest(actualRequest) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweets(request) + .onSuccess(observeGetTweetResults) + .respond(response => observeGetTweetsResultState((request, response))) + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = { + observeGetTweetFieldsRequest(request) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweetFields(request) + .onSuccess(observeGetTweetFieldsResults) + .respond(response => observeGetTweetFieldsResultState((request, response))) + } + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = { + observeGetTweetCountsRequest(request) + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getTweetCounts(request) + .onSuccess(observeTweetCountsResults) + .respond(response => observeGetTweetCountsResultState((request, response))) + } + + override def getDeletedTweets( + request: GetDeletedTweetsRequest + ): Future[Seq[GetDeletedTweetResult]] = { + Trace.recordBinary("query_width", request.tweetIds.length) + super + .getDeletedTweets(request) + .respond(response => observeGetDeletedTweetsResultState((request, response))) + } + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = { + observePostTweetRequest(request) + request.additionalFields.foreach(observePostTweetAdditionals) + super + .postTweet(request) + .onSuccess(observePostTweetResponse) + .onSuccess(observeTweetWriteSuccess) + .respond(observeWriteResult) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = { + observeRetweetRequest(request) + super + .postRetweet(request) + .onSuccess(observeRetweetResponse) + .onSuccess(observeTweetWriteSuccess) + .respond(observeWriteResult) + } + + override def setAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = { + observeSetFieldsRequest(request) + super + .setAdditionalFields(request) + .respond(observeWriteResult) + } + + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = { + observeSetRetweetVisibilityRequest(request) + super + .setRetweetVisibility(request) + .respond(observeWriteResult) + } + + override def deleteAdditionalFields(request: DeleteAdditionalFieldsRequest): Future[Unit] = { + observeDeleteFieldsRequest(request) + super + .deleteAdditionalFields(request) + .respond(observeWriteResult) + } + + override def updatePossiblySensitiveTweet( + request: UpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + super + .updatePossiblySensitiveTweet(request) + .respond(observeWriteResult) + + override def deleteLocationData(request: DeleteLocationDataRequest): Future[Unit] = + super + .deleteLocationData(request) + .respond(observeWriteResult) + + override def scrubGeo(geoScrub: GeoScrub): Future[Unit] = { + observeScrubGeoRequest(geoScrub) + super + .scrubGeo(geoScrub) + .respond(observeWriteResult) + } + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + super.scrubGeoUpdateUserTimestamp(request).respond(observeWriteResult) + + override def takedown(request: TakedownRequest): Future[Unit] = + super + .takedown(request) + .respond(observeWriteResult) + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + super + .setTweetUserTakedown(request) + .respond(observeWriteResult) + + override def incrTweetFavCount(request: IncrTweetFavCountRequest): Future[Unit] = + super + .incrTweetFavCount(request) + .respond(observeWriteResult) + + override def incrTweetBookmarkCount(request: IncrTweetBookmarkCountRequest): Future[Unit] = + super + .incrTweetBookmarkCount(request) + .respond(observeWriteResult) + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = { + observeDeleteTweetsRequest(request) + super + .deleteTweets(request) + .respond(observeWriteResult) + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + super + .cascadedDeleteTweet(request) + .respond(observeWriteResult) + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = { + observeAsyncInsertRequest(request) + super + .asyncInsert(request) + .respond(observeWriteResult) + } + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = { + observeAsyncSetAdditionalFieldsRequest(request) + super + .asyncSetAdditionalFields(request) + .respond(observeWriteResult) + } + + override def asyncSetRetweetVisibility( + request: AsyncSetRetweetVisibilityRequest + ): Future[Unit] = { + observeAsyncSetRetweetVisibilityRequest(request) + super + .asyncSetRetweetVisibility(request) + .respond(observeWriteResult) + } + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = { + observeAsyncUndeleteTweetRequest(request) + super + .asyncUndeleteTweet(request) + .respond(observeWriteResult) + } + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = { + observeAsyncDeleteTweetRequest(request) + super + .asyncDelete(request) + .respond(observeWriteResult) + } + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = { + observeAsyncDeleteAdditionalFieldsRequest(request) + super + .asyncDeleteAdditionalFields(request) + .respond(observeWriteResult) + } + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = { + observeAsyncTakedownRequest(request) + super + .asyncTakedown(request) + .respond(observeWriteResult) + } + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = { + observeAsyncUpdatePossiblySensitiveTweetRequest(request) + super + .asyncUpdatePossiblySensitiveTweet(request) + .respond(observeWriteResult) + } + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = { + observedReplicatedInsertTweet2Request(request.cachedTweet.tweet) + super.replicatedInsertTweet2(request) + } + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = { + observeGetStoredTweetsRequest(request) + super + .getStoredTweets(request) + .onSuccess(observeGetStoredTweetsResult) + .respond(response => observeGetStoredTweetsResultState((request, response))) + } + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = { + observeGetStoredTweetsByUserRequest(request) + super + .getStoredTweetsByUser(request) + .onSuccess(observeGetStoredTweetsByUserResult) + .respond(response => observeGetStoredTweetsByUserResultState((request, response))) + } + + private def withAndWithoutClientId[A]( + stats: StatsReceiver + )( + f: (StatsReceiver, Boolean) => Effect[A] + ) = + f(stats, false).also(withClientId(stats)(f)) + + private def withClientId[A](stats: StatsReceiver)(f: (StatsReceiver, Boolean) => Effect[A]) = { + val map = new SynchronizedHashMap[String, Effect[A]] + + Effect[A] { value => + clientIdHelper.effectiveClientIdRoot.foreach { clientId => + val clientObserver = map.getOrElseUpdate(clientId, f(stats.scope(clientId), true)) + clientObserver(value) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala new file mode 100644 index 000000000..69b9481be --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/QuillTweetService.scala @@ -0,0 +1,75 @@ +package com.twitter.tweetypie +package service + +import com.twitter.quill.capture.QuillCapture +import com.twitter.tweetypie.thriftscala._ +import org.apache.thrift.transport.TMemoryBuffer +import com.twitter.finagle.thrift.Protocols +import com.twitter.quill.capture.Payloads +import com.twitter.tweetypie.service.QuillTweetService.createThriftBinaryRequest +import org.apache.thrift.protocol.TMessage +import org.apache.thrift.protocol.TMessageType +import org.apache.thrift.protocol.TProtocol + +object QuillTweetService { + // Construct the byte stream for a binary thrift request + def createThriftBinaryRequest(method_name: String, write_args: TProtocol => Unit): Array[Byte] = { + val buf = new TMemoryBuffer(512) + val oprot = Protocols.binaryFactory().getProtocol(buf) + + oprot.writeMessageBegin(new TMessage(method_name, TMessageType.CALL, 0)) + write_args(oprot) + oprot.writeMessageEnd() + + // Return bytes + java.util.Arrays.copyOfRange(buf.getArray, 0, buf.length) + } +} + +/** + * Wraps an underlying TweetService, logging some requests. + */ +class QuillTweetService(quillCapture: QuillCapture, protected val underlying: ThriftTweetService) + extends TweetServiceProxy { + + override def postTweet(request: PostTweetRequest): Future[PostTweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.PostTweet.name, + TweetService.PostTweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.postTweet(request) + } + + override def deleteTweets(request: DeleteTweetsRequest): Future[Seq[DeleteTweetResult]] = { + val requestBytes = createThriftBinaryRequest( + TweetService.DeleteTweets.name, + TweetService.DeleteTweets.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.deleteTweets(request) + } + + override def postRetweet(request: RetweetRequest): Future[PostTweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.PostRetweet.name, + TweetService.PostRetweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.postRetweet(request) + } + + override def unretweet(request: UnretweetRequest): Future[UnretweetResult] = { + val requestBytes = createThriftBinaryRequest( + TweetService.Unretweet.name, + TweetService.Unretweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.unretweet(request) + } + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = { + val requestBytes = createThriftBinaryRequest( + TweetServiceInternal.CascadedDeleteTweet.name, + TweetServiceInternal.CascadedDeleteTweet.Args(request).write) + quillCapture.storeServerRecv(Payloads.fromThriftMessageBytes(requestBytes)) + underlying.cascadedDeleteTweet(request) + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala new file mode 100644 index 000000000..d10170232 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/ReplicatingTweetService.scala @@ -0,0 +1,47 @@ +package com.twitter.tweetypie +package service + +import com.twitter.tweetypie.thriftscala._ +import com.twitter.servo.forked.Forked +import com.twitter.tweetypie.service.ReplicatingTweetService.GatedReplicationClient + +/** + * Wraps an underlying ThriftTweetService, transforming external requests to replicated requests. + */ +object ReplicatingTweetService { + // Can be used to associate replication client with a gate that determines + // if a replication request should be performed. + case class GatedReplicationClient(client: ThriftTweetService, gate: Gate[Unit]) { + def execute(executor: Forked.Executor, action: ThriftTweetService => Unit): Unit = { + if (gate()) executor { () => action(client) } + } + } +} + +class ReplicatingTweetService( + protected val underlying: ThriftTweetService, + replicationTargets: Seq[GatedReplicationClient], + executor: Forked.Executor, +) extends TweetServiceProxy { + private[this] def replicateRead(action: ThriftTweetService => Unit): Unit = + replicationTargets.foreach(_.execute(executor, action)) + + override def getTweetCounts(request: GetTweetCountsRequest): Future[Seq[GetTweetCountsResult]] = { + replicateRead(_.replicatedGetTweetCounts(request)) + underlying.getTweetCounts(request) + } + + override def getTweetFields(request: GetTweetFieldsRequest): Future[Seq[GetTweetFieldsResult]] = { + if (!request.options.doNotCache) { + replicateRead(_.replicatedGetTweetFields(request)) + } + underlying.getTweetFields(request) + } + + override def getTweets(request: GetTweetsRequest): Future[Seq[GetTweetResult]] = { + if (!request.options.exists(_.doNotCache)) { + replicateRead(_.replicatedGetTweets(request)) + } + underlying.getTweets(request) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala new file mode 100644 index 000000000..9ae769f2b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/RescueExceptions.scala @@ -0,0 +1,63 @@ +package com.twitter.tweetypie +package service + +import com.twitter.finagle.IndividualRequestTimeoutException +import com.twitter.servo.exception.thriftscala._ +import com.twitter.tweetypie.core.OverCapacity +import com.twitter.tweetypie.core.RateLimited +import com.twitter.tweetypie.core.TweetHydrationError +import com.twitter.tweetypie.core.UpstreamFailure +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.TimeoutException + +object RescueExceptions { + private val log = Logger("com.twitter.tweetypie.service.TweetService") + + /** + * rescue to servo exceptions + */ + def rescueToServoFailure( + name: String, + clientId: String + ): PartialFunction[Throwable, Future[Nothing]] = { + translateToServoFailure(formatError(name, clientId, _)).andThen(Future.exception) + } + + private def translateToServoFailure( + toMsg: String => String + ): PartialFunction[Throwable, Throwable] = { + case e: AccessDenied if suspendedOrDeactivated(e) => + e.copy(message = toMsg(e.message)) + case e: ClientError => + e.copy(message = toMsg(e.message)) + case e: UnauthorizedException => + ClientError(ClientErrorCause.Unauthorized, toMsg(e.msg)) + case e: AccessDenied => + ClientError(ClientErrorCause.Unauthorized, toMsg(e.message)) + case e: RateLimited => + ClientError(ClientErrorCause.RateLimited, toMsg(e.message)) + case e: ServerError => + e.copy(message = toMsg(e.message)) + case e: TimeoutException => + ServerError(ServerErrorCause.RequestTimeout, toMsg(e.toString)) + case e: IndividualRequestTimeoutException => + ServerError(ServerErrorCause.RequestTimeout, toMsg(e.toString)) + case e: UpstreamFailure => + ServerError(ServerErrorCause.DependencyError, toMsg(e.toString)) + case e: OverCapacity => + ServerError(ServerErrorCause.ServiceUnavailable, toMsg(e.message)) + case e: TweetHydrationError => + ServerError(ServerErrorCause.DependencyError, toMsg(e.toString)) + case e => + log.warn("caught unexpected exception", e) + ServerError(ServerErrorCause.InternalServerError, toMsg(e.toString)) + } + + private def suspendedOrDeactivated(e: AccessDenied): Boolean = + e.errorCause.exists { c => + c == AccessDeniedCause.UserDeactivated || c == AccessDeniedCause.UserSuspended + } + + private def formatError(name: String, clientId: String, msg: String): String = + s"($clientId, $name) $msg" +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala new file mode 100644 index 000000000..a167ecb43 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceProxy.scala @@ -0,0 +1,146 @@ +/** Copyright 2012 Twitter, Inc. */ +package com.twitter.tweetypie +package service + +import com.twitter.finagle.thrift.ClientId +import com.twitter.tweetypie.thriftscala.{TweetServiceProxy => BaseTweetServiceProxy, _} + +/** + * A trait for TweetService implementations that wrap an underlying TweetService and need to modify + * only some of the methods. + * + * This proxy is the same as [[com.twitter.tweetypie.thriftscala.TweetServiceProxy]], except it also + * extends [[com.twitter.tweetypie.thriftscala.TweetServiceInternal]] which gives us access to all + * of the async* methods. + */ +trait TweetServiceProxy extends BaseTweetServiceProxy with ThriftTweetService { + protected override def underlying: ThriftTweetService + + override def replicatedGetTweetCounts(request: GetTweetCountsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweetCounts(request)) + + override def replicatedGetTweetFields(request: GetTweetFieldsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweetFields(request)) + + override def replicatedGetTweets(request: GetTweetsRequest): Future[Unit] = + wrap(underlying.replicatedGetTweets(request)) + + override def asyncSetAdditionalFields(request: AsyncSetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.asyncSetAdditionalFields(request)) + + override def asyncDeleteAdditionalFields( + request: AsyncDeleteAdditionalFieldsRequest + ): Future[Unit] = + wrap(underlying.asyncDeleteAdditionalFields(request)) + + override def cascadedDeleteTweet(request: CascadedDeleteTweetRequest): Future[Unit] = + wrap(underlying.cascadedDeleteTweet(request)) + + override def asyncInsert(request: AsyncInsertRequest): Future[Unit] = + wrap(underlying.asyncInsert(request)) + + override def replicatedUpdatePossiblySensitiveTweet(tweet: Tweet): Future[Unit] = + wrap(underlying.replicatedUpdatePossiblySensitiveTweet(tweet)) + + override def asyncUpdatePossiblySensitiveTweet( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): Future[Unit] = + wrap(underlying.asyncUpdatePossiblySensitiveTweet(request)) + + override def asyncUndeleteTweet(request: AsyncUndeleteTweetRequest): Future[Unit] = + wrap(underlying.asyncUndeleteTweet(request)) + + override def eraseUserTweets(request: EraseUserTweetsRequest): Future[Unit] = + wrap(underlying.eraseUserTweets(request)) + + override def asyncEraseUserTweets(request: AsyncEraseUserTweetsRequest): Future[Unit] = + wrap(underlying.asyncEraseUserTweets(request)) + + override def asyncDelete(request: AsyncDeleteRequest): Future[Unit] = + wrap(underlying.asyncDelete(request)) + + override def asyncIncrFavCount(request: AsyncIncrFavCountRequest): Future[Unit] = + wrap(underlying.asyncIncrFavCount(request)) + + override def asyncIncrBookmarkCount(request: AsyncIncrBookmarkCountRequest): Future[Unit] = + wrap(underlying.asyncIncrBookmarkCount(request)) + + override def scrubGeoUpdateUserTimestamp(request: DeleteLocationData): Future[Unit] = + wrap(underlying.scrubGeoUpdateUserTimestamp(request)) + + override def asyncSetRetweetVisibility(request: AsyncSetRetweetVisibilityRequest): Future[Unit] = + wrap(underlying.asyncSetRetweetVisibility(request)) + + override def setRetweetVisibility(request: SetRetweetVisibilityRequest): Future[Unit] = + wrap(underlying.setRetweetVisibility(request)) + + override def asyncTakedown(request: AsyncTakedownRequest): Future[Unit] = + wrap(underlying.asyncTakedown(request)) + + override def setTweetUserTakedown(request: SetTweetUserTakedownRequest): Future[Unit] = + wrap(underlying.setTweetUserTakedown(request)) + + override def replicatedUndeleteTweet2(request: ReplicatedUndeleteTweet2Request): Future[Unit] = + wrap(underlying.replicatedUndeleteTweet2(request)) + + override def replicatedInsertTweet2(request: ReplicatedInsertTweet2Request): Future[Unit] = + wrap(underlying.replicatedInsertTweet2(request)) + + override def replicatedDeleteTweet2(request: ReplicatedDeleteTweet2Request): Future[Unit] = + wrap(underlying.replicatedDeleteTweet2(request)) + + override def replicatedIncrFavCount(tweetId: TweetId, delta: Int): Future[Unit] = + wrap(underlying.replicatedIncrFavCount(tweetId, delta)) + + override def replicatedIncrBookmarkCount(tweetId: TweetId, delta: Int): Future[Unit] = + wrap(underlying.replicatedIncrBookmarkCount(tweetId, delta)) + + override def replicatedSetRetweetVisibility( + request: ReplicatedSetRetweetVisibilityRequest + ): Future[Unit] = + wrap(underlying.replicatedSetRetweetVisibility(request)) + + override def replicatedScrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + wrap(underlying.replicatedScrubGeo(tweetIds)) + + override def replicatedSetAdditionalFields(request: SetAdditionalFieldsRequest): Future[Unit] = + wrap(underlying.replicatedSetAdditionalFields(request)) + + override def replicatedDeleteAdditionalFields( + request: ReplicatedDeleteAdditionalFieldsRequest + ): Future[Unit] = + wrap(underlying.replicatedDeleteAdditionalFields(request)) + + override def replicatedTakedown(tweet: Tweet): Future[Unit] = + wrap(underlying.replicatedTakedown(tweet)) + + override def quotedTweetDelete(request: QuotedTweetDeleteRequest): Future[Unit] = + wrap(underlying.quotedTweetDelete(request)) + + override def quotedTweetTakedown(request: QuotedTweetTakedownRequest): Future[Unit] = + wrap(underlying.quotedTweetTakedown(request)) + + override def getStoredTweets( + request: GetStoredTweetsRequest + ): Future[Seq[GetStoredTweetsResult]] = + wrap(underlying.getStoredTweets(request)) + + override def getStoredTweetsByUser( + request: GetStoredTweetsByUserRequest + ): Future[GetStoredTweetsByUserResult] = + wrap(underlying.getStoredTweetsByUser(request)) +} + +/** + * A TweetServiceProxy with a mutable underlying field. + */ +class MutableTweetServiceProxy(var underlying: ThriftTweetService) extends TweetServiceProxy + +/** + * A TweetServiceProxy that sets the ClientId context before executing the method. + */ +class ClientIdSettingTweetServiceProxy(clientId: ClientId, val underlying: ThriftTweetService) + extends TweetServiceProxy { + override def wrap[A](f: => Future[A]): Future[A] = + clientId.asCurrent(f) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala new file mode 100644 index 000000000..79e97519c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/TweetServiceWarmer.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package service + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.thrift.ClientId +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Await +import scala.util.control.NonFatal + +/** + * Settings for the artificial tweet fetching requests that are sent to warmup the + * server before authentic requests are processed. + */ +case class WarmupQueriesSettings( + realTweetRequestCycles: Int = 100, + requestTimeout: Duration = 3.seconds, + clientId: ClientId = ClientId("tweetypie.warmup"), + requestTimeRange: Duration = 10.minutes, + maxConcurrency: Int = 20) + +object TweetServiceWarmer { + + /** + * Load info from perspective of TLS test account with short favorites timeline. + */ + val ForUserId = 3511687034L // @mikestltestact1 +} + +/** + * Generates requests to getTweets for the purpose of warming up the code paths used + * in fetching tweets. + */ +class TweetServiceWarmer( + warmupSettings: WarmupQueriesSettings, + requestOptions: GetTweetOptions = GetTweetOptions(includePlaces = true, + includeRetweetCount = true, includeReplyCount = true, includeFavoriteCount = true, + includeCards = true, cardsPlatformKey = Some("iPhone-13"), includePerspectivals = true, + includeQuotedTweet = true, forUserId = Some(TweetServiceWarmer.ForUserId))) + extends (ThriftTweetService => Unit) { + import warmupSettings._ + + private val realTweetIds = + Seq( + 20L, // just setting up my twttr + 456190426412617728L, // protected user tweet + 455477977715707904L, // suspended user tweet + 440322224407314432L, // ellen oscar selfie + 372173241290612736L, // gaga mentions 1d + 456965485179838464L, // media tagged tweet + 525421442918121473L, // tweet with card + 527214829807759360L, // tweet with annotation + 472788687571677184L // tweet with quote tweet + ) + + private val log = Logger(getClass) + + /** + * Executes the warmup queries, waiting for them to complete or until + * the warmupTimeout occurs. + */ + def apply(service: ThriftTweetService): Unit = { + val warmupStart = Time.now + log.info("warming up...") + warmup(service) + val warmupDuration = Time.now.since(warmupStart) + log.info("warmup took " + warmupDuration) + } + + /** + * Executes the warmup queries, returning when all responses have completed or timed-out. + */ + private[this] def warmup(service: ThriftTweetService): Unit = + clientId.asCurrent { + val request = GetTweetsRequest(realTweetIds, options = Some(requestOptions)) + val requests = Seq.fill(realTweetRequestCycles)(request) + val requestGroups = requests.grouped(maxConcurrency) + + for (requests <- requestGroups) { + val responses = requests.map(service.getTweets(_)) + try { + Await.ready(Future.join(responses), requestTimeout) + } catch { + // Await.ready throws exceptions on timeouts and + // interruptions. This prevents those exceptions from + // bubbling up. + case NonFatal(_) => + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD new file mode 100644 index 000000000..45c15cb77 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "tweetypie/servo/util/src/main/scala", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/servo:servo-exception-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info-scala", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/tweettext", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala new file mode 100644 index 000000000..1e86348b8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetDeletedTweetsObserver.scala @@ -0,0 +1,25 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala.GetDeletedTweetResult +import com.twitter.tweetypie.thriftscala.GetDeletedTweetsRequest + +private[service] object GetDeletedTweetsObserver { + type Type = ObserveExchange[GetDeletedTweetsRequest, Seq[GetDeletedTweetResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) | Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala new file mode 100644 index 000000000..5c16c68b2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsByUserObserver.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsByUserResult + +private[service] object GetStoredTweetsByUserObserver extends StoredTweetsObserver { + + type Type = ObserveExchange[GetStoredTweetsByUserRequest, GetStoredTweetsByUserResult] + val firstTweetTimestamp: Long = 1142974200L + + def observeRequest(stats: StatsReceiver): Effect[GetStoredTweetsByUserRequest] = { + val optionsScope = stats.scope("options") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val forUserIdCounter = optionsScope.counter("set_for_user_id") + val timeRangeStat = optionsScope.stat("time_range_seconds") + val cursorCounter = optionsScope.counter("cursor") + val startFromOldestCounter = optionsScope.counter("start_from_oldest") + val additionalFieldsScope = optionsScope.scope("additional_fields") + + Effect { request => + if (request.options.isDefined) { + val options = request.options.get + + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.setForUserId) forUserIdCounter.incr() + if (options.cursor.isDefined) { + cursorCounter.incr() + } else { + // We only add a time range stat once, when there's no cursor in the request (i.e. this + // isn't a repeat request for a subsequent batch of results) + val startTimeSeconds: Long = + options.startTimeMsec.map(_ / 1000).getOrElse(firstTweetTimestamp) + val endTimeSeconds: Long = options.endTimeMsec.map(_ / 1000).getOrElse(Time.now.inSeconds) + timeRangeStat.add(endTimeSeconds - startTimeSeconds) + + // We use the startFromOldest parameter when the cursor isn't defined + if (options.startFromOldest) startFromOldestCounter.incr() + } + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + } + } + } + + def observeResult(stats: StatsReceiver): Effect[GetStoredTweetsByUserResult] = { + val resultScope = stats.scope("result") + + Effect { result => + observeStoredTweets(result.storedTweets, resultScope) + } + } + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) => resultStateStats.success() + case Throw(_) => resultStateStats.failed() + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala new file mode 100644 index 000000000..f6021d06c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetStoredTweetsObserver.scala @@ -0,0 +1,52 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.GetStoredTweetsRequest +import com.twitter.tweetypie.thriftscala.GetStoredTweetsResult + +private[service] object GetStoredTweetsObserver extends StoredTweetsObserver { + type Type = ObserveExchange[GetStoredTweetsRequest, Seq[GetStoredTweetsResult]] + + def observeRequest(stats: StatsReceiver): Effect[GetStoredTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + + val optionsScope = stats.scope("options") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val forUserIdCounter = optionsScope.counter("for_user_id") + val additionalFieldsScope = optionsScope.scope("additional_fields") + + Effect { request => + requestSizeStat.add(request.tweetIds.size) + + if (request.options.isDefined) { + val options = request.options.get + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.forUserId.isDefined) forUserIdCounter.incr() + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + } + } + } + + def observeResult(stats: StatsReceiver): Effect[Seq[GetStoredTweetsResult]] = { + val resultScope = stats.scope("result") + + Effect { result => + observeStoredTweets(result.map(_.storedTweet), resultScope) + } + } + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) => resultStateStats.success(request.tweetIds.size) + case Throw(_) => resultStateStats.failed(request.tweetIds.size) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala new file mode 100644 index 000000000..c97fdc2e7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetCountsObserver.scala @@ -0,0 +1,67 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.thriftscala.GetTweetCountsRequest +import com.twitter.tweetypie.thriftscala.GetTweetCountsResult + +private[service] object GetTweetCountsObserver { + type Type = ObserveExchange[GetTweetCountsRequest, Seq[GetTweetCountsResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(_) | Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeResults(stats: StatsReceiver): Effect[Seq[GetTweetCountsResult]] = { + val retweetCounter = stats.counter("retweets") + val replyCounter = stats.counter("replies") + val favoriteCounter = stats.counter("favorites") + + Effect { counts => + counts.foreach { c => + if (c.retweetCount.isDefined) retweetCounter.incr() + if (c.replyCount.isDefined) replyCounter.incr() + if (c.favoriteCount.isDefined) favoriteCounter.incr() + } + } + } + + def observeRequest(stats: StatsReceiver): Effect[GetTweetCountsRequest] = { + val requestSizesStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val includeRetweetCounter = optionsScope.counter("retweet_counts") + val includeReplyCounter = optionsScope.counter("reply_counts") + val includeFavoriteCounter = optionsScope.counter("favorite_counts") + val tweetAgeStat = stats.stat("tweet_age_seconds") + + Effect { request => + val size = request.tweetIds.size + requestSizesStat.add(size) + + // Measure Tweet.get_tweet_counts tweet age of requested Tweets. + // Tweet counts are stored in cache, falling back to TFlock on cache misses. + // Track client TweetId age to understand how that affects clients response latencies. + for { + id <- request.tweetIds + timestamp <- SnowflakeId.timeFromIdOpt(id) + age = Time.now.since(timestamp) + } tweetAgeStat.add(age.inSeconds) + + if (request.includeRetweetCount) includeRetweetCounter.incr(size) + if (request.includeReplyCount) includeReplyCounter.incr(size) + if (request.includeFavoriteCount) includeFavoriteCounter.incr(size) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala new file mode 100644 index 000000000..af6666b03 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetFieldsObserver.scala @@ -0,0 +1,160 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala._ + +private[service] object GetTweetFieldsObserver { + type Type = ObserveExchange[GetTweetFieldsRequest, Seq[GetTweetFieldsResult]] + + def observeExchange(statsReceiver: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(statsReceiver) + + val stats = statsReceiver.scope("results") + val tweetResultFailed = stats.counter("tweet_result_failed") + val quoteResultFailed = stats.counter("quote_result_failed") + val overCapacity = stats.counter("over_capacity") + + def observeFailedResult(r: GetTweetFieldsResult): Unit = { + r.tweetResult match { + case TweetFieldsResultState.Failed(failed) => + tweetResultFailed.incr() + + if (failed.overCapacity) overCapacity.incr() + case _ => + } + + if (r.quotedTweetResult.exists(_.isInstanceOf[TweetFieldsResultState.Failed])) + quoteResultFailed.incr() + } + + Effect { + case (request, response) => + response match { + case Return(xs) => + xs foreach { + case x if isFailedResult(x) => + observeFailedResult(x) + resultStateStats.failed() + case _ => + resultStateStats.success() + } + case Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeRequest(stats: StatsReceiver, byClient: Boolean): Effect[GetTweetFieldsRequest] = { + val requestSizeStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val tweetFieldsScope = optionsScope.scope("tweet_field") + val countsFieldsScope = optionsScope.scope("counts_field") + val mediaFieldsScope = optionsScope.scope("media_field") + val includeRetweetedTweetCounter = optionsScope.counter("include_retweeted_tweet") + val includeQuotedTweetCounter = optionsScope.counter("include_quoted_tweet") + val forUserIdCounter = optionsScope.counter("for_user_id") + val cardsPlatformKeyCounter = optionsScope.counter("cards_platform_key") + val cardsPlatformKeyScope = optionsScope.scope("cards_platform_key") + val extensionsArgsCounter = optionsScope.counter("extensions_args") + val doNotCacheCounter = optionsScope.counter("do_not_cache") + val simpleQuotedTweetCounter = optionsScope.counter("simple_quoted_tweet") + val visibilityPolicyScope = optionsScope.scope("visibility_policy") + val userVisibleCounter = visibilityPolicyScope.counter("user_visible") + val noFilteringCounter = visibilityPolicyScope.counter("no_filtering") + val noSafetyLevelCounter = optionsScope.counter("no_safety_level") + val safetyLevelCounter = optionsScope.counter("safety_level") + val safetyLevelScope = optionsScope.scope("safety_level") + + Effect { + case GetTweetFieldsRequest(tweetIds, options) => + requestSizeStat.add(tweetIds.size) + options.tweetIncludes.foreach { + case TweetInclude.TweetFieldId(id) => tweetFieldsScope.counter(id.toString).incr() + case TweetInclude.CountsFieldId(id) => countsFieldsScope.counter(id.toString).incr() + case TweetInclude.MediaEntityFieldId(id) => mediaFieldsScope.counter(id.toString).incr() + case _ => + } + if (options.includeRetweetedTweet) includeRetweetedTweetCounter.incr() + if (options.includeQuotedTweet) includeQuotedTweetCounter.incr() + if (options.forUserId.nonEmpty) forUserIdCounter.incr() + if (options.cardsPlatformKey.nonEmpty) cardsPlatformKeyCounter.incr() + if (!byClient) { + options.cardsPlatformKey.foreach { cardsPlatformKey => + cardsPlatformKeyScope.counter(cardsPlatformKey).incr() + } + } + if (options.extensionsArgs.nonEmpty) extensionsArgsCounter.incr() + if (options.safetyLevel.nonEmpty) { + safetyLevelCounter.incr() + } else { + noSafetyLevelCounter.incr() + } + options.visibilityPolicy match { + case TweetVisibilityPolicy.UserVisible => userVisibleCounter.incr() + case TweetVisibilityPolicy.NoFiltering => noFilteringCounter.incr() + case _ => + } + options.safetyLevel.foreach { level => safetyLevelScope.counter(level.toString).incr() } + if (options.doNotCache) doNotCacheCounter.incr() + if (options.simpleQuotedTweet) simpleQuotedTweetCounter.incr() + } + } + + def observeResults(stats: StatsReceiver): Effect[Seq[GetTweetFieldsResult]] = { + val resultsCounter = stats.counter("results") + val resultsScope = stats.scope("results") + val observeState = GetTweetFieldsObserver.observeResultState(resultsScope) + + Effect { results => + resultsCounter.incr(results.size) + results.foreach { r => + observeState(r.tweetResult) + r.quotedTweetResult.foreach { qtResult => + resultsCounter.incr() + observeState(qtResult) + } + } + } + } + + /** + * Given a GetTweetFieldsResult result, do we observe the result as a failure or not. + */ + private def isFailedResult(result: GetTweetFieldsResult): Boolean = { + result.tweetResult.isInstanceOf[TweetFieldsResultState.Failed] || + result.quotedTweetResult.exists(_.isInstanceOf[TweetFieldsResultState.Failed]) + } + + private def observeResultState(stats: StatsReceiver): Effect[TweetFieldsResultState] = { + val foundCounter = stats.counter("found") + val notFoundCounter = stats.counter("not_found") + val failedCounter = stats.counter("failed") + val filteredCounter = stats.counter("filtered") + val filteredReasonScope = stats.scope("filtered_reason") + val otherCounter = stats.counter("other") + val observeTweet = Observer + .countTweetAttributes(stats.scope("found"), byClient = false) + + Effect { + case TweetFieldsResultState.Found(found) => + foundCounter.incr() + observeTweet(found.tweet) + found.retweetedTweet.foreach(observeTweet) + + case TweetFieldsResultState.NotFound(_) => notFoundCounter.incr() + case TweetFieldsResultState.Failed(_) => failedCounter.incr() + case TweetFieldsResultState.Filtered(f) => + filteredCounter.incr() + // Since reasons have parameters, eg. AuthorBlockViewer(true) and we don't + // need the "(true)" part, we do .getClass.getSimpleName to get rid of that + filteredReasonScope.counter(f.reason.getClass.getSimpleName).incr() + + case _ => otherCounter.incr() + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala new file mode 100644 index 000000000..77f1829a5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/GetTweetsObserver.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.servo.exception.thriftscala.ClientError +import com.twitter.tweetypie.thriftscala.GetTweetOptions +import com.twitter.tweetypie.thriftscala.GetTweetResult +import com.twitter.tweetypie.thriftscala.GetTweetsRequest + +private[service] object GetTweetsObserver { + type Type = ObserveExchange[GetTweetsRequest, Seq[GetTweetResult]] + + def observeExchange(stats: StatsReceiver): Effect[Type] = { + val resultStateStats = ResultStateStats(stats) + + Effect { + case (request, response) => + response match { + case Return(xs) => + xs.foreach { + case result if Observer.successStatusStates(result.tweetState) => + resultStateStats.success() + case _ => + resultStateStats.failed() + } + case Throw(ClientError(_)) => + resultStateStats.success(request.tweetIds.size) + case Throw(_) => + resultStateStats.failed(request.tweetIds.size) + } + } + } + + def observeResults(stats: StatsReceiver, byClient: Boolean): Effect[Seq[GetTweetResult]] = + countStates(stats).also(countTweetReadAttributes(stats, byClient)) + + def observeRequest(stats: StatsReceiver, byClient: Boolean): Effect[GetTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + val optionsScope = stats.scope("options") + val languageScope = optionsScope.scope("language") + val includeSourceTweetCounter = optionsScope.counter("source_tweet") + val includeQuotedTweetCounter = optionsScope.counter("quoted_tweet") + val includePerspectiveCounter = optionsScope.counter("perspective") + val includeConversationMutedCounter = optionsScope.counter("conversation_muted") + val includePlacesCounter = optionsScope.counter("places") + val includeCardsCounter = optionsScope.counter("cards") + val includeRetweetCountsCounter = optionsScope.counter("retweet_counts") + val includeReplyCountsCounter = optionsScope.counter("reply_counts") + val includeFavoriteCountsCounter = optionsScope.counter("favorite_counts") + val includeQuoteCountsCounter = optionsScope.counter("quote_counts") + val bypassVisibilityFilteringCounter = optionsScope.counter("bypass_visibility_filtering") + val excludeReportedCounter = optionsScope.counter("exclude_reported") + val cardsPlatformKeyScope = optionsScope.scope("cards_platform_key") + val extensionsArgsCounter = optionsScope.counter("extensions_args") + val doNotCacheCounter = optionsScope.counter("do_not_cache") + val additionalFieldsScope = optionsScope.scope("additional_fields") + val safetyLevelScope = optionsScope.scope("safety_level") + val includeProfileGeoEnrichment = optionsScope.counter("profile_geo_enrichment") + val includeMediaAdditionalMetadata = optionsScope.counter("media_additional_metadata") + val simpleQuotedTweet = optionsScope.counter("simple_quoted_tweet") + val forUserIdCounter = optionsScope.counter("for_user_id") + + def includesPerspectivals(options: GetTweetOptions) = + options.includePerspectivals && options.forUserId.nonEmpty + + Effect { + case GetTweetsRequest(tweetIds, _, Some(options), _) => + requestSizeStat.add(tweetIds.size) + if (!byClient) languageScope.counter(options.languageTag).incr() + if (options.includeSourceTweet) includeSourceTweetCounter.incr() + if (options.includeQuotedTweet) includeQuotedTweetCounter.incr() + if (includesPerspectivals(options)) includePerspectiveCounter.incr() + if (options.includeConversationMuted) includeConversationMutedCounter.incr() + if (options.includePlaces) includePlacesCounter.incr() + if (options.includeCards) includeCardsCounter.incr() + if (options.includeRetweetCount) includeRetweetCountsCounter.incr() + if (options.includeReplyCount) includeReplyCountsCounter.incr() + if (options.includeFavoriteCount) includeFavoriteCountsCounter.incr() + if (options.includeQuoteCount) includeQuoteCountsCounter.incr() + if (options.bypassVisibilityFiltering) bypassVisibilityFilteringCounter.incr() + if (options.excludeReported) excludeReportedCounter.incr() + if (options.extensionsArgs.nonEmpty) extensionsArgsCounter.incr() + if (options.doNotCache) doNotCacheCounter.incr() + if (options.includeProfileGeoEnrichment) includeProfileGeoEnrichment.incr() + if (options.includeMediaAdditionalMetadata) includeMediaAdditionalMetadata.incr() + if (options.simpleQuotedTweet) simpleQuotedTweet.incr() + if (options.forUserId.nonEmpty) forUserIdCounter.incr() + if (!byClient) { + options.cardsPlatformKey.foreach { cardsPlatformKey => + cardsPlatformKeyScope.counter(cardsPlatformKey).incr() + } + } + options.additionalFieldIds.foreach { id => + additionalFieldsScope.counter(id.toString).incr() + } + options.safetyLevel.foreach { level => safetyLevelScope.counter(level.toString).incr() } + } + } + + /** + * We count the number of times each tweet state is returned as a + * general measure of the health of TweetyPie. partial and not_found + * tweet states should be close to zero. + */ + private def countStates(stats: StatsReceiver): Effect[Seq[GetTweetResult]] = { + val state = Observer.observeStatusStates(stats) + Effect { results => results.foreach { tweetResult => state(tweetResult.tweetState) } } + } + + private def countTweetReadAttributes( + stats: StatsReceiver, + byClient: Boolean + ): Effect[Seq[GetTweetResult]] = { + val tweetObserver = Observer.countTweetAttributes(stats, byClient) + Effect { results => + results.foreach { tweetResult => tweetResult.tweet.foreach(tweetObserver) } + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala new file mode 100644 index 000000000..c5a9782cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/Observer.scala @@ -0,0 +1,365 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.media.MediaKeyClassifier +import com.twitter.tweetypie.thriftscala._ +import com.twitter.tweetypie.tweettext.TweetText.codePointLength +import com.twitter.conversions.DurationOps._ + +/** + * Observer can be used for storing + * - one-off handler specific metrics with minor logic + * - reusable Tweetypie service metrics for multiple handlers + */ +private[service] object Observer { + + val successStatusStates: Set[StatusState] = Set( + StatusState.Found, + StatusState.NotFound, + StatusState.DeactivatedUser, + StatusState.SuspendedUser, + StatusState.ProtectedUser, + StatusState.ReportedTweet, + StatusState.UnsupportedClient, + StatusState.Drop, + StatusState.Suppress, + StatusState.Deleted, + StatusState.BounceDeleted + ) + + def observeStatusStates(statsReceiver: StatsReceiver): Effect[StatusState] = { + val stats = statsReceiver.scope("status_state") + val total = statsReceiver.counter("status_results") + + val foundCounter = stats.counter("found") + val notFoundCounter = stats.counter("not_found") + val partialCounter = stats.counter("partial") + val timedOutCounter = stats.counter("timed_out") + val failedCounter = stats.counter("failed") + val deactivatedCounter = stats.counter("deactivated") + val suspendedCounter = stats.counter("suspended") + val protectedCounter = stats.counter("protected") + val reportedCounter = stats.counter("reported") + val overCapacityCounter = stats.counter("over_capacity") + val unsupportedClientCounter = stats.counter("unsupported_client") + val dropCounter = stats.counter("drop") + val suppressCounter = stats.counter("suppress") + val deletedCounter = stats.counter("deleted") + val bounceDeletedCounter = stats.counter("bounce_deleted") + + Effect { st => + total.incr() + st match { + case StatusState.Found => foundCounter.incr() + case StatusState.NotFound => notFoundCounter.incr() + case StatusState.Partial => partialCounter.incr() + case StatusState.TimedOut => timedOutCounter.incr() + case StatusState.Failed => failedCounter.incr() + case StatusState.DeactivatedUser => deactivatedCounter.incr() + case StatusState.SuspendedUser => suspendedCounter.incr() + case StatusState.ProtectedUser => protectedCounter.incr() + case StatusState.ReportedTweet => reportedCounter.incr() + case StatusState.OverCapacity => overCapacityCounter.incr() + case StatusState.UnsupportedClient => unsupportedClientCounter.incr() + case StatusState.Drop => dropCounter.incr() + case StatusState.Suppress => suppressCounter.incr() + case StatusState.Deleted => deletedCounter.incr() + case StatusState.BounceDeleted => bounceDeletedCounter.incr() + case _ => + } + } + } + + def observeSetFieldsRequest(stats: StatsReceiver): Effect[SetAdditionalFieldsRequest] = + Effect { request => + val tweet = request.additionalFields + AdditionalFields.nonEmptyAdditionalFieldIds(tweet).foreach { id => + val fieldScope = "field_%d".format(id) + val fieldCounter = stats.counter(fieldScope) + val sizeStats = stats.stat(fieldScope) + + tweet.getFieldBlob(id).foreach { blob => + fieldCounter.incr() + sizeStats.add(blob.content.length) + } + } + } + + def observeSetRetweetVisibilityRequest( + stats: StatsReceiver + ): Effect[SetRetweetVisibilityRequest] = { + val setInvisibleCounter = stats.counter("set_invisible") + val setVisibleCounter = stats.counter("set_visible") + + Effect { request => + if (request.visible) setVisibleCounter.incr() else setInvisibleCounter.incr() + } + } + + def observeDeleteFieldsRequest(stats: StatsReceiver): Effect[DeleteAdditionalFieldsRequest] = { + val requestSizeStat = stats.stat("request_size") + + Effect { request => + requestSizeStat.add(request.tweetIds.size) + + request.fieldIds.foreach { id => + val fieldScope = "field_%d".format(id) + val fieldCounter = stats.counter(fieldScope) + fieldCounter.incr() + } + } + } + + def observeDeleteTweetsRequest(stats: StatsReceiver): Effect[DeleteTweetsRequest] = { + val requestSizeStat = stats.stat("request_size") + val userErasureTweetsStat = stats.counter("user_erasure_tweets") + val isBounceDeleteStat = stats.counter("is_bounce_delete_tweets") + + Effect { + case DeleteTweetsRequest(tweetIds, _, _, _, isUserErasure, _, isBounceDelete, _, _) => + requestSizeStat.add(tweetIds.size) + if (isUserErasure) { + userErasureTweetsStat.incr(tweetIds.size) + } + if (isBounceDelete) { + isBounceDeleteStat.incr(tweetIds.size) + } + } + } + + def observeRetweetRequest(stats: StatsReceiver): Effect[RetweetRequest] = { + val optionsScope = stats.scope("options") + val narrowcastCounter = optionsScope.counter("narrowcast") + val nullcastCounter = optionsScope.counter("nullcast") + val darkCounter = optionsScope.counter("dark") + val successOnDupCounter = optionsScope.counter("success_on_dup") + + Effect { request => + if (request.narrowcast.nonEmpty) narrowcastCounter.incr() + if (request.nullcast) nullcastCounter.incr() + if (request.dark) darkCounter.incr() + if (request.returnSuccessOnDuplicate) successOnDupCounter.incr() + } + } + + def observeScrubGeo(stats: StatsReceiver): Effect[GeoScrub] = { + val optionsScope = stats.scope("options") + val hosebirdEnqueueCounter = optionsScope.counter("hosebird_enqueue") + val requestSizeStat = stats.stat("request_size") + + Effect { request => + requestSizeStat.add(request.statusIds.size) + if (request.hosebirdEnqueue) hosebirdEnqueueCounter.incr() + } + } + + def observeEventOrRetry(stats: StatsReceiver, isRetry: Boolean): Unit = { + val statName = if (isRetry) "retry" else "event" + stats.counter(statName).incr() + } + + def observeAsyncInsertRequest(stats: StatsReceiver): Effect[AsyncInsertRequest] = { + val insertScope = stats.scope("insert") + val ageStat = insertScope.stat("age") + Effect { request => + observeEventOrRetry(insertScope, request.retryAction.isDefined) + ageStat.add(SnowflakeId.timeFromId(request.tweet.id).untilNow.inMillis) + } + } + + def observeAsyncSetAdditionalFieldsRequest( + stats: StatsReceiver + ): Effect[AsyncSetAdditionalFieldsRequest] = { + val setAdditionalFieldsScope = stats.scope("set_additional_fields") + Effect { request => + observeEventOrRetry(setAdditionalFieldsScope, request.retryAction.isDefined) + } + } + + def observeAsyncSetRetweetVisibilityRequest( + stats: StatsReceiver + ): Effect[AsyncSetRetweetVisibilityRequest] = { + val setRetweetVisibilityScope = stats.scope("set_retweet_visibility") + + Effect { request => + observeEventOrRetry(setRetweetVisibilityScope, request.retryAction.isDefined) + } + } + + def observeAsyncUndeleteTweetRequest(stats: StatsReceiver): Effect[AsyncUndeleteTweetRequest] = { + val undeleteTweetScope = stats.scope("undelete_tweet") + Effect { request => observeEventOrRetry(undeleteTweetScope, request.retryAction.isDefined) } + } + + def observeAsyncDeleteTweetRequest(stats: StatsReceiver): Effect[AsyncDeleteRequest] = { + val deleteTweetScope = stats.scope("delete_tweet") + Effect { request => observeEventOrRetry(deleteTweetScope, request.retryAction.isDefined) } + } + + def observeAsyncDeleteAdditionalFieldsRequest( + stats: StatsReceiver + ): Effect[AsyncDeleteAdditionalFieldsRequest] = { + val deleteAdditionalFieldsScope = stats.scope("delete_additional_fields") + Effect { request => + observeEventOrRetry( + deleteAdditionalFieldsScope, + request.retryAction.isDefined + ) + } + } + + def observeAsyncTakedownRequest(stats: StatsReceiver): Effect[AsyncTakedownRequest] = { + val takedownScope = stats.scope("takedown") + Effect { request => observeEventOrRetry(takedownScope, request.retryAction.isDefined) } + } + + def observeAsyncUpdatePossiblySensitiveTweetRequest( + stats: StatsReceiver + ): Effect[AsyncUpdatePossiblySensitiveTweetRequest] = { + val updatePossiblySensitiveTweetScope = stats.scope("update_possibly_sensitive_tweet") + Effect { request => + observeEventOrRetry(updatePossiblySensitiveTweetScope, request.action.isDefined) + } + } + + def observeReplicatedInsertTweetRequest(stats: StatsReceiver): Effect[Tweet] = { + val ageStat = stats.stat("age") // in milliseconds + Effect { request => ageStat.add(SnowflakeId.timeFromId(request.id).untilNow.inMillis) } + } + + def camelToUnderscore(str: String): String = { + val bldr = new StringBuilder + str.foldLeft(false) { (prevWasLowercase, c) => + if (prevWasLowercase && c.isUpper) { + bldr += '_' + } + bldr += c.toLower + c.isLower + } + bldr.result + } + + def observeAdditionalFields(stats: StatsReceiver): Effect[Tweet] = { + val additionalScope = stats.scope("additional_fields") + + Effect { tweet => + for (fieldId <- AdditionalFields.nonEmptyAdditionalFieldIds(tweet)) + additionalScope.counter(fieldId.toString).incr() + } + } + + /** + * We count how many tweets have each of these attributes so that we + * can observe general trends, as well as for tracking down the + * cause of behavior changes, like increased calls to certain + * services. + */ + def countTweetAttributes(stats: StatsReceiver, byClient: Boolean): Effect[Tweet] = { + val ageStat = stats.stat("age") + val tweetCounter = stats.counter("tweets") + val retweetCounter = stats.counter("retweets") + val repliesCounter = stats.counter("replies") + val inReplyToTweetCounter = stats.counter("in_reply_to_tweet") + val selfRepliesCounter = stats.counter("self_replies") + val directedAtCounter = stats.counter("directed_at") + val mentionsCounter = stats.counter("mentions") + val mentionsStat = stats.stat("mentions") + val urlsCounter = stats.counter("urls") + val urlsStat = stats.stat("urls") + val hashtagsCounter = stats.counter("hashtags") + val hashtagsStat = stats.stat("hashtags") + val mediaCounter = stats.counter("media") + val mediaStat = stats.stat("media") + val photosCounter = stats.counter("media", "photos") + val gifsCounter = stats.counter("media", "animated_gifs") + val videosCounter = stats.counter("media", "videos") + val cardsCounter = stats.counter("cards") + val card2Counter = stats.counter("card2") + val geoCoordsCounter = stats.counter("geo_coordinates") + val placeCounter = stats.counter("place") + val quotedTweetCounter = stats.counter("quoted_tweet") + val selfRetweetCounter = stats.counter("self_retweet") + val languageScope = stats.scope("language") + val textLengthStat = stats.stat("text_length") + val selfThreadCounter = stats.counter("self_thread") + val communitiesTweetCounter = stats.counter("communities") + + observeAdditionalFields(stats).also { + Effect[Tweet] { tweet => + def coreDataField[T](f: TweetCoreData => T): Option[T] = + tweet.coreData.map(f) + + def coreDataOptionField[T](f: TweetCoreData => Option[T]) = + coreDataField(f).flatten + + (SnowflakeId.isSnowflakeId(tweet.id) match { + case true => Some(SnowflakeId.timeFromId(tweet.id)) + case false => coreDataField(_.createdAtSecs.seconds.afterEpoch) + }).foreach { createdAt => ageStat.add(createdAt.untilNow.inSeconds) } + + if (!byClient) { + val mentions = getMentions(tweet) + val urls = getUrls(tweet) + val hashtags = getHashtags(tweet) + val media = getMedia(tweet) + val mediaKeys = media.flatMap(_.mediaKey) + val share = coreDataOptionField(_.share) + val selfThreadMetadata = getSelfThreadMetadata(tweet) + val communities = getCommunities(tweet) + + tweetCounter.incr() + if (share.isDefined) retweetCounter.incr() + if (coreDataOptionField(_.directedAtUser).isDefined) directedAtCounter.incr() + + coreDataOptionField(_.reply).foreach { reply => + repliesCounter.incr() + if (reply.inReplyToStatusId.nonEmpty) { + // repliesCounter counts all Tweets with a Reply struct, + // but that includes both directed-at Tweets and + // conversational replies. Only conversational replies + // have inReplyToStatusId present, so this counter lets + // us split apart those two cases. + inReplyToTweetCounter.incr() + } + + // Not all Tweet objects have CoreData yet isSelfReply() requires it. Thus, this + // invocation is guarded by the `coreDataOptionField(_.reply)` above. + if (isSelfReply(tweet)) selfRepliesCounter.incr() + } + + if (mentions.nonEmpty) mentionsCounter.incr() + if (urls.nonEmpty) urlsCounter.incr() + if (hashtags.nonEmpty) hashtagsCounter.incr() + if (media.nonEmpty) mediaCounter.incr() + if (selfThreadMetadata.nonEmpty) selfThreadCounter.incr() + if (communities.nonEmpty) communitiesTweetCounter.incr() + + mentionsStat.add(mentions.size) + urlsStat.add(urls.size) + hashtagsStat.add(hashtags.size) + mediaStat.add(media.size) + + if (mediaKeys.exists(MediaKeyClassifier.isImage(_))) photosCounter.incr() + if (mediaKeys.exists(MediaKeyClassifier.isGif(_))) gifsCounter.incr() + if (mediaKeys.exists(MediaKeyClassifier.isVideo(_))) videosCounter.incr() + + if (tweet.cards.exists(_.nonEmpty)) cardsCounter.incr() + if (tweet.card2.nonEmpty) card2Counter.incr() + if (coreDataOptionField(_.coordinates).nonEmpty) geoCoordsCounter.incr() + if (TweetLenses.place.get(tweet).nonEmpty) placeCounter.incr() + if (TweetLenses.quotedTweet.get(tweet).nonEmpty) quotedTweetCounter.incr() + if (share.exists(_.sourceUserId == getUserId(tweet))) selfRetweetCounter.incr() + + tweet.language + .map(_.language) + .foreach(lang => languageScope.counter(lang).incr()) + coreDataField(_.text).foreach(text => textLengthStat.add(codePointLength(text))) + } + } + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala new file mode 100644 index 000000000..6d20169d0 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/PostTweetObserver.scala @@ -0,0 +1,82 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.escherbird.thriftscala.TweetEntityAnnotation +import com.twitter.tweetypie.thriftscala.BatchComposeMode +import com.twitter.tweetypie.thriftscala.PostTweetRequest +import com.twitter.tweetypie.thriftscala.PostTweetResult +import com.twitter.tweetypie.thriftscala.TweetCreateState +import com.twitter.util.Memoize + +private[service] object PostTweetObserver { + def observeResults(stats: StatsReceiver, byClient: Boolean): Effect[PostTweetResult] = { + val stateScope = stats.scope("state") + val tweetObserver = Observer.countTweetAttributes(stats, byClient) + + val stateCounters = + Memoize { st: TweetCreateState => stateScope.counter(Observer.camelToUnderscore(st.name)) } + + Effect { result => + stateCounters(result.state).incr() + if (result.state == TweetCreateState.Ok) result.tweet.foreach(tweetObserver) + } + } + + private def isCommunity(req: PostTweetRequest): Boolean = { + val CommunityGroupId = 8L + val CommunityDomainId = 31L + req.additionalFields + .flatMap(_.escherbirdEntityAnnotations).exists { e => + e.entityAnnotations.collect { + case TweetEntityAnnotation(CommunityGroupId, CommunityDomainId, _) => true + }.nonEmpty + } + } + + def observerRequest(stats: StatsReceiver): Effect[PostTweetRequest] = { + val optionsScope = stats.scope("options") + val narrowcastCounter = optionsScope.counter("narrowcast") + val nullcastCounter = optionsScope.counter("nullcast") + val inReplyToStatusIdCounter = optionsScope.counter("in_reply_to_status_id") + val placeIdCounter = optionsScope.counter("place_id") + val geoCoordinatesCounter = optionsScope.counter("geo_coordinates") + val placeMetadataCounter = optionsScope.counter("place_metadata") + val mediaUploadIdCounter = optionsScope.counter("media_upload_id") + val darkCounter = optionsScope.counter("dark") + val tweetToNarrowcastingCounter = optionsScope.counter("tweet_to_narrowcasting") + val autoPopulateReplyMetadataCounter = optionsScope.counter("auto_populate_reply_metadata") + val attachmentUrlCounter = optionsScope.counter("attachment_url") + val excludeReplyUserIdsCounter = optionsScope.counter("exclude_reply_user_ids") + val excludeReplyUserIdsStat = optionsScope.stat("exclude_reply_user_ids") + val uniquenessIdCounter = optionsScope.counter("uniqueness_id") + val batchModeScope = optionsScope.scope("batch_mode") + val batchModeFirstCounter = batchModeScope.counter("first") + val batchModeSubsequentCounter = batchModeScope.counter("subsequent") + val communitiesCounter = optionsScope.counter("communities") + + Effect { request => + if (request.narrowcast.nonEmpty) narrowcastCounter.incr() + if (request.nullcast) nullcastCounter.incr() + if (request.inReplyToTweetId.nonEmpty) inReplyToStatusIdCounter.incr() + if (request.geo.flatMap(_.placeId).nonEmpty) placeIdCounter.incr() + if (request.geo.flatMap(_.coordinates).nonEmpty) geoCoordinatesCounter.incr() + if (request.geo.flatMap(_.placeMetadata).nonEmpty) placeMetadataCounter.incr() + if (request.mediaUploadIds.nonEmpty) mediaUploadIdCounter.incr() + if (request.dark) darkCounter.incr() + if (request.enableTweetToNarrowcasting) tweetToNarrowcastingCounter.incr() + if (request.autoPopulateReplyMetadata) autoPopulateReplyMetadataCounter.incr() + if (request.attachmentUrl.nonEmpty) attachmentUrlCounter.incr() + if (request.excludeReplyUserIds.exists(_.nonEmpty)) excludeReplyUserIdsCounter.incr() + if (isCommunity(request)) communitiesCounter.incr() + if (request.uniquenessId.nonEmpty) uniquenessIdCounter.incr() + request.transientContext.flatMap(_.batchCompose).foreach { + case BatchComposeMode.BatchFirst => batchModeFirstCounter.incr() + case BatchComposeMode.BatchSubsequent => batchModeSubsequentCounter.incr() + case _ => + } + + excludeReplyUserIdsStat.add(request.excludeReplyUserIds.size) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala new file mode 100644 index 000000000..b9cedf68e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/ResultStateStats.scala @@ -0,0 +1,19 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.finagle.stats.StatsReceiver + +/** + * "Result State" is, for every singular tweet read, we categorize the tweet + * result as a success or failure. + * These stats enable us to track true TPS success rates. + */ +private[service] case class ResultStateStats(private val underlying: StatsReceiver) { + private val stats = underlying.scope("result_state") + private val successCounter = stats.counter("success") + private val failedCounter = stats.counter("failed") + + def success(delta: Long = 1): Unit = successCounter.incr(delta) + def failed(delta: Long = 1): Unit = failedCounter.incr(delta) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala new file mode 100644 index 000000000..8a525c158 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/StoredTweetsObserver.scala @@ -0,0 +1,56 @@ +package com.twitter.tweetypie +package service +package observer + +import com.twitter.tweetypie.thriftscala.StoredTweetError +import com.twitter.tweetypie.thriftscala.StoredTweetInfo +import com.twitter.tweetypie.thriftscala.StoredTweetState.BounceDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.ForceAdded +import com.twitter.tweetypie.thriftscala.StoredTweetState.HardDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.NotFound +import com.twitter.tweetypie.thriftscala.StoredTweetState.SoftDeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.Undeleted +import com.twitter.tweetypie.thriftscala.StoredTweetState.UnknownUnionField + +private[service] trait StoredTweetsObserver { + + protected def observeStoredTweets( + storedTweets: Seq[StoredTweetInfo], + stats: StatsReceiver + ): Unit = { + val stateScope = stats.scope("state") + val errorScope = stats.scope("error") + + val sizeCounter = stats.counter("count") + sizeCounter.incr(storedTweets.size) + + val returnedStatesCount = storedTweets + .groupBy(_.storedTweetState match { + case None => "found" + case Some(_: HardDeleted) => "hard_deleted" + case Some(_: SoftDeleted) => "soft_deleted" + case Some(_: BounceDeleted) => "bounce_deleted" + case Some(_: Undeleted) => "undeleted" + case Some(_: ForceAdded) => "force_added" + case Some(_: NotFound) => "not_found" + case Some(_: UnknownUnionField) => "unknown" + }) + .mapValues(_.size) + + returnedStatesCount.foreach { + case (state, count) => stateScope.counter(state).incr(count) + } + + val returnedErrorsCount = storedTweets + .foldLeft(Seq[StoredTweetError]()) { (errors, storedTweetInfo) => + errors ++ storedTweetInfo.errors + } + .groupBy(_.name) + .mapValues(_.size) + + returnedErrorsCount.foreach { + case (error, count) => errorScope.counter(error).incr(count) + } + } + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala new file mode 100644 index 000000000..4cfaea9f4 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/observer/package.scala @@ -0,0 +1,13 @@ +package com.twitter.tweetypie +package service + +import com.twitter.util.Try + +package object observer { + + /** + * Generic Request/Result observer container for making observations on both requests/results. + */ + type ObserveExchange[Req, Res] = (Req, Try[Res]) + +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala new file mode 100644 index 000000000..c6e0e861b --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/service/package.scala @@ -0,0 +1,12 @@ +package com.twitter.tweetypie + +import com.twitter.servo.request +import com.twitter.servo.request.ClientRequestAuthorizer + +package object service { + type ClientRequestAuthorizer = request.ClientRequestAuthorizer + + type UnauthorizedException = request.ClientRequestAuthorizer.UnauthorizedException + val UnauthorizedException: ClientRequestAuthorizer.UnauthorizedException.type = + request.ClientRequestAuthorizer.UnauthorizedException +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala new file mode 100644 index 000000000..3ad816e40 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/AsyncEnqueueStore.scala @@ -0,0 +1,95 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +/** + * AsyncEnqueueStore converts certains TweetStoreEvent types into their async-counterpart + * events, and enqueues those to a deferredrpc-backed ThriftTweetService instance. + */ +trait AsyncEnqueueStore + extends TweetStoreBase[AsyncEnqueueStore] + with InsertTweet.Store + with DeleteTweet.Store + with UndeleteTweet.Store + with IncrFavCount.Store + with IncrBookmarkCount.Store + with SetAdditionalFields.Store + with SetRetweetVisibility.Store + with Takedown.Store + with DeleteAdditionalFields.Store + with UpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): AsyncEnqueueStore = + new TweetStoreWrapper[AsyncEnqueueStore](w, this) + with AsyncEnqueueStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with IncrFavCount.StoreWrapper + with IncrBookmarkCount.StoreWrapper + with SetAdditionalFields.StoreWrapper + with SetRetweetVisibility.StoreWrapper + with Takedown.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper +} + +object AsyncEnqueueStore { + def apply( + tweetService: ThriftTweetService, + scrubUserInAsyncInserts: User => User, + scrubSourceTweetInAsyncInserts: Tweet => Tweet, + scrubSourceUserInAsyncInserts: User => User + ): AsyncEnqueueStore = + new AsyncEnqueueStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { e => + tweetService.asyncInsert( + e.toAsyncRequest( + scrubUserInAsyncInserts, + scrubSourceTweetInAsyncInserts, + scrubSourceUserInAsyncInserts + ) + ) + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event] { e => tweetService.asyncDelete(e.toAsyncRequest) } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { e => + tweetService.asyncUndeleteTweet(e.toAsyncUndeleteTweetRequest) + } + + override val incrFavCount: FutureEffect[IncrFavCount.Event] = + FutureEffect[IncrFavCount.Event] { e => tweetService.asyncIncrFavCount(e.toAsyncRequest) } + + override val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + FutureEffect[IncrBookmarkCount.Event] { e => + tweetService.asyncIncrBookmarkCount(e.toAsyncRequest) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { e => + tweetService.asyncSetAdditionalFields(e.toAsyncRequest) + } + + override val setRetweetVisibility: FutureEffect[SetRetweetVisibility.Event] = + FutureEffect[SetRetweetVisibility.Event] { e => + tweetService.asyncSetRetweetVisibility(e.toAsyncRequest) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { e => + tweetService.asyncDeleteAdditionalFields(e.toAsyncRequest) + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { e => + tweetService.asyncUpdatePossiblySensitiveTweet(e.toAsyncRequest) + } + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { e => tweetService.asyncTakedown(e.toAsyncRequest) } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD new file mode 100644 index 000000000..542f5ee81 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/BUILD @@ -0,0 +1,60 @@ +scala_library( + sources = ["*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = [ + "bazel-compatible", + "bazel-incompatible-scaladoc", # see http://go/bazel-incompatible-scaladoc + ], + dependencies = [ + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/fasterxml/jackson/module:jackson-module-scala", + "3rdparty/jvm/com/twitter/bijection:core", + "3rdparty/jvm/com/twitter/bijection:thrift", + "3rdparty/jvm/org/apache/thrift:libthrift", + "diffshow", + "fanoutservice/thrift/src/main/thrift:thrift-scala", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle/finagle-core/src/main", + "flock-client/src/main/scala", + "mediaservices/commons/src/main/thrift:thrift-scala", + "scrooge/scrooge-core", + "tweetypie/servo/repo", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "snowflake/src/main/scala/com/twitter/snowflake/id", + "src/scala/com/twitter/takedown/util", + "src/thrift/com/twitter/context:feature-context-scala", + "src/thrift/com/twitter/context:twitter-context-scala", + "src/thrift/com/twitter/escherbird:media-annotation-structs-scala", + "src/thrift/com/twitter/expandodo:cards-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/guano:guano-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:events-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service-scala", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet-scala", + "stitch/stitch-core", + "stitch/stitch-core/src/main/scala/com/twitter/stitch", + "tweetypie/server/src/main/scala/com/twitter/tweetypie", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/backends", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/core", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/media", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/repository", + "tweetypie/server/src/main/scala/com/twitter/tweetypie/serverutil", + "tweetypie/server/src/main/thrift:compiled-scala", + "tweetypie/common/src/scala/com/twitter/tweetypie/additionalfields", + "tweetypie/common/src/scala/com/twitter/tweetypie/client_id", + "tweetypie/common/src/scala/com/twitter/tweetypie/media", + "tweetypie/common/src/scala/com/twitter/tweetypie/storage", + "tweetypie/common/src/scala/com/twitter/tweetypie/tflock", + "tweetypie/common/src/scala/com/twitter/tweetypie/util", + "twitter-context", + "util/util-slf4j-api/src/main/scala/com/twitter/util/logging", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala new file mode 100644 index 000000000..2f4dd6387 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/CachingTweetStore.scala @@ -0,0 +1,420 @@ +package com.twitter.tweetypie +package store + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.scrooge.TFieldBlob +import com.twitter.servo.cache.LockingCache._ +import com.twitter.servo.cache._ +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.repository.CachedBounceDeleted.isBounceDeleted +import com.twitter.tweetypie.repository.CachedBounceDeleted.toBounceDeletedCachedTweet +import com.twitter.tweetypie.repository._ +import com.twitter.tweetypie.store.TweetUpdate._ +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Time +import diffshow.DiffShow + +trait CachingTweetStore + extends TweetStoreBase[CachingTweetStore] + with InsertTweet.Store + with ReplicatedInsertTweet.Store + with DeleteTweet.Store + with AsyncDeleteTweet.Store + with ReplicatedDeleteTweet.Store + with UndeleteTweet.Store + with AsyncUndeleteTweet.Store + with ReplicatedUndeleteTweet.Store + with SetAdditionalFields.Store + with ReplicatedSetAdditionalFields.Store + with DeleteAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with ReplicatedDeleteAdditionalFields.Store + with ScrubGeo.Store + with ReplicatedScrubGeo.Store + with Takedown.Store + with ReplicatedTakedown.Store + with Flush.Store + with UpdatePossiblySensitiveTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with ReplicatedUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): CachingTweetStore = + new TweetStoreWrapper(w, this) + with CachingTweetStore + with InsertTweet.StoreWrapper + with ReplicatedInsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ReplicatedDeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with ReplicatedUndeleteTweet.StoreWrapper + with SetAdditionalFields.StoreWrapper + with ReplicatedSetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with ReplicatedDeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with ReplicatedScrubGeo.StoreWrapper + with Takedown.StoreWrapper + with ReplicatedTakedown.StoreWrapper + with Flush.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper + with ReplicatedUpdatePossiblySensitiveTweet.StoreWrapper +} + +object CachingTweetStore { + val Action: AsyncWriteAction.CacheUpdate.type = AsyncWriteAction.CacheUpdate + + def apply( + tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetKeyFactory, + stats: StatsReceiver + ): CachingTweetStore = { + val ops = + new CachingTweetStoreOps( + tweetCache, + tweetKeyFactory, + stats + ) + + new CachingTweetStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = { + FutureEffect[InsertTweet.Event](e => + ops.insertTweet(e.internalTweet, e.initialTweetUpdateRequest)) + } + + override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + FutureEffect[ReplicatedInsertTweet.Event](e => + ops.insertTweet(e.cachedTweet, e.initialTweetUpdateRequest)) + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event](e => + ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](e => + ops.deleteTweet(e.tweet.id, updateOnly = true, isBounceDelete = e.isBounceDelete)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + FutureEffect[ReplicatedDeleteTweet.Event](e => + ops.deleteTweet( + tweetId = e.tweet.id, + updateOnly = e.isErasure, + isBounceDelete = e.isBounceDelete + )) + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event](e => ops.undeleteTweet(e.internalTweet)) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + FutureEffect[ReplicatedUndeleteTweet.Event](e => ops.undeleteTweet(e.cachedTweet)) + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event](e => ops.setAdditionalFields(e.additionalFields)) + + override val replicatedSetAdditionalFields: FutureEffect[ + ReplicatedSetAdditionalFields.Event + ] = + FutureEffect[ReplicatedSetAdditionalFields.Event](e => + ops.setAdditionalFields(e.additionalFields)) + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val replicatedDeleteAdditionalFields: FutureEffect[ + ReplicatedDeleteAdditionalFields.Event + ] = + FutureEffect[ReplicatedDeleteAdditionalFields.Event](e => + ops.deleteAdditionalFields(e.tweetId, e.fieldIds)) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) + + override val replicatedScrubGeo: FutureEffect[ReplicatedScrubGeo.Event] = + FutureEffect[ReplicatedScrubGeo.Event](e => ops.scrubGeo(e.tweetIds)) + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event](e => ops.takedown(e.tweet)) + + override val replicatedTakedown: FutureEffect[ReplicatedTakedown.Event] = + FutureEffect[ReplicatedTakedown.Event](e => ops.takedown(e.tweet)) + + override val flush: FutureEffect[Flush.Event] = + FutureEffect[Flush.Event](e => ops.flushTweets(e.tweetIds, logExisting = e.logExisting)) + .onlyIf(_.flushTweets) + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event](e => ops.updatePossiblySensitive(e.tweet)) + + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[ + ReplicatedUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[ReplicatedUpdatePossiblySensitiveTweet.Event](e => + ops.updatePossiblySensitive(e.tweet)) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => + ops.updatePossiblySensitive(e.tweet)) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } + } +} + +private class CachingTweetStoreOps( + tweetCache: LockingCache[TweetKey, Cached[CachedTweet]], + tweetKeyFactory: TweetKeyFactory, + stats: StatsReceiver, + evictionRetries: Int = 3) { + type CachedTweetHandler = Handler[Cached[CachedTweet]] + + private val preferNewestPicker = new PreferNewestCached[CachedTweet] + + private val evictionFailedCounter = stats.counter("eviction_failures") + + private val cacheFlushesLog = Logger("com.twitter.tweetypie.store.CacheFlushesLog") + + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + /** + * Inserts a tweet into cache, recording all compiled additional fields and all + * included passthrough fields. Additionally if the insertion event contains + * a 'InitialTweetUpdateRequest` we will update the cache entry for this tweet's + * initialTweet. + */ + def insertTweet( + ct: CachedTweet, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] + ): Future[Unit] = + lockAndSet( + ct.tweet.id, + insertTweetHandler(ct) + ).flatMap { _ => + initialTweetUpdateRequest match { + case Some(request) => + lockAndSet( + request.initialTweetId, + updateTweetHandler(tweet => InitialTweetUpdate.updateTweet(tweet, request)) + ) + case None => + Future.Unit + } + } + + /** + * Writes a `deleted` tombstone to cache. If `updateOnly` is true, then we only + * write the tombstone if the tweet is already in cache. If `isBounceDelete` we + * write a special bounce-deleted CachedTweet record to cache. + */ + def deleteTweet(tweetId: TweetId, updateOnly: Boolean, isBounceDelete: Boolean): Future[Unit] = { + // We only need to store a CachedTweet value the tweet is bounce-deleted to support rendering + // timeline tombstones for tweets that violated the Twitter Rules. see go/bounced-tweet + val cachedValue = if (isBounceDelete) { + found(toBounceDeletedCachedTweet(tweetId)) + } else { + writeThroughCached[CachedTweet](None, CachedValueStatus.Deleted) + } + + val pickerHandler = + if (updateOnly) { + deleteTweetUpdateOnlyHandler(cachedValue) + } else { + deleteTweetHandler(cachedValue) + } + + lockAndSet(tweetId, pickerHandler) + } + + def undeleteTweet(ct: CachedTweet): Future[Unit] = + lockAndSet( + ct.tweet.id, + insertTweetHandler(ct) + ) + + def setAdditionalFields(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, setFieldsHandler(AdditionalFields.additionalFields(tweet))) + + def deleteAdditionalFields(tweetId: TweetId, fieldIds: Seq[FieldId]): Future[Unit] = + lockAndSet(tweetId, deleteFieldsHandler(fieldIds)) + + def scrubGeo(tweetIds: Seq[TweetId]): Future[Unit] = + Future.join { + tweetIds.map { id => + // First, attempt to modify any tweets that are in cache to + // avoid having to reload the cached tweet from storage. + lockAndSet(id, scrubGeoHandler).unit.rescue { + case _: OptimisticLockingCache.LockAndSetFailure => + // If the modification fails, then remove whatever is in + // cache. This is much more likely to succeed because it + // does not require multiple successful requests to cache. + // This will force the tweet to be loaded from storage the + // next time it is requested, and the stored tweet will have + // the geo information removed. + // + // This eviction path was added due to frequent failures of + // the in-place modification code path, causing geoscrub + // daemon tasks to fail. + evictOne(tweetKeyFactory.fromId(id), evictionRetries) + } + } + } + + def takedown(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, updateCachedTweetHandler(copyTakedownFieldsForUpdate(tweet))) + + def updatePossiblySensitive(tweet: Tweet): Future[Unit] = + lockAndSet(tweet.id, updateTweetHandler(copyNsfwFieldsForUpdate(tweet))) + + def flushTweets(tweetIds: Seq[TweetId], logExisting: Boolean = false): Future[Unit] = { + val tweetKeys = tweetIds.map(tweetKeyFactory.fromId) + + Future.when(logExisting) { logExistingValues(tweetKeys) }.ensure { + evictAll(tweetKeys) + } + } + + /** + * A LockingCache.Handler that inserts a tweet into cache. + */ + private def insertTweetHandler(newValue: CachedTweet): Handler[Cached[CachedTweet]] = + AlwaysSetHandler(Some(writeThroughCached(Some(newValue), CachedValueStatus.Found))) + + private def foundAndNotBounced(c: Cached[CachedTweet]) = + c.status == CachedValueStatus.Found && !isBounceDeleted(c) + + /** + * A LockingCache.Handler that updates an existing CachedTweet in cache. + */ + private def updateTweetHandler(update: Tweet => Tweet): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = update(cachedTweet.tweet) + } yield found(cachedTweet.copy(tweet = updatedTweet)) + + /** + * A LockingCache.Handler that updates an existing CachedTweet in cache. + */ + private def updateCachedTweetHandler(update: CachedTweet => CachedTweet): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedCachedTweet = update(cachedTweet) + } yield found(updatedCachedTweet) + + private def deleteTweetHandler(value: Cached[CachedTweet]): CachedTweetHandler = + PickingHandler(value, preferNewestPicker) + + private def deleteTweetUpdateOnlyHandler(value: Cached[CachedTweet]): CachedTweetHandler = + UpdateOnlyPickingHandler(value, preferNewestPicker) + + private def setFieldsHandler(additional: Seq[TFieldBlob]): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = AdditionalFields.setAdditionalFields(cachedTweet.tweet, additional) + updatedCachedTweet = CachedTweet(updatedTweet) + } yield found(updatedCachedTweet) + + private def deleteFieldsHandler(fieldIds: Seq[FieldId]): CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + updatedTweet = AdditionalFields.unsetFields(cachedTweet.tweet, fieldIds) + scrubbedCachedTweet = cachedTweet.copy(tweet = updatedTweet) + } yield found(scrubbedCachedTweet) + + private val scrubGeoHandler: CachedTweetHandler = + inCache => + for { + cached <- inCache.filter(foundAndNotBounced) + cachedTweet <- cached.value + tweet = cachedTweet.tweet + coreData <- tweet.coreData if hasGeo(tweet) + scrubbedCoreData = coreData.copy(coordinates = None, placeId = None) + scrubbedTweet = tweet.copy(coreData = Some(scrubbedCoreData), place = None) + scrubbedCachedTweet = cachedTweet.copy(tweet = scrubbedTweet) + } yield found(scrubbedCachedTweet) + + private def evictOne(key: TweetKey, tries: Int): Future[Int] = + tweetCache.delete(key).transform { + case Throw(_) if tries > 1 => evictOne(key, tries - 1) + case Throw(_) => Future.value(1) + case Return(_) => Future.value(0) + } + + private def evictAll(keys: Seq[TweetKey]): Future[Unit] = + Future + .collect { + keys.map(evictOne(_, evictionRetries)) + } + .onSuccess { (failures: Seq[Int]) => evictionFailedCounter.incr(failures.sum) } + .unit + + private def logExistingValues(keys: Seq[TweetKey]): Future[Unit] = + tweetCache + .get(keys) + .map { existing => + for { + (key, cached) <- existing.found + cachedTweet <- cached.value + tweet = cachedTweet.tweet + } yield { + cacheFlushesLog.info( + mapper.writeValueAsString( + Map( + "key" -> key, + "tweet_id" -> tweet.id, + "tweet" -> DiffShow.show(tweet) + ) + ) + ) + } + } + .unit + + private def found(value: CachedTweet): Cached[CachedTweet] = + writeThroughCached(Some(value), CachedValueStatus.Found) + + private def writeThroughCached[V](value: Option[V], status: CachedValueStatus): Cached[V] = { + val now = Time.now + Cached(value, status, now, None, Some(now)) + } + + private def lockAndSet(tweetId: TweetId, handler: LockingCache.Handler[Cached[CachedTweet]]) = + tweetCache.lockAndSet(tweetKeyFactory.fromId(tweetId), handler).unit +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala new file mode 100644 index 000000000..726745b7e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteAdditionalFields.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object DeleteAdditionalFields extends TweetStore.SyncModule { + + case class Event(tweetId: TweetId, fieldIds: Seq[FieldId], userId: UserId, timestamp: Time) + extends SyncTweetStoreEvent("delete_additional_fields") { + + def toAsyncRequest: AsyncDeleteAdditionalFieldsRequest = + AsyncDeleteAdditionalFieldsRequest( + tweetId = tweetId, + fieldIds = fieldIds, + userId = userId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val deleteAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val deleteAdditionalFields: FutureEffect[Event] = wrap( + underlying.deleteAdditionalFields) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val deleteAdditionalFields: FutureEffect[Event] = + FutureEffect.inParallel( + // ignore failures deleting from cache, will be retried in async-path + cachingTweetStore.ignoreFailures.deleteAdditionalFields, + asyncEnqueueStore.deleteAdditionalFields, + logLensStore.deleteAdditionalFields + ) + } + } +} + +object AsyncDeleteAdditionalFields extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncDeleteAdditionalFieldsRequest, + user: User + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweetId = request.tweetId, + fieldIds = request.fieldIds, + userId = request.userId, + optUser = Some(user), + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweetId: TweetId, + fieldIds: Seq[FieldId], + userId: UserId, + optUser: Option[User], + timestamp: Time) + extends AsyncTweetStoreEvent("async_delete_additional_fields") + with TweetStoreTweetEvent { + + def toAsyncRequest( + action: Option[AsyncWriteAction] = None + ): AsyncDeleteAdditionalFieldsRequest = + AsyncDeleteAdditionalFieldsRequest( + tweetId = tweetId, + fieldIds = fieldIds, + userId = userId, + timestamp = timestamp.inMillis, + retryAction = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.AdditionalFieldDeleteEvent( + AdditionalFieldDeleteEvent( + deletedFields = Map(tweetId -> fieldIds), + userId = optUser.map(_.id) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncDeleteAdditionalFields(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.DeleteAdditionalFields.type = + AsyncWriteEventType.DeleteAdditionalFields + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncDeleteAdditionalFields: FutureEffect[Event] + val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncDeleteAdditionalFields: FutureEffect[Event] = wrap( + underlying.asyncDeleteAdditionalFields) + override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncDeleteAdditionalFields + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + eventBusEnqueueStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncDeleteAdditionalFields: FutureEffect[Event] = build( + _.asyncDeleteAdditionalFields) + override val retryAsyncDeleteAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncDeleteAdditionalFields) + } + } + } +} + +object ReplicatedDeleteAdditionalFields extends TweetStore.ReplicatedModule { + + case class Event(tweetId: TweetId, fieldIds: Seq[FieldId]) + extends ReplicatedTweetStoreEvent("replicated_delete_additional_fields") + + trait Store { + val replicatedDeleteAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedDeleteAdditionalFields: FutureEffect[Event] = + wrap(underlying.replicatedDeleteAdditionalFields) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedDeleteAdditionalFields: FutureEffect[Event] = + cachingTweetStore.replicatedDeleteAdditionalFields + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala new file mode 100644 index 000000000..c2b315d27 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/DeleteTweet.scala @@ -0,0 +1,221 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object DeleteTweet extends TweetStore.SyncModule { + case class Event( + tweet: Tweet, + timestamp: Time, + user: Option[User] = None, + byUserId: Option[UserId] = None, + auditPassthrough: Option[AuditDeleteTweet] = None, + cascadedFromTweetId: Option[TweetId] = None, + isUserErasure: Boolean = false, + isBounceDelete: Boolean = false, + isLastQuoteOfQuoter: Boolean = false, + isAdminDelete: Boolean) + extends SyncTweetStoreEvent("delete_tweet") { + + def toAsyncRequest: AsyncDeleteRequest = + AsyncDeleteRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + auditPassthrough = auditPassthrough, + cascadedFromTweetId = cascadedFromTweetId, + isUserErasure = isUserErasure, + isBounceDelete = isBounceDelete, + isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), + isAdminDelete = Some(isAdminDelete) + ) + } + + trait Store { + val deleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val deleteTweet: FutureEffect[Event] = wrap(underlying.deleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val deleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.ignoreFailures.deleteTweet, + asyncEnqueueStore.deleteTweet, + userCountsUpdatingStore.deleteTweet, + tweetCountsUpdatingStore.deleteTweet, + logLensStore.deleteTweet + ) + } + } +} + +object AsyncDeleteTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncDeleteRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncDeleteTweet.Event( + tweet = request.tweet, + timestamp = Time.fromMilliseconds(request.timestamp), + optUser = request.user, + byUserId = request.byUserId, + auditPassthrough = request.auditPassthrough, + cascadedFromTweetId = request.cascadedFromTweetId, + isUserErasure = request.isUserErasure, + isBounceDelete = request.isBounceDelete, + isLastQuoteOfQuoter = request.isLastQuoteOfQuoter.getOrElse(false), + isAdminDelete = request.isAdminDelete.getOrElse(false) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + timestamp: Time, + optUser: Option[User] = None, + byUserId: Option[UserId] = None, + auditPassthrough: Option[AuditDeleteTweet] = None, + cascadedFromTweetId: Option[TweetId] = None, + isUserErasure: Boolean = false, + isBounceDelete: Boolean, + isLastQuoteOfQuoter: Boolean = false, + isAdminDelete: Boolean) + extends AsyncTweetStoreEvent("async_delete_tweet") + with TweetStoreTweetEvent { + val tweetEventTweetId: TweetId = tweet.id + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncDeleteRequest = + AsyncDeleteRequest( + tweet = tweet, + user = optUser, + byUserId = byUserId, + timestamp = timestamp.inMillis, + auditPassthrough = auditPassthrough, + cascadedFromTweetId = cascadedFromTweetId, + retryAction = action, + isUserErasure = isUserErasure, + isBounceDelete = isBounceDelete, + isLastQuoteOfQuoter = Some(isLastQuoteOfQuoter), + isAdminDelete = Some(isAdminDelete) + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetDeleteEvent( + TweetDeleteEvent( + tweet = scrub(tweet), + user = optUser, + isUserErasure = Some(isUserErasure), + audit = auditPassthrough, + byUserId = byUserId, + isAdminDelete = Some(isAdminDelete) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncDelete(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Delete.type = AsyncWriteEventType.Delete + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncDeleteTweet: FutureEffect[Event] + val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncDeleteTweet: FutureEffect[Event] = wrap(underlying.asyncDeleteTweet) + override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncDeleteTweet) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + indexingStore: TweetIndexingStore, + eventBusEnqueueStore: TweetEventBusStore, + timelineUpdatingStore: TlsTimelineUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + guanoServiceStore: GuanoServiceStore, + mediaServiceStore: MediaServiceStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + indexingStore, + eventBusEnqueueStore, + timelineUpdatingStore, + tweetCountsUpdatingStore, + guanoServiceStore, + mediaServiceStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncDeleteTweet: FutureEffect[Event] = build(_.asyncDeleteTweet) + override val retryAsyncDeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncDeleteTweet) + } + } + } +} + +object ReplicatedDeleteTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + isErasure: Boolean, + isBounceDelete: Boolean, + isLastQuoteOfQuoter: Boolean = false) + extends ReplicatedTweetStoreEvent("replicated_delete_tweet") + + trait Store { + val replicatedDeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedDeleteTweet: FutureEffect[Event] = wrap(underlying.replicatedDeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = { + new Store { + override val replicatedDeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedDeleteTweet, + tweetCountsUpdatingStore.replicatedDeleteTweet.ignoreFailures + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala new file mode 100644 index 000000000..ad0104acd --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/FanoutServiceStore.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie +package store + +import com.twitter.timelineservice.fanout.thriftscala.FanoutService +import com.twitter.tweetypie.thriftscala._ + +trait FanoutServiceStore extends TweetStoreBase[FanoutServiceStore] with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): FanoutServiceStore = + new TweetStoreWrapper(w, this) with FanoutServiceStore with AsyncInsertTweet.StoreWrapper +} + +object FanoutServiceStore { + val Action: AsyncWriteAction.FanoutDelivery.type = AsyncWriteAction.FanoutDelivery + + def apply( + fanoutClient: FanoutService.MethodPerEndpoint, + stats: StatsReceiver + ): FanoutServiceStore = + new FanoutServiceStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { event => + fanoutClient.tweetCreateEvent2( + TweetCreateEvent( + tweet = event.tweet, + user = event.user, + sourceTweet = event.sourceTweet, + sourceUser = event.sourceUser, + additionalContext = event.additionalContext, + transientContext = event.transientContext + ) + ) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = TweetStore.retry(Action, asyncInsertTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala new file mode 100644 index 000000000..83fbc12af --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Flush.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package store + +object Flush extends TweetStore.SyncModule { + + case class Event( + tweetIds: Seq[TweetId], + flushTweets: Boolean = true, + flushCounts: Boolean = true, + logExisting: Boolean = true) + extends SyncTweetStoreEvent("flush") + + trait Store { + val flush: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val flush: FutureEffect[Event] = wrap(underlying.flush) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val flush: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.flush, + tweetCountsUpdatingStore.flush + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala new file mode 100644 index 000000000..be29aba1e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GeoSearchRequestIDStore.scala @@ -0,0 +1,72 @@ +package com.twitter.tweetypie +package store + +import com.twitter.geoduck.backend.relevance.thriftscala.ReportFailure +import com.twitter.geoduck.backend.relevance.thriftscala.ReportResult +import com.twitter.geoduck.backend.relevance.thriftscala.ConversionReport +import com.twitter.geoduck.backend.searchrequestid.thriftscala.SearchRequestID +import com.twitter.geoduck.backend.tweetid.thriftscala.TweetID +import com.twitter.geoduck.common.thriftscala.GeoduckException +import com.twitter.geoduck.service.identifier.thriftscala.PlaceIdentifier +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.thriftscala._ + +trait GeoSearchRequestIDStore + extends TweetStoreBase[GeoSearchRequestIDStore] + with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): GeoSearchRequestIDStore = + new TweetStoreWrapper[GeoSearchRequestIDStore](w, this) + with GeoSearchRequestIDStore + with AsyncInsertTweet.StoreWrapper +} + +object GeoSearchRequestIDStore { + type ConversionReporter = FutureArrow[ConversionReport, ReportResult] + + val Action: AsyncWriteAction.GeoSearchRequestId.type = AsyncWriteAction.GeoSearchRequestId + private val log = Logger(getClass) + + object FailureHandler { + def translateException(failure: ReportResult.Failure): GeoduckException = { + failure.failure match { + case ReportFailure.Failure(exception) => exception + case _ => GeoduckException("Unknown failure: " + failure.toString) + } + } + } + + def apply(conversionReporter: ConversionReporter): GeoSearchRequestIDStore = + new GeoSearchRequestIDStore { + + val conversionEffect: FutureEffect[ConversionReport] = + FutureEffect + .fromPartial[ReportResult] { + case unionFailure: ReportResult.Failure => + Future.exception(FailureHandler.translateException(unionFailure)) + } + .contramapFuture(conversionReporter) + + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + conversionEffect.contramapOption[AsyncInsertTweet.Event] { event => + for { + isUserProtected <- event.user.safety.map(_.isProtected) + geoSearchRequestID <- event.geoSearchRequestId + placeType <- event.tweet.place.map(_.`type`) + placeId <- event.tweet.coreData.flatMap(_.placeId) + placeIdLong <- Try(java.lang.Long.parseUnsignedLong(placeId, 16)).toOption + if placeType == PlaceType.Poi && isUserProtected == false + } yield { + ConversionReport( + requestID = SearchRequestID(requestID = geoSearchRequestID), + tweetID = TweetID(event.tweet.id), + placeID = PlaceIdentifier(placeIdLong) + ) + } + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala new file mode 100644 index 000000000..4ddc40dc2 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserCountsUpdatingStore.scala @@ -0,0 +1,48 @@ +package com.twitter.tweetypie +package store + +import com.twitter.gizmoduck.thriftscala.{CountsUpdateField => Field} +import com.twitter.tweetypie.backends.Gizmoduck + +trait GizmoduckUserCountsUpdatingStore + extends TweetStoreBase[GizmoduckUserCountsUpdatingStore] + with InsertTweet.Store + with DeleteTweet.Store { + def wrap(w: TweetStore.Wrap): GizmoduckUserCountsUpdatingStore = + new TweetStoreWrapper(w, this) + with GizmoduckUserCountsUpdatingStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper +} + +/** + * A TweetStore implementation that sends user-specific count updates to Gizmoduck. + */ +object GizmoduckUserCountsUpdatingStore { + def isUserTweet(tweet: Tweet): Boolean = + !TweetLenses.nullcast.get(tweet) && TweetLenses.narrowcast.get(tweet).isEmpty + + def apply( + incr: Gizmoduck.IncrCount, + hasMedia: Tweet => Boolean + ): GizmoduckUserCountsUpdatingStore = { + def incrField(field: Field, amt: Int): FutureEffect[Tweet] = + FutureEffect[Tweet](tweet => incr((getUserId(tweet), field, amt))) + + def incrAll(amt: Int): FutureEffect[Tweet] = + FutureEffect.inParallel( + incrField(Field.Tweets, amt).onlyIf(isUserTweet), + incrField(Field.MediaTweets, amt).onlyIf(t => isUserTweet(t) && hasMedia(t)) + ) + + new GizmoduckUserCountsUpdatingStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + incrAll(1).contramap[InsertTweet.Event](_.tweet) + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + incrAll(-1) + .contramap[DeleteTweet.Event](_.tweet) + .onlyIf(!_.isUserErasure) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala new file mode 100644 index 000000000..fb6c50c4c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GizmoduckUserGeotagUpdateStore.scala @@ -0,0 +1,68 @@ +package com.twitter.tweetypie +package store + +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.ModifiedAccount +import com.twitter.gizmoduck.thriftscala.ModifiedUser +import com.twitter.tweetypie.backends.Gizmoduck +import com.twitter.tweetypie.thriftscala._ + +trait GizmoduckUserGeotagUpdateStore + extends TweetStoreBase[GizmoduckUserGeotagUpdateStore] + with AsyncInsertTweet.Store + with ScrubGeoUpdateUserTimestamp.Store { + def wrap(w: TweetStore.Wrap): GizmoduckUserGeotagUpdateStore = + new TweetStoreWrapper(w, this) + with GizmoduckUserGeotagUpdateStore + with AsyncInsertTweet.StoreWrapper + with ScrubGeoUpdateUserTimestamp.StoreWrapper +} + +/** + * A TweetStore implementation that updates a Gizmoduck user's user_has_geotagged_status flag. + * If a tweet is geotagged and the user's flag is not set, call out to Gizmoduck to update it. + */ +object GizmoduckUserGeotagUpdateStore { + val Action: AsyncWriteAction.UserGeotagUpdate.type = AsyncWriteAction.UserGeotagUpdate + + def apply( + modifyAndGet: Gizmoduck.ModifyAndGet, + stats: StatsReceiver + ): GizmoduckUserGeotagUpdateStore = { + // Counts the number of times that the scrubGeo actually cleared the + // hasGeotaggedStatuses bit for a user. + val clearedCounter = stats.counter("has_geotag_cleared") + + // Counts the number of times that asyncInsertTweet actually set the + // hasGeotaggedStatuses bit for a user. + val setCounter = stats.counter("has_geotag_set") + + def setHasGeotaggedStatuses(value: Boolean): FutureEffect[UserId] = { + val modifiedAccount = ModifiedAccount(hasGeotaggedStatuses = Some(value)) + val modifiedUser = ModifiedUser(account = Some(modifiedAccount)) + FutureEffect(userId => modifyAndGet((LookupContext(), userId, modifiedUser)).unit) + } + + new GizmoduckUserGeotagUpdateStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + setHasGeotaggedStatuses(true) + .contramap[AsyncInsertTweet.Event](_.user.id) + .onSuccess(_ => setCounter.incr()) + .onlyIf { e => + // only with geo info and an account that doesn't yet have geotagged statuses flag set + hasGeo(e.tweet) && (e.user.account.exists(!_.hasGeotaggedStatuses)) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + setHasGeotaggedStatuses(false) + .contramap[ScrubGeoUpdateUserTimestamp.Event](_.userId) + .onlyIf(_.mightHaveGeotaggedStatuses) + .onSuccess(_ => clearedCounter.incr()) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala new file mode 100644 index 000000000..d40e6f657 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Guano.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package store + +import com.twitter.guano.{thriftscala => guano} +import com.twitter.servo.util.Scribe +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala.AuditDeleteTweet + +object Guano { + case class MalwareAttempt( + url: String, + userId: UserId, + clientAppId: Option[Long], + remoteHost: Option[String]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.MalwareAttempt, + malwareAttempt = Some( + guano.MalwareAttempt( + timestamp = Time.now.inSeconds, + host = remoteHost, + userId = userId, + url = url, + `type` = guano.MalwareAttemptType.Status, + clientAppId = clientAppId.map(_.toInt) // yikes! + ) + ) + ) + } + + case class DestroyTweet( + tweet: Tweet, + userId: UserId, + byUserId: UserId, + passthrough: Option[AuditDeleteTweet]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.DestroyStatus, + destroyStatus = Some( + guano.DestroyStatus( + `type` = Some(guano.DestroyStatusType.Status), + timestamp = Time.now.inSeconds, + userId = userId, + byUserId = byUserId, + statusId = tweet.id, + text = "", + reason = passthrough + .flatMap(_.reason) + .flatMap { r => guano.UserActionReason.valueOf(r.name) } + .orElse(Some(guano.UserActionReason.Other)), + done = passthrough.flatMap(_.done).orElse(Some(true)), + host = passthrough.flatMap(_.host), + bulkId = passthrough.flatMap(_.bulkId), + note = passthrough.flatMap(_.note), + runId = passthrough.flatMap(_.runId), + clientApplicationId = passthrough.flatMap(_.clientApplicationId), + userAgent = passthrough.flatMap(_.userAgent) + ) + ) + ) + } + + case class Takedown( + tweetId: TweetId, + userId: UserId, + reason: TakedownReason, + takendown: Boolean, + note: Option[String], + host: Option[String], + byUserId: Option[UserId]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.PctdAction, + pctdAction = Some( + guano.PctdAction( + `type` = guano.PctdActionType.Status, + timestamp = Time.now.inSeconds, + tweetId = Some(tweetId), + userId = userId, + countryCode = + TakedownReasons.reasonToCountryCode.applyOrElse(reason, (_: TakedownReason) => ""), + takendown = takendown, + note = note, + host = host, + byUserId = byUserId.getOrElse(-1L), + reason = Some(reason) + ) + ) + ) + } + + case class UpdatePossiblySensitiveTweet( + tweetId: TweetId, + userId: UserId, + byUserId: UserId, + action: guano.NsfwTweetActionAction, + enabled: Boolean, + host: Option[String], + note: Option[String]) { + def toScribeMessage: guano.ScribeMessage = + guano.ScribeMessage( + `type` = guano.ScribeType.NsfwTweetAction, + nsfwTweetAction = Some( + guano.NsfwTweetAction( + timestamp = Time.now.inSeconds, + host = host, + userId = userId, + byUserId = byUserId, + action = action, + enabled = enabled, + note = note, + tweetId = tweetId + ) + ) + ) + } + + def apply( + scribe: FutureEffect[guano.ScribeMessage] = Scribe(guano.ScribeMessage, + Scribe("trust_eng_audit")) + ): Guano = { + new Guano { + override val scribeMalwareAttempt: FutureEffect[MalwareAttempt] = + scribe.contramap[MalwareAttempt](_.toScribeMessage) + + override val scribeDestroyTweet: FutureEffect[DestroyTweet] = + scribe.contramap[DestroyTweet](_.toScribeMessage) + + override val scribeTakedown: FutureEffect[Takedown] = + scribe.contramap[Takedown](_.toScribeMessage) + + override val scribeUpdatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet] = + scribe.contramap[UpdatePossiblySensitiveTweet](_.toScribeMessage) + } + } +} + +trait Guano { + val scribeMalwareAttempt: FutureEffect[Guano.MalwareAttempt] + val scribeDestroyTweet: FutureEffect[Guano.DestroyTweet] + val scribeTakedown: FutureEffect[Guano.Takedown] + val scribeUpdatePossiblySensitiveTweet: FutureEffect[Guano.UpdatePossiblySensitiveTweet] +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala new file mode 100644 index 000000000..a2a284b8f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/GuanoServiceStore.scala @@ -0,0 +1,120 @@ +package com.twitter.tweetypie +package store + +import com.twitter.guano.thriftscala.NsfwTweetActionAction +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +trait GuanoServiceStore + extends TweetStoreBase[GuanoServiceStore] + with AsyncDeleteTweet.Store + with AsyncTakedown.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): GuanoServiceStore = + new TweetStoreWrapper(w, this) + with GuanoServiceStore + with AsyncDeleteTweet.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +object GuanoServiceStore { + val Action: AsyncWriteAction.GuanoScribe.type = AsyncWriteAction.GuanoScribe + + val toGuanoTakedown: (AsyncTakedown.Event, TakedownReason, Boolean) => Guano.Takedown = + (event: AsyncTakedown.Event, reason: TakedownReason, takendown: Boolean) => + Guano.Takedown( + tweetId = event.tweet.id, + userId = getUserId(event.tweet), + reason = reason, + takendown = takendown, + note = event.auditNote, + host = event.host, + byUserId = event.byUserId + ) + + val toGuanoUpdatePossiblySensitiveTweet: ( + AsyncUpdatePossiblySensitiveTweet.Event, + Boolean, + NsfwTweetActionAction + ) => Guano.UpdatePossiblySensitiveTweet = + ( + event: AsyncUpdatePossiblySensitiveTweet.Event, + updatedValue: Boolean, + action: NsfwTweetActionAction + ) => + Guano.UpdatePossiblySensitiveTweet( + tweetId = event.tweet.id, + host = event.host.orElse(Some("unknown")), + userId = event.user.id, + byUserId = event.byUserId, + action = action, + enabled = updatedValue, + note = event.note + ) + + def apply(guano: Guano, stats: StatsReceiver): GuanoServiceStore = { + val deleteByUserIdCounter = stats.counter("deletes_with_by_user_id") + val deleteScribeCounter = stats.counter("deletes_resulting_in_scribe") + + new GuanoServiceStore { + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { event => + val tweet = event.tweet + + event.byUserId.foreach(_ => deleteByUserIdCounter.incr()) + + // Guano the tweet deletion action not initiated from the RetweetsDeletionStore + event.byUserId match { + case Some(byUserId) => + deleteScribeCounter.incr() + guano.scribeDestroyTweet( + Guano.DestroyTweet( + tweet = tweet, + userId = getUserId(tweet), + byUserId = byUserId, + passthrough = event.auditPassthrough + ) + ) + case _ => + Future.Unit + } + }.onlyIf(_.cascadedFromTweetId.isEmpty) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + FutureEffect[AsyncTakedown.Event] { event => + val messages = + event.reasonsToAdd.map(toGuanoTakedown(event, _, true)) ++ + event.reasonsToRemove.map(toGuanoTakedown(event, _, false)) + Future.join(messages.map(guano.scribeTakedown)) + }.onlyIf(_.scribeForAudit) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => + val messages = + event.nsfwAdminChange.map( + toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwAdmin) + ) ++ + event.nsfwUserChange.map( + toGuanoUpdatePossiblySensitiveTweet(event, _, NsfwTweetActionAction.NsfwUser) + ) + Future.join(messages.toSeq.map(guano.scribeUpdatePossiblySensitiveTweet)) + } + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala new file mode 100644 index 000000000..5f1f2920a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrBookmarkCount.scala @@ -0,0 +1,92 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry +import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy +import com.twitter.tweetypie.thriftscala.AsyncIncrBookmarkCountRequest +import com.twitter.tweetypie.thriftscala.AsyncWriteAction + +object IncrBookmarkCount extends TweetStore.SyncModule { + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends SyncTweetStoreEvent("incr_bookmark_count") { + val toAsyncRequest: AsyncIncrBookmarkCountRequest = + AsyncIncrBookmarkCountRequest(tweetId = tweetId, delta = delta) + } + + trait Store { + val incrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val incrBookmarkCount: FutureEffect[Event] = wrap(underlying.incrBookmarkCount) + } + + object Store { + def apply( + asyncEnqueueStore: AsyncEnqueueStore, + replicatingStore: ReplicatingTweetStore + ): Store = { + new Store { + override val incrBookmarkCount: FutureEffect[Event] = + FutureEffect.inParallel( + asyncEnqueueStore.incrBookmarkCount, + replicatingStore.incrBookmarkCount + ) + } + } + } +} + +object AsyncIncrBookmarkCount extends TweetStore.AsyncModule { + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends AsyncTweetStoreEvent("async_incr_bookmark_event") { + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + Future.Unit + + override def retryStrategy: RetryStrategy = NoRetry + } + + trait Store { + def asyncIncrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncIncrBookmarkCount: FutureEffect[Event] = wrap( + underlying.asyncIncrBookmarkCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override def asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + tweetCountsUpdatingStore.asyncIncrBookmarkCount + } + } + } +} + +object ReplicatedIncrBookmarkCount extends TweetStore.ReplicatedModule { + case class Event(tweetId: TweetId, delta: Int) + extends ReplicatedTweetStoreEvent("replicated_incr_bookmark_count") { + override def retryStrategy: RetryStrategy = NoRetry + } + + trait Store { + val replicatedIncrBookmarkCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedIncrBookmarkCount: FutureEffect[Event] = wrap( + underlying.replicatedIncrBookmarkCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val replicatedIncrBookmarkCount: FutureEffect[Event] = { + tweetCountsUpdatingStore.replicatedIncrBookmarkCount + } + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala new file mode 100644 index 000000000..b6e1aabcb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/IncrFavCount.scala @@ -0,0 +1,90 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetStoreEvent.NoRetry +import com.twitter.tweetypie.thriftscala._ + +object IncrFavCount extends TweetStore.SyncModule { + + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends SyncTweetStoreEvent("incr_fav_count") { + val toAsyncRequest: AsyncIncrFavCountRequest = AsyncIncrFavCountRequest(tweetId, delta) + } + + trait Store { + val incrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val incrFavCount: FutureEffect[Event] = wrap(underlying.incrFavCount) + } + + object Store { + def apply( + asyncEnqueueStore: AsyncEnqueueStore, + replicatingStore: ReplicatingTweetStore + ): Store = + new Store { + override val incrFavCount: FutureEffect[Event] = + FutureEffect.inParallel( + asyncEnqueueStore.incrFavCount, + replicatingStore.incrFavCount + ) + } + } +} + +object AsyncIncrFavCount extends TweetStore.AsyncModule { + + case class Event(tweetId: TweetId, delta: Int, timestamp: Time) + extends AsyncTweetStoreEvent("async_incr_fav_count") { + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + Future.Unit // We need to define this method for TweetStoreEvent.Async but we don't use it + + override def retryStrategy: TweetStoreEvent.RetryStrategy = NoRetry + } + + trait Store { + val asyncIncrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncIncrFavCount: FutureEffect[Event] = wrap(underlying.asyncIncrFavCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val asyncIncrFavCount: FutureEffect[Event] = + tweetCountsUpdatingStore.asyncIncrFavCount + } + } + } +} + +object ReplicatedIncrFavCount extends TweetStore.ReplicatedModule { + + case class Event(tweetId: TweetId, delta: Int) + extends ReplicatedTweetStoreEvent("replicated_incr_fav_count") { + override def retryStrategy: TweetStoreEvent.NoRetry.type = NoRetry + } + + trait Store { + val replicatedIncrFavCount: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedIncrFavCount: FutureEffect[Event] = wrap( + underlying.replicatedIncrFavCount) + } + + object Store { + def apply(tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore): Store = { + new Store { + override val replicatedIncrFavCount: FutureEffect[Event] = + tweetCountsUpdatingStore.replicatedIncrFavCount.ignoreFailures + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala new file mode 100644 index 000000000..3e796d3d8 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InitialTweetUpdate.scala @@ -0,0 +1,31 @@ +package com.twitter.tweetypie.store + +import com.twitter.tweetypie.Tweet +import com.twitter.tweetypie.serverutil.ExtendedTweetMetadataBuilder +import com.twitter.tweetypie.thriftscala.EditControl +import com.twitter.tweetypie.thriftscala.InitialTweetUpdateRequest +import com.twitter.tweetypie.util.EditControlUtil + +/* Logic to update the initial tweet with new information when that tweet is edited */ +object InitialTweetUpdate { + + /* Given the initial tweet and update request, copy updated edit + * related fields onto it. + */ + def updateTweet(initialTweet: Tweet, request: InitialTweetUpdateRequest): Tweet = { + + // compute a new edit control initial with updated list of edit tweet ids + val editControl: EditControl.Initial = + EditControlUtil.editControlForInitialTweet(initialTweet, request.editTweetId).get() + + // compute the correct extended metadata for a permalink + val extendedTweetMetadata = + request.selfPermalink.map(link => ExtendedTweetMetadataBuilder(initialTweet, link)) + + initialTweet.copy( + selfPermalink = initialTweet.selfPermalink.orElse(request.selfPermalink), + editControl = Some(editControl), + extendedTweetMetadata = initialTweet.extendedTweetMetadata.orElse(extendedTweetMetadata) + ) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala new file mode 100644 index 000000000..969cc2b5a --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/InsertTweet.scala @@ -0,0 +1,284 @@ +package com.twitter.tweetypie +package store + +import com.twitter.context.thriftscala.FeatureContext +import com.twitter.tweetypie.core.GeoSearchRequestId +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object InsertTweet extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + _internalTweet: Option[CachedTweet] = None, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + parentUserId: Option[UserId] = None, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, + dark: Boolean = false, + hydrateOptions: WritePathHydrationOptions = WritePathHydrationOptions(), + featureContext: Option[FeatureContext] = None, + geoSearchRequestId: Option[GeoSearchRequestId] = None, + additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, + transientContext: Option[TransientCreateContext] = None, + quoterHasAlreadyQuotedTweet: Boolean = false, + noteTweetMentionedUserIds: Option[Seq[Long]] = None) + extends SyncTweetStoreEvent("insert_tweet") + with QuotedTweetOps { + def internalTweet: CachedTweet = + _internalTweet.getOrElse( + throw new IllegalStateException( + s"internalTweet should have been set in WritePathHydration, ${this}" + ) + ) + + def toAsyncRequest( + scrubUser: User => User, + scrubSourceTweet: Tweet => Tweet, + scrubSourceUser: User => User + ): AsyncInsertRequest = + AsyncInsertRequest( + tweet = tweet, + cachedTweet = internalTweet, + user = scrubUser(user), + sourceTweet = sourceTweet.map(scrubSourceTweet), + sourceUser = sourceUser.map(scrubSourceUser), + quotedTweet = quotedTweet.map(scrubSourceTweet), + quotedUser = quotedUser.map(scrubSourceUser), + parentUserId = parentUserId, + featureContext = featureContext, + timestamp = timestamp.inMillis, + geoSearchRequestId = geoSearchRequestId.map(_.requestID), + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), + initialTweetUpdateRequest = initialTweetUpdateRequest, + noteTweetMentionedUserIds = noteTweetMentionedUserIds + ) + } + + trait Store { + val insertTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val insertTweet: FutureEffect[Event] = wrap(underlying.insertTweet) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + tweetStatsStore: TweetStatsStore, + cachingTweetStore: CachingTweetStore, + limiterStore: LimiterStore, + asyncEnqueueStore: AsyncEnqueueStore, + userCountsUpdatingStore: GizmoduckUserCountsUpdatingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val insertTweet: FutureEffect[Event] = + FutureEffect.sequentially( + logLensStore.insertTweet, + manhattanStore.insertTweet, + tweetStatsStore.insertTweet, + FutureEffect.inParallel( + // allow write-through caching to fail without failing entire insert + cachingTweetStore.ignoreFailures.insertTweet, + limiterStore.ignoreFailures.insertTweet, + asyncEnqueueStore.insertTweet, + userCountsUpdatingStore.insertTweet, + tweetCountsUpdatingStore.insertTweet + ) + ) + } + } +} + +object AsyncInsertTweet extends TweetStore.AsyncModule { + + private val log = Logger(getClass) + + object Event { + def fromAsyncRequest(request: AsyncInsertRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweet = request.tweet, + cachedTweet = request.cachedTweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + sourceTweet = request.sourceTweet, + sourceUser = request.sourceUser, + parentUserId = request.parentUserId, + featureContext = request.featureContext, + quotedTweet = request.quotedTweet, + quotedUser = request.quotedUser, + geoSearchRequestId = request.geoSearchRequestId, + additionalContext = request.additionalContext, + transientContext = request.transientContext, + quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false), + initialTweetUpdateRequest = request.initialTweetUpdateRequest, + noteTweetMentionedUserIds = request.noteTweetMentionedUserIds + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + user: User, + optUser: Option[User], + timestamp: Time, + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + parentUserId: Option[UserId] = None, + featureContext: Option[FeatureContext] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + geoSearchRequestId: Option[String] = None, + additionalContext: Option[collection.Map[TweetCreateContextKey, String]] = None, + transientContext: Option[TransientCreateContext] = None, + quoterHasAlreadyQuotedTweet: Boolean = false, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None, + noteTweetMentionedUserIds: Option[Seq[Long]] = None) + extends AsyncTweetStoreEvent("async_insert_tweet") + with QuotedTweetOps + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncInsertRequest = + AsyncInsertRequest( + tweet = tweet, + cachedTweet = cachedTweet, + user = user, + sourceTweet = sourceTweet, + sourceUser = sourceUser, + parentUserId = parentUserId, + retryAction = action, + featureContext = featureContext, + timestamp = timestamp.inMillis, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + geoSearchRequestId = geoSearchRequestId, + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet), + initialTweetUpdateRequest = initialTweetUpdateRequest, + noteTweetMentionedUserIds = noteTweetMentionedUserIds + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetCreateEvent( + TweetCreateEvent( + tweet = scrub(tweet), + user = user, + sourceUser = sourceUser, + sourceTweet = sourceTweet.map(scrub), + retweetParentUserId = parentUserId, + quotedTweet = publicQuotedTweet.map(scrub), + quotedUser = publicQuotedUser, + additionalContext = additionalContext, + transientContext = transientContext, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncInsert(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Insert.type = AsyncWriteEventType.Insert + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncInsertTweet: FutureEffect[Event] + val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncInsertTweet: FutureEffect[Event] = wrap(underlying.asyncInsertTweet) + override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncInsertTweet) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + indexingStore: TweetIndexingStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + timelineUpdatingStore: TlsTimelineUpdatingStore, + eventBusEnqueueStore: TweetEventBusStore, + fanoutServiceStore: FanoutServiceStore, + scribeMediaTagStore: ScribeMediaTagStore, + userGeotagUpdateStore: GizmoduckUserGeotagUpdateStore, + geoSearchRequestIDStore: GeoSearchRequestIDStore + ): Store = { + val stores: Seq[Store] = + Seq( + replicatingStore, + indexingStore, + timelineUpdatingStore, + eventBusEnqueueStore, + fanoutServiceStore, + userGeotagUpdateStore, + tweetCountsUpdatingStore, + scribeMediaTagStore, + geoSearchRequestIDStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncInsertTweet: FutureEffect[Event] = build(_.asyncInsertTweet) + override val retryAsyncInsertTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncInsertTweet) + } + } + } +} + +object ReplicatedInsertTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + quoterHasAlreadyQuotedTweet: Boolean = false, + initialTweetUpdateRequest: Option[InitialTweetUpdateRequest] = None) + extends ReplicatedTweetStoreEvent("replicated_insert_tweet") + + trait Store { + val replicatedInsertTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedInsertTweet: FutureEffect[Event] = wrap(underlying.replicatedInsertTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = { + new Store { + override val replicatedInsertTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedInsertTweet, + tweetCountsUpdatingStore.replicatedInsertTweet.ignoreFailures + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala new file mode 100644 index 000000000..fa71a7967 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LimiterStore.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.backends.LimiterService +import com.twitter.tweetypie.thriftscala._ + +trait LimiterStore extends TweetStoreBase[LimiterStore] with InsertTweet.Store { + def wrap(w: TweetStore.Wrap): LimiterStore = + new TweetStoreWrapper(w, this) with LimiterStore with InsertTweet.StoreWrapper +} + +object LimiterStore { + def apply( + incrementCreateSuccess: LimiterService.IncrementByOne, + incrementMediaTags: LimiterService.Increment + ): LimiterStore = + new LimiterStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + Future.when(!event.dark) { + val userId = event.user.id + val contributorUserId: Option[UserId] = event.tweet.contributor.map(_.userId) + + val mediaTags = getMediaTagMap(event.tweet) + val mediaTagCount = countDistinctUserMediaTags(mediaTags) + Future + .join( + incrementCreateSuccess(userId, contributorUserId), + incrementMediaTags(userId, contributorUserId, mediaTagCount) + ) + .unit + } + } + } + + def countDistinctUserMediaTags(mediaTags: Map[MediaId, Seq[MediaTag]]): Int = + mediaTags.values.flatten.toSeq + .collect { case MediaTag(MediaTagType.User, Some(userId), _, _) => userId } + .distinct + .size +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala new file mode 100644 index 000000000..67b69691e --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/LogLensStore.scala @@ -0,0 +1,169 @@ +package com.twitter.tweetypie +package store + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.twitter.finagle.tracing.Trace +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.client_id.ClientIdHelper +import com.twitter.tweetypie.media.Media.ownMedia + +trait LogLensStore + extends TweetStoreBase[LogLensStore] + with InsertTweet.Store + with DeleteTweet.Store + with UndeleteTweet.Store + with SetAdditionalFields.Store + with DeleteAdditionalFields.Store + with ScrubGeo.Store + with Takedown.Store + with UpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): LogLensStore = + new TweetStoreWrapper(w, this) + with LogLensStore + with InsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with SetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with Takedown.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper +} + +object LogLensStore { + def apply( + tweetCreationsLogger: Logger, + tweetDeletionsLogger: Logger, + tweetUndeletionsLogger: Logger, + tweetUpdatesLogger: Logger, + clientIdHelper: ClientIdHelper, + ): LogLensStore = + new LogLensStore { + private[this] val mapper = new ObjectMapper().registerModule(DefaultScalaModule) + + private def logMessage(logger: Logger, data: (String, Any)*): Future[Unit] = + Future { + val allData = data ++ defaultData + val msg = mapper.writeValueAsString(Map(allData: _*)) + logger.info(msg) + } + + // Note: Longs are logged as strings to avoid JSON 53-bit numeric truncation + private def defaultData: Seq[(String, Any)] = { + val viewer = TwitterContext() + Seq( + "client_id" -> getOpt(clientIdHelper.effectiveClientId), + "service_id" -> getOpt(clientIdHelper.effectiveServiceIdentifier), + "trace_id" -> Trace.id.traceId.toString, + "audit_ip" -> getOpt(viewer.flatMap(_.auditIp)), + "application_id" -> getOpt(viewer.flatMap(_.clientApplicationId).map(_.toString)), + "user_agent" -> getOpt(viewer.flatMap(_.userAgent)), + "authenticated_user_id" -> getOpt(viewer.flatMap(_.authenticatedUserId).map(_.toString)) + ) + } + + private def getOpt[A](opt: Option[A]): Any = + opt.getOrElse(null) + + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + logMessage( + tweetCreationsLogger, + "type" -> "create_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> event.user.id.toString, + "source_tweet_id" -> getOpt(event.sourceTweet.map(_.id.toString)), + "source_user_id" -> getOpt(event.sourceUser.map(_.id.toString)), + "directed_at_user_id" -> getOpt(getDirectedAtUser(event.tweet).map(_.userId.toString)), + "reply_to_tweet_id" -> getOpt( + getReply(event.tweet).flatMap(_.inReplyToStatusId).map(_.toString)), + "reply_to_user_id" -> getOpt(getReply(event.tweet).map(_.inReplyToUserId.toString)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) + ) + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event] { event => + logMessage( + tweetDeletionsLogger, + "type" -> "delete_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> getOpt(event.user.map(_.id.toString)), + "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), + "by_user_id" -> getOpt(event.byUserId.map(_.toString)), + "passthrough_audit_ip" -> getOpt(event.auditPassthrough.flatMap(_.host)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString), + "cascaded_from_tweet_id" -> getOpt(event.cascadedFromTweetId.map(_.toString)) + ) + } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { event => + logMessage( + tweetUndeletionsLogger, + "type" -> "undelete_tweet", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> event.user.id.toString, + "source_tweet_id" -> getOpt(getShare(event.tweet).map(_.sourceStatusId.toString)), + "media_ids" -> ownMedia(event.tweet).map(_.mediaId.toString) + ) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "set_additional_fields", + "tweet_id" -> event.additionalFields.id.toString, + "field_ids" -> AdditionalFields.nonEmptyAdditionalFieldIds(event.additionalFields) + ) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "delete_additional_fields", + "tweet_id" -> event.tweetId.toString, + "field_ids" -> event.fieldIds + ) + } + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event] { event => + Future.join( + event.tweetIds.map { tweetId => + logMessage( + tweetUpdatesLogger, + "type" -> "scrub_geo", + "tweet_id" -> tweetId.toString, + "user_id" -> event.userId.toString + ) + } + ) + } + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "takedown", + "tweet_id" -> event.tweet.id.toString, + "user_id" -> getUserId(event.tweet).toString, + "reasons" -> event.takedownReasons + ) + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => + logMessage( + tweetUpdatesLogger, + "type" -> "update_possibly_sensitive_tweet", + "tweet_id" -> event.tweet.id.toString, + "nsfw_admin" -> TweetLenses.nsfwAdmin(event.tweet), + "nsfw_user" -> TweetLenses.nsfwUser(event.tweet) + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala new file mode 100644 index 000000000..6eaa65eee --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ManhattanTweetStore.scala @@ -0,0 +1,231 @@ +/** Copyright 2010 Twitter, Inc. */ +package com.twitter.tweetypie +package store + +import com.twitter.stitch.Stitch +import com.twitter.tweetypie.additionalfields.AdditionalFields +import com.twitter.tweetypie.storage.Field +import com.twitter.tweetypie.storage.Response.TweetResponse +import com.twitter.tweetypie.storage.Response.TweetResponseCode +import com.twitter.tweetypie.storage.TweetStorageClient +import com.twitter.tweetypie.storage.TweetStorageClient.GetTweet +import com.twitter.tweetypie.storage.TweetStorageException +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Future + +case class UpdateTweetNotFoundException(tweetId: TweetId) extends Exception + +trait ManhattanTweetStore + extends TweetStoreBase[ManhattanTweetStore] + with InsertTweet.Store + with AsyncDeleteTweet.Store + with ScrubGeo.Store + with SetAdditionalFields.Store + with DeleteAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with Takedown.Store + with UpdatePossiblySensitiveTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): ManhattanTweetStore = + new TweetStoreWrapper(w, this) + with ManhattanTweetStore + with InsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ScrubGeo.StoreWrapper + with SetAdditionalFields.StoreWrapper + with DeleteAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with Takedown.StoreWrapper + with UpdatePossiblySensitiveTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +/** + * A TweetStore implementation that writes to Manhattan. + */ +object ManhattanTweetStore { + val Action: AsyncWriteAction.TbirdUpdate.type = AsyncWriteAction.TbirdUpdate + + private val log = Logger(getClass) + private val successResponses = Set(TweetResponseCode.Success, TweetResponseCode.Deleted) + + case class AnnotationFailure(message: String) extends Exception(message) + + def apply(tweetStorageClient: TweetStorageClient): ManhattanTweetStore = { + + def handleStorageResponses( + responsesStitch: Stitch[Seq[TweetResponse]], + action: String + ): Future[Unit] = + Stitch + .run(responsesStitch) + .onFailure { + case ex: TweetStorageException => log.warn("failed on: " + action, ex) + case _ => + } + .flatMap { responses => + Future.when(responses.exists(resp => !successResponses(resp.overallResponse))) { + Future.exception(AnnotationFailure(s"$action gets failure response $responses")) + } + } + + def updateTweetMediaIds(mutation: Mutation[MediaEntity]): Tweet => Tweet = + tweet => tweet.copy(media = tweet.media.map(entities => entities.map(mutation.endo))) + + /** + * Does a get and set, and only sets fields that are allowed to be + * changed. This also prevents incoming tweets containing incomplete + * fields from being saved to Manhattan. + */ + def updateOneTweetByIdAction(tweetId: TweetId, copyFields: Tweet => Tweet): Future[Unit] = { + Stitch.run { + tweetStorageClient.getTweet(tweetId).flatMap { + case GetTweet.Response.Found(tweet) => + val updatedTweet = copyFields(tweet) + + if (updatedTweet != tweet) { + tweetStorageClient.addTweet(updatedTweet) + } else { + Stitch.Unit + } + case _ => Stitch.exception(UpdateTweetNotFoundException(tweetId)) + } + } + } + + // This should NOT be used in parallel with other write operations. + // A race condition can occur after changes to the storage library to + // return all additional fields. The resulting behavior can cause + // fields that were modified by other writes to revert to their old value. + def updateOneTweetAction(update: Tweet, copyFields: Tweet => Tweet => Tweet): Future[Unit] = + updateOneTweetByIdAction(update.id, copyFields(update)) + + def tweetStoreUpdateTweet(tweet: Tweet): Future[Unit] = { + val setFields = AdditionalFields.nonEmptyAdditionalFieldIds(tweet).map(Field.additionalField) + handleStorageResponses( + tweetStorageClient.updateTweet(tweet, setFields).map(Seq(_)), + s"updateTweet($tweet, $setFields)" + ) + } + + // This is an edit so update the initial Tweet's control + def updateInitialTweet(event: InsertTweet.Event): Future[Unit] = { + event.initialTweetUpdateRequest match { + case Some(request) => + updateOneTweetByIdAction( + request.initialTweetId, + tweet => InitialTweetUpdate.updateTweet(tweet, request) + ) + case None => Future.Unit + } + } + + new ManhattanTweetStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + Stitch + .run( + tweetStorageClient.addTweet(event.internalTweet.tweet) + ).flatMap(_ => updateInitialTweet(event)) + } + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { event => + if (event.isBounceDelete) { + Stitch.run(tweetStorageClient.bounceDelete(event.tweet.id)) + } else { + Stitch.run(tweetStorageClient.softDelete(event.tweet.id)) + } + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event] { event => + Stitch.run(tweetStorageClient.scrub(event.tweetIds, Seq(Field.Geo))) + } + + override val setAdditionalFields: FutureEffect[SetAdditionalFields.Event] = + FutureEffect[SetAdditionalFields.Event] { event => + tweetStoreUpdateTweet(event.additionalFields) + } + + override val deleteAdditionalFields: FutureEffect[DeleteAdditionalFields.Event] = + FutureEffect[DeleteAdditionalFields.Event] { event => + handleStorageResponses( + tweetStorageClient.deleteAdditionalFields( + Seq(event.tweetId), + event.fieldIds.map(Field.additionalField) + ), + s"deleteAdditionalFields(${event.tweetId}, ${event.fieldIds}})" + ) + } + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event] { event => + handleStorageResponses( + tweetStorageClient.deleteAdditionalFields( + Seq(event.tweetId), + event.fieldIds.map(Field.additionalField) + ), + s"deleteAdditionalFields(Seq(${event.tweetId}), ${event.fieldIds}})" + ) + } + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val takedown: FutureEffect[Takedown.Event] = + FutureEffect[Takedown.Event] { event => + val (fieldsToUpdate, fieldsToDelete) = + Seq( + Field.TweetypieOnlyTakedownCountryCodes, + Field.TweetypieOnlyTakedownReasons + ).filter(_ => event.updateCodesAndReasons) + .partition(f => event.tweet.getFieldBlob(f.id).isDefined) + + val allFieldsToUpdate = Seq(Field.HasTakedown) ++ fieldsToUpdate + + Future + .join( + handleStorageResponses( + tweetStorageClient + .updateTweet(event.tweet, allFieldsToUpdate) + .map(Seq(_)), + s"updateTweet(${event.tweet}, $allFieldsToUpdate)" + ), + Future.when(fieldsToDelete.nonEmpty) { + handleStorageResponses( + tweetStorageClient + .deleteAdditionalFields(Seq(event.tweet.id), fieldsToDelete), + s"deleteAdditionalFields(Seq(${event.tweet.id}), $fieldsToDelete)" + ) + } + ).unit + } + + override val updatePossiblySensitiveTweet: FutureEffect[UpdatePossiblySensitiveTweet.Event] = + FutureEffect[UpdatePossiblySensitiveTweet.Event] { event => + updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) + } + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event] { event => + updateOneTweetAction(event.tweet, TweetUpdate.copyNsfwFieldsForUpdate) + } + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala new file mode 100644 index 000000000..4efe22706 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaIndexHelper.scala @@ -0,0 +1,34 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ +import scala.util.matching.Regex + +object MediaIndexHelper { + + /** + * Which tweets should we treat as "media" tweets? + * + * Any tweet that is not a retweet and any of: + * - Is explicitly marked as a media tweet. + * - Has a media entity. + * - Includes a partner media URL. + */ + def apply(partnerMediaRegexes: Seq[Regex]): Tweet => Boolean = { + val isPartnerUrl = partnerUrlMatcher(partnerMediaRegexes) + + tweet => + getShare(tweet).isEmpty && + (hasMediaFlagSet(tweet) || + getMedia(tweet).nonEmpty || + getUrls(tweet).exists(isPartnerUrl)) + } + + def partnerUrlMatcher(partnerMediaRegexes: Seq[Regex]): UrlEntity => Boolean = + _.expanded.exists { expandedUrl => + partnerMediaRegexes.exists(_.findFirstIn(expandedUrl).isDefined) + } + + def hasMediaFlagSet(tweet: Tweet): Boolean = + tweet.coreData.flatMap(_.hasMedia).getOrElse(false) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala new file mode 100644 index 000000000..f2f427c3c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/MediaServiceStore.scala @@ -0,0 +1,62 @@ +package com.twitter.tweetypie +package store + +import com.twitter.mediaservices.commons.thriftscala.MediaKey +import com.twitter.servo.util.FutureArrow +import com.twitter.tweetypie.media._ +import com.twitter.tweetypie.thriftscala._ + +trait MediaServiceStore + extends TweetStoreBase[MediaServiceStore] + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store { + def wrap(w: TweetStore.Wrap): MediaServiceStore = + new TweetStoreWrapper(w, this) + with MediaServiceStore + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper +} + +object MediaServiceStore { + val Action: AsyncWriteAction.MediaDeletion.type = AsyncWriteAction.MediaDeletion + + private def ownMedia(t: Tweet): Seq[(MediaKey, TweetId)] = + getMedia(t) + .collect { + case m if Media.isOwnMedia(t.id, m) => (MediaKeyUtil.get(m), t.id) + } + + def apply( + deleteMedia: FutureArrow[DeleteMediaRequest, Unit], + undeleteMedia: FutureArrow[UndeleteMediaRequest, Unit] + ): MediaServiceStore = + new MediaServiceStore { + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { e => + Future.when(!isRetweet(e.tweet)) { + val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) + val deleteMediaRequests = ownMediaKeys.map(DeleteMediaRequest.tupled) + Future.collect(deleteMediaRequests.map(deleteMedia)) + } + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event] { e => + Future.when(!isRetweet(e.tweet)) { + val ownMediaKeys: Seq[(MediaKey, TweetId)] = ownMedia(e.tweet) + val unDeleteMediaRequests = ownMediaKeys.map(UndeleteMediaRequest.tupled) + Future.collect(unDeleteMediaRequests.map(undeleteMedia)) + } + } + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala new file mode 100644 index 000000000..68a6283d7 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetDelete.scala @@ -0,0 +1,45 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object QuotedTweetDelete extends TweetStore.SyncModule { + + case class Event( + quotingTweetId: TweetId, + quotingUserId: UserId, + quotedTweetId: TweetId, + quotedUserId: UserId, + timestamp: Time, + optUser: Option[User] = None) + extends SyncTweetStoreEvent("quoted_tweet_delete") + with TweetStoreTweetEvent { + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.QuotedTweetDeleteEvent( + QuotedTweetDeleteEvent( + quotingTweetId = quotingTweetId, + quotingUserId = quotingUserId, + quotedTweetId = quotedTweetId, + quotedUserId = quotedUserId + ) + ) + ) + } + + trait Store { + val quotedTweetDelete: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val quotedTweetDelete: FutureEffect[Event] = wrap(underlying.quotedTweetDelete) + } + + object Store { + def apply(eventBusEnqueueStore: TweetEventBusStore): Store = + new Store { + override val quotedTweetDelete: FutureEffect[Event] = eventBusEnqueueStore.quotedTweetDelete + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala new file mode 100644 index 000000000..34fa71aa6 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetOps.scala @@ -0,0 +1,33 @@ +package com.twitter.tweetypie +package store + +/** + * Mixin that implements public quoted tweet and public quoted user + * filtering for tweet events that have quoted tweets and users. + */ +trait QuotedTweetOps { + def quotedTweet: Option[Tweet] + def quotedUser: Option[User] + + /** + * Do we have evidence that the quoted user is unprotected? + */ + def quotedUserIsPublic: Boolean = + // The quoted user should include the `safety` struct, but if it + // doesn't for any reason then the quoted tweet and quoted user + // should not be included in the events. This is a safety measure to + // avoid leaking private information. + quotedUser.exists(_.safety.exists(!_.isProtected)) + + /** + * The quoted tweet, filtered as it should appear through public APIs. + */ + def publicQuotedTweet: Option[Tweet] = + if (quotedUserIsPublic) quotedTweet else None + + /** + * The quoted user, filtered as it should appear through public APIs. + */ + def publicQuotedUser: Option[User] = + if (quotedUserIsPublic) quotedUser else None +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala new file mode 100644 index 000000000..4b73437cb --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/QuotedTweetTakedown.scala @@ -0,0 +1,51 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +object QuotedTweetTakedown extends TweetStore.SyncModule { + + case class Event( + quotingTweetId: TweetId, + quotingUserId: UserId, + quotedTweetId: TweetId, + quotedUserId: UserId, + takedownCountryCodes: Seq[String], + takedownReasons: Seq[TakedownReason], + timestamp: Time, + optUser: Option[User] = None) + extends SyncTweetStoreEvent("quoted_tweet_takedown") + with TweetStoreTweetEvent { + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.QuotedTweetTakedownEvent( + QuotedTweetTakedownEvent( + quotingTweetId = quotingTweetId, + quotingUserId = quotingUserId, + quotedTweetId = quotedTweetId, + quotedUserId = quotedUserId, + takedownCountryCodes = takedownCountryCodes, + takedownReasons = takedownReasons + ) + ) + ) + } + + trait Store { + val quotedTweetTakedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val quotedTweetTakedown: FutureEffect[Event] = wrap(underlying.quotedTweetTakedown) + } + + object Store { + def apply(eventBusEnqueueStore: TweetEventBusStore): Store = + new Store { + override val quotedTweetTakedown: FutureEffect[Event] = + eventBusEnqueueStore.quotedTweetTakedown + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala new file mode 100644 index 000000000..333103447 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ReplicatingTweetStore.scala @@ -0,0 +1,180 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +/** + * A TweetStore that sends write events to the replication endpoints + * of a ThriftTweetService. + * + * The events that are sent are sufficient to keep the other + * instance's caches up to date. The calls contain sufficient data so + * that the remote caches can be updated without requiring the remote + * Tweetypie to access any other services. + * + * The replication services two purposes: + * + * 1. Maintain consistency between caches in different data centers. + * + * 2. Keep the caches in all data centers warm, protecting backend + * services. + * + * Correctness bugs are worse than bugs that make data less available. + * All of these events affect data consistency. + * + * IncrFavCount.Event and InsertEvents are the least important + * from a data consistency standpoint, because the only data + * consistency issues are counts, which are cached for a shorter time, + * and are not as noticable to end users if they fail to occur. + * (Failure to apply them is both less severe and self-correcting.) + * + * Delete and GeoScrub events are critical, because the cached data + * has a long expiration and failure to apply them can result in + * violations of user privacy. + * + * Update events are also important from a legal perspective, since + * the update may be updating the per-country take-down status. + * + * @param svc: The ThriftTweetService implementation that will receive the + * replication events. In practice, this will usually be a + * deferredrpc service. + */ +trait ReplicatingTweetStore + extends TweetStoreBase[ReplicatingTweetStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store + with AsyncSetRetweetVisibility.Store + with AsyncSetAdditionalFields.Store + with AsyncDeleteAdditionalFields.Store + with ScrubGeo.Store + with IncrFavCount.Store + with IncrBookmarkCount.Store + with AsyncTakedown.Store + with AsyncUpdatePossiblySensitiveTweet.Store { + def wrap(w: TweetStore.Wrap): ReplicatingTweetStore = + new TweetStoreWrapper(w, this) + with ReplicatingTweetStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper + with AsyncSetAdditionalFields.StoreWrapper + with AsyncDeleteAdditionalFields.StoreWrapper + with ScrubGeo.StoreWrapper + with IncrFavCount.StoreWrapper + with IncrBookmarkCount.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper +} + +object ReplicatingTweetStore { + + val Action: AsyncWriteAction.Replication.type = AsyncWriteAction.Replication + + def apply( + svc: ThriftTweetService + ): ReplicatingTweetStore = + new ReplicatingTweetStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { e => + svc.replicatedInsertTweet2( + ReplicatedInsertTweet2Request( + e.cachedTweet, + initialTweetUpdateRequest = e.initialTweetUpdateRequest + )) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event] { e => + svc.replicatedDeleteTweet2( + ReplicatedDeleteTweet2Request( + tweet = e.tweet, + isErasure = e.isUserErasure, + isBounceDelete = e.isBounceDelete + ) + ) + } + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event] { e => + svc.replicatedUndeleteTweet2(ReplicatedUndeleteTweet2Request(e.cachedTweet)) + } + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + FutureEffect[AsyncSetAdditionalFields.Event] { e => + svc.replicatedSetAdditionalFields(SetAdditionalFieldsRequest(e.additionalFields)) + } + + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncSetAdditionalFields) + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + svc.replicatedSetRetweetVisibility( + ReplicatedSetRetweetVisibilityRequest(e.srcId, e.visible) + ) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(Action, asyncSetRetweetVisibility) + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + FutureEffect[AsyncDeleteAdditionalFields.Event] { e => + svc.replicatedDeleteAdditionalFields( + ReplicatedDeleteAdditionalFieldsRequest(Map(e.tweetId -> e.fieldIds)) + ) + } + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + FutureEffect[ScrubGeo.Event](e => svc.replicatedScrubGeo(e.tweetIds)) + + override val incrFavCount: FutureEffect[IncrFavCount.Event] = + FutureEffect[IncrFavCount.Event](e => svc.replicatedIncrFavCount(e.tweetId, e.delta)) + + override val incrBookmarkCount: FutureEffect[IncrBookmarkCount.Event] = + FutureEffect[IncrBookmarkCount.Event](e => + svc.replicatedIncrBookmarkCount(e.tweetId, e.delta)) + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + FutureEffect[AsyncTakedown.Event](e => svc.replicatedTakedown(e.tweet)) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + FutureEffect[AsyncUpdatePossiblySensitiveTweet.Event](e => + svc.replicatedUpdatePossiblySensitiveTweet(e.tweet)) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala new file mode 100644 index 000000000..4720e0317 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/RetweetArchivalEnqueueStore.scala @@ -0,0 +1,38 @@ +package com.twitter.tweetypie.store +import com.twitter.tweetypie.FutureEffect +import com.twitter.tweetypie.thriftscala.AsyncWriteAction +import com.twitter.tweetypie.thriftscala.RetweetArchivalEvent + +trait RetweetArchivalEnqueueStore + extends TweetStoreBase[RetweetArchivalEnqueueStore] + with AsyncSetRetweetVisibility.Store { + def wrap(w: TweetStore.Wrap): RetweetArchivalEnqueueStore = + new TweetStoreWrapper(w, this) + with RetweetArchivalEnqueueStore + with AsyncSetRetweetVisibility.StoreWrapper +} + +object RetweetArchivalEnqueueStore { + + def apply(enqueue: FutureEffect[RetweetArchivalEvent]): RetweetArchivalEnqueueStore = + new RetweetArchivalEnqueueStore { + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + enqueue( + RetweetArchivalEvent( + retweetId = e.retweetId, + srcTweetId = e.srcId, + retweetUserId = e.retweetUserId, + srcTweetUserId = e.srcTweetUserId, + timestampMs = e.timestamp.inMillis, + isArchivingAction = Some(!e.visible) + ) + ) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(AsyncWriteAction.RetweetArchivalEnqueue, asyncSetRetweetVisibility) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala new file mode 100644 index 000000000..f610fb5ce --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScribeMediaTagStore.scala @@ -0,0 +1,42 @@ +package com.twitter.tweetypie +package store + +import com.twitter.servo.util.Scribe +import com.twitter.tweetypie.thriftscala.TweetMediaTagEvent + +/** + * Scribes thrift-encoded TweetMediaTagEvents (from tweet_events.thrift). + */ +trait ScribeMediaTagStore extends TweetStoreBase[ScribeMediaTagStore] with AsyncInsertTweet.Store { + def wrap(w: TweetStore.Wrap): ScribeMediaTagStore = + new TweetStoreWrapper(w, this) with ScribeMediaTagStore with AsyncInsertTweet.StoreWrapper +} + +object ScribeMediaTagStore { + + private def toMediaTagEvent(event: AsyncInsertTweet.Event): Option[TweetMediaTagEvent] = { + val tweet = event.tweet + val taggedUserIds = getMediaTagMap(tweet).values.flatten.flatMap(_.userId).toSet + val timestamp = Time.now.inMilliseconds + if (taggedUserIds.nonEmpty) { + Some(TweetMediaTagEvent(tweet.id, getUserId(tweet), taggedUserIds, Some(timestamp))) + } else { + None + } + } + + def apply( + scribe: FutureEffect[String] = Scribe("tweetypie_media_tag_events") + ): ScribeMediaTagStore = + new ScribeMediaTagStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + Scribe(TweetMediaTagEvent, scribe) + .contramapOption[AsyncInsertTweet.Event](toMediaTagEvent) + + // we don't retry this action + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala new file mode 100644 index 000000000..262def919 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/ScrubGeo.scala @@ -0,0 +1,164 @@ +package com.twitter.tweetypie +package store + +import com.twitter.conversions.DurationOps._ +import com.twitter.servo.cache.Cached +import com.twitter.servo.cache.CachedValueStatus +import com.twitter.servo.cache.LockingCache +import com.twitter.snowflake.id.SnowflakeId +import com.twitter.tweetypie.backends.GeoScrubEventStore +import com.twitter.tweetypie.thriftscala._ + +/** + * Scrub geo information from Tweets. + */ +object ScrubGeo extends TweetStore.SyncModule { + + case class Event( + tweetIdSet: Set[TweetId], + userId: UserId, + optUser: Option[User], + timestamp: Time, + enqueueMax: Boolean) + extends SyncTweetStoreEvent("scrub_geo") + with TweetStoreTweetEvent { + + val tweetIds: Seq[TweetId] = tweetIdSet.toSeq + + override def toTweetEventData: Seq[TweetEventData] = + tweetIds.map { tweetId => + TweetEventData.TweetScrubGeoEvent( + TweetScrubGeoEvent( + tweetId = tweetId, + userId = userId + ) + ) + } + } + + trait Store { + val scrubGeo: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val scrubGeo: FutureEffect[Event] = wrap(underlying.scrubGeo) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + eventBusEnqueueStore: TweetEventBusStore, + replicatingStore: ReplicatingTweetStore + ): Store = + new Store { + override val scrubGeo: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.scrubGeo, + manhattanStore.scrubGeo, + cachingTweetStore.scrubGeo, + eventBusEnqueueStore.scrubGeo, + replicatingStore.scrubGeo + ) + } + } +} + +object ReplicatedScrubGeo extends TweetStore.ReplicatedModule { + + case class Event(tweetIds: Seq[TweetId]) extends ReplicatedTweetStoreEvent("replicated_scrub_geo") + + trait Store { + val replicatedScrubGeo: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedScrubGeo: FutureEffect[Event] = wrap(underlying.replicatedScrubGeo) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedScrubGeo: FutureEffect[Event] = + cachingTweetStore.replicatedScrubGeo + } + } + } +} + +/** + * Update the timestamp of the user's most recent request to delete all + * location data attached to her tweets. We use the timestamp to ensure + * that even if we fail to scrub a particular tweet in storage, we will + * not return geo information with that tweet. + * + * See http://go/geoscrub for more details. + */ +object ScrubGeoUpdateUserTimestamp extends TweetStore.SyncModule { + + case class Event(userId: UserId, timestamp: Time, optUser: Option[User]) + extends SyncTweetStoreEvent("scrub_geo_update_user_timestamp") + with TweetStoreTweetEvent { + + def mightHaveGeotaggedStatuses: Boolean = + optUser.forall(_.account.forall(_.hasGeotaggedStatuses == true)) + + def maxTweetId: TweetId = SnowflakeId.firstIdFor(timestamp + 1.millisecond) - 1 + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.UserScrubGeoEvent( + UserScrubGeoEvent( + userId = userId, + maxTweetId = maxTweetId + ) + ) + ) + + /** + * How to update a geo scrub timestamp cache entry. Always prefers + * the highest timestamp value that is available, regardless of when + * it was added to cache. + */ + def cacheHandler: LockingCache.Handler[Cached[Time]] = { + case Some(c) if c.value.exists(_ >= timestamp) => None + case _ => Some(Cached(Some(timestamp), CachedValueStatus.Found, Time.now)) + } + } + + trait Store { + val scrubGeoUpdateUserTimestamp: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = wrap( + underlying.scrubGeoUpdateUserTimestamp) + } + + object Store { + def apply( + geotagUpdateStore: GizmoduckUserGeotagUpdateStore, + tweetEventBusStore: TweetEventBusStore, + setInManhattan: GeoScrubEventStore.SetGeoScrubTimestamp, + cache: LockingCache[UserId, Cached[Time]] + ): Store = { + val manhattanEffect = + setInManhattan.asFutureEffect + .contramap[Event](e => (e.userId, e.timestamp)) + + val cacheEffect = + FutureEffect[Event](e => cache.lockAndSet(e.userId, e.cacheHandler).unit) + + new Store { + override val scrubGeoUpdateUserTimestamp: FutureEffect[Event] = + FutureEffect.inParallel( + manhattanEffect, + cacheEffect, + geotagUpdateStore.scrubGeoUpdateUserTimestamp, + tweetEventBusStore.scrubGeoUpdateUserTimestamp + ) + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala new file mode 100644 index 000000000..a1dfef0df --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetAdditionalFields.scala @@ -0,0 +1,155 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object SetAdditionalFields extends TweetStore.SyncModule { + + case class Event(additionalFields: Tweet, userId: UserId, timestamp: Time) + extends SyncTweetStoreEvent("set_additional_fields") { + + def toAsyncRequest: AsyncSetAdditionalFieldsRequest = + AsyncSetAdditionalFieldsRequest( + additionalFields = additionalFields, + userId = userId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val setAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val setAdditionalFields: FutureEffect[Event] = wrap(underlying.setAdditionalFields) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore, + logLensStore: LogLensStore + ): Store = + new Store { + override val setAdditionalFields: FutureEffect[Event] = + FutureEffect.sequentially( + logLensStore.setAdditionalFields, + manhattanStore.setAdditionalFields, + // Ignore failures but wait for completion to ensure we attempted to update cache before + // running async tasks, in particular publishing an event to EventBus. + cachingTweetStore.ignoreFailuresUponCompletion.setAdditionalFields, + asyncEnqueueStore.setAdditionalFields + ) + } + } +} + +object AsyncSetAdditionalFields extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncSetAdditionalFieldsRequest, + user: User + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + additionalFields = request.additionalFields, + userId = request.userId, + optUser = Some(user), + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event(additionalFields: Tweet, userId: UserId, optUser: Option[User], timestamp: Time) + extends AsyncTweetStoreEvent("async_set_additional_fields") + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetAdditionalFieldsRequest = + AsyncSetAdditionalFieldsRequest( + additionalFields = additionalFields, + retryAction = action, + userId = userId, + timestamp = timestamp.inMillis + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.AdditionalFieldUpdateEvent( + AdditionalFieldUpdateEvent( + updatedFields = additionalFields, + userId = optUser.map(_.id) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncSetAdditionalFields(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.SetAdditionalFields.type = + AsyncWriteEventType.SetAdditionalFields + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncSetAdditionalFields: FutureEffect[Event] + val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncSetAdditionalFields: FutureEffect[Event] = wrap( + underlying.asyncSetAdditionalFields) + override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncSetAdditionalFields) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = Seq(replicatingStore, eventBusEnqueueStore) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncSetAdditionalFields: FutureEffect[Event] = build( + _.asyncSetAdditionalFields) + override val retryAsyncSetAdditionalFields: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncSetAdditionalFields) + } + } + } +} + +object ReplicatedSetAdditionalFields extends TweetStore.ReplicatedModule { + + case class Event(additionalFields: Tweet) + extends ReplicatedTweetStoreEvent("replicated_set_additional_fields") + + trait Store { + val replicatedSetAdditionalFields: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedSetAdditionalFields: FutureEffect[Event] = wrap( + underlying.replicatedSetAdditionalFields) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedSetAdditionalFields: FutureEffect[Event] = + cachingTweetStore.replicatedSetAdditionalFields + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala new file mode 100644 index 000000000..7f4736f15 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/SetRetweetVisibility.scala @@ -0,0 +1,172 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object SetRetweetVisibility extends TweetStore.SyncModule { + + case class Event( + retweetId: TweetId, + visible: Boolean, + srcId: TweetId, + retweetUserId: UserId, + srcTweetUserId: UserId, + timestamp: Time) + extends SyncTweetStoreEvent("set_retweet_visibility") { + def toAsyncRequest: AsyncSetRetweetVisibilityRequest = + AsyncSetRetweetVisibilityRequest( + retweetId = retweetId, + visible = visible, + srcId = srcId, + retweetUserId = retweetUserId, + sourceTweetUserId = srcTweetUserId, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val setRetweetVisibility: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + val setRetweetVisibility: FutureEffect[Event] = wrap(underlying.setRetweetVisibility) + } + + object Store { + + /** + * [[AsyncEnqueueStore]] - use this store to call the asyncSetRetweetVisibility endpoint. + * + * @see [[AsyncSetRetweetVisibility.Store.apply]] + */ + def apply(asyncEnqueueStore: AsyncEnqueueStore): Store = + new Store { + override val setRetweetVisibility: FutureEffect[Event] = + asyncEnqueueStore.setRetweetVisibility + } + } +} + +object AsyncSetRetweetVisibility extends TweetStore.AsyncModule { + + case class Event( + retweetId: TweetId, + visible: Boolean, + srcId: TweetId, + retweetUserId: UserId, + srcTweetUserId: UserId, + timestamp: Time) + extends AsyncTweetStoreEvent("async_set_retweet_visibility") { + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncSetRetweetVisibilityRequest = + AsyncSetRetweetVisibilityRequest( + retweetId = retweetId, + visible = visible, + srcId = srcId, + retweetUserId = retweetUserId, + sourceTweetUserId = srcTweetUserId, + retryAction = action, + timestamp = timestamp.inMillis + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncSetRetweetVisibility(toAsyncRequest(Some(action))) + } + + object Event { + def fromAsyncRequest(req: AsyncSetRetweetVisibilityRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncSetRetweetVisibility.Event( + retweetId = req.retweetId, + visible = req.visible, + srcId = req.srcId, + retweetUserId = req.retweetUserId, + srcTweetUserId = req.sourceTweetUserId, + timestamp = Time.fromMilliseconds(req.timestamp) + ), + req.retryAction, + RetryEvent + ) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.SetRetweetVisibility.type = + AsyncWriteEventType.SetRetweetVisibility + override val scribedTweetOnFailure: None.type = None + } + + trait Store { + val asyncSetRetweetVisibility: FutureEffect[Event] + val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + val asyncSetRetweetVisibility: FutureEffect[Event] = wrap(underlying.asyncSetRetweetVisibility) + val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncSetRetweetVisibility) + } + + object Store { + + /** + * [[TweetIndexingStore]] - archive or unarchive a retweet edge in TFlock RetweetGraph + * [[TweetCountsCacheUpdatingStore]] - modify the retweet count directly in cache. + * [[ReplicatingTweetStore]] - replicate this [[Event]] in the other DC. + * [[RetweetArchivalEnqueueStore]] - publish RetweetArchivalEvent to "retweet_archival_events" event stream. + * + * @see [[ReplicatedSetRetweetVisibility.Store.apply]] + */ + def apply( + tweetIndexingStore: TweetIndexingStore, + tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore, + replicatingTweetStore: ReplicatingTweetStore, + retweetArchivalEnqueueStore: RetweetArchivalEnqueueStore + ): Store = { + val stores: Seq[Store] = + Seq( + tweetIndexingStore, + tweetCountsCacheUpdatingStore, + replicatingTweetStore, + retweetArchivalEnqueueStore + ) + + def build[E <: TweetStoreEvent, S](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncSetRetweetVisibility: FutureEffect[Event] = build( + _.asyncSetRetweetVisibility) + override val retryAsyncSetRetweetVisibility: FutureEffect[TweetStoreRetryEvent[Event]] = + build(_.retryAsyncSetRetweetVisibility) + } + } + } +} + +object ReplicatedSetRetweetVisibility extends TweetStore.ReplicatedModule { + + case class Event(srcId: TweetId, visible: Boolean) + extends ReplicatedTweetStoreEvent("replicated_set_retweet_visibility") + + trait Store { + val replicatedSetRetweetVisibility: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedSetRetweetVisibility: FutureEffect[Event] = + wrap(underlying.replicatedSetRetweetVisibility) + } + + object Store { + + /** + * [[TweetCountsCacheUpdatingStore]] - replicate modifying the retweet count directly in cache. + */ + def apply(tweetCountsCacheUpdatingStore: TweetCountsCacheUpdatingStore): Store = + new Store { + override val replicatedSetRetweetVisibility: FutureEffect[Event] = + tweetCountsCacheUpdatingStore.replicatedSetRetweetVisibility + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala new file mode 100644 index 000000000..cfe3262b5 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/Takedown.scala @@ -0,0 +1,205 @@ +package com.twitter.tweetypie +package store + +import com.twitter.takedown.util.TakedownReasons +import com.twitter.tseng.withholding.thriftscala.TakedownReason +import com.twitter.tweetypie.thriftscala._ + +object Takedown extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, // for CachingTweetStore / ManhattanTweetStore / ReplicatedTakedown + timestamp: Time, + user: Option[User] = None, + takedownReasons: Seq[TakedownReason] = Seq(), // for EventBus + reasonsToAdd: Seq[TakedownReason] = Seq(), // for Guano + reasonsToRemove: Seq[TakedownReason] = Seq(), // for Guano + auditNote: Option[String] = None, + host: Option[String] = None, + byUserId: Option[UserId] = None, + eventbusEnqueue: Boolean = true, + scribeForAudit: Boolean = true, + // If ManhattanTweetStore should update countryCodes and reasons + updateCodesAndReasons: Boolean = false) + extends SyncTweetStoreEvent("takedown") { + def toAsyncRequest(): AsyncTakedownRequest = + AsyncTakedownRequest( + tweet = tweet, + user = user, + takedownReasons = takedownReasons, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + scribeForAudit = scribeForAudit, + eventbusEnqueue = eventbusEnqueue, + auditNote = auditNote, + byUserId = byUserId, + host = host, + timestamp = timestamp.inMillis + ) + } + + trait Store { + val takedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val takedown: FutureEffect[Event] = wrap(underlying.takedown) + } + + object Store { + def apply( + logLensStore: LogLensStore, + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val takedown: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.takedown, + FutureEffect.sequentially( + manhattanStore.takedown, + FutureEffect.inParallel( + cachingTweetStore.takedown, + asyncEnqueueStore.takedown + ) + ) + ) + } + } +} + +object AsyncTakedown extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncTakedownRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + Event( + tweet = request.tweet, + optUser = request.user, + takedownReasons = request.takedownReasons, + reasonsToAdd = request.reasonsToAdd, + reasonsToRemove = request.reasonsToRemove, + auditNote = request.auditNote, + host = request.host, + byUserId = request.byUserId, + eventbusEnqueue = request.eventbusEnqueue, + scribeForAudit = request.scribeForAudit, + timestamp = Time.fromMilliseconds(request.timestamp) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + timestamp: Time, + optUser: Option[User], + takedownReasons: Seq[TakedownReason], // for EventBus + reasonsToAdd: Seq[TakedownReason], // for Guano + reasonsToRemove: Seq[TakedownReason], // for Guano + auditNote: Option[String], // for Guano + host: Option[String], // for Guano + byUserId: Option[UserId], // for Guano + eventbusEnqueue: Boolean, + scribeForAudit: Boolean) + extends AsyncTweetStoreEvent("async_takedown") + with TweetStoreTweetEvent { + + def toAsyncRequest(action: Option[AsyncWriteAction] = None): AsyncTakedownRequest = + AsyncTakedownRequest( + tweet = tweet, + user = optUser, + takedownReasons = takedownReasons, + reasonsToAdd = reasonsToAdd, + reasonsToRemove = reasonsToRemove, + scribeForAudit = scribeForAudit, + eventbusEnqueue = eventbusEnqueue, + auditNote = auditNote, + byUserId = byUserId, + host = host, + timestamp = timestamp.inMillis, + retryAction = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + optUser.map { user => + TweetEventData.TweetTakedownEvent( + TweetTakedownEvent( + tweetId = tweet.id, + userId = user.id, + takedownCountryCodes = + takedownReasons.collect(TakedownReasons.reasonToCountryCode).sorted, + takedownReasons = takedownReasons + ) + ) + }.toSeq + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncTakedown(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Takedown.type = AsyncWriteEventType.Takedown + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncTakedown: FutureEffect[Event] + val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncTakedown: FutureEffect[Event] = wrap(underlying.asyncTakedown) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncTakedown) + } + + object Store { + def apply( + replicatingStore: ReplicatingTweetStore, + guanoStore: GuanoServiceStore, + eventBusEnqueueStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + replicatingStore, + guanoStore, + eventBusEnqueueStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncTakedown: FutureEffect[Event] = build(_.asyncTakedown) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncTakedown) + } + } + } +} + +object ReplicatedTakedown extends TweetStore.ReplicatedModule { + + case class Event(tweet: Tweet) extends ReplicatedTweetStoreEvent("takedown") + + trait Store { + val replicatedTakedown: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedTakedown: FutureEffect[Event] = wrap(underlying.replicatedTakedown) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedTakedown: FutureEffect[Event] = cachingTweetStore.replicatedTakedown + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala new file mode 100644 index 000000000..14b83d878 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TlsTimelineUpdatingStore.scala @@ -0,0 +1,150 @@ +package com.twitter.tweetypie +package store + +import com.twitter.timelineservice.{thriftscala => tls} +import com.twitter.tweetypie.backends.TimelineService +import com.twitter.tweetypie.thriftscala._ + +trait TlsTimelineUpdatingStore + extends TweetStoreBase[TlsTimelineUpdatingStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store { + def wrap(w: TweetStore.Wrap): TlsTimelineUpdatingStore = + new TweetStoreWrapper(w, this) + with TlsTimelineUpdatingStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper +} + +/** + * An implementation of TweetStore that sends update events to + * the Timeline Service. + */ +object TlsTimelineUpdatingStore { + val Action: AsyncWriteAction.TimelineUpdate.type = AsyncWriteAction.TimelineUpdate + + /** + * Converts a TweetyPie Tweet to tls.Tweet + * + * @param explicitCreatedAt when Some, overrides the default getTimestamp defined in package + * object com.twitter.tweetypie + */ + def tweetToTLSFullTweet( + hasMedia: Tweet => Boolean + )( + tweet: Tweet, + explicitCreatedAt: Option[Time], + noteTweetMentionedUserIds: Option[Seq[Long]] + ): tls.FullTweet = + tls.FullTweet( + userId = getUserId(tweet), + tweetId = tweet.id, + mentionedUserIds = + noteTweetMentionedUserIds.getOrElse(getMentions(tweet).flatMap(_.userId)).toSet, + isNullcasted = TweetLenses.nullcast.get(tweet), + conversationId = TweetLenses.conversationId.get(tweet).getOrElse(tweet.id), + narrowcastGeos = Set.empty, + createdAtMs = explicitCreatedAt.getOrElse(getTimestamp(tweet)).inMillis, + hasMedia = hasMedia(tweet), + directedAtUserId = TweetLenses.directedAtUser.get(tweet).map(_.userId), + retweet = getShare(tweet).map { share => + tls.Retweet( + sourceUserId = share.sourceUserId, + sourceTweetId = share.sourceStatusId, + parentTweetId = Some(share.parentStatusId) + ) + }, + reply = getReply(tweet).map { reply => + tls.Reply( + inReplyToUserId = reply.inReplyToUserId, + inReplyToTweetId = reply.inReplyToStatusId + ) + }, + quote = tweet.quotedTweet.map { qt => + tls.Quote( + quotedUserId = qt.userId, + quotedTweetId = qt.tweetId + ) + }, + mediaTags = tweet.mediaTags, + text = Some(getText(tweet)) + ) + + val logger: Logger = Logger(getClass) + + def logValidationFailed(stats: StatsReceiver): tls.ProcessEventResult => Unit = { + case tls.ProcessEventResult(tls.ProcessEventResultType.ValidationFailed, errors) => + logger.error(s"Validation Failed in processEvent2: $errors") + stats.counter("processEvent2_validation_failed").incr() + case _ => () + } + + def apply( + processEvent2: TimelineService.ProcessEvent2, + hasMedia: Tweet => Boolean, + stats: StatsReceiver + ): TlsTimelineUpdatingStore = { + val toTlsTweet = tweetToTLSFullTweet(hasMedia) _ + + val processAndLog = + processEvent2.andThen(FutureArrow.fromFunction(logValidationFailed(stats))) + + new TlsTimelineUpdatingStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + processAndLog + .contramap[AsyncInsertTweet.Event] { event => + tls.Event.FullTweetCreate( + tls.FullTweetCreateEvent( + toTlsTweet(event.tweet, Some(event.timestamp), event.noteTweetMentionedUserIds), + event.timestamp.inMillis, + featureContext = event.featureContext + ) + ) + } + .asFutureEffect[AsyncInsertTweet.Event] + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + processAndLog + .contramap[AsyncUndeleteTweet.Event] { event => + tls.Event.FullTweetRestore( + tls.FullTweetRestoreEvent( + toTlsTweet(event.tweet, None, None), + event.deletedAt.map(_.inMillis) + ) + ) + } + .asFutureEffect[AsyncUndeleteTweet.Event] + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + processAndLog + .contramap[AsyncDeleteTweet.Event] { event => + tls.Event.FullTweetDelete( + tls.FullTweetDeleteEvent( + toTlsTweet(event.tweet, None, None), + event.timestamp.inMillis, + isUserErasure = Some(event.isUserErasure), + isBounceDelete = Some(event.isBounceDelete) + ) + ) + } + .asFutureEffect[AsyncDeleteTweet.Event] + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala new file mode 100644 index 000000000..3f1d3e288 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetCountsCacheUpdatingStore.scala @@ -0,0 +1,358 @@ +package com.twitter.tweetypie +package store + +import com.twitter.concurrent.Serialized +import com.twitter.servo.cache.LockingCache.Handler +import com.twitter.servo.cache._ +import com.twitter.tweetypie.repository.BookmarksKey +import com.twitter.tweetypie.repository.FavsKey +import com.twitter.tweetypie.repository.QuotesKey +import com.twitter.tweetypie.repository.RepliesKey +import com.twitter.tweetypie.repository.RetweetsKey +import com.twitter.tweetypie.repository.TweetCountKey +import com.twitter.util.Duration +import com.twitter.util.Timer +import scala.collection.mutable + +trait TweetCountsCacheUpdatingStore + extends TweetStoreBase[TweetCountsCacheUpdatingStore] + with InsertTweet.Store + with AsyncInsertTweet.Store + with ReplicatedInsertTweet.Store + with DeleteTweet.Store + with AsyncDeleteTweet.Store + with ReplicatedDeleteTweet.Store + with UndeleteTweet.Store + with ReplicatedUndeleteTweet.Store + with AsyncIncrFavCount.Store + with ReplicatedIncrFavCount.Store + with AsyncIncrBookmarkCount.Store + with ReplicatedIncrBookmarkCount.Store + with AsyncSetRetweetVisibility.Store + with ReplicatedSetRetweetVisibility.Store + with Flush.Store { + def wrap(w: TweetStore.Wrap): TweetCountsCacheUpdatingStore = { + new TweetStoreWrapper(w, this) + with TweetCountsCacheUpdatingStore + with InsertTweet.StoreWrapper + with AsyncInsertTweet.StoreWrapper + with ReplicatedInsertTweet.StoreWrapper + with DeleteTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with ReplicatedDeleteTweet.StoreWrapper + with UndeleteTweet.StoreWrapper + with ReplicatedUndeleteTweet.StoreWrapper + with AsyncIncrFavCount.StoreWrapper + with ReplicatedIncrFavCount.StoreWrapper + with AsyncIncrBookmarkCount.StoreWrapper + with ReplicatedIncrBookmarkCount.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper + with ReplicatedSetRetweetVisibility.StoreWrapper + with Flush.StoreWrapper + } +} + +/** + * An implementation of TweetStore that updates tweet-specific counts in + * the CountsCache. + */ +object TweetCountsCacheUpdatingStore { + private type Action = TweetCountKey => Future[Unit] + + def keys(tweetId: TweetId): Seq[TweetCountKey] = + Seq( + RetweetsKey(tweetId), + RepliesKey(tweetId), + FavsKey(tweetId), + QuotesKey(tweetId), + BookmarksKey(tweetId)) + + def relatedKeys(tweet: Tweet): Seq[TweetCountKey] = + Seq( + getReply(tweet).flatMap(_.inReplyToStatusId).map(RepliesKey(_)), + getQuotedTweet(tweet).map(quotedTweet => QuotesKey(quotedTweet.tweetId)), + getShare(tweet).map(share => RetweetsKey(share.sourceStatusId)) + ).flatten + + // pick all keys except quotes key + def relatedKeysWithoutQuotesKey(tweet: Tweet): Seq[TweetCountKey] = + relatedKeys(tweet).filterNot(_.isInstanceOf[QuotesKey]) + + def apply(countsStore: CachedCountsStore): TweetCountsCacheUpdatingStore = { + val incr: Action = key => countsStore.incr(key, 1) + val decr: Action = key => countsStore.incr(key, -1) + val init: Action = key => countsStore.add(key, 0) + val delete: Action = key => countsStore.delete(key) + + def initCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(init)) + def incrRelatedCounts(tweet: Tweet, excludeQuotesKey: Boolean = false) = { + Future.join { + if (excludeQuotesKey) { + relatedKeysWithoutQuotesKey(tweet).map(incr) + } else { + relatedKeys(tweet).map(incr) + } + } + } + def deleteCounts(tweetId: TweetId) = Future.join(keys(tweetId).map(delete)) + + // Decrement all the counters if is the last quote, otherwise avoid decrementing quote counters + def decrRelatedCounts(tweet: Tweet, isLastQuoteOfQuoter: Boolean = false) = { + Future.join { + if (isLastQuoteOfQuoter) { + relatedKeys(tweet).map(decr) + } else { + relatedKeysWithoutQuotesKey(tweet).map(decr) + } + } + } + + def updateFavCount(tweetId: TweetId, delta: Int) = + countsStore.incr(FavsKey(tweetId), delta).unit + + def updateBookmarkCount(tweetId: TweetId, delta: Int) = + countsStore.incr(BookmarksKey(tweetId), delta).unit + + // these are use specifically for setRetweetVisibility + def incrRetweetCount(tweetId: TweetId) = incr(RetweetsKey(tweetId)) + def decrRetweetCount(tweetId: TweetId) = decr(RetweetsKey(tweetId)) + + new TweetCountsCacheUpdatingStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event](e => initCounts(e.tweet.id)) + + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event] { e => + incrRelatedCounts(e.cachedTweet.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncInsertTweet.Event]] + + override val replicatedInsertTweet: FutureEffect[ReplicatedInsertTweet.Event] = + FutureEffect[ReplicatedInsertTweet.Event] { e => + Future + .join( + initCounts(e.tweet.id), + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet)).unit + } + + override val deleteTweet: FutureEffect[DeleteTweet.Event] = + FutureEffect[DeleteTweet.Event](e => deleteCounts(e.tweet.id)) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](e => decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncDeleteTweet.Event]] + + override val replicatedDeleteTweet: FutureEffect[ReplicatedDeleteTweet.Event] = + FutureEffect[ReplicatedDeleteTweet.Event] { e => + Future + .join(deleteCounts(e.tweet.id), decrRelatedCounts(e.tweet, e.isLastQuoteOfQuoter)).unit + } + + override val undeleteTweet: FutureEffect[UndeleteTweet.Event] = + FutureEffect[UndeleteTweet.Event] { e => + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val replicatedUndeleteTweet: FutureEffect[ReplicatedUndeleteTweet.Event] = + FutureEffect[ReplicatedUndeleteTweet.Event] { e => + incrRelatedCounts(e.tweet, e.quoterHasAlreadyQuotedTweet) + } + + override val asyncIncrFavCount: FutureEffect[AsyncIncrFavCount.Event] = + FutureEffect[AsyncIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) + + override val replicatedIncrFavCount: FutureEffect[ReplicatedIncrFavCount.Event] = + FutureEffect[ReplicatedIncrFavCount.Event](e => updateFavCount(e.tweetId, e.delta)) + + override val asyncIncrBookmarkCount: FutureEffect[AsyncIncrBookmarkCount.Event] = + FutureEffect[AsyncIncrBookmarkCount.Event](e => updateBookmarkCount(e.tweetId, e.delta)) + + override val replicatedIncrBookmarkCount: FutureEffect[ReplicatedIncrBookmarkCount.Event] = + FutureEffect[ReplicatedIncrBookmarkCount.Event] { e => + updateBookmarkCount(e.tweetId, e.delta) + } + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { e => + if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + FutureEffect.unit[TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event]] + + override val replicatedSetRetweetVisibility: FutureEffect[ + ReplicatedSetRetweetVisibility.Event + ] = + FutureEffect[ReplicatedSetRetweetVisibility.Event] { e => + if (e.visible) incrRetweetCount(e.srcId) else decrRetweetCount(e.srcId) + } + + override val flush: FutureEffect[Flush.Event] = + FutureEffect[Flush.Event] { e => Future.collect(e.tweetIds.map(deleteCounts)).unit } + .onlyIf(_.flushCounts) + } + } +} + +/** + * A simple trait around the cache operations needed by TweetCountsCacheUpdatingStore. + */ +trait CachedCountsStore { + def add(key: TweetCountKey, count: Count): Future[Unit] + def delete(key: TweetCountKey): Future[Unit] + def incr(key: TweetCountKey, delta: Count): Future[Unit] +} + +object CachedCountsStore { + def fromLockingCache(cache: LockingCache[TweetCountKey, Cached[Count]]): CachedCountsStore = + new CachedCountsStore { + def add(key: TweetCountKey, count: Count): Future[Unit] = + cache.add(key, toCached(count)).unit + + def delete(key: TweetCountKey): Future[Unit] = + cache.delete(key).unit + + def incr(key: TweetCountKey, delta: Count): Future[Unit] = + cache.lockAndSet(key, IncrDecrHandler(delta)).unit + } + + def toCached(count: Count): Cached[Count] = { + val now = Time.now + Cached(Some(count), CachedValueStatus.Found, now, Some(now)) + } + + case class IncrDecrHandler(delta: Long) extends Handler[Cached[Count]] { + override def apply(inCache: Option[Cached[Count]]): Option[Cached[Count]] = + inCache.flatMap(incrCount) + + private[this] def incrCount(oldCached: Cached[Count]): Option[Cached[Count]] = { + oldCached.value.map { oldCount => oldCached.copy(value = Some(saferIncr(oldCount))) } + } + + private[this] def saferIncr(value: Long) = math.max(0, value + delta) + + override lazy val toString: String = "IncrDecrHandler(%s)".format(delta) + } + + object QueueIsFullException extends Exception +} + +/** + * An implementation of CachedCountsStore that can queue and aggregate multiple incr + * updates to the same key together. Currently, updates for a key only start to aggregate + * after there is a failure to incr on the underlying store, which often indicates contention + * due to a high level of updates. After a failure, a key is promoted into a "tracked" state, + * and subsequent updates are aggregated together. Periodically, the aggregated updates will + * be flushed. If the flush for a key succeeds and no more updates have come in during the flush, + * then the key is demoted out of the tracked state. Otherwise, updates continue to aggregate + * until the next flush attempt. + */ +class AggregatingCachedCountsStore( + underlying: CachedCountsStore, + timer: Timer, + flushInterval: Duration, + maxSize: Int, + stats: StatsReceiver) + extends CachedCountsStore + with Serialized { + private[this] val pendingUpdates: mutable.Map[TweetCountKey, Count] = + new mutable.HashMap[TweetCountKey, Count] + + private[this] var trackingCount: Int = 0 + + private[this] val promotionCounter = stats.counter("promotions") + private[this] val demotionCounter = stats.counter("demotions") + private[this] val updateCounter = stats.counter("aggregated_updates") + private[this] val overflowCounter = stats.counter("overflows") + private[this] val flushFailureCounter = stats.counter("flush_failures") + private[this] val trackingCountGauge = stats.addGauge("tracking")(trackingCount.toFloat) + + timer.schedule(flushInterval) { flush() } + + def add(key: TweetCountKey, count: Count): Future[Unit] = + underlying.add(key, count) + + def delete(key: TweetCountKey): Future[Unit] = + underlying.delete(key) + + def incr(key: TweetCountKey, delta: Count): Future[Unit] = + aggregateIfTracked(key, delta).flatMap { + case true => Future.Unit + case false => + underlying + .incr(key, delta) + .rescue { case _ => aggregate(key, delta) } + } + + /** + * Queues an update to be aggregated and applied to a key at a later time, but only if we are + * already aggregating updates for the key. + * + * @return true the delta was aggregated, false if the key is not being tracked + * and the incr should be attempted directly. + */ + private[this] def aggregateIfTracked(key: TweetCountKey, delta: Count): Future[Boolean] = + serialized { + pendingUpdates.get(key) match { + case None => false + case Some(current) => + updateCounter.incr() + pendingUpdates(key) = current + delta + true + } + } + + /** + * Queues an update to be aggregated and applied to a key at a later time. + */ + private[this] def aggregate(key: TweetCountKey, delta: Count): Future[Unit] = + serialized { + val alreadyTracked = pendingUpdates.contains(key) + + if (!alreadyTracked) { + if (pendingUpdates.size < maxSize) + promotionCounter.incr() + else { + overflowCounter.incr() + throw CachedCountsStore.QueueIsFullException + } + } + + (pendingUpdates.get(key).getOrElse(0L) + delta) match { + case 0 => + pendingUpdates.remove(key) + demotionCounter.incr() + + case aggregatedDelta => + pendingUpdates(key) = aggregatedDelta + } + + trackingCount = pendingUpdates.size + } + + private[this] def flush(): Future[Unit] = { + for { + // make a copy of the updates to flush, so that updates can continue to be queued + // while the flush is in progress. if an individual flush succeeds, then we + // go back and update pendingUpdates. + updates <- serialized { pendingUpdates.toSeq.toList } + () <- Future.join(for ((key, delta) <- updates) yield flush(key, delta)) + } yield () + } + + private[this] def flush(key: TweetCountKey, delta: Count): Future[Unit] = + underlying + .incr(key, delta) + .flatMap(_ => aggregate(key, -delta)) + .handle { case ex => flushFailureCounter.incr() } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala new file mode 100644 index 000000000..e846c01ea --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetEventBusStore.scala @@ -0,0 +1,209 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +trait TweetEventBusStore + extends TweetStoreBase[TweetEventBusStore] + with AsyncDeleteAdditionalFields.Store + with AsyncDeleteTweet.Store + with AsyncInsertTweet.Store + with AsyncSetAdditionalFields.Store + with AsyncTakedown.Store + with AsyncUndeleteTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with QuotedTweetDelete.Store + with QuotedTweetTakedown.Store + with ScrubGeoUpdateUserTimestamp.Store + with ScrubGeo.Store { self => + def wrap(w: TweetStore.Wrap): TweetEventBusStore = + new TweetStoreWrapper(w, this) + with TweetEventBusStore + with AsyncDeleteAdditionalFields.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncInsertTweet.StoreWrapper + with AsyncSetAdditionalFields.StoreWrapper + with AsyncTakedown.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncUpdatePossiblySensitiveTweet.StoreWrapper + with QuotedTweetDelete.StoreWrapper + with QuotedTweetTakedown.StoreWrapper + with ScrubGeo.StoreWrapper + with ScrubGeoUpdateUserTimestamp.StoreWrapper + + def inParallel(that: TweetEventBusStore): TweetEventBusStore = + new TweetEventBusStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + self.asyncInsertTweet.inParallel(that.asyncInsertTweet) + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + self.asyncDeleteAdditionalFields.inParallel(that.asyncDeleteAdditionalFields) + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + self.asyncDeleteTweet.inParallel(that.asyncDeleteTweet) + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + self.asyncSetAdditionalFields.inParallel(that.asyncSetAdditionalFields) + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + self.asyncTakedown.inParallel(that.asyncTakedown) + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + self.asyncUndeleteTweet.inParallel(that.asyncUndeleteTweet) + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + self.asyncUpdatePossiblySensitiveTweet.inParallel(that.asyncUpdatePossiblySensitiveTweet) + override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + self.quotedTweetDelete.inParallel(that.quotedTweetDelete) + override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + self.quotedTweetTakedown.inParallel(that.quotedTweetTakedown) + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + self.retryAsyncInsertTweet.inParallel(that.retryAsyncInsertTweet) + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + self.retryAsyncDeleteAdditionalFields.inParallel(that.retryAsyncDeleteAdditionalFields) + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + self.retryAsyncDeleteTweet.inParallel(that.retryAsyncDeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + self.retryAsyncUndeleteTweet.inParallel(that.retryAsyncUndeleteTweet) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + self.retryAsyncUpdatePossiblySensitiveTweet.inParallel( + that.retryAsyncUpdatePossiblySensitiveTweet + ) + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + self.retryAsyncSetAdditionalFields.inParallel(that.retryAsyncSetAdditionalFields) + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + self.retryAsyncTakedown.inParallel(that.retryAsyncTakedown) + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + self.scrubGeo.inParallel(that.scrubGeo) + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + self.scrubGeoUpdateUserTimestamp.inParallel(that.scrubGeoUpdateUserTimestamp) + } +} + +object TweetEventBusStore { + val Action: AsyncWriteAction = AsyncWriteAction.EventBusEnqueue + + def safetyTypeForUser(user: User): Option[SafetyType] = + user.safety.map(userSafetyToSafetyType) + + def userSafetyToSafetyType(safety: Safety): SafetyType = + if (safety.isProtected) { + SafetyType.Private + } else if (safety.suspended) { + SafetyType.Restricted + } else { + SafetyType.Public + } + + def apply( + eventStore: FutureEffect[TweetEvent] + ): TweetEventBusStore = { + + def toTweetEvents(event: TweetStoreTweetEvent): Seq[TweetEvent] = + event.toTweetEventData.map { data => + TweetEvent( + data, + TweetEventFlags( + timestampMs = event.timestamp.inMillis, + safetyType = event.optUser.flatMap(safetyTypeForUser) + ) + ) + } + + def enqueueEvents[E <: TweetStoreTweetEvent]: FutureEffect[E] = + eventStore.liftSeq.contramap[E](toTweetEvents) + + new TweetEventBusStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + enqueueEvents[AsyncInsertTweet.Event] + + override val asyncDeleteAdditionalFields: FutureEffect[AsyncDeleteAdditionalFields.Event] = + enqueueEvents[AsyncDeleteAdditionalFields.Event] + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + enqueueEvents[AsyncDeleteTweet.Event] + + override val asyncSetAdditionalFields: FutureEffect[AsyncSetAdditionalFields.Event] = + enqueueEvents[AsyncSetAdditionalFields.Event] + + override val asyncTakedown: FutureEffect[AsyncTakedown.Event] = + enqueueEvents[AsyncTakedown.Event] + .onlyIf(_.eventbusEnqueue) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + enqueueEvents[AsyncUndeleteTweet.Event] + + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[ + AsyncUpdatePossiblySensitiveTweet.Event + ] = + enqueueEvents[AsyncUpdatePossiblySensitiveTweet.Event] + + override val quotedTweetDelete: FutureEffect[QuotedTweetDelete.Event] = + enqueueEvents[QuotedTweetDelete.Event] + + override val quotedTweetTakedown: FutureEffect[QuotedTweetTakedown.Event] = + enqueueEvents[QuotedTweetTakedown.Event] + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val retryAsyncDeleteAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncDeleteAdditionalFields) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUpdatePossiblySensitiveTweet.Event] + ] = + TweetStore.retry(Action, asyncUpdatePossiblySensitiveTweet) + + override val retryAsyncSetAdditionalFields: FutureEffect[ + TweetStoreRetryEvent[AsyncSetAdditionalFields.Event] + ] = + TweetStore.retry(Action, asyncSetAdditionalFields) + + override val retryAsyncTakedown: FutureEffect[TweetStoreRetryEvent[AsyncTakedown.Event]] = + TweetStore.retry(Action, asyncTakedown) + + override val scrubGeo: FutureEffect[ScrubGeo.Event] = + enqueueEvents[ScrubGeo.Event] + + override val scrubGeoUpdateUserTimestamp: FutureEffect[ScrubGeoUpdateUserTimestamp.Event] = + enqueueEvents[ScrubGeoUpdateUserTimestamp.Event] + } + } +} + +/** + * Scrubs inappropriate fields from tweet events before publishing. + */ +object TweetEventDataScrubber { + def scrub(tweet: Tweet): Tweet = + tweet.copy( + cards = None, + card2 = None, + media = tweet.media.map(_.map { mediaEntity => mediaEntity.copy(extensionsReply = None) }), + previousCounts = None, + editPerspective = None + ) +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala new file mode 100644 index 000000000..648e9a17c --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetIndexingStore.scala @@ -0,0 +1,65 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.tflock.TweetIndexer +import com.twitter.tweetypie.thriftscala._ + +trait TweetIndexingStore + extends TweetStoreBase[TweetIndexingStore] + with AsyncInsertTweet.Store + with AsyncDeleteTweet.Store + with AsyncUndeleteTweet.Store + with AsyncSetRetweetVisibility.Store { + def wrap(w: TweetStore.Wrap): TweetIndexingStore = + new TweetStoreWrapper(w, this) + with TweetIndexingStore + with AsyncInsertTweet.StoreWrapper + with AsyncDeleteTweet.StoreWrapper + with AsyncUndeleteTweet.StoreWrapper + with AsyncSetRetweetVisibility.StoreWrapper +} + +/** + * A TweetStore that sends indexing updates to a TweetIndexer. + */ +object TweetIndexingStore { + val Action: AsyncWriteAction.TweetIndex.type = AsyncWriteAction.TweetIndex + + def apply(indexer: TweetIndexer): TweetIndexingStore = + new TweetIndexingStore { + override val asyncInsertTweet: FutureEffect[AsyncInsertTweet.Event] = + FutureEffect[AsyncInsertTweet.Event](event => indexer.createIndex(event.tweet)) + + override val retryAsyncInsertTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncInsertTweet.Event] + ] = + TweetStore.retry(Action, asyncInsertTweet) + + override val asyncDeleteTweet: FutureEffect[AsyncDeleteTweet.Event] = + FutureEffect[AsyncDeleteTweet.Event](event => + indexer.deleteIndex(event.tweet, event.isBounceDelete)) + + override val retryAsyncDeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncDeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncDeleteTweet) + + override val asyncUndeleteTweet: FutureEffect[AsyncUndeleteTweet.Event] = + FutureEffect[AsyncUndeleteTweet.Event](event => indexer.undeleteIndex(event.tweet)) + + override val retryAsyncUndeleteTweet: FutureEffect[ + TweetStoreRetryEvent[AsyncUndeleteTweet.Event] + ] = + TweetStore.retry(Action, asyncUndeleteTweet) + + override val asyncSetRetweetVisibility: FutureEffect[AsyncSetRetweetVisibility.Event] = + FutureEffect[AsyncSetRetweetVisibility.Event] { event => + indexer.setRetweetVisibility(event.retweetId, event.visible) + } + + override val retryAsyncSetRetweetVisibility: FutureEffect[ + TweetStoreRetryEvent[AsyncSetRetweetVisibility.Event] + ] = + TweetStore.retry(Action, asyncSetRetweetVisibility) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala new file mode 100644 index 000000000..23f6f5124 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStatsStore.scala @@ -0,0 +1,64 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.stats.RollupStatsReceiver +import com.twitter.servo.util.MemoizingStatsReceiver + +/** + * Records some stats about inserted tweets. Tweets are currently classified by three criteria: + * + * - tweet type: "tweet" or "retweet" + * - user type: "stresstest", "protected", "restricted", or "public" + * - fanout type: "nullcast", "narrowcast", or "usertimeline" + * + * A counter is incremented for a tweet using those three criteria in order. Counters are + * created with a RollupStatsReceiver, so counts are aggregated at each level. Some + * example counters are: + * + * ./insert + * ./insert/tweet + * ./insert/tweet/public + * ./insert/tweet/protected/usertimeline + * ./insert/retweet/stresstest + * ./insert/retweet/public/nullcast + */ +trait TweetStatsStore extends TweetStoreBase[TweetStatsStore] with InsertTweet.Store { + def wrap(w: TweetStore.Wrap): TweetStatsStore = + new TweetStoreWrapper(w, this) with TweetStatsStore with InsertTweet.StoreWrapper +} + +object TweetStatsStore { + def apply(stats: StatsReceiver): TweetStatsStore = { + val rollup = new MemoizingStatsReceiver(new RollupStatsReceiver(stats)) + val inserts = rollup.scope("insert") + + def tweetType(tweet: Tweet) = + if (getShare(tweet).isDefined) "retweet" else "tweet" + + def userType(user: User) = + if (user.roles.exists(_.roles.contains("stresstest"))) "stresstest" + else if (user.safety.exists(_.isProtected)) "protected" + else if (user.safety.exists(_.suspended)) "restricted" + else "public" + + def fanoutType(tweet: Tweet) = + if (TweetLenses.nullcast(tweet)) "nullcast" + else if (TweetLenses.narrowcast(tweet).isDefined) "narrowcast" + else "usertimeline" + + new TweetStatsStore { + override val insertTweet: FutureEffect[InsertTweet.Event] = + FutureEffect[InsertTweet.Event] { event => + inserts + .counter( + tweetType(event.tweet), + userType(event.user), + fanoutType(event.tweet) + ) + .incr() + + Future.Unit + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala new file mode 100644 index 000000000..62a668681 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStore.scala @@ -0,0 +1,292 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.stats.Stat +import com.twitter.servo.util.RetryHandler +import com.twitter.tweetypie.thriftscala._ +import com.twitter.util.Timer + +object TweetStore { + // Using the old-school c.t.logging.Logger here as this log is only used by + // servo.FutureEffect's trackOutcome method, which needs that kind of logger. + val log: com.twitter.logging.Logger = com.twitter.logging.Logger(getClass) + + /** + * Adapts a tweet store on a specific TweetStoreEvent type to one that handles + * TweetStoreRetryEvents of that type that match the given AsyncWriteAction. + */ + def retry[T <: AsyncTweetStoreEvent]( + action: AsyncWriteAction, + store: FutureEffect[T] + ): FutureEffect[TweetStoreRetryEvent[T]] = + store.contramap[TweetStoreRetryEvent[T]](_.event).onlyIf(_.action == action) + + /** + * Defines an abstract polymorphic operation to be applied to FutureEffects over any + * TweetStoreEvent type. The Wrap operation is defined over all possible + * FutureEffect[E <: TweetStoreEvent] types. + */ + trait Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] + } + + /** + * A Wrap operation that applies standardized metrics collection to the FutureEffect. + */ + case class Tracked(stats: StatsReceiver) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + Stat.timeFuture(stats.scope(event.name).stat("latency_ms")) { + handler(event) + } + }.trackOutcome(stats, _.name, log) + } + + /** + * A Wrap operation that makes the FutureEffect enabled according to the given gate. + */ + case class Gated(gate: Gate[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.enabledBy(gate) + } + + /** + * A Wrap operation that updates the FutureEffect to ignore failures. + */ + object IgnoreFailures extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.ignoreFailures + } + + /** + * A Wrap operation that updates the FutureEffect to ignore failures upon completion. + */ + object IgnoreFailuresUponCompletion extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.ignoreFailuresUponCompletion + } + + /** + * A Wrap operation that applies a RetryHandler to FutureEffects. + */ + case class Retry(retryHandler: RetryHandler[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + handler.retry(retryHandler) + } + + /** + * A Wrap operation that applies a RetryHandler to FutureEffects. + */ + case class ReplicatedEventRetry(retryHandler: RetryHandler[Unit]) extends Wrap { + def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + event.retryStrategy match { + case TweetStoreEvent.ReplicatedEventLocalRetry => handler.retry(retryHandler)(event) + case _ => handler(event) + } + } + } + + /** + * A Wrap operation that configures async-retry behavior to async-write events. + */ + class AsyncRetry( + localRetryPolicy: RetryPolicy[Try[Nothing]], + enqueueRetryPolicy: RetryPolicy[Try[Nothing]], + timer: Timer, + tweetService: ThriftTweetService, + scribe: FutureEffect[FailedAsyncWrite] + )( + stats: StatsReceiver, + action: AsyncWriteAction) + extends Wrap { + + override def apply[E <: TweetStoreEvent](handler: FutureEffect[E]): FutureEffect[E] = + FutureEffect[E] { event => + event.retryStrategy match { + case TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) => + enqueueAsyncRetry(handler, enqueueRetry)(event) + + case TweetStoreEvent.LocalRetryThenScribeFailure(toFailedAsyncWrite) => + localRetryThenScribeFailure(handler, toFailedAsyncWrite)(event) + + case _ => + handler(event) + } + } + + private def enqueueAsyncRetry[E <: TweetStoreEvent]( + handler: FutureEffect[E], + enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit] + ): FutureEffect[E] = { + val retryInitCounter = stats.counter("retries_initiated") + + // enqueues failed TweetStoreEvents to the deferredrpc-backed tweetService + // to be retried. this store uses the enqueueRetryPolicy to retry the enqueue + // attempts in the case of deferredrpc application failures. + val enqueueRetryHandler = + FutureEffect[E](_ => enqueueRetry(tweetService, action)) + .retry(RetryHandler.failuresOnly(enqueueRetryPolicy, timer, stats.scope("enqueue_retry"))) + + handler.rescue { + case ex => + TweetStore.log.warning(ex, s"will retry $action") + retryInitCounter.incr() + enqueueRetryHandler + } + } + + private def localRetryThenScribeFailure[E <: TweetStoreEvent]( + handler: FutureEffect[E], + toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite + ): FutureEffect[E] = { + val exhaustedCounter = stats.counter("retries_exhausted") + + // scribe events that failed after exhausting all retries + val scribeEventHandler = + FutureEffect[E](_ => scribe(toFailedAsyncWrite(action))) + + // wraps `handle` with a retry policy to retry failures with a backoff. if we exhaust + // all retries, then we pass the event to `scribeEventStore` to scribe the failure. + handler + .retry(RetryHandler.failuresOnly(localRetryPolicy, timer, stats)) + .rescue { + case ex => + TweetStore.log.warning(ex, s"exhausted retries on $action") + exhaustedCounter.incr() + scribeEventHandler + } + } + } + + /** + * Parent trait for defining a "module" that defines a TweetStoreEvent type and corresponding + * TweetStore and TweetStoreWrapper types. + */ + sealed trait Module { + type Store + type StoreWrapper <: Store + } + + /** + * Parent trait for defining a "module" that defines a sync TweetStoreEvent. + */ + trait SyncModule extends Module { + type Event <: SyncTweetStoreEvent + } + + /** + * Parent trait for defining a "module" that defines an async TweetStoreEvent and a + * TweetStoreRetryEvent. + */ + trait AsyncModule extends Module { + type Event <: AsyncTweetStoreEvent + type RetryEvent <: TweetStoreRetryEvent[Event] + } + + /** + * Parent trait for defining a "module" that defines a replicated TweetStoreEvent. + */ + trait ReplicatedModule extends Module { + type Event <: ReplicatedTweetStoreEvent + } +} + +/** + * Trait for TweetStore implementations that support handler wrapping. + */ +trait TweetStoreBase[Self] { + import TweetStore._ + + /** + * Returns a new store of type Self with Wrap applied to each event handler in this instance. + */ + def wrap(w: Wrap): Self + + /** + * Applies the Tracked Wrap operation to the store. + */ + def tracked(stats: StatsReceiver): Self = wrap(Tracked(stats)) + + /** + * Applies the Gated Wrap operation to the store. + */ + def enabledBy(gate: Gate[Unit]): Self = wrap(Gated(gate)) + + /** + * Applies the IgnoreFailures Wrap operation to the store. + */ + def ignoreFailures: Self = wrap(IgnoreFailures) + + /** + * Applies the IgnoreFailuresUponCompletion Wrap operation to the store. + */ + def ignoreFailuresUponCompletion: Self = wrap(IgnoreFailuresUponCompletion) + + /** + * Applies a RetryHandler to each event handler. + */ + def retry(retryHandler: RetryHandler[Unit]): Self = wrap(Retry(retryHandler)) + + /** + * Applies a RetryHandler to replicated event handlers. + */ + def replicatedRetry(retryHandler: RetryHandler[Unit]): Self = + wrap(ReplicatedEventRetry(retryHandler)) + + /** + * Applies the AsyncRetryConfig Wrap operation to the store. + */ + def asyncRetry(cfg: AsyncRetry): Self = wrap(cfg) +} + +/** + * An abstract base class for tweet store instances that wrap another tweet store instance. + * You can mix event-specific store wrapper traits into this class to automatically + * have the event-specific handlers wrapped. + */ +abstract class TweetStoreWrapper[+T]( + protected val wrap: TweetStore.Wrap, + protected val underlying: T) + +/** + * A TweetStore that has a handler for all possible TweetStoreEvents. + */ +trait TotalTweetStore + extends AsyncDeleteAdditionalFields.Store + with AsyncDeleteTweet.Store + with AsyncIncrBookmarkCount.Store + with AsyncIncrFavCount.Store + with AsyncInsertTweet.Store + with AsyncSetAdditionalFields.Store + with AsyncSetRetweetVisibility.Store + with AsyncTakedown.Store + with AsyncUndeleteTweet.Store + with AsyncUpdatePossiblySensitiveTweet.Store + with DeleteAdditionalFields.Store + with DeleteTweet.Store + with Flush.Store + with IncrBookmarkCount.Store + with IncrFavCount.Store + with InsertTweet.Store + with QuotedTweetDelete.Store + with QuotedTweetTakedown.Store + with ReplicatedDeleteAdditionalFields.Store + with ReplicatedDeleteTweet.Store + with ReplicatedIncrBookmarkCount.Store + with ReplicatedIncrFavCount.Store + with ReplicatedInsertTweet.Store + with ReplicatedScrubGeo.Store + with ReplicatedSetAdditionalFields.Store + with ReplicatedSetRetweetVisibility.Store + with ReplicatedTakedown.Store + with ReplicatedUndeleteTweet.Store + with ReplicatedUpdatePossiblySensitiveTweet.Store + with ScrubGeo.Store + with ScrubGeoUpdateUserTimestamp.Store + with SetAdditionalFields.Store + with SetRetweetVisibility.Store + with Takedown.Store + with UndeleteTweet.Store + with UpdatePossiblySensitiveTweet.Store diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala new file mode 100644 index 000000000..987668d6f --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetStoreEvent.scala @@ -0,0 +1,144 @@ +package com.twitter.tweetypie +package store + +import com.twitter.finagle.tracing.Trace +import com.twitter.tweetypie.store.TweetStoreEvent.RetryStrategy +import com.twitter.tweetypie.thriftscala._ + +object TweetStoreEvent { + + /** + * Parent trait for indicating what type of retry strategy to apply to event handlers + * for the corresponding event type. Different classes of events use different strategies. + */ + sealed trait RetryStrategy + + /** + * Indicates that the event type doesn't support retries. + */ + case object NoRetry extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should enqueue a + * retry to be performed asynchronously. + */ + case class EnqueueAsyncRetry(enqueueRetry: (ThriftTweetService, AsyncWriteAction) => Future[Unit]) + extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should retry + * the event locally some number of times, before eventually given up and scribing + * the failure. + */ + case class LocalRetryThenScribeFailure(toFailedAsyncWrite: AsyncWriteAction => FailedAsyncWrite) + extends RetryStrategy + + /** + * Indicates that if an event handler encounters a failure, it should retry + * the event locally some number of times. + */ + case object ReplicatedEventLocalRetry extends RetryStrategy +} + +/** + * The abstract parent class for all TweetStoreEvent types. + */ +sealed trait TweetStoreEvent { + val name: String + + val traceId: Long = Trace.id.traceId.toLong + + /** + * Indicates a particular retry behavior that should be applied to event handlers for + * the corresponding event type. The specifics of the strategy might depend upon the + * specific TweetStore implementation. + */ + def retryStrategy: RetryStrategy +} + +abstract class SyncTweetStoreEvent(val name: String) extends TweetStoreEvent { + override def retryStrategy: RetryStrategy = TweetStoreEvent.NoRetry +} + +abstract class AsyncTweetStoreEvent(val name: String) extends TweetStoreEvent { + def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] + + override def retryStrategy: RetryStrategy = TweetStoreEvent.EnqueueAsyncRetry(enqueueRetry) +} + +abstract class ReplicatedTweetStoreEvent(val name: String) extends TweetStoreEvent { + override def retryStrategy: RetryStrategy = TweetStoreEvent.ReplicatedEventLocalRetry +} + +/** + * A trait for all TweetStoreEvents that become TweetEvents. + */ +trait TweetStoreTweetEvent { + val timestamp: Time + + val optUser: Option[User] + + /** + * Most TweetStoreTweetEvents map to a single TweetEvent, but some + * optionally map to an event and others map to multiple events, so + * this method needs to return a Seq of TweetEventData. + */ + def toTweetEventData: Seq[TweetEventData] +} + +/** + * The abstract parent class for an event that indicates a particular action + * for a particular event that needs to be retried via the async-write-retrying mechanism. + */ +abstract class TweetStoreRetryEvent[E <: AsyncTweetStoreEvent] extends TweetStoreEvent { + override val name = "async_write_retry" + + def action: AsyncWriteAction + def event: E + + def eventType: AsyncWriteEventType + + def scribedTweetOnFailure: Option[Tweet] + + override def retryStrategy: RetryStrategy = + TweetStoreEvent.LocalRetryThenScribeFailure(action => + FailedAsyncWrite(eventType, action, scribedTweetOnFailure)) +} + +/** + * Functions as a disjunction between an event type E and it's corresonding + * retry event type TweetStoreRetryEvent[E] + */ +case class TweetStoreEventOrRetry[E <: AsyncTweetStoreEvent]( + event: E, + toRetry: Option[TweetStoreRetryEvent[E]]) { + def toInitial: Option[E] = if (retryAction.isDefined) None else Some(event) + def retryAction: Option[RetryStrategy] = toRetry.map(_.retryStrategy) + def hydrate(f: E => Future[E]): Future[TweetStoreEventOrRetry[E]] = + f(event).map(e => copy(event = e)) +} + +object TweetStoreEventOrRetry { + def apply[E <: AsyncTweetStoreEvent, R <: TweetStoreRetryEvent[E]]( + event: E, + retryAction: Option[AsyncWriteAction], + toRetryEvent: (AsyncWriteAction, E) => R + ): TweetStoreEventOrRetry[E] = + TweetStoreEventOrRetry(event, retryAction.map(action => toRetryEvent(action, event))) + + object First { + + /** matches against TweetStoreEventOrRetry instances for an initial event */ + def unapply[E <: AsyncTweetStoreEvent](it: TweetStoreEventOrRetry[E]): Option[E] = + it.toInitial + } + + object Retry { + + /** matches against TweetStoreEventOrRetry instances for a retry event */ + def unapply[E <: AsyncTweetStoreEvent]( + it: TweetStoreEventOrRetry[E] + ): Option[TweetStoreRetryEvent[E]] = + it.toRetry + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala new file mode 100644 index 000000000..8e031fc46 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/TweetUpdate.scala @@ -0,0 +1,41 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object TweetUpdate { + + /** + * Copies takedown information from the source [[Tweet]] into [[CachedTweet]]. + * + * Note that this method requires the source [[Tweet]] to have been loaded with the following + * additional fields (which happens for all paths that create [[ReplicatedTakedown.Event]], in + * both [[TakedownHandler]] and [[UserTakedownHandler]]: + * - TweetypieOnlyTakedownReasonsField + * - TweetypieOnlyTakedownCountryCodesField + * This is done to ensure the remote datacenter of a takedown does not incorrectly try to load + * from MH as the data is already cached. + */ + def copyTakedownFieldsForUpdate(source: Tweet): CachedTweet => CachedTweet = + ct => { + val newCoreData = source.coreData.get + val updatedCoreData = ct.tweet.coreData.map(_.copy(hasTakedown = newCoreData.hasTakedown)) + ct.copy( + tweet = ct.tweet.copy( + coreData = updatedCoreData, + tweetypieOnlyTakedownCountryCodes = source.tweetypieOnlyTakedownCountryCodes, + tweetypieOnlyTakedownReasons = source.tweetypieOnlyTakedownReasons + ) + ) + } + + def copyNsfwFieldsForUpdate(source: Tweet): Tweet => Tweet = + tweet => { + val newCoreData = source.coreData.get + val updatedCoreData = + tweet.coreData.map { core => + core.copy(nsfwUser = newCoreData.nsfwUser, nsfwAdmin = newCoreData.nsfwAdmin) + } + tweet.copy(coreData = updatedCoreData) + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala new file mode 100644 index 000000000..72edb8cc1 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UndeleteTweet.scala @@ -0,0 +1,237 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.store.TweetEventDataScrubber.scrub +import com.twitter.tweetypie.thriftscala._ + +object UndeleteTweet extends TweetStore.SyncModule { + + /** + * A TweetStoreEvent for Undeletion. + */ + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + hydrateOptions: WritePathHydrationOptions, + _internalTweet: Option[CachedTweet] = None, + deletedAt: Option[Time], + sourceTweet: Option[Tweet] = None, + sourceUser: Option[User] = None, + quotedTweet: Option[Tweet] = None, + quotedUser: Option[User] = None, + parentUserId: Option[UserId] = None, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends SyncTweetStoreEvent("undelete_tweet") + with QuotedTweetOps { + def internalTweet: CachedTweet = + _internalTweet.getOrElse( + throw new IllegalStateException( + s"internalTweet should have been set in WritePathHydration, ${this}" + ) + ) + + def toAsyncUndeleteTweetRequest: AsyncUndeleteTweetRequest = + AsyncUndeleteTweetRequest( + tweet = tweet, + cachedTweet = internalTweet, + user = user, + timestamp = timestamp.inMillis, + deletedAt = deletedAt.map(_.inMillis), + sourceTweet = sourceTweet, + sourceUser = sourceUser, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + parentUserId = parentUserId, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + } + + trait Store { + val undeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val undeleteTweet: FutureEffect[Event] = wrap(underlying.undeleteTweet) + } + + object Store { + def apply( + logLensStore: LogLensStore, + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val undeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + logLensStore.undeleteTweet, + // ignore failures writing to cache, will be retried in async-path + cachingTweetStore.ignoreFailures.undeleteTweet, + tweetCountsUpdatingStore.undeleteTweet, + asyncEnqueueStore.undeleteTweet + ) + } + } +} + +object AsyncUndeleteTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest(request: AsyncUndeleteTweetRequest): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncUndeleteTweet.Event( + tweet = request.tweet, + cachedTweet = request.cachedTweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + deletedAt = request.deletedAt.map(Time.fromMilliseconds), + sourceTweet = request.sourceTweet, + sourceUser = request.sourceUser, + quotedTweet = request.quotedTweet, + quotedUser = request.quotedUser, + parentUserId = request.parentUserId, + quoterHasAlreadyQuotedTweet = request.quoterHasAlreadyQuotedTweet.getOrElse(false) + ), + request.retryAction, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + user: User, + optUser: Option[User], + timestamp: Time, + deletedAt: Option[Time], + sourceTweet: Option[Tweet], + sourceUser: Option[User], + quotedTweet: Option[Tweet], + quotedUser: Option[User], + parentUserId: Option[UserId] = None, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends AsyncTweetStoreEvent("async_undelete_tweet") + with QuotedTweetOps + with TweetStoreTweetEvent { + + /** + * Convert this event into an AsyncUndeleteTweetRequest thrift request object + */ + def toAsyncRequest(retryAction: Option[AsyncWriteAction] = None): AsyncUndeleteTweetRequest = + AsyncUndeleteTweetRequest( + tweet = tweet, + cachedTweet = cachedTweet, + user = user, + timestamp = timestamp.inMillis, + retryAction = retryAction, + deletedAt = deletedAt.map(_.inMillis), + sourceTweet = sourceTweet, + sourceUser = sourceUser, + quotedTweet = quotedTweet, + quotedUser = quotedUser, + parentUserId = parentUserId, + quoterHasAlreadyQuotedTweet = Some(quoterHasAlreadyQuotedTweet) + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetUndeleteEvent( + TweetUndeleteEvent( + tweet = scrub(tweet), + user = Some(user), + sourceTweet = sourceTweet.map(scrub), + sourceUser = sourceUser, + retweetParentUserId = parentUserId, + quotedTweet = publicQuotedTweet.map(scrub), + quotedUser = publicQuotedUser, + deletedAtMsec = deletedAt.map(_.inMilliseconds) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncUndeleteTweet(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.Undelete.type = AsyncWriteEventType.Undelete + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncUndeleteTweet: FutureEffect[Event] + val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncUndeleteTweet: FutureEffect[Event] = wrap(underlying.asyncUndeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = wrap( + underlying.retryAsyncUndeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + eventBusEnqueueStore: TweetEventBusStore, + indexingStore: TweetIndexingStore, + replicatingStore: ReplicatingTweetStore, + mediaServiceStore: MediaServiceStore, + timelineUpdatingStore: TlsTimelineUpdatingStore + ): Store = { + val stores: Seq[Store] = + Seq( + cachingTweetStore, + eventBusEnqueueStore, + indexingStore, + replicatingStore, + mediaServiceStore, + timelineUpdatingStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncUndeleteTweet: FutureEffect[Event] = build(_.asyncUndeleteTweet) + override val retryAsyncUndeleteTweet: FutureEffect[TweetStoreRetryEvent[Event]] = build( + _.retryAsyncUndeleteTweet) + } + } + } +} + +object ReplicatedUndeleteTweet extends TweetStore.ReplicatedModule { + + case class Event( + tweet: Tweet, + cachedTweet: CachedTweet, + quoterHasAlreadyQuotedTweet: Boolean = false) + extends ReplicatedTweetStoreEvent("replicated_undelete_tweet") + + trait Store { + val replicatedUndeleteTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedUndeleteTweet: FutureEffect[Event] = wrap( + underlying.replicatedUndeleteTweet) + } + + object Store { + def apply( + cachingTweetStore: CachingTweetStore, + tweetCountsUpdatingStore: TweetCountsCacheUpdatingStore + ): Store = + new Store { + override val replicatedUndeleteTweet: FutureEffect[Event] = + FutureEffect.inParallel( + cachingTweetStore.replicatedUndeleteTweet.ignoreFailures, + tweetCountsUpdatingStore.replicatedUndeleteTweet.ignoreFailures + ) + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala new file mode 100644 index 000000000..c8d1d0b30 --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/UpdatePossiblySensitiveTweet.scala @@ -0,0 +1,206 @@ +package com.twitter.tweetypie +package store + +import com.twitter.tweetypie.thriftscala._ + +object UpdatePossiblySensitiveTweet extends TweetStore.SyncModule { + + case class Event( + tweet: Tweet, + user: User, + timestamp: Time, + byUserId: UserId, + nsfwAdminChange: Option[Boolean], + nsfwUserChange: Option[Boolean], + note: Option[String], + host: Option[String]) + extends SyncTweetStoreEvent("update_possibly_sensitive_tweet") { + def toAsyncRequest: AsyncUpdatePossiblySensitiveTweetRequest = + AsyncUpdatePossiblySensitiveTweetRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + nsfwAdminChange = nsfwAdminChange, + nsfwUserChange = nsfwUserChange, + note = note, + host = host + ) + } + + trait Store { + val updatePossiblySensitiveTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val updatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.updatePossiblySensitiveTweet + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + logLensStore: LogLensStore, + asyncEnqueueStore: AsyncEnqueueStore + ): Store = + new Store { + override val updatePossiblySensitiveTweet: FutureEffect[Event] = + FutureEffect.inParallel( + manhattanStore.ignoreFailures.updatePossiblySensitiveTweet, + cachingTweetStore.ignoreFailures.updatePossiblySensitiveTweet, + logLensStore.updatePossiblySensitiveTweet, + asyncEnqueueStore.updatePossiblySensitiveTweet + ) + } + } +} + +object AsyncUpdatePossiblySensitiveTweet extends TweetStore.AsyncModule { + + object Event { + def fromAsyncRequest( + request: AsyncUpdatePossiblySensitiveTweetRequest + ): TweetStoreEventOrRetry[Event] = + TweetStoreEventOrRetry( + AsyncUpdatePossiblySensitiveTweet.Event( + tweet = request.tweet, + user = request.user, + optUser = Some(request.user), + timestamp = Time.fromMilliseconds(request.timestamp), + byUserId = request.byUserId, + nsfwAdminChange = request.nsfwAdminChange, + nsfwUserChange = request.nsfwUserChange, + note = request.note, + host = request.host + ), + request.action, + RetryEvent + ) + } + + case class Event( + tweet: Tweet, + user: User, + optUser: Option[User], + timestamp: Time, + byUserId: UserId, + nsfwAdminChange: Option[Boolean], + nsfwUserChange: Option[Boolean], + note: Option[String], + host: Option[String]) + extends AsyncTweetStoreEvent("async_update_possibly_sensitive_tweet") + with TweetStoreTweetEvent { + + def toAsyncRequest( + action: Option[AsyncWriteAction] = None + ): AsyncUpdatePossiblySensitiveTweetRequest = + AsyncUpdatePossiblySensitiveTweetRequest( + tweet = tweet, + user = user, + byUserId = byUserId, + timestamp = timestamp.inMillis, + nsfwAdminChange = nsfwAdminChange, + nsfwUserChange = nsfwUserChange, + note = note, + host = host, + action = action + ) + + override def toTweetEventData: Seq[TweetEventData] = + Seq( + TweetEventData.TweetPossiblySensitiveUpdateEvent( + TweetPossiblySensitiveUpdateEvent( + tweetId = tweet.id, + userId = user.id, + nsfwAdmin = TweetLenses.nsfwAdmin.get(tweet), + nsfwUser = TweetLenses.nsfwUser.get(tweet) + ) + ) + ) + + override def enqueueRetry(service: ThriftTweetService, action: AsyncWriteAction): Future[Unit] = + service.asyncUpdatePossiblySensitiveTweet(toAsyncRequest(Some(action))) + } + + case class RetryEvent(action: AsyncWriteAction, event: Event) + extends TweetStoreRetryEvent[Event] { + + override val eventType: AsyncWriteEventType.UpdatePossiblySensitiveTweet.type = + AsyncWriteEventType.UpdatePossiblySensitiveTweet + override val scribedTweetOnFailure: Option[Tweet] = Some(event.tweet) + } + + trait Store { + val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] + val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.asyncUpdatePossiblySensitiveTweet + ) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[TweetStoreRetryEvent[Event]] = + wrap( + underlying.retryAsyncUpdatePossiblySensitiveTweet + ) + } + + object Store { + def apply( + manhattanStore: ManhattanTweetStore, + cachingTweetStore: CachingTweetStore, + replicatingStore: ReplicatingTweetStore, + guanoStore: GuanoServiceStore, + eventBusStore: TweetEventBusStore + ): Store = { + val stores: Seq[Store] = + Seq( + manhattanStore, + cachingTweetStore, + replicatingStore, + guanoStore, + eventBusStore + ) + + def build[E <: TweetStoreEvent](extract: Store => FutureEffect[E]): FutureEffect[E] = + FutureEffect.inParallel[E](stores.map(extract): _*) + + new Store { + override val asyncUpdatePossiblySensitiveTweet: FutureEffect[Event] = build( + _.asyncUpdatePossiblySensitiveTweet) + override val retryAsyncUpdatePossiblySensitiveTweet: FutureEffect[ + TweetStoreRetryEvent[Event] + ] = build( + _.retryAsyncUpdatePossiblySensitiveTweet + ) + } + } + } +} + +object ReplicatedUpdatePossiblySensitiveTweet extends TweetStore.ReplicatedModule { + + case class Event(tweet: Tweet) + extends ReplicatedTweetStoreEvent("replicated_update_possibly_sensitive_tweet") + + trait Store { + val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] + } + + trait StoreWrapper extends Store { self: TweetStoreWrapper[Store] => + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = wrap( + underlying.replicatedUpdatePossiblySensitiveTweet + ) + } + + object Store { + def apply(cachingTweetStore: CachingTweetStore): Store = { + new Store { + override val replicatedUpdatePossiblySensitiveTweet: FutureEffect[Event] = + cachingTweetStore.replicatedUpdatePossiblySensitiveTweet + } + } + } +} diff --git a/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala new file mode 100644 index 000000000..aa399d9bf --- /dev/null +++ b/tweetypie/server/src/main/scala/com/twitter/tweetypie/store/package.scala @@ -0,0 +1,16 @@ +package com.twitter.tweetypie + +import com.fasterxml.jackson.core.JsonGenerator +import com.twitter.tweetypie.thriftscala.CachedTweet +import com.twitter.context.TwitterContext + +package object store { + type JsonGen = JsonGenerator => Unit + + // Bring Tweetypie permitted TwitterContext into scope + val TwitterContext: TwitterContext = + com.twitter.context.TwitterContext(com.twitter.tweetypie.TwitterContextPermit) + + def cachedTweetFromUnhydratedTweet(tweet: Tweet): CachedTweet = + CachedTweet(tweet = tweet) +} diff --git a/tweetypie/server/src/main/thrift/BUILD b/tweetypie/server/src/main/thrift/BUILD new file mode 100644 index 000000000..f90f1b823 --- /dev/null +++ b/tweetypie/server/src/main/thrift/BUILD @@ -0,0 +1,29 @@ +create_thrift_libraries( + base_name = "compiled", + sources = ["**/*.thrift"], + platform = "java8", + strict_deps = True, + tags = ["bazel-compatible"], + dependency_roots = [ + "mediaservices/commons/src/main/thrift", + "tweetypie/servo/repo/src/main/thrift", + "src/thrift/com/twitter/context:feature-context", + "src/thrift/com/twitter/escherbird:media-annotation-structs", + "src/thrift/com/twitter/expandodo:capi", + "src/thrift/com/twitter/expandodo:only", + "src/thrift/com/twitter/geoduck", + "src/thrift/com/twitter/gizmoduck:thrift", + "src/thrift/com/twitter/gizmoduck:user-thrift", + "src/thrift/com/twitter/servo:servo-exception", + "tweetypie/common/src/thrift/com/twitter/tweetypie:audit", + "tweetypie/common/src/thrift/com/twitter/tweetypie:delete_location_data", + "tweetypie/common/src/thrift/com/twitter/tweetypie:media-entity", + "tweetypie/common/src/thrift/com/twitter/tweetypie:service", + "tweetypie/common/src/thrift/com/twitter/tweetypie:stored-tweet-info", + "tweetypie/common/src/thrift/com/twitter/tweetypie:tweet", + ], + generate_languages = [ + "java", + "scala", + ], +) diff --git a/tweetypie/server/src/main/thrift/tweetypie_internal.thrift b/tweetypie/server/src/main/thrift/tweetypie_internal.thrift new file mode 100644 index 000000000..3cc16381e --- /dev/null +++ b/tweetypie/server/src/main/thrift/tweetypie_internal.thrift @@ -0,0 +1,705 @@ +namespace java com.twitter.tweetypie.thriftjava +#@namespace scala com.twitter.tweetypie.thriftscala + +include "com/twitter/context/feature_context.thrift" +include "com/twitter/expandodo/cards.thrift" +include "com/twitter/gizmoduck/user.thrift" +include "com/twitter/mediaservices/commons/MediaCommon.thrift" +include "com/twitter/mediaservices/commons/MediaInformation.thrift" +include "com/twitter/mediaservices/commons/TweetMedia.thrift" +include "com/twitter/servo/exceptions.thrift" +include "com/twitter/servo/cache/servo_repo.thrift" +include "com/twitter/tseng/withholding/withholding.thrift" +include "com/twitter/tweetypie/delete_location_data.thrift" +include "com/twitter/tweetypie/transient_context.thrift" +include "com/twitter/tweetypie/media_entity.thrift" +include "com/twitter/tweetypie/tweet.thrift" +include "com/twitter/tweetypie/tweet_audit.thrift" +include "com/twitter/tweetypie/stored_tweet_info.thrift" +include "com/twitter/tweetypie/tweet_service.thrift" + +typedef i16 FieldId + +struct UserIdentity { + 1: required i64 id + 2: required string screen_name + 3: required string real_name +# obsolete 4: bool deactivated = 0 +# obsolete 5: bool suspended = 0 +} + +enum HydrationType { + MENTIONS = 1, + URLS = 2, + CACHEABLE_MEDIA = 3, + QUOTED_TWEET_REF = 4, + REPLY_SCREEN_NAME = 5, + DIRECTED_AT = 6, + CONTRIBUTOR = 7, + SELF_THREAD_INFO = 8 +} + +struct CachedTweet { + 1: required tweet.Tweet tweet + // @obsolete 2: optional set included_additional_fields + 3: set completed_hydrations = [] + + // Indicates that a tweet was deleted after being bounced for violating + // the Twitter Rules. + // When set to true, all other fields in CachedTweet are ignored. + 4: optional bool is_bounce_deleted + + // Indicates whether this tweet has safety labels stored in Strato. + // See com.twitter.tweetypie.core.TweetData.hasSafetyLabels for more details. + // @obsolete 5: optional bool has_safety_labels +} (persisted='true', hasPersonalData='true') + +struct MediaFaces { + 1: required map> faces +} + +enum AsyncWriteEventType { + INSERT = 1, + DELETE = 2, + UNDELETE = 3, + SET_ADDITIONAL_FIELDS = 4, + DELETE_ADDITIONAL_FIELDS = 5, + UPDATE_POSSIBLY_SENSITIVE_TWEET = 6, + UPDATE_TWEET_MEDIA = 7, + TAKEDOWN = 8, + SET_RETWEET_VISIBILITY = 9 +} + +// an enum of actions that could happen in an async-write (insert or delete) +enum AsyncWriteAction { + HOSEBIRD_ENQUEUE = 1 + SEARCH_ENQUEUE = 2 + // obsolete MAIL_ENQUEUE = 3 + FANOUT_DELIVERY = 4 + // obsolete FACEBOOK_ENQUEUE = 5 + TWEET_INDEX = 6 + TIMELINE_UPDATE = 7 + CACHE_UPDATE = 8 + REPLICATION = 9 + // obsolete MONORAIL_EXPIRY_ENQUEUE = 10 + USER_GEOTAG_UPDATE = 11 + // obsolete IBIS_ENQUEUE = 12 + EVENT_BUS_ENQUEUE = 13 + // obsolete HOSEBIRD_BINARY_ENQUEUE = 14 + TBIRD_UPDATE = 15 + RETWEETS_DELETION = 16 + GUANO_SCRIBE = 17 + MEDIA_DELETION = 18 + GEO_SEARCH_REQUEST_ID = 19 + SEARCH_THRIFT_ENQUEUE = 20 + RETWEET_ARCHIVAL_ENQUEUE = 21 +} + +# This struct is scribed to test_tweetypie_failed_async_write after +# an async-write action has failed multiple retries +struct FailedAsyncWrite { + 1: required AsyncWriteEventType event_type + 2: required AsyncWriteAction action + 3: optional tweet.Tweet tweet +} (persisted='true', hasPersonalData='true') + +# This struct is scribed to test_tweetypie_detached_retweets after +# attempting to read a retweet for which the source tweet has been deleted. +struct DetachedRetweet { + 1: required i64 tweet_id (personalDataType='TweetId') + 2: required i64 user_id (personalDataType='UserId') + 3: required i64 source_tweet_id (personalDataType='TweetId') +} (persisted='true', hasPersonalData='true') + +struct TweetCacheWrite { + 1: required i64 tweet_id (personalDataType = 'TweetId') + // If the tweet id is a snowflake id, this is an offset since tweet creation. + // If it is not a snowflake id, then this is a Unix epoch time in + // milliseconds. (The idea is that for most tweets, this encoding will make + // it easier to see the interval between events and whether it occured soon + // acter tweet creation.) + 2: required i64 timestamp (personalDataType = 'TransactionTimestamp') + 3: required string action // One of "set", "add", "replace", "cas", "delete" + 4: required servo_repo.CachedValue cached_value // Contains metadata about the cached value + 5: optional CachedTweet cached_tweet +} (persisted='true', hasPersonalData='true') + +struct AsyncInsertRequest { + 12: required tweet.Tweet tweet + 18: required user.User user + 21: required i64 timestamp + // the cacheable version of tweet from field 12 + 29: required CachedTweet cached_tweet + # 13: obsolete tweet.Tweet internal_tweet + 19: optional tweet.Tweet source_tweet + 20: optional user.User source_user + // Used for quote tweet feature + 22: optional tweet.Tweet quoted_tweet + 23: optional user.User quoted_user + 28: optional i64 parent_user_id + // Used for delivering the requestId of a geotagged tweet + 24: optional string geo_search_request_id + # 7: obsolete + # if not specified, all async insert actions are performed. if specified, only + # the specified action is performed; this is used for retrying specific actions + # that failed on a previous attempt. + 10: optional AsyncWriteAction retry_action + # 11: obsolete: bool from_monorail = 0 + # 14: obsolete + 15: optional feature_context.FeatureContext feature_context + # 16: obsolete + # 17: obsolete + # 26: obsolete: optional tweet.Tweet debug_tweet_copy + 27: optional map additional_context + 30: optional transient_context.TransientCreateContext transient_context + // Used to check whether the same tweet has been quoted multiple + // times by a given user. + 31: optional bool quoter_has_already_quoted_tweet + 32: optional InitialTweetUpdateRequest initialTweetUpdateRequest + // User ids of users mentioned in note tweet. Used for tls events + 33: optional list note_tweet_mentioned_user_ids +} + +struct AsyncUpdatePossiblySensitiveTweetRequest { + 1: required tweet.Tweet tweet + 2: required user.User user + 3: required i64 by_user_id + 4: required i64 timestamp + 5: optional bool nsfw_admin_change + 6: optional bool nsfw_user_change + 7: optional string note + 8: optional string host + 9: optional AsyncWriteAction action +} + +struct AsyncUpdateTweetMediaRequest { + 1: required i64 tweet_id + 2: required list orphaned_media + 3: optional AsyncWriteAction retry_action + 4: optional list media_keys +} + +struct AsyncSetAdditionalFieldsRequest { + 1: required tweet.Tweet additional_fields + 3: required i64 timestamp + 4: required i64 user_id + 2: optional AsyncWriteAction retry_action +} + +struct AsyncSetRetweetVisibilityRequest { + 1: required i64 retweet_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible + 3: required i64 src_id + 5: required i64 retweet_user_id + 6: required i64 source_tweet_user_id + 7: required i64 timestamp + 4: optional AsyncWriteAction retry_action +} + +struct SetRetweetVisibilityRequest { + 1: required i64 retweet_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible +} + +struct AsyncEraseUserTweetsRequest { + 1: required i64 user_id + 3: required i64 flock_cursor + 4: required i64 start_timestamp + 5: required i64 tweet_count +} + +struct AsyncDeleteRequest { + 4: required tweet.Tweet tweet + 11: required i64 timestamp + 2: optional user.User user + 9: optional i64 by_user_id + 12: optional tweet_audit.AuditDeleteTweet audit_passthrough + 13: optional i64 cascaded_from_tweet_id + # if not specified, all async-delete actions are performed. if specified, only + # the specified action is performed; this is used for retrying specific actions + # that failed on a previous attempt. + 3: optional AsyncWriteAction retry_action + 5: bool delete_media = 1 + 6: bool delete_retweets = 1 + 8: bool scribe_for_audit = 1 + 15: bool is_user_erasure = 0 + 17: bool is_bounce_delete = 0 + 18: optional bool is_last_quote_of_quoter + 19: optional bool is_admin_delete +} + +struct AsyncUndeleteTweetRequest { + 1: required tweet.Tweet tweet + 3: required user.User user + 4: required i64 timestamp + // the cacheable version of tweet from field 1 + 12: required CachedTweet cached_tweet + # 2: obsolete tweet.Tweet internal_tweet + 5: optional AsyncWriteAction retry_action + 6: optional i64 deleted_at + 7: optional tweet.Tweet source_tweet + 8: optional user.User source_user + 9: optional tweet.Tweet quoted_tweet + 10: optional user.User quoted_user + 11: optional i64 parent_user_id + 13: optional bool quoter_has_already_quoted_tweet +} + +struct AsyncIncrFavCountRequest { + 1: required i64 tweet_id + 2: required i32 delta +} + +struct AsyncIncrBookmarkCountRequest { + 1: required i64 tweet_id + 2: required i32 delta +} + +struct AsyncDeleteAdditionalFieldsRequest { + 6: required i64 tweet_id + 7: required list field_ids + 4: required i64 timestamp + 5: required i64 user_id + 3: optional AsyncWriteAction retry_action +} + +// Used for both tweet and user takedowns. +// user will be None for user takedowns because user is only used when scribe_for_audit or +// eventbus_enqueue are true, which is never the case for user takedown. +struct AsyncTakedownRequest { + 1: required tweet.Tweet tweet + + // Author of the tweet. Used when scribe_for_audit or eventbus_enqueue are true which is the case + // for tweet takedown but not user takedown. + 2: optional user.User user + + // This field is the resulting list of takedown country codes on the tweet after the + // countries_to_add and countries_to_remove changes have been applied. + 13: list takedown_reasons = [] + + // This field is the list of takedown reaons to add to the tweet. + 14: list reasons_to_add = [] + + // This field is the list of takedown reasons to remove from the tweet. + 15: list reasons_to_remove = [] + + // This field determines whether or not Tweetypie should write takedown audits + // for this request to Guano. + 6: required bool scribe_for_audit + + // This field determines whether or not Tweetypie should enqueue a + // TweetTakedownEvent to EventBus and Hosebird for this request. + 7: required bool eventbus_enqueue + + // This field is sent as part of the takedown audit that's written to Guano, + // and is not persisted with the takedown itself. + 8: optional string audit_note + + // This field is the ID of the user who initiated the takedown. It is used + // when auditing the takedown in Guano. If unset, it will be logged as -1. + 9: optional i64 by_user_id + + // This field is the host where the request originated or the remote IP that + // is associated with the request. It is used when auditing the takedown in + // Guano. If unset, it will be logged as "". + 10: optional string host + + 11: optional AsyncWriteAction retry_action + 12: required i64 timestamp +} + +struct SetTweetUserTakedownRequest { + 1: required i64 tweet_id + 2: required bool has_takedown + 3: optional i64 user_id +} + +enum DataErrorCause { + UNKNOWN = 0 + // Returned on set_tweet_user_takedown when + // the SetTweetUserTakedownRequest.user_id does not match the author + // of the tweet identified by SetTweetUserTakedownRequest.tweet_id. + USER_TWEET_RELATIONSHIP = 1 +} + +/** + * DataError is returned for operations that perform data changes, + * but encountered an inconsistency, and the operation cannot + * be meaninfully performed. + */ +exception DataError { + 1: required string message + 2: optional DataErrorCause errorCause +} + +struct ReplicatedDeleteAdditionalFieldsRequest { + /** is a map for backwards compatibility, but will only contain a single tweet id */ + 1: required map> fields_map +} + +struct CascadedDeleteTweetRequest { + 1: required i64 tweet_id + 2: required i64 cascaded_from_tweet_id + 3: optional tweet_audit.AuditDeleteTweet audit_passthrough +} + +struct QuotedTweetDeleteRequest { + 1: i64 quoting_tweet_id + 2: i64 quoted_tweet_id + 3: i64 quoted_user_id +} + +struct QuotedTweetTakedownRequest { + 1: i64 quoting_tweet_id + 2: i64 quoted_tweet_id + 3: i64 quoted_user_id + 4: list takedown_country_codes = [] + 5: list takedown_reasons = [] +} + +struct ReplicatedInsertTweet2Request { + 1: required CachedTweet cached_tweet + // Used to check whether the same tweet has been quoted by a user. + 2: optional bool quoter_has_already_quoted_tweet + 3: optional InitialTweetUpdateRequest initialTweetUpdateRequest +} + +struct ReplicatedDeleteTweet2Request { + 1: required tweet.Tweet tweet + 2: required bool is_erasure + 3: required bool is_bounce_delete + 4: optional bool is_last_quote_of_quoter +} + +struct ReplicatedSetRetweetVisibilityRequest { + 1: required i64 src_id + // Whether to archive or unarchive(visible=true) the retweet_id edge in the RetweetsGraph. + 2: required bool visible +} + +struct ReplicatedUndeleteTweet2Request { + 1: required CachedTweet cached_tweet + 2: optional bool quoter_has_already_quoted_tweet +} + +struct GetStoredTweetsOptions { + 1: bool bypass_visibility_filtering = 0 + 2: optional i64 for_user_id + 3: list additional_field_ids = [] +} + +struct GetStoredTweetsRequest { + 1: required list tweet_ids + 2: optional GetStoredTweetsOptions options +} + +struct GetStoredTweetsResult { + 1: required stored_tweet_info.StoredTweetInfo stored_tweet +} + +struct GetStoredTweetsByUserOptions { + 1: bool bypass_visibility_filtering = 0 + 2: bool set_for_user_id = 0 + 3: optional i64 start_time_msec + 4: optional i64 end_time_msec + 5: optional i64 cursor + 6: bool start_from_oldest = 0 + 7: list additional_field_ids = [] +} + +struct GetStoredTweetsByUserRequest { + 1: required i64 user_id + 2: optional GetStoredTweetsByUserOptions options +} + +struct GetStoredTweetsByUserResult { + 1: required list stored_tweets + 2: optional i64 cursor +} + +/* This is a request to update an initial tweet based on the creation of a edit tweet + * initialTweetId: The tweet to be updated + * editTweetId: The tweet being created, which is an edit of initialTweetId + * selfPermalink: A self permalink for initialTweetId + */ +struct InitialTweetUpdateRequest { + 1: required i64 initialTweetId + 2: required i64 editTweetId + 3: optional tweet.ShortenedUrl selfPermalink +} + +service TweetServiceInternal extends tweet_service.TweetService { + + /** + * Performs the async portion of TweetService.erase_user_tweets. + * Only tweetypie itself can call this. + */ + void async_erase_user_tweets(1: AsyncEraseUserTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.post_tweet. + * Only tweetypie itself can call this. + */ + void async_insert(1: AsyncInsertRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.delete_tweets. + * Only tweetypie itself can call this. + */ + void async_delete(1: AsyncDeleteRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.undelete_tweet. + * Only tweetypie itself can call this. + */ + void async_undelete_tweet(1: AsyncUndeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.update_possibly_sensitive_tweet. + * Only tweetypie itself can call this. + */ + void async_update_possibly_sensitive_tweet(1: AsyncUpdatePossiblySensitiveTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.incr_tweet_fav_count. + * Only tweetypie itself can call this. + */ + void async_incr_fav_count(1: AsyncIncrFavCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.incr_tweet_bookmark_count. + * Only tweetypie itself can call this. + */ + void async_incr_bookmark_count(1: AsyncIncrBookmarkCountRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.set_additional_fields. + * Only tweetypie itself can call this. + */ + void async_set_additional_fields(1: AsyncSetAdditionalFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetServiceInternal.set_retweet_visibility. + * Only tweetypie itself can call this. + */ + void async_set_retweet_visibility(1: AsyncSetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Set whether the specified retweet ID should be included in its source tweet's retweet count. + * This endpoint is invoked from a tweetypie-daemon to adjust retweet counts for all tweets a + * suspended or fraudulent (e.g. ROPO-'d) user has retweeted to disincentivize their false engagement. + */ + void set_retweet_visibility(1: SetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.delete_additional_fields. + * Only tweetypie itself can call this. + */ + void async_delete_additional_fields(1: AsyncDeleteAdditionalFieldsRequest field_delete) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Performs the async portion of TweetService.takedown. + * Only tweetypie itself can call this. + */ + void async_takedown(1: AsyncTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Update the tweet's takedown fields when a user is taken down. + * Only tweetypie's UserTakedownChange daemon can call this. + */ + void set_tweet_user_takedown(1: SetTweetUserTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error, + 3: DataError data_error) + + /** + * Cascade delete tweet is the logic for removing tweets that are detached + * from their dependency which has been deleted. They are already filtered + * out from serving, so this operation reconciles storage with the view + * presented by Tweetypie. + * This RPC call is delegated from daemons or batch jobs. Currently there + * are two use-cases when this call is issued: + * * Deleting detached retweets after the source tweet was deleted. + * This is done through RetweetsDeletion daemon and the + * CleanupDetachedRetweets job. + * * Deleting edits of an initial tweet that has been deleted. + * This is done by CascadedEditedTweetDelete daemon. + * Note that, when serving the original delete request for an edit, + * the initial tweet is only deleted, which makes all edits hidden. + */ + void cascaded_delete_tweet(1: CascadedDeleteTweetRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Update the timestamp of the user's most recent request to delete + * location data on their tweets. This does not actually remove the + * geo information from the user's tweets, but it will prevent the geo + * information for this user's tweets from being returned by + * Tweetypie. + */ + void scrub_geo_update_user_timestamp(1: delete_location_data.DeleteLocationData request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Look up tweets quoting a tweet that has been deleted and enqueue a compliance event. + * Only tweetypie's QuotedTweetDelete daemon can call this. + **/ + void quoted_tweet_delete(1: QuotedTweetDeleteRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Look up tweets quoting a tweet that has been taken down and enqueue a compliance event. + * Only tweetypie's QuotedTweetTakedown daemon can call this. + **/ + void quoted_tweet_takedown(1: QuotedTweetTakedownRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweet_counts from another cluster. + */ + void replicated_get_tweet_counts(1: tweet_service.GetTweetCountsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweet_fields from another cluster. + */ + void replicated_get_tweet_fields(1: tweet_service.GetTweetFieldsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates TweetService.get_tweets from another cluster. + */ + void replicated_get_tweets(1: tweet_service.GetTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.post_tweet InsertTweet event from another cluster. + * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced + */ + void replicated_insert_tweet2(1: ReplicatedInsertTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.delete_tweets DeleteTweet event from another cluster. + */ + void replicated_delete_tweet2(1: ReplicatedDeleteTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.incr_tweet_fav_count event from another cluster. + */ + void replicated_incr_fav_count(1: i64 tweet_id, 2: i32 delta) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.incr_tweet_bookmark_count event from another cluster. + */ + void replicated_incr_bookmark_count(1: i64 tweet_id, 2: i32 delta) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetServiceInternal.set_retweet_visibility event from another cluster. + */ + void replicated_set_retweet_visibility(1: ReplicatedSetRetweetVisibilityRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.scrub_geo from another cluster. + */ + void replicated_scrub_geo(1: list tweet_ids) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.set_additional_fields event from another cluster. + */ + void replicated_set_additional_fields( + 1: tweet_service.SetAdditionalFieldsRequest request + ) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.delete_additional_fields event from another cluster. + */ + void replicated_delete_additional_fields( + 1: ReplicatedDeleteAdditionalFieldsRequest request + ) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.undelete_tweet event from another cluster. + * Note: v1 version of this endpoint previously just took a Tweet which is why it was replaced + */ + void replicated_undelete_tweet2(1: ReplicatedUndeleteTweet2Request request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.takedown event from another cluster. + */ + void replicated_takedown(1: tweet.Tweet tweet) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Replicates a TweetService.update_possibly_sensitive_tweet event from another cluster. + */ + void replicated_update_possibly_sensitive_tweet(1: tweet.Tweet tweet) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Fetches hydrated Tweets and some metadata irrespective of the Tweets' state. + */ + list get_stored_tweets(1: GetStoredTweetsRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) + + /** + * Fetches hydrated Tweets and some metadata for a particular user, irrespective of the Tweets' + * state. + */ + GetStoredTweetsByUserResult get_stored_tweets_by_user(1: GetStoredTweetsByUserRequest request) throws ( + 1: exceptions.ClientError client_error, + 2: exceptions.ServerError server_error) +} diff --git a/tweetypie/servo/README.md b/tweetypie/servo/README.md new file mode 100644 index 000000000..ff9d1e89d --- /dev/null +++ b/tweetypie/servo/README.md @@ -0,0 +1,3 @@ +# Servo + +Servo is a collection of classes and patterns for building services in Scala. It's a grab-bag of code that was deemed useful for service development. diff --git a/tweetypie/servo/decider/BUILD b/tweetypie/servo/decider/BUILD new file mode 100644 index 000000000..2da29494b --- /dev/null +++ b/tweetypie/servo/decider/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/decider/src/main/scala", + ], +) diff --git a/tweetypie/servo/decider/src/main/scala/BUILD b/tweetypie/servo/decider/src/main/scala/BUILD new file mode 100644 index 000000000..846ac3eb2 --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/BUILD @@ -0,0 +1,18 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-decider", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "decider", + "finagle/finagle-core/src/main", + "tweetypie/servo/util", + "twitter-server-internal", + "twitter-server/server/src/main/scala", + ], +) diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala new file mode 100644 index 000000000..e147ad2fe --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderGateBuilder.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.decider + +import com.twitter.decider.{Decider, Feature} +import com.twitter.servo.util.Gate +import com.twitter.servo.gate.DeciderGate + +/** + * Convenience syntax for creating decider gates + */ +class DeciderGateBuilder(decider: Decider) { + + /** + * idGate should be used when the result of the gate needs to be consistent between repeated + * invocations, with the condition that consistency is dependent up on passing identical + * parameter between the invocations. + */ + def idGate(key: DeciderKeyName): Gate[Long] = + DeciderGate.byId(keyToFeature(key)) + + /** + * linearGate should be used when the probability of the gate returning true needs to + * increase linearly with the availability of feature. + */ + def linearGate(key: DeciderKeyName): Gate[Unit] = + DeciderGate.linear(keyToFeature(key)) + + /** + * typedLinearGate is a linearGate that conforms to the gate of the specified type. + */ + def typedLinearGate[T](key: DeciderKeyName): Gate[T] = + linearGate(key).contramap[T] { _ => () } + + /** + * expGate should be used when the probability of the gate returning true needs to + * increase exponentially with the availability of feature. + */ + def expGate(key: DeciderKeyName, exponent: Int): Gate[Unit] = + DeciderGate.exp(keyToFeature(key), exponent) + + def keyToFeature(key: DeciderKeyName): Feature = decider.feature(key.toString) +} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala new file mode 100644 index 000000000..8f9e17dce --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/DeciderKeyEnum.scala @@ -0,0 +1,3 @@ +package com.twitter.servo.decider + +trait DeciderKeyEnum extends Enumeration diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala new file mode 100644 index 000000000..86aa734cb --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/decider/package.scala @@ -0,0 +1,5 @@ +package com.twitter.servo + +package object decider { + type DeciderKeyName = DeciderKeyEnum#Value +} diff --git a/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala new file mode 100644 index 000000000..42874e20d --- /dev/null +++ b/tweetypie/servo/decider/src/main/scala/com/twitter/servo/gate/DeciderGate.scala @@ -0,0 +1,34 @@ +package com.twitter.servo.gate + +import com.twitter.decider +import com.twitter.servo.util.Gate +import scala.annotation.tailrec + +object DeciderGate { + + /** + * Create a Gate[Unit] with a probability of returning true + * that increases linearly with the availability of feature. + */ + def linear(feature: decider.Feature): Gate[Unit] = + Gate(_ => feature.isAvailable, "DeciderGate.linear(%s)".format(feature)) + + /** + * Create a Gate[Unit] with a probability of returning true + * that increases exponentially with the availability of feature. + */ + def exp(feature: decider.Feature, exponent: Int): Gate[Unit] = { + val gate = if (exponent >= 0) linear(feature) else !linear(feature) + + @tailrec + def go(exp: Int): Boolean = if (exp == 0) true else (gate() && go(exp - 1)) + + Gate(_ => go(math.abs(exponent)), "DeciderGate.exp(%s, %s)".format(feature, exponent)) + } + + /** + * Create a Gate[Long] that returns true if the given feature is available for an id. + */ + def byId(feature: decider.Feature): Gate[Long] = + Gate(id => feature.isAvailable(id), "DeciderGate.byId(%s)".format(feature)) +} diff --git a/tweetypie/servo/json/BUILD b/tweetypie/servo/json/BUILD new file mode 100644 index 000000000..9f49967ba --- /dev/null +++ b/tweetypie/servo/json/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/json/src/main/scala/com/twitter/servo/json", + ], +) diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD new file mode 100644 index 000000000..c641f0626 --- /dev/null +++ b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/BUILD @@ -0,0 +1,21 @@ +scala_library( + sources = ["*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-json", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "//:scala-reflect", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-core", + "3rdparty/jvm/com/fasterxml/jackson/core:jackson-databind", + "3rdparty/jvm/com/googlecode/java-diff-utils:diffutils", + "3rdparty/jvm/org/apache/thrift:libthrift", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer", + "util/util-codec/src/main/scala", + ], +) diff --git a/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala new file mode 100644 index 000000000..cb9e65ee8 --- /dev/null +++ b/tweetypie/servo/json/src/main/scala/com/twitter/servo/json/ThriftJsonInspector.scala @@ -0,0 +1,142 @@ +package com.twitter.servo.json + +import com.fasterxml.jackson.core.JsonParser +import com.fasterxml.jackson.databind.JsonNode +import com.fasterxml.jackson.databind.ObjectMapper +import com.twitter.scrooge.ThriftStruct +import com.twitter.scrooge.ThriftStructCodec +import com.twitter.scrooge.ThriftStructSerializer +import difflib.DiffUtils +import java.io.StringWriter +import org.apache.thrift.protocol.TField +import org.apache.thrift.protocol.TProtocol +import org.apache.thrift.protocol.TProtocolFactory +import org.apache.thrift.protocol.TSimpleJSONProtocol +import org.apache.thrift.transport.TTransport +import scala.collection.JavaConverters._ +import scala.language.experimental.macros +import scala.reflect.macros.blackbox.Context + +object ThriftJsonInspector { + private val mapper = new ObjectMapper() + mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) + private val factory = mapper.getFactory() + + private def mkSerializer[T <: ThriftStruct](_codec: ThriftStructCodec[T]) = + new ThriftStructSerializer[T] { + def codec = _codec + + def protocolFactory = + // Identical to TSimpleJSONProtocol.Factory except the TProtocol + // returned serializes Thrift pass-through fields with the name + // "(TField.id)" instead of empty string. + new TProtocolFactory { + def getProtocol(trans: TTransport): TProtocol = + new TSimpleJSONProtocol(trans) { + override def writeFieldBegin(field: TField): Unit = + writeString(if (field.name.isEmpty) s"(${field.id})" else field.name) + } + } + } + + def apply[T <: ThriftStruct](codec: ThriftStructCodec[T]) = new ThriftJsonInspector(codec) +} + +/** + * Helper for human inspection of Thrift objects. + */ +class ThriftJsonInspector[T <: ThriftStruct](codec: ThriftStructCodec[T]) { + import ThriftJsonInspector._ + + private[this] val serializer = mkSerializer(codec) + + /** + * Convert the Thrift object to a JSON representation based on this + * object's codec, in the manner of TSimpleJSONProtocol. The resulting + * JSON will have human-readable field names that match the field + * names that were used in the Thrift definition that the codec was + * created from, but the conversion is lossy, and the JSON + * representation cannot be converted back. + */ + def toSimpleJson(t: T): JsonNode = + mapper.readTree(factory.createParser(serializer.toBytes(t))) + + /** + * Selects requested fields (matching against the JSON fields) from a + * Thrift-generated class. + * + * Paths are specified as slash-separated strings (e.g., + * "key1/key2/key3"). If the path specifies an array or object, it is + * included in the output in JSON format, otherwise the simple value is + * converted to a string. + */ + def select(item: T, paths: Seq[String]): Seq[String] = { + val jsonNode = toSimpleJson(item) + paths.map { + _.split("/").foldLeft(jsonNode)(_.findPath(_)) match { + case node if node.isMissingNode => "[invalid-path]" + case node if node.isContainerNode => node.toString + case node => node.asText + } + } + } + + /** + * Convert the given Thrift struct to a human-readable pretty-printed + * JSON representation. This JSON cannot be converted back into a + * struct. This output is intended for debug logging or interactive + * inspection of Thrift objects. + */ + def prettyPrint(t: T): String = print(t, true) + + def print(t: T, pretty: Boolean = false): String = { + val writer = new StringWriter() + val generator = factory.createGenerator(writer) + if (pretty) + generator.useDefaultPrettyPrinter() + generator.writeTree(toSimpleJson(t)) + writer.toString + } + + /** + * Produce a human-readable unified diff of the json pretty-printed + * representations of `a` and `b`. If the inputs have the same JSON + * representation, the result will be the empty string. + */ + def diff(a: T, b: T, contextLines: Int = 1): String = { + val linesA = prettyPrint(a).linesIterator.toList.asJava + val linesB = prettyPrint(b).linesIterator.toList.asJava + val patch = DiffUtils.diff(linesA, linesB) + DiffUtils.generateUnifiedDiff("a", "b", linesA, patch, contextLines).asScala.mkString("\n") + } +} + +object syntax { + private[this] object CompanionObjectLoader { + def load[T](c: Context)(implicit t: c.universe.WeakTypeTag[T]) = { + val tSym = t.tpe.typeSymbol + val companion = tSym.asClass.companion + if (companion == c.universe.NoSymbol) { + c.abort(c.enclosingPosition, s"${tSym} has no companion object") + } else { + c.universe.Ident(companion) + } + } + } + + /** + * Load the companion object of the named type parameter and require + * it to be a ThriftStructCodec. Compilation will fail if the + * companion object is not a ThriftStructCodec. + */ + implicit def thriftStructCodec[T <: ThriftStruct]: ThriftStructCodec[T] = + macro CompanionObjectLoader.load[T] + + implicit class ThriftJsonSyntax[T <: ThriftStruct](t: T)(implicit codec: ThriftStructCodec[T]) { + private[this] def inspector = ThriftJsonInspector(codec) + def toSimpleJson: JsonNode = inspector.toSimpleJson(t) + def prettyPrint: String = inspector.prettyPrint(t) + def diff(other: T, contextLines: Int = 1): String = + inspector.diff(t, other, contextLines) + } +} diff --git a/tweetypie/servo/repo/BUILD b/tweetypie/servo/repo/BUILD new file mode 100644 index 000000000..66618d7e0 --- /dev/null +++ b/tweetypie/servo/repo/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/repo/src/main/scala", + ], +) diff --git a/tweetypie/servo/repo/src/main/scala/BUILD b/tweetypie/servo/repo/src/main/scala/BUILD new file mode 100644 index 000000000..c50c57807 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/BUILD @@ -0,0 +1,29 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-repo", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/google/inject:guice", + "3rdparty/jvm/org/apache/thrift:libthrift", + "finagle/finagle-core/src/main", + "finagle/finagle-memcached/src/main/scala", + "finagle/finagle-mux/src/main/scala", + "finagle/finagle-thrift", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer/src/main/scala", + "tweetypie/servo/repo/src/main/thrift:thrift-scala", + "tweetypie/servo/util", + "util/util-codec/src/main/scala", + "util/util-hashing/src/main/scala", + "util/util-logging", + "util/util-security/src/main/scala/com/twitter/util/security", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala new file mode 100644 index 000000000..6a00220ef --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ByteCountingMemcache.scala @@ -0,0 +1,183 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Future} + +case class ByteCountingMemcacheFactory( + memcacheFactory: MemcacheFactory, + statsReceiver: StatsReceiver, + delimiter: String = constants.Colon, + checksumSize: Int = 8) // memcached checksums are u64s + extends MemcacheFactory { + + def apply() = + new ByteCountingMemcache(memcacheFactory(), statsReceiver, delimiter, checksumSize) +} + +/** + * A decorator around a Memcache that counts the rough number + * of bytes transferred, bucketed & rolled up by in/out, method name, + * and key prefix + */ +class ByteCountingMemcache( + underlying: Memcache, + statsReceiver: StatsReceiver, + delimiter: String, + checksumSize: Int) + extends Memcache { + val scopedReceiver = statsReceiver.scope("memcache").scope("bytes") + + val outStat = scopedReceiver.stat("out") + val outReceiver = scopedReceiver.scope("out") + + val inStat = scopedReceiver.stat("in") + val inReceiver = scopedReceiver.scope("in") + + val getOutStat = outReceiver.stat("get") + val getOutReceiver = outReceiver.scope("get") + + val getInStat = inReceiver.stat("get") + val getInReceiver = inReceiver.scope("get") + val getInHitsStat = getInReceiver.stat("hits") + val getInHitsReceiver = getInReceiver.scope("hits") + val getInMissesStat = getInReceiver.stat("misses") + val getInMissesReceiver = getInReceiver.scope("misses") + + val gwcOutStat = outReceiver.stat("get_with_checksum") + val gwcOutReceiver = outReceiver.scope("get_with_checksum") + + val gwcInStat = inReceiver.stat("get_with_checksum") + val gwcInReceiver = inReceiver.scope("get_with_checksum") + val gwcInHitsStat = gwcOutReceiver.stat("hits") + val gwcInHitsReceiver = gwcOutReceiver.scope("hits") + val gwcInMissesStat = gwcOutReceiver.stat("misses") + val gwcInMissesReceiver = gwcOutReceiver.scope("misses") + + val addStat = outReceiver.stat("add") + val addReceiver = outReceiver.scope("add") + + val setStat = outReceiver.stat("set") + val setReceiver = outReceiver.scope("set") + + val replaceStat = outReceiver.stat("replace") + val replaceReceiver = outReceiver.scope("replace") + + val casStat = outReceiver.stat("check_and_set") + val casReceiver = outReceiver.scope("check_and_set") + + def release() = underlying.release() + + // get namespace from key + protected[this] def ns(key: String) = { + val idx = math.min(key.size - 1, math.max(key.lastIndexOf(delimiter), 0)) + key.substring(0, idx).replaceAll(delimiter, "_") + } + + override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = { + keys foreach { key => + val size = key.size + outStat.add(size) + getOutStat.add(size) + getOutReceiver.stat(ns(key)).add(size) + } + underlying.get(keys) onSuccess { lr => + lr.found foreach { + case (key, bytes) => + val size = key.size + bytes.length + inStat.add(size) + getInStat.add(size) + getInHitsStat.add(size) + getInHitsReceiver.stat(ns(key)).add(size) + } + lr.notFound foreach { key => + val size = key.size + inStat.add(size) + getInStat.add(size) + getInMissesStat.add(size) + getInMissesReceiver.stat(ns(key)).add(size) + } + } + } + + override def getWithChecksum( + keys: Seq[String] + ): Future[CsKeyValueResult[String, Array[Byte]]] = { + keys foreach { key => + val size = key.size + outStat.add(size) + gwcOutStat.add(size) + gwcOutReceiver.stat(ns(key)).add(size) + } + underlying.getWithChecksum(keys) onSuccess { lr => + lr.found foreach { + case (key, (bytes, _)) => + val size = key.size + (bytes map { _.length } getOrElse (0)) + checksumSize + inStat.add(size) + gwcInStat.add(size) + gwcInHitsStat.add(size) + gwcInHitsReceiver.stat(ns(key)).add(size) + } + lr.notFound foreach { key => + val size = key.size + inStat.add(size) + gwcInStat.add(size) + gwcInMissesStat.add(size) + gwcInMissesReceiver.stat(ns(key)).add(size) + } + } + } + + override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { + val size = key.size + value.size + outStat.add(size) + addStat.add(size) + addReceiver.stat(ns(key)).add(size) + underlying.add(key, value, ttl) + } + + override def checkAndSet( + key: String, + value: Array[Byte], + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = { + val size = key.size + value.size + checksumSize + outStat.add(size) + casStat.add(size) + casReceiver.stat(ns(key)).add(size) + underlying.checkAndSet(key, value, checksum, ttl) + } + + override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = { + val size = key.size + value.size + outStat.add(size) + setStat.add(size) + setReceiver.stat(ns(key)).add(size) + underlying.set(key, value, ttl) + } + + override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = { + val size = key.size + value.size + outStat.add(size) + replaceStat.add(size) + replaceReceiver.stat(ns(key)).add(size) + underlying.replace(key, value, ttl) + } + + override def delete(key: String): Future[Boolean] = { + outStat.add(key.size) + underlying.delete(key) + } + + override def incr(key: String, delta: Long = 1): Future[Option[Long]] = { + val size = key.size + 8 + outStat.add(size) + underlying.incr(key, delta) + } + + override def decr(key: String, delta: Long = 1): Future[Option[Long]] = { + val size = key.size + 8 + outStat.add(size) + underlying.decr(key, delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala new file mode 100644 index 000000000..c23e6e462 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cache.scala @@ -0,0 +1,275 @@ +package com.twitter.servo.cache + +import com.google.common.cache.CacheBuilder +import com.twitter.finagle.memcached.util.NotFound +import com.twitter.servo.util.ThreadLocalStringBuilder +import com.twitter.util.{Duration, Future, Return} +import java.util.concurrent.TimeUnit +import scala.collection.mutable +import scala.collection.JavaConverters._ + +/** + * opaque trait used for getWithChecksum calls. + * the implementation should be private to the cache, + * to inhibit peeking + */ +trait Checksum extends Any + +object ScopedCacheKey { + private[ScopedCacheKey] val builder = new ThreadLocalStringBuilder(64) +} + +/** + * base class for cache keys needing scoping + * + * @param globalNamespace + * the project-level namespace + * @param cacheNamespace + * the cache-level namespace + * @param version + * the version of serialization for values + * @param scopes + * additional key scopes + */ +abstract class ScopedCacheKey( + globalNamespace: String, + cacheNamespace: String, + version: Int, + scopes: String*) { + import constants._ + + override lazy val toString = { + val builder = ScopedCacheKey + .builder() + .append(globalNamespace) + .append(Colon) + .append(cacheNamespace) + .append(Colon) + .append(version) + + scopes foreach { + builder.append(Colon).append(_) + } + + builder.toString + } +} + +/** + * Shared trait for reading from a cache + */ +trait ReadCache[K, V] { + def get(keys: Seq[K]): Future[KeyValueResult[K, V]] + + /** + * get the value with an opaque checksum that can be passed in + * a checkAndSet operation. If there is a deserialization error, + * the checksum is still returned + */ + def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] + + /** + * release any underlying resources + */ + def release(): Unit +} + +/** + * allows one ReadCache to wrap another + */ +trait ReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { + def underlyingCache: This + + override def get(keys: Seq[K]) = underlyingCache.get(keys) + + override def getWithChecksum(keys: Seq[K]) = underlyingCache.getWithChecksum(keys) + + override def release() = underlyingCache.release() +} + +/** + * Simple trait for a cache supporting multi-get and single set + */ +trait Cache[K, V] extends ReadCache[K, V] { + def add(key: K, value: V): Future[Boolean] + + def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] + + def set(key: K, value: V): Future[Unit] + + def set(pairs: Seq[(K, V)]): Future[Unit] = { + Future.join { + pairs map { + case (key, value) => set(key, value) + } + } + } + + /** + * Replaces the value for an existing key. If the key doesn't exist, this has no effect. + * @return true if replaced, false if not found + */ + def replace(key: K, value: V): Future[Boolean] + + /** + * Deletes a value from cache. + * @return true if deleted, false if not found + */ + def delete(key: K): Future[Boolean] +} + +/** + * allows one cache to wrap another + */ +trait CacheWrapper[K, V] extends Cache[K, V] with ReadCacheWrapper[K, V, Cache[K, V]] { + override def add(key: K, value: V) = underlyingCache.add(key, value) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache.checkAndSet(key, value, checksum) + + override def set(key: K, value: V) = underlyingCache.set(key, value) + + override def replace(key: K, value: V) = underlyingCache.replace(key, value) + + override def delete(key: K) = underlyingCache.delete(key) +} + +/** + * Switch between two caches with a decider value + */ +class DeciderableCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], isAvailable: => Boolean) + extends CacheWrapper[K, V] { + override def underlyingCache = if (isAvailable) primary else secondary +} + +private object MutableMapCache { + case class IntChecksum(i: Int) extends AnyVal with Checksum +} + +/** + * implementation of a Cache with a mutable.Map + */ +class MutableMapCache[K, V](underlying: mutable.Map[K, V]) extends Cache[K, V] { + import MutableMapCache.IntChecksum + + protected[this] def checksum(value: V): Checksum = IntChecksum(value.hashCode) + + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = Future { + val founds = Map.newBuilder[K, V] + val iter = keys.iterator + while (iter.hasNext) { + val key = iter.next() + synchronized { + underlying.get(key) + } match { + case Some(v) => founds += key -> v + case None => + } + } + val found = founds.result() + val notFound = NotFound(keys, found.keySet) + KeyValueResult(found, notFound) + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = Future { + val founds = Map.newBuilder[K, (Return[V], Checksum)] + val iter = keys.iterator + while (iter.hasNext) { + val key = iter.next() + synchronized { + underlying.get(key) + } match { + case Some(value) => founds += key -> (Return(value), checksum(value)) + case None => + } + } + val found = founds.result() + val notFound = NotFound(keys, found.keySet) + KeyValueResult(found, notFound) + } + + override def add(key: K, value: V): Future[Boolean] = + synchronized { + underlying.get(key) match { + case Some(_) => + Future.False + case None => + underlying += key -> value + Future.True + } + } + + override def checkAndSet(key: K, value: V, cs: Checksum): Future[Boolean] = + synchronized { + underlying.get(key) match { + case Some(current) => + if (checksum(current) == cs) { + // checksums match, set value + underlying += key -> value + Future.True + } else { + // checksums didn't match, so no set + Future.False + } + case None => + // if nothing there, the checksums can't be compared + Future.False + } + } + + override def set(key: K, value: V): Future[Unit] = { + synchronized { + underlying += key -> value + } + Future.Done + } + + override def replace(key: K, value: V): Future[Boolean] = synchronized { + if (underlying.contains(key)) { + underlying(key) = value + Future.True + } else { + Future.False + } + } + + override def delete(key: K): Future[Boolean] = synchronized { + if (underlying.remove(key).nonEmpty) Future.True else Future.False + } + + override def release(): Unit = synchronized { + underlying.clear() + } +} + +/** + * In-memory implementation of a cache with LRU semantics and a TTL. + */ +class ExpiringLruCache[K, V](ttl: Duration, maximumSize: Int) + extends MutableMapCache[K, V]( + // TODO: consider wiring the Cache interface directly to the + // Guava Cache, instead of introducing two layers of indirection + CacheBuilder.newBuilder + .asInstanceOf[CacheBuilder[K, V]] + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .initialCapacity(maximumSize) + .maximumSize(maximumSize) + .build[K, V]() + .asMap + .asScala + ) + +/** + * An empty cache that stays empty + */ +class NullCache[K, V] extends Cache[K, V] { + lazy val futureTrue = Future.value(true) + override def get(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) + override def getWithChecksum(keys: Seq[K]) = Future.value(KeyValueResult(notFound = keys.toSet)) + override def add(key: K, value: V) = futureTrue + override def checkAndSet(key: K, value: V, checksum: Checksum) = Future.value(true) + override def set(key: K, value: V) = Future.Done + override def replace(key: K, value: V) = futureTrue + override def delete(key: K) = futureTrue + override def release() = () +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala new file mode 100644 index 000000000..85359db1a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CacheFactory.scala @@ -0,0 +1,153 @@ +package com.twitter.servo.cache + +import com.twitter.util.Duration +import scala.collection.mutable + +/** + * Used to produce differently-typed caches with the same configuration + * and potentially with shared observation. + */ +trait CacheFactory { + def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] +} + +/** + * Builds an instance of NullCache. + */ +object NullCacheFactory extends CacheFactory { + val cache = new NullCache[Nothing, Nothing] + + override def apply[K, V](serializer: Serializer[V], scopes: String*): Cache[K, V] = + cache.asInstanceOf[NullCache[K, V]] +} + +/** + * Builds DeciderableCaches, which proxy to one of two caches built from the + * argument CacheFactories depending on a decider value. + */ +case class DeciderableCacheFactory( + primaryCacheFactory: CacheFactory, + secondaryCacheFactory: CacheFactory, + isAvailable: () => Boolean) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new DeciderableCache( + primaryCacheFactory(serializer, scopes: _*), + secondaryCacheFactory(serializer, scopes: _*), + isAvailable() + ) +} + +/** + * Builds MigratingCaches, which support gradual migrations from one cache + * to another. See MigratingCache.scala for details. + */ +case class MigratingCacheFactory(cacheFactory: CacheFactory, darkCacheFactory: CacheFactory) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new MigratingCache( + cacheFactory(serializer, scopes: _*), + darkCacheFactory(serializer, scopes: _*) + ) +} + +case class ObservableCacheFactory(cacheFactory: CacheFactory, cacheObserver: CacheObserver) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new ObservableCache(cacheFactory(serializer), cacheObserver.scope(scopes: _*)) +} + +/** + * Builds in-memory caches with elements that never expire. + */ +case class MutableMapCacheFactory( + serialize: Boolean = false, + useSharedCache: Boolean = false, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + lazy val sharedCache = mkCache + + def mkCache = { + new MutableMapCache[Object, Object](new mutable.HashMap) + } + + override def apply[K, V](serializer: Serializer[V], scopes: String*) = { + val cache = if (useSharedCache) sharedCache else mkCache + if (serialize) { + new KeyValueTransformingCache( + cache.asInstanceOf[Cache[String, Array[Byte]]], + serializer, + keyTransformerFactory() + ) + } else { + cache.asInstanceOf[Cache[K, V]] + } + } +} + +/** + * Builds in-memory caches with TTL'd entries and LRU eviction policies. + */ +case class InProcessLruCacheFactory( + ttl: Duration, + lruSize: Int, + serialize: Boolean = false, + useSharedCache: Boolean = false, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + def mkCache = new ExpiringLruCache[Object, Object](ttl, lruSize) + lazy val sharedCache = mkCache + + override def apply[K, V](serializer: Serializer[V], scopes: String*) = { + val cache = if (useSharedCache) sharedCache else mkCache + if (serialize) { + new KeyValueTransformingCache( + cache.asInstanceOf[Cache[String, Array[Byte]]], + serializer, + keyTransformerFactory() + ) + } else { + cache.asInstanceOf[Cache[K, V]] + } + } +} + +/** + * Builds MemcacheCaches, which applies serialization, key-transformation, + * and TTL mechanics to an underlying Memcache. + */ +case class MemcacheCacheFactory( + memcache: Memcache, + ttl: Duration, + keyTransformerFactory: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends CacheFactory { + override def apply[K, V](serializer: Serializer[V], scopes: String*) = + new MemcacheCache(memcache, ttl, serializer, keyTransformerFactory[K]()) +} + +/** + * Builds KeyTransformers, which are required for constructing + * KeyValueTransformingCaches. + */ +trait KeyTransformerFactory { + def apply[K](): KeyTransformer[K] +} + +/** + * Builds KeyTransformers by simply call the keys' toString methods. + */ +object ToStringKeyTransformerFactory extends KeyTransformerFactory { + def apply[K]() = new ToStringKeyTransformer[K]() +} + +/** + * Builds KeyTransformers that prefix all keys generated by an underlying + * transformer with a string. + */ +case class PrefixKeyTransformerFactory( + prefix: String, + delimiter: String = constants.Colon, + underlying: KeyTransformerFactory = ToStringKeyTransformerFactory) + extends KeyTransformerFactory { + def apply[K]() = new PrefixKeyTransformer[K](prefix, delimiter, underlying[K]()) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala new file mode 100644 index 000000000..9956cb515 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Cached.scala @@ -0,0 +1,261 @@ +package com.twitter.servo.cache + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.thriftscala.CachedValueStatus.DoNotCache +import com.twitter.servo.util.{Gate, Transformer} +import com.twitter.util.{Duration, Return, Throw, Time} +import java.nio.ByteBuffer + +object Cached { + + private[this] val millisToTime: Long => Time = + ms => Time.fromMilliseconds(ms) + + private val timeToMills: Time => Long = + time => time.inMilliseconds + + /** + * Deserialize a CachedValue to a Cached[V] + * + * If the ByteBuffer contained in the `cachedValue` is backed by an `Array[Byte]` with its offset + * at 0, we will apply the serializer directly to the backing array for performance reasons. + * + * As such, the `Serializer[V]` the caller provides MUST NOT mutate the buffer it is given. + * This exhortation is also given in com.twitter.servo.util.Transformer, but repeated here. + */ + def apply[V](cachedValue: CachedValue, serializer: Serializer[V]): Cached[V] = { + val value: Option[V] = cachedValue.value match { + case Some(buf) if buf.hasArray && buf.arrayOffset() == 0 => + serializer.from(buf.array).toOption + case Some(buf) => + val array = new Array[Byte](buf.remaining) + buf.duplicate.get(array) + serializer.from(array).toOption + case None => None + } + val status = + if (cachedValue.value.nonEmpty && value.isEmpty) + CachedValueStatus.DeserializationFailed + else + cachedValue.status + + Cached( + value, + status, + Time.fromMilliseconds(cachedValue.cachedAtMsec), + cachedValue.readThroughAtMsec.map(millisToTime), + cachedValue.writtenThroughAtMsec.map(millisToTime), + cachedValue.doNotCacheUntilMsec.map(millisToTime), + cachedValue.softTtlStep + ) + } +} + +/** + * A simple metadata wrapper for cached values. This is stored in the cache + * using the [[com.twitter.servo.cache.thriftscala.CachedValue]] struct, which is similar, but + * untyped. + */ +case class Cached[V]( + value: Option[V], + status: CachedValueStatus, + cachedAt: Time, + readThroughAt: Option[Time] = None, + writtenThroughAt: Option[Time] = None, + doNotCacheUntil: Option[Time] = None, + softTtlStep: Option[Short] = None) { + + /** + * produce a new cached value with the same metadata + */ + def map[W](f: V => W): Cached[W] = copy(value = value.map(f)) + + /** + * serialize to a CachedValue + */ + def toCachedValue(serializer: Serializer[V]): CachedValue = { + var serializedValue: Option[ByteBuffer] = None + val cachedValueStatus = value match { + case Some(v) => + serializer.to(v) match { + case Return(sv) => + serializedValue = Some(ByteBuffer.wrap(sv)) + status + case Throw(_) => CachedValueStatus.SerializationFailed + } + case None => status + } + + CachedValue( + serializedValue, + cachedValueStatus, + cachedAt.inMilliseconds, + readThroughAt.map(Cached.timeToMills), + writtenThroughAt.map(Cached.timeToMills), + doNotCacheUntil.map(Cached.timeToMills), + softTtlStep + ) + } + + /** + * Resolves conflicts between a value being inserted into cache and a value already in cache by + * using the time a cached value was last updated. + * If the cached value has a writtenThroughAt, returns it. Otherwise returns readThroughAt, but + * if that doesn't exist, returns cachedAt. + * This makes it favor writes to reads in the event of a race condition. + */ + def effectiveUpdateTime[V](writtenThroughBuffer: Duration = 0.second): Time = { + this.writtenThroughAt match { + case Some(wta) => wta + writtenThroughBuffer + case None => + this.readThroughAt match { + case Some(rta) => rta + case None => this.cachedAt + } + } + } +} + +/** + * Switch between two cache pickers by providing deciderable gate + */ +class DeciderablePicker[V]( + primaryPicker: LockingCache.Picker[Cached[V]], + secondaryPicker: LockingCache.Picker[Cached[V]], + usePrimary: Gate[Unit], + statsReceiver: StatsReceiver) + extends LockingCache.Picker[Cached[V]] { + private[this] val stats = statsReceiver.scope("deciderable_picker") + private[this] val pickerScope = stats.scope("picker") + private[this] val primaryPickerCount = pickerScope.counter("primary") + private[this] val secondaryPickerCount = pickerScope.counter("secondary") + + private[this] val pickedScope = stats.scope("picked_values") + private[this] val pickedValuesMatched = pickedScope.counter("matched") + private[this] val pickedValuesMismatched = pickedScope.counter("mismatched") + + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + val secondaryPickerValue = secondaryPicker(newValue, oldValue) + + if (usePrimary()) { + val primaryPickerValue = primaryPicker(newValue, oldValue) + + primaryPickerCount.incr() + if (primaryPickerValue == secondaryPickerValue) pickedValuesMatched.incr() + else pickedValuesMismatched.incr() + + primaryPickerValue + } else { + secondaryPickerCount.incr() + secondaryPickerValue + } + } + + override def toString(): String = "DeciderablePicker" + +} + +/** + * It's similar to the PreferNewestCached picker, but it prefers written-through value + * over read-through as long as written-through value + writtenThroughExtra is + * newer than read-through value. Same as in PreferNewestCached, if values cached + * have the same cached method and time picker picks the new value. + * + * It intends to solve race condition when the read and write requests come at the + * same time, but write requests is getting cached first and then getting override with + * a stale value from the read request. + * + * If enabled gate is disabled, it falls back to PreferNewestCached logic. + * + */ +class PreferWrittenThroughCached[V]( + writtenThroughBuffer: Duration = 1.second) + extends PreferNewestCached[V] { + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + // the tie goes to newValue + if (oldValue.effectiveUpdateTime(writtenThroughBuffer) > newValue.effectiveUpdateTime( + writtenThroughBuffer)) + None + else + Some(newValue) + } + override def toString(): String = "PreferWrittenThroughCached" +} + +/** + * prefer one value over another based on Cached metadata + */ +class PreferNewestCached[V] extends LockingCache.Picker[Cached[V]] { + + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + if (oldValue.effectiveUpdateTime() > newValue.effectiveUpdateTime()) + None + else + Some(newValue) + } + + override def toString(): String = "PreferNewestCached" +} + +/** + * Prefer non-empty values. If a non-empty value is in cache, and the + * value to store is empty, return the non-empty value with a fresh cachedAt + * instead. + */ +class PreferNewestNonEmptyCached[V] extends PreferNewestCached[V] { + override def apply(newValue: Cached[V], oldValue: Cached[V]) = { + (newValue.value, oldValue.value) match { + // Some/Some and None/None cases are handled by the super class + case (Some(_), Some(_)) => super.apply(newValue, oldValue) + case (None, None) => super.apply(newValue, oldValue) + case (Some(_), None) => Some(newValue) + case (None, Some(_)) => Some(oldValue.copy(cachedAt = Time.now)) + } + } +} + +/** + * Prefer do not cache entries if they're not expired. Otherwise uses fallbackPicker + * @param fallBackPicker the picker to use when the oldvalue isn't do not cache or is expired. + * Defaults to PreferNewestCache. + */ +class PreferDoNotCache[V]( + fallBackPicker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], + statsReceiver: StatsReceiver) + extends LockingCache.Picker[Cached[V]] { + private[this] val pickDoNotCacheEntryCounter = statsReceiver.counter("pick_do_not_cache_entry") + private[this] val useFallbackCounter = statsReceiver.counter("use_fallback") + override def apply(newValue: Cached[V], oldValue: Cached[V]): Option[Cached[V]] = { + if (oldValue.status == DoNotCache && oldValue.doNotCacheUntil.forall( + _ > newValue.effectiveUpdateTime())) { // evaluates to true if dnc until is None + pickDoNotCacheEntryCounter.incr() + None + } else { + useFallbackCounter.incr() + fallBackPicker.apply(newValue, oldValue) + } + } +} + +/** + * A Transformer of Cached values composed of a Transformer of the underlying values. + */ +class CachedTransformer[A, B](underlying: Transformer[A, B]) + extends Transformer[Cached[A], Cached[B]] { + def to(cachedA: Cached[A]) = cachedA.value match { + case None => Return(cachedA.copy(value = None)) + case Some(a) => + underlying.to(a) map { b => + cachedA.copy(value = Some(b)) + } + } + + def from(cachedB: Cached[B]) = cachedB.value match { + case None => Return(cachedB.copy(value = None)) + case Some(b) => + underlying.from(b) map { a => + cachedB.copy(value = Some(a)) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala new file mode 100644 index 000000000..5fa06185a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterCache.scala @@ -0,0 +1,20 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +trait CounterCache[K] extends Cache[K, Long] { + def incr(key: K, delta: Int = 1): Future[Option[Long]] + def decr(key: K, delta: Int = 1): Future[Option[Long]] +} + +class MemcacheCounterCache[K]( + memcache: Memcache, + ttl: Duration, + transformKey: KeyTransformer[K] = ((k: K) => k.toString): (K => java.lang.String)) + extends MemcacheCache[K, Long](memcache, ttl, CounterSerializer, transformKey) + with CounterCache[K] + +class NullCounterCache[K] extends NullCache[K, Long] with CounterCache[K] { + override def incr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) + override def decr(key: K, delta: Int = 1): Future[Option[Long]] = Future.value(Some(0L)) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala new file mode 100644 index 000000000..4711cc2ef --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/CounterSerializer.scala @@ -0,0 +1,114 @@ +package com.twitter.servo.cache + +import com.google.common.base.Charsets +import com.twitter.util.Try + +/** + * Fast implementation of dealing with memcached counters. + * + * Memcache is funkytown for incr and decr. Basically, you store a number, + * as a STRING, and then incr and decr that. This abstracts over that detail. + * + * This implementation was quite a bit faster than the simple implementation + * of `new String(bytes, Charsets.US_ASCII).toLong()` + * and `Long.toString(value).getBytes()` + * + * Thread-safe. + */ +object CounterSerializer extends Serializer[Long] { + private[this] val Minus = '-'.toByte + // The lower bound + private[this] val Zero = '0'.toByte + // The upper bound + private[this] val Nine = '9'.toByte + + // Max length for our byte arrays that'll fit all positive longs + private[this] val MaxByteArrayLength = 19 + + override def to(long: Long): Try[Array[Byte]] = Try { + // NOTE: code based on Long.toString(value), but it avoids creating the + // intermediate String object and the charset encoding in String.getBytes + // This was about 12% faster than calling Long.toString(long).getBytes + if (long == Long.MinValue) { + "-9223372036854775808".getBytes(Charsets.US_ASCII) + } else { + val size = if (long < 0) stringSize(-long) + 1 else stringSize(long) + val bytes = new Array[Byte](size) + + var isNegative = false + var endAt = 0 + var currentLong = if (long < 0) { + isNegative = true + endAt = 1 + -long + } else { + long + } + + // Note: look at the implementation in Long.getChars(long, int, char[]) + // They can do 2 digits at a time for this, so we could speed this up + // See: Division by Invariant Integers using Multiplication + // http://gmplib.org/~tege/divcnst-pldi94.pdf + + // starting at the least significant digit and working our way up... + var pos = size - 1 + do { + val byte = currentLong % 10 + bytes(pos) = (Zero + byte).toByte + currentLong /= 10 + pos -= 1 + } while (currentLong != 0) + + if (isNegative) { + assert(pos == 0, "For value " + long + ", pos " + pos) + bytes(0) = Minus + } + + bytes + } + } + + override def from(bytes: Array[Byte]): Try[Long] = Try { + // This implementation was about 4x faster than the simple: + // new String(bytes, Charsets.US_ASCII).toLong + + if (bytes.length < 1) + throw new NumberFormatException("Empty byte arrays are unsupported") + + val isNegative = bytes(0) == Minus + if (isNegative && bytes.length == 1) + throw new NumberFormatException(bytes.mkString(",")) + + // we count in negative numbers so we don't have problems at Long.MaxValue + var total = 0L + val endAt = bytes.length + var i = if (isNegative) 1 else 0 + while (i < endAt) { + val b = bytes(i) + if (b < Zero || b > Nine) + throw new NumberFormatException(bytes.mkString(",")) + + val int = b - Zero + total = (total * 10L) - int + + i += 1 + } + + if (isNegative) total else -total + } + + /** + * @param long must be non-negative + */ + private[this] def stringSize(long: Long): Int = { + var p = 10 + var i = 1 + while (i < MaxByteArrayLength) { + if (long < p) return i + p *= 10 + i += 1 + } + MaxByteArrayLength + } + +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala new file mode 100644 index 000000000..0cd3153a7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/FinagleMemcache.scala @@ -0,0 +1,149 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.{CasResult, Client} +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.{Backoff, Memcached, TimeoutException, WriteException} +import com.twitter.hashing.KeyHasher +import com.twitter.io.Buf +import com.twitter.logging.Logger +import com.twitter.util._ + +case class MemcacheRetryPolicy( + writeExceptionBackoffs: Backoff, + timeoutBackoffs: Backoff) + extends RetryPolicy[Try[Nothing]] { + override def apply(r: Try[Nothing]) = r match { + case Throw(_: WriteException) => onWriteException + case Throw(_: TimeoutException) => onTimeoutException + case _ => None + } + + private[this] def onTimeoutException = consume(timeoutBackoffs.toStream) { tail => + copy(timeoutBackoffs = Backoff.fromStream(tail)) + } + + private[this] def onWriteException = consume(writeExceptionBackoffs.toStream) { tail => + copy(writeExceptionBackoffs = Backoff.fromStream(tail)) + } + + private[this] def consume(s: Stream[Duration])(f: Stream[Duration] => MemcacheRetryPolicy) = { + s.headOption map { duration => + (duration, f(s.tail)) + } + } +} + +object FinagleMemcacheFactory { + val DefaultHashName = "fnv1-32" + + def apply(client: Memcached.Client, dest: String, hashName: String = DefaultHashName) = + new FinagleMemcacheFactory(client, dest, hashName) +} + +class FinagleMemcacheFactory private[cache] ( + client: Memcached.Client, + dest: String, + hashName: String) + extends MemcacheFactory { + + def apply(): Memcache = { + val keyHasher = KeyHasher.byName(hashName) + new FinagleMemcache(client.withKeyHasher(keyHasher).newTwemcacheClient(dest), hashName) + } +} + +object FinagleMemcache { + val NoFlags = 0 + val logger = Logger(getClass) +} + +/** + * Adapter for a [[Memcache]] (type alias for [[TtlCache]]) from a Finagle Memcached + * [[Client]]. + */ +class FinagleMemcache(client: Client, hashName: String = FinagleMemcacheFactory.DefaultHashName) + extends Memcache { + + import FinagleMemcache.NoFlags + + private[this] case class BufferChecksum(buffer: Buf) extends Checksum + + def release(): Unit = { + client.close() + } + + override def get(keys: Seq[String]): Future[KeyValueResult[String, Array[Byte]]] = + client.getResult(keys).transform { + case Return(gr) => + val found = gr.hits.map { + case (key, v) => + val bytes = Buf.ByteArray.Owned.extract(v.value) + key -> bytes + } + Future.value(KeyValueResult(found, gr.misses, gr.failures)) + + case Throw(t) => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + + override def getWithChecksum(keys: Seq[String]): Future[CsKeyValueResult[String, Array[Byte]]] = + client.getsResult(keys).transform { + case Return(gr) => + try { + val hits = gr.hits map { + case (key, v) => + val bytes = Buf.ByteArray.Owned.extract(v.value) + key -> (Return(bytes), BufferChecksum( + v.casUnique.get + )) // TODO. what to do if missing? + } + Future.value(KeyValueResult(hits, gr.misses, gr.failures)) + } catch { + case t: Throwable => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + case Throw(t) => + Future.value(KeyValueResult(failed = keys.map(_ -> t).toMap)) + } + + private val jb2sb: java.lang.Boolean => Boolean = _.booleanValue + private val jl2sl: java.lang.Long => Long = _.longValue + + override def add(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = + client.add(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb + + override def checkAndSet( + key: String, + value: Array[Byte], + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = { + checksum match { + case BufferChecksum(cs) => + client.checkAndSet(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value), cs) map { + res: CasResult => + res.replaced + } + case _ => + Future.exception(new IllegalArgumentException("unrecognized checksum: " + checksum)) + } + } + + override def set(key: String, value: Array[Byte], ttl: Duration): Future[Unit] = + client.set(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) + + override def replace(key: String, value: Array[Byte], ttl: Duration): Future[Boolean] = + client.replace(key, NoFlags, ttl.fromNow, Buf.ByteArray.Owned(value)) map jb2sb + + override def delete(key: String): Future[Boolean] = + client.delete(key) map jb2sb + + def incr(key: String, delta: Long = 1): Future[Option[Long]] = + client.incr(key, delta) map { _ map jl2sl } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = + client.decr(key, delta) map { _ map jl2sl } + + // NOTE: This is the only reason that hashName is passed as a param to FinagleMemcache. + override lazy val toString = "FinagleMemcache(%s)".format(hashName) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala new file mode 100644 index 000000000..86c7f495a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ForwardingCache.scala @@ -0,0 +1,186 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Future, Return} +import scala.collection.mutable + +/** + * uses a forwarding cache to lookup a value by a secondary index. + * filters out values for which the requested secondary index does not + * match the actual secondary index (these are treated as a miss) + */ +class ForwardingCache[K, F, V]( + forwardingCache: Cache[K, Cached[F]], + underlyingCache: SecondaryIndexingCache[F, _, V], + primaryKey: V => F, + secondaryKey: SecondaryIndexingCache.IndexMapping[K, V], + lockingCacheFactory: LockingCacheFactory) + extends LockingCache[K, Cached[V]] { + protected[this] case class ForwardingChecksum( + forwardingChecksum: Checksum, + underlyingChecksum: Option[Checksum]) + extends Checksum + + protected[this] val lockingUnderlying = lockingCacheFactory(underlyingCache) + protected[this] val lockingForwarding = lockingCacheFactory(forwardingCache) + + override def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + forwardingCache.get(keys) flatMap { flr => + val (tombstones, notTombstones) = { + val tombstones = mutable.Map.empty[K, Cached[F]] + val notTombstones = mutable.Map.empty[F, K] + // split results into tombstoned keys and non-tombstoned key/pKeys + // while we're at it, produce a reverse-keymap of non-tombstones + flr.found foreach { + case (key, cachedPKey) => + cachedPKey.value match { + case Some(pKey) => notTombstones += pKey -> key + case None => tombstones += key -> cachedPKey + } + } + (tombstones.toMap, notTombstones.toMap) + } + + // only make call to underlyingCache if there are keys to lookup + val fromUnderlying = if (notTombstones.isEmpty) { + KeyValueResult.emptyFuture + } else { + // get non-tombstoned values from underlying cache + underlyingCache.get(notTombstones.keys.toSeq) map { lr => + val (goodValues, badValues) = lr.found partition { + case (pKey, cachedValue) => + // filter out values that somehow don't match the primary key and secondary key + cachedValue.value match { + case Some(value) => + secondaryKey(value) match { + case Return(Some(sKey)) => + pKey == primaryKey(value) && sKey == notTombstones(pKey) + case _ => false + } + case None => true + } + } + val found = goodValues map { case (k, v) => notTombstones(k) -> v } + val notFound = (lr.notFound ++ badValues.keySet) map { notTombstones(_) } + val failed = lr.failed map { case (k, t) => notTombstones(k) -> t } + KeyValueResult(found, notFound, failed) + } handle { + case t => + KeyValueResult(failed = notTombstones.values map { _ -> t } toMap) + } + } + + fromUnderlying map { lr => + // fill in tombstone values, copying the metadata from the Cached[F] + val withTombstones = tombstones map { + case (key, cachedPKey) => + key -> cachedPKey.copy[V](value = None) + } + val found = lr.found ++ withTombstones + val notFound = flr.notFound ++ lr.notFound + val failed = flr.failed ++ lr.failed + KeyValueResult(found, notFound, failed) + } + } + } + + // since we implement lockAndSet directly, we don't support getWithChecksum and checkAndSet. + // we should consider changing the class hierarchy of Cache/LockingCache so that this can + // be checked at compile time. + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = + Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) + + override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = + Future.exception(new UnsupportedOperationException("Use lockAndSet directly")) + + protected[this] def maybeAddForwardingIndex( + key: K, + cachedPrimaryKey: Cached[F], + wasAdded: Boolean + ): Future[Boolean] = { + if (wasAdded) + forwardingCache.set(key, cachedPrimaryKey) map { _ => + true + } + else + Future.value(false) + } + + override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = { + // copy the cache metadata to the primaryKey + val cachedPrimaryKey = cachedValue map { primaryKey(_) } + cachedPrimaryKey.value match { + case Some(pKey) => + // if a value can be derived from the key, use the underlying cache to add it + // the underlying cache will create the secondary index as a side-effect + underlyingCache.add(pKey, cachedValue) + case None => + // otherwise, we're just writing a tombstone, so we need to check if it exists + forwardingCache.add(key, cachedPrimaryKey) + } + } + + override def lockAndSet( + key: K, + handler: LockingCache.Handler[Cached[V]] + ): Future[Option[Cached[V]]] = { + handler(None) match { + case Some(cachedValue) => + cachedValue.value match { + case Some(value) => + // set on the underlying cache, and let it take care of adding + // the secondary index + val pKey = primaryKey(value) + lockingUnderlying.lockAndSet(pKey, handler) + case None => + // no underlying value to set, so just write the forwarding entry. + // secondaryIndexingCache doesn't lock for this set, so there's + // no point in our doing it. There's a slight risk of writing an + // errant tombstone in a race, but the only way to get around this + // would be to lock around *all* primary and secondary indexes, + // which could produce deadlocks, which is probably worse. + val cachedEmptyPKey = cachedValue.copy[F](value = None) + forwardingCache.set(key, cachedEmptyPKey) map { _ => + Some(cachedValue) + } + } + case None => + // nothing to do here + Future.value(None) + } + } + + override def set(key: K, cachedValue: Cached[V]): Future[Unit] = { + cachedValue.value match { + case Some(value) => + // set on the underlying cache, and let it take care of adding + // the secondary index + val pKey = primaryKey(value) + underlyingCache.set(pKey, cachedValue) + case None => + // no underlying value to set, so just write the forwarding entry + forwardingCache.set(key, cachedValue.copy[F](value = None)) + } + } + + override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = { + cachedValue.value match { + case Some(value) => + // replace in the underlying cache, and let it take care of adding the secondary index + val pKey = primaryKey(value) + underlyingCache.replace(pKey, cachedValue) + case None => + // no underlying value to set, so just write the forwarding entry + forwardingCache.replace(key, cachedValue.copy[F](value = None)) + } + } + + override def delete(key: K): Future[Boolean] = { + forwardingCache.delete(key) + } + + override def release(): Unit = { + forwardingCache.release() + underlyingCache.release() + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala new file mode 100644 index 000000000..af29080e4 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/HotKeyMemcacheClient.scala @@ -0,0 +1,109 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.memcached.protocol.Value +import com.twitter.finagle.memcached.GetResult +import com.twitter.finagle.memcached.ProxyClient +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.io.Buf +import com.twitter.logging.Logger +import com.twitter.util.Future +import scala.collection.breakOut + +object HotKeyCachingCache { + private[cache] val logger = Logger.get(getClass) +} + +/** + * Wrapper for a [[com.twitter.finagle.Memcached.Client]] that handles in-process caching for + * values flagged for promotion ("hot keys") by a twemcache backend. + * + * This is similar conceptually to + * [[com.twitter.servo.repository.HotKeyCachingKeyValueRepository]] but differs because + * HotKeyCachingKeyValueRepository detects hot keys in the client, which requires tuning and + * becomes less effective as the number of instances in the cluster grows. [[HotKeyMemcacheClient]] + * uses detection in the memcache server, which is centralized and has a better view of frequently + * accessed keys. This is a custom feature in twemcache, Twitter's memcache fork, that is not + * enabled by default. Consult with the cache team if you want to use it. + * + * Usage: + * {{{ + * new HotKeyMemcacheClient( + * underlyingCache = Memcached.client. ... .newRichClient(destination), + * inProcessCache = ExpiringLruInProcessCache(ttl = 10.seconds, maximumSize = 100), + * statsReceiver = statsReceiver.scope("inprocess") + * ) + * }}} + */ +class HotKeyMemcacheClient( + override val proxyClient: Client, + inProcessCache: InProcessCache[String, Value], + statsReceiver: StatsReceiver, + label: Option[String] = None) + extends ProxyClient { + import HotKeyCachingCache._ + + private val promotions = statsReceiver.counter("promotions") + private val hits = statsReceiver.counter("hits") + private val misses = statsReceiver.counter("misses") + + private def cacheIfPromoted(key: String, value: Value): Unit = { + if (value.flags.exists(MemcacheFlags.shouldPromote)) { + logger.debug(s"Promoting hot-key $key flagged by memcached backend to in-process cache.") + Trace.recordBinary("hot_key_cache.hot_key_promoted", s"${label.getOrElse("")},$key") + promotions.incr() + inProcessCache.set(key, value) + } + } + + override def getResult(keys: Iterable[String]): Future[GetResult] = { + val resultsFromInProcessCache: Map[String, Value] = + keys.flatMap(k => inProcessCache.get(k).map(v => (k, v)))(breakOut) + val foundInProcess = resultsFromInProcessCache.keySet + val newKeys = keys.filterNot(foundInProcess.contains) + + hits.incr(foundInProcess.size) + misses.incr(newKeys.size) + + if (foundInProcess.nonEmpty) { + // If there are hot keys found in the cache, record a trace annotation with the format: + // hot key cache client label;the number of hits;number of misses;and the set of hot keys found in the cache. + Trace.recordBinary( + "hot_key_cache", + s"${label.getOrElse("")};${foundInProcess.size};${newKeys.size};${foundInProcess.mkString(",")}" + ) + } + + proxyClient.getResult(newKeys).map { result => + result.hits.foreach { case (k, v) => cacheIfPromoted(k, v) } + result.copy(hits = result.hits ++ resultsFromInProcessCache) + } + } + + /** + * Exposes whether or not a key was promoted to the in-process hot key cache. In most cases, users + * of [[HotKeyMemcacheClient]] should not need to know this. However, they may if hot key caching + * conflicts with other layers of caching they are using. + */ + def isHotKey(key: String): Boolean = inProcessCache.get(key).isDefined +} + +// TOOD: May want to turn flags into a value class in com.twitter.finagle.memcached +// with methods for these operations +object MemcacheFlags { + val FrequencyBasedPromotion: Int = 1 + val BandwidthBasedPromotion: Int = 1 << 1 + val Promotable: Int = FrequencyBasedPromotion | BandwidthBasedPromotion + + /** + * Memcache flags are returned as an unsigned integer, represented as a decimal string. + * + * Check whether the bit in position 0 ([[FrequencyBasedPromotion]]) or the bit in position 1 + * ([[BandwidthBasedPromotion]]) is set to 1 (zero-index from least-significant bit). + */ + def shouldPromote(flagsBuf: Buf): Boolean = { + val flags = flagsBuf match { case Buf.Utf8(s) => s.toInt } + (flags & Promotable) != 0 + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala new file mode 100644 index 000000000..a47e0f7a1 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/InProcessCache.scala @@ -0,0 +1,63 @@ +package com.twitter.servo.cache + +import com.google.common.cache.{CacheBuilder, RemovalListener} +import com.twitter.util.Duration +import java.util.concurrent.TimeUnit + +object InProcessCache { + + /** + * Apply a read filter to exclude items in an InProcessCache + */ + def withFilter[K, V]( + underlying: InProcessCache[K, V] + )( + shouldFilter: (K, V) => Boolean + ): InProcessCache[K, V] = + new InProcessCache[K, V] { + def get(key: K): Option[V] = underlying.get(key) filterNot { shouldFilter(key, _) } + def set(key: K, value: V) = underlying.set(key, value) + } +} + +/** + * An in-process cache interface. It is distinct from a map in that: + * 1) All methods must be threadsafe + * 2) A value set in cache is not guaranteed to remain in the cache. + */ +trait InProcessCache[K, V] { + def get(key: K): Option[V] + def set(key: K, value: V): Unit +} + +/** + * In-process implementation of a cache with LRU semantics and a TTL. + */ +class ExpiringLruInProcessCache[K, V]( + ttl: Duration, + maximumSize: Int, + removalListener: Option[RemovalListener[K, V]] = None: None.type) + extends InProcessCache[K, V] { + + private[this] val cacheBuilder = + CacheBuilder.newBuilder + .asInstanceOf[CacheBuilder[K, V]] + .expireAfterWrite(ttl.inMilliseconds, TimeUnit.MILLISECONDS) + .initialCapacity(maximumSize) + .maximumSize(maximumSize) + + private[this] val cache = + removalListener match { + case Some(listener) => + cacheBuilder + .removalListener(listener) + .build[K, V]() + case None => + cacheBuilder + .build[K, V]() + } + + def get(key: K): Option[V] = Option(cache.getIfPresent(key)) + + def set(key: K, value: V): Unit = cache.put(key, value) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala new file mode 100644 index 000000000..0228b4a0f --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/IterableSerializer.scala @@ -0,0 +1,84 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Throw, Return, Try} +import java.io.{DataOutputStream, ByteArrayOutputStream} +import java.nio.ByteBuffer +import scala.collection.mutable +import scala.util.control.NonFatal + +object IterableSerializer { + // Serialized format for version 0: + // Header: + // 1 byte - Version + // 4 byte - number of items + // Data, 1 per item: + // 4 bytes - item length in bytes (n) + // n bytes - item data + val FormatVersion = 0 +} + +/** + * A `Serializer` for `Iterable[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class IterableSerializer[T, C <: Iterable[T]]( + newBuilder: () => mutable.Builder[T, C], + itemSerializer: Serializer[T], + itemSizeEstimate: Int = 8) + extends Serializer[C] { + import IterableSerializer.FormatVersion + + if (itemSizeEstimate <= 0) { + throw new IllegalArgumentException( + "Item size estimate must be positive. Invalid estimate provided: " + itemSizeEstimate + ) + } + + override def to(iterable: C): Try[Array[Byte]] = Try { + assert(iterable.hasDefiniteSize, "Must have a definite size: %s".format(iterable)) + + val numItems = iterable.size + val baos = new ByteArrayOutputStream(1 + 4 + (numItems * (4 + itemSizeEstimate))) + val output = new DataOutputStream(baos) + + // Write serialization version format and set length. + output.writeByte(FormatVersion) + output.writeInt(numItems) + + iterable.foreach { item => + val itemBytes = itemSerializer.to(item).get() + output.writeInt(itemBytes.length) + output.write(itemBytes) + } + output.flush() + baos.toByteArray() + } + + override def from(bytes: Array[Byte]): Try[C] = { + try { + val buf = ByteBuffer.wrap(bytes) + val formatVersion = buf.get() + if (formatVersion < 0 || formatVersion > FormatVersion) { + Throw(new IllegalArgumentException("Invalid serialization format: " + formatVersion)) + } else { + val numItems = buf.getInt() + val builder = newBuilder() + builder.sizeHint(numItems) + + var i = 0 + while (i < numItems) { + val itemBytes = new Array[Byte](buf.getInt()) + buf.get(itemBytes) + val item = itemSerializer.from(itemBytes).get() + builder += item + i += 1 + } + Return(builder.result()) + } + } catch { + case NonFatal(e) => Throw(e) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala new file mode 100644 index 000000000..8caea385a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyFilteringCache.scala @@ -0,0 +1,51 @@ +package com.twitter.servo.cache + +import com.twitter.util.Future + +/** + * A cache wrapper that makes the underlying cache transparent to + * certain keys. + */ +class KeyFilteringCache[K, V](val underlyingCache: Cache[K, V], keyPredicate: K => Boolean) + extends CacheWrapper[K, V] { + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = + underlyingCache.get(keys filter keyPredicate) + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = + underlyingCache.getWithChecksum(keys filter keyPredicate) + + override def add(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.add(key, value) + } else { + Future.True + } + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + if (keyPredicate(key)) { + underlyingCache.checkAndSet(key, value, checksum) + } else { + Future.True + } + + override def set(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.set(key, value) + } else { + Future.Done + } + + override def replace(key: K, value: V) = + if (keyPredicate(key)) { + underlyingCache.replace(key, value) + } else { + Future.True + } + + override def delete(key: K) = + if (keyPredicate(key)) { + underlyingCache.delete(key) + } else { + Future.True + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala new file mode 100644 index 000000000..fb7641b9e --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/KeyTransformer.scala @@ -0,0 +1,21 @@ +package com.twitter.servo.cache + +/** + * Converts all keys to a string via .toString + */ +class ToStringKeyTransformer[K] extends KeyTransformer[K] { + override def apply(key: K) = key.toString +} + +/** + * Prefixes all keys with a string + */ +class PrefixKeyTransformer[K]( + prefix: String, + delimiter: String = constants.Colon, + underlying: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) + extends KeyTransformer[K] { + private[this] val fullPrefix = prefix + delimiter + + override def apply(key: K) = fullPrefix + underlying(key) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala new file mode 100644 index 000000000..caf990303 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/LockingCache.scala @@ -0,0 +1,486 @@ +package com.twitter.servo.cache + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.partitioning.FailureAccrualException +import com.twitter.finagle.Backoff +import com.twitter.finagle.stats.{NullStatsReceiver, Stat, StatsReceiver} +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.util.{ExceptionCounter, RateLimitingLogger} +import com.twitter.util._ +import scala.util.control.NoStackTrace + +object LockingCache { + + /** + * first argument is value to store, second argument is value in cache, + * returns an Option of the value to be stored. None should be interpreted + * as "don't store anything" + */ + type Picker[V] = (V, V) => Option[V] + + /** + * argument is value, if any, in cache. + * return type is value, if any, to be stored in cache. + * returning None means nothing will be done. + */ + type Handler[V] = Option[V] => Option[V] + + case class AlwaysSetHandler[V](value: Option[V]) extends Handler[V] { + override def apply(ignored: Option[V]) = value + } + + case class PickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { + override def apply(inCache: Option[V]): Option[V] = + inCache match { + case None => + // if nothing in cache, go ahead and store! + Some(newValue) + case Some(oldValue) => + // if something in cache, store a picked value based on + // what's in cache and what's being stored + pick(newValue, oldValue) + } + + // apparently case classes that extend functions don't get pretty toString methods + override lazy val toString = "PickingHandler(%s, %s)".format(newValue, pick) + } + + case class UpdateOnlyPickingHandler[V](newValue: V, pick: Picker[V]) extends Handler[V] { + override def apply(inCache: Option[V]): Option[V] = + inCache match { + case None => + // if nothing in cache, do not update + None + case Some(oldValue) => + // if something in cache, store a picked value based on + // what's in cache and what's being stored + pick(newValue, oldValue) + } + + // apparently case classes that extend functions don't get pretty toString methods + override lazy val toString = "UpdateOnlyPickingHandler(%s, %s)".format(newValue, pick) + } +} + +trait LockingCacheFactory { + def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] + def scope(scopes: String*): LockingCacheFactory +} + +/** + * A cache that enforces a consistent view of values between the time when a set + * is initiated and when the value is actually updated in cache. + */ +trait LockingCache[K, V] extends Cache[K, V] { + + /** + * Look up a value and dispatch based on the result. The particular locking + * approach is defined by the implementing class. May call handler multiple + * times as part of more elaborate locking and retry looping. + * + * Overview of semantics: + * `handler(None)` is called if no value is present in cache. + * `handler(Some(value))` is called if a value is present. + * `handler(x)` should return None if nothing should be done and `Some(value)` + * if a value should be set. + * + * @return the value that was actually set + */ + def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] +} + +class OptimisticLockingCacheObserver(statsReceiver: StatsReceiver) { + import OptimisticLockingCache._ + + private[this] val scopedReceiver = statsReceiver.scope("locking_cache") + + private[this] val successCounter = scopedReceiver.counter("success") + private[this] val failureCounter = scopedReceiver.counter("failure") + private[this] val exceptionCounter = new ExceptionCounter(scopedReceiver) + private[this] val lockAndSetStat = scopedReceiver.stat("lockAndSet") + + def time[V](f: => Future[Option[V]]): Future[Option[V]] = { + Stat.timeFuture(lockAndSetStat) { + f + } + } + + def success(attempts: Seq[FailedAttempt]): Unit = { + successCounter.incr() + countAttempts(attempts) + } + + def failure(attempts: Seq[FailedAttempt]): Unit = { + failureCounter.incr() + countAttempts(attempts) + } + + def scope(s: String*): OptimisticLockingCacheObserver = + s.toList match { + case Nil => this + case head :: tail => + new OptimisticLockingCacheObserver(statsReceiver.scope(head)).scope(tail: _*) + } + + private[this] def countAttempts(attempts: Seq[FailedAttempt]): Unit = { + attempts foreach { attempt => + val name = attempt.getClass.getSimpleName + scopedReceiver.counter(name).incr() + attempt.maybeThrowable foreach { t => + exceptionCounter(t) + scopedReceiver.scope(name).counter(t.getClass.getName).incr() + } + } + } +} + +case class OptimisticLockingCacheFactory( + backoffs: Backoff, + observer: OptimisticLockingCacheObserver = new OptimisticLockingCacheObserver(NullStatsReceiver), + timer: Timer = new NullTimer, + // Enabling key logging may unintentionally cause inclusion of sensitive data + // in service logs and any accompanying log sinks such as Splunk. By default, this is disabled, + // however may be optionally enabled for the purpose of debugging. Caution is warranted. + enableKeyLogging: Boolean = false) + extends LockingCacheFactory { + def this( + backoffs: Backoff, + statsReceiver: StatsReceiver, + timer: Timer, + enableKeyLogging: Boolean + ) = this(backoffs, new OptimisticLockingCacheObserver(statsReceiver), timer, enableKeyLogging) + + override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = { + new OptimisticLockingCache(cache, backoffs, observer, timer, enableKeyLogging) + } + + override def scope(scopes: String*): LockingCacheFactory = { + new OptimisticLockingCacheFactory(backoffs, observer.scope(scopes: _*), timer) + } +} + +object OptimisticLockingCache { + private[this] val FutureNone = Future.value(None) + + def emptyFutureNone[V] = FutureNone.asInstanceOf[Future[Option[V]]] + + sealed abstract class FailedAttempt(val maybeThrowable: Option[Throwable]) + extends Exception + with NoStackTrace + case class GetWithChecksumException(t: Throwable) extends FailedAttempt(Some(t)) + case object GetWithChecksumEmpty extends FailedAttempt(None) + case object CheckAndSetFailed extends FailedAttempt(None) + case class CheckAndSetException(t: Throwable) extends FailedAttempt(Some(t)) + case class AddException(t: Throwable) extends FailedAttempt(Some(t)) + + case class LockAndSetFailure(str: String, attempts: Seq[FailedAttempt]) + extends Exception( + str, + // if the last exception was an RPC exception, try to recover the stack trace + attempts.lastOption.flatMap(_.maybeThrowable).orNull + ) + + private def retryPolicy(backoffs: Backoff): RetryPolicy[Try[Nothing]] = + RetryPolicy.backoff(backoffs) { + case Throw(_: FailureAccrualException) => false + case _ => true + } +} + +/** + * Implementation of a LockingCache using add/getWithChecksum/checkAndSet. + */ +class OptimisticLockingCache[K, V]( + override val underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + enableKeyLogging: Boolean) + extends LockingCache[K, V] + with CacheWrapper[K, V] { + import LockingCache._ + import OptimisticLockingCache._ + + def this( + underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + ) = + this( + underlyingCache: Cache[K, V], + retryPolicy: RetryPolicy[Try[Nothing]], + observer: OptimisticLockingCacheObserver, + timer: Timer, + false + ) + + def this( + underlyingCache: Cache[K, V], + backoffs: Backoff, + observer: OptimisticLockingCacheObserver, + timer: Timer + ) = + this( + underlyingCache, + OptimisticLockingCache.retryPolicy(backoffs), + observer, + timer, + false + ) + + def this( + underlyingCache: Cache[K, V], + backoffs: Backoff, + observer: OptimisticLockingCacheObserver, + timer: Timer, + enableKeyLogging: Boolean + ) = + this( + underlyingCache, + OptimisticLockingCache.retryPolicy(backoffs), + observer, + timer, + enableKeyLogging + ) + + private[this] val log = Logger.get("OptimisticLockingCache") + private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) + + @deprecated("use RetryPolicy-based constructor", "0.1.2") + def this(underlyingCache: Cache[K, V], maxTries: Int = 10, enableKeyLogging: Boolean) = { + this( + underlyingCache, + Backoff.const(0.milliseconds).take(maxTries), + new OptimisticLockingCacheObserver(NullStatsReceiver), + new NullTimer, + enableKeyLogging + ) + } + + override def lockAndSet(key: K, handler: Handler[V]): Future[Option[V]] = { + observer.time { + dispatch(key, handler, retryPolicy, Nil) + } + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def retry( + key: K, + failure: Try[Nothing], + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = + retryPolicy(failure) match { + case None => + observer.failure(attempts) + if (enableKeyLogging) { + rateLimitedLogger.log( + s"failed attempts for ${key}:\n ${attempts.mkString("\n ")}", + level = Level.INFO) + Future.exception(LockAndSetFailure("lockAndSet failed for " + key, attempts)) + } else { + Future.exception(LockAndSetFailure("lockAndSet failed", attempts)) + } + + case Some((backoff, tailPolicy)) => + timer + .doLater(backoff) { + dispatch(key, handler, tailPolicy, attempts) + } + .flatten + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def dispatch( + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + // get the value if nothing's there + handler(None) match { + case None => + // if nothing should be done when missing, go straight to getAndConditionallySet, + // since there's nothing to attempt an add with + getAndConditionallySet(key, handler, retryPolicy, attempts) + + case some @ Some(value) => + // otherwise, try to do an atomic add, which will return false if something's there + underlyingCache.add(key, value) transform { + case Return(added) => + if (added) { + // if added, return the value + observer.success(attempts) + Future.value(some) + } else { + // otherwise, do a checkAndSet based on the current value + getAndConditionallySet(key, handler, retryPolicy, attempts) + } + + case Throw(t) => + // count exception against retries + if (enableKeyLogging) + rateLimitedLogger.logThrowable(t, s"add($key) returned exception. will retry") + retry(key, Throw(t), handler, retryPolicy, attempts :+ AddException(t)) + } + } + } + + /** + * @param key + * The key to look up in cache + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def getAndConditionallySet( + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + // look in the cache to see what's there + underlyingCache.getWithChecksum(Seq(key)) handle { + case t => + // treat global failure as key-based failure + KeyValueResult(failed = Map(key -> t)) + } flatMap { lr => + lr(key) match { + case Return.None => + handler(None) match { + case Some(_) => + // if there's nothing in the cache now, but handler(None) return Some, + // that means something has changed since we attempted the add, so try again + val failure = GetWithChecksumEmpty + retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) + + case None => + // if there's nothing in the cache now, but handler(None) returns None, + // that means we don't want to store anything when there's nothing already + // in cache, so return None + observer.success(attempts) + emptyFutureNone + } + + case Return(Some((Return(current), checksum))) => + // the cache entry is present + dispatchCheckAndSet(Some(current), checksum, key, handler, retryPolicy, attempts) + + case Return(Some((Throw(t), checksum))) => + // the cache entry failed to deserialize; treat it as a None and overwrite. + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"getWithChecksum(${key}) returned a bad value. overwriting.") + dispatchCheckAndSet(None, checksum, key, handler, retryPolicy, attempts) + + case Throw(t) => + // lookup failure counts against numTries + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"getWithChecksum(${key}) returned exception. will retry.") + retry(key, Throw(t), handler, retryPolicy, attempts :+ GetWithChecksumException(t)) + } + } + } + + /** + * @param current + * The value currently cached under key `key`, if any + * @param checksum + * The checksum of the currently-cached value + * @param key + * The key mapping to `current` + * @param handler + * The handler that is applied to values from cache + * @param retryPolicy + * Used to determine if more attempts should be made. + * @param attempts + * Contains representations of the causes of previous dispatch failures + */ + protected[this] def dispatchCheckAndSet( + current: Option[V], + checksum: Checksum, + key: K, + handler: Handler[V], + retryPolicy: RetryPolicy[Try[Nothing]], + attempts: Seq[FailedAttempt] + ): Future[Option[V]] = { + handler(current) match { + case None => + // if nothing should be done based on the current value, don't do anything + observer.success(attempts) + emptyFutureNone + + case some @ Some(value) => + // otherwise, try a check and set with the checksum + underlyingCache.checkAndSet(key, value, checksum) transform { + case Return(added) => + if (added) { + // if added, return the value + observer.success(attempts) + Future.value(some) + } else { + // otherwise, something has changed, try again + val failure = CheckAndSetFailed + retry(key, Throw(failure), handler, retryPolicy, attempts :+ failure) + } + + case Throw(t) => + // count exception against retries + if (enableKeyLogging) + rateLimitedLogger.logThrowable( + t, + s"checkAndSet(${key}) returned exception. will retry.") + retry(key, Throw(t), handler, retryPolicy, attempts :+ CheckAndSetException(t)) + } + } + } +} + +object NonLockingCacheFactory extends LockingCacheFactory { + override def apply[K, V](cache: Cache[K, V]): LockingCache[K, V] = new NonLockingCache(cache) + override def scope(scopes: String*) = this +} + +class NonLockingCache[K, V](override val underlyingCache: Cache[K, V]) + extends LockingCache[K, V] + with CacheWrapper[K, V] { + override def lockAndSet(key: K, handler: LockingCache.Handler[V]): Future[Option[V]] = { + handler(None) match { + case None => + // if nothing should be done when nothing's there, don't do anything + Future.value(None) + + case some @ Some(value) => + set(key, value) map { _ => + some + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala new file mode 100644 index 000000000..8b0be8dcc --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Memcache.scala @@ -0,0 +1,59 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +/** + * [[Memcache]] is a Cache with types that reflect the memcached protocol. Keys are strings and + * values are byte arrays. + */ +trait Memcache extends TtlCache[String, Array[Byte]] { + def incr(key: String, delta: Long = 1): Future[Option[Long]] + def decr(key: String, delta: Long = 1): Future[Option[Long]] +} + +/** + * allows one Memcache to wrap another + */ +trait MemcacheWrapper extends TtlCacheWrapper[String, Array[Byte]] with Memcache { + override def underlyingCache: Memcache + + override def incr(key: String, delta: Long = 1) = underlyingCache.incr(key, delta) + override def decr(key: String, delta: Long = 1) = underlyingCache.decr(key, delta) +} + +/** + * Switch between two caches with a decider value + */ +class DeciderableMemcache(primary: Memcache, secondary: Memcache, isAvailable: => Boolean) + extends MemcacheWrapper { + override def underlyingCache = if (isAvailable) primary else secondary +} + +/** + * [[MemcacheCache]] converts a [[Memcache]] to a [[Cache[K, V]]] using a [[Serializer]] for values + * and a [[KeyTransformer]] for keys. + * + * The value serializer is bidirectional. Keys are serialized using a one-way transformation + * method, which defaults to _.toString. + */ +class MemcacheCache[K, V]( + memcache: Memcache, + ttl: Duration, + serializer: Serializer[V], + transformKey: KeyTransformer[K] = new ToStringKeyTransformer[K]: ToStringKeyTransformer[K]) + extends CacheWrapper[K, V] { + override val underlyingCache = new KeyValueTransformingCache( + new SimpleTtlCacheToCache(memcache, ttl), + serializer, + transformKey + ) + + def incr(key: K, delta: Int = 1): Future[Option[Long]] = { + if (delta >= 0) + memcache.incr(transformKey(key), delta) + else + memcache.decr(transformKey(key), -delta) + } + + def decr(key: K, delta: Int = 1): Future[Option[Long]] = incr(key, -delta) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala new file mode 100644 index 000000000..750dc913c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MigratingCache.scala @@ -0,0 +1,245 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Return +import com.twitter.util.Throw + +/** + * MigratingReadCache supports a gradual migration from one cache to another. Reads from the + * cache are compared to reads from the darkCache and new values are written to the darkCache + * if necessary. + */ +class MigratingReadCache[K, V]( + cache: ReadCache[K, V], + darkCache: Cache[K, V], + statsReceiver: StatsReceiver = NullStatsReceiver) + extends ReadCache[K, V] { + + private[this] val scopedStatsReceiver = statsReceiver.scope("migrating_read_cache") + private[this] val getScope = scopedStatsReceiver.scope("get") + private[this] val getMismatchedResultsCounter = getScope.counter("mismatched_results") + private[this] val getMissingResultsCounter = getScope.counter("missing_results") + private[this] val getUnexpectedResultsCounter = getScope.counter("unexpected_results") + private[this] val getMatchingResultsCounter = getScope.counter("matching_results") + + private[this] val getWithChecksumScope = scopedStatsReceiver.scope("get_with_cheksum") + private[this] val getWithChecksumMismatchedResultsCounter = + getWithChecksumScope.counter("mismatched_results") + private[this] val getWithChecksumMissingResultsCounter = + getWithChecksumScope.counter("missing_results") + private[this] val getWithChecksumUnexpectedResultsCounter = + getWithChecksumScope.counter("unexpected_results") + private[this] val getWithChecksumMatchingResultsCounter = + getWithChecksumScope.counter("matching_results") + + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { + cache.get(keys) onSuccess { result => + darkCache.get(keys) onSuccess { darkResult => + keys foreach { k => + (result(k), darkResult(k)) match { + // compare values, set if they differ + case (Return(Some(v)), Return(Some(dv))) if (v != dv) => + getMismatchedResultsCounter.incr() + darkCache.set(k, v) + // set a value if missing + case (Return(Some(v)), Return.None | Throw(_)) => + getMissingResultsCounter.incr() + darkCache.set(k, v) + // remove if necessary + case (Return.None, Return(Some(_)) | Throw(_)) => + getUnexpectedResultsCounter.incr() + darkCache.delete(k) + // do nothing otherwise + case _ => + getMatchingResultsCounter.incr() + () + } + } + } + } + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { + cache.getWithChecksum(keys) onSuccess { result => + // no point in the getWithChecksum from the darkCache + darkCache.get(keys) onSuccess { darkResult => + keys foreach { k => + (result(k), darkResult(k)) match { + // compare values, set if they differ + case (Return(Some((Return(v), _))), Return(Some(dv))) if (v != dv) => + getWithChecksumMismatchedResultsCounter.incr() + darkCache.set(k, v) + // set a value if missing + case (Return(Some((Return(v), _))), Return.None | Throw(_)) => + getWithChecksumMissingResultsCounter.incr() + darkCache.set(k, v) + // remove if necessary + case (Return.None, Return(Some(_)) | Throw(_)) => + getWithChecksumUnexpectedResultsCounter.incr() + darkCache.delete(k) + // do nothing otherwise + case _ => + getWithChecksumMatchingResultsCounter.incr() + () + } + } + } + } + } + + override def release(): Unit = { + cache.release() + darkCache.release() + } +} + +/** + * MigratingCache supports a gradual migration from one cache to another. Writes to the cache + * are propogated to the darkCache. Reads from the cache are compared to reads from the darkCache + * and new values are written to the darkCache if necessary. + * + * Writes to the darkCache are not locking writes, so there is some risk of inconsistencies from + * race conditions. However, writes to the darkCache only occur if they succeed in the cache, so + * if a checkAndSet fails, for example, no write is issued to the darkCache. + */ +class MigratingCache[K, V]( + cache: Cache[K, V], + darkCache: Cache[K, V], + statsReceiver: StatsReceiver = NullStatsReceiver) + extends MigratingReadCache(cache, darkCache, statsReceiver) + with Cache[K, V] { + override def add(key: K, value: V): Future[Boolean] = { + cache.add(key, value) onSuccess { wasAdded => + if (wasAdded) { + darkCache.set(key, value) + } + } + } + + override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = { + cache.checkAndSet(key, value, checksum) onSuccess { wasSet => + if (wasSet) { + darkCache.set(key, value) + } + } + } + + override def set(key: K, value: V): Future[Unit] = { + cache.set(key, value) onSuccess { _ => + darkCache.set(key, value) + } + } + + override def replace(key: K, value: V): Future[Boolean] = { + cache.replace(key, value) onSuccess { wasReplaced => + if (wasReplaced) { + darkCache.set(key, value) + } + } + } + + override def delete(key: K): Future[Boolean] = { + cache.delete(key) onSuccess { wasDeleted => + if (wasDeleted) { + darkCache.delete(key) + } + } + } +} + +/** + * Like MigratingCache but for TtlCaches + */ +class MigratingTtlCache[K, V]( + cache: TtlCache[K, V], + darkCache: TtlCache[K, V], + ttl: (K, V) => Duration) + extends MigratingReadCache(cache, new TtlCacheToCache(darkCache, ttl)) + with TtlCache[K, V] { + override def add(key: K, value: V, ttl: Duration): Future[Boolean] = { + cache.add(key, value, ttl) onSuccess { wasAdded => + if (wasAdded) { + darkCache.set(key, value, ttl) + } + } + } + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = { + cache.checkAndSet(key, value, checksum, ttl) onSuccess { wasSet => + if (wasSet) { + darkCache.set(key, value, ttl) + } + } + } + + override def set(key: K, value: V, ttl: Duration): Future[Unit] = { + cache.set(key, value, ttl) onSuccess { _ => + darkCache.set(key, value, ttl) + } + } + + override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = { + cache.replace(key, value, ttl) onSuccess { wasReplaced => + if (wasReplaced) { + darkCache.set(key, value, ttl) + } + } + } + + override def delete(key: K): Future[Boolean] = { + cache.delete(key) onSuccess { wasDeleted => + if (wasDeleted) { + darkCache.delete(key) + } + } + } + + override def release(): Unit = { + cache.release() + darkCache.release() + } +} + +/** + * A MigratingTtlCache for Memcaches, implementing a migrating incr and decr. Race conditions + * are possible and may prevent the counts from being perfectly synchronized. + */ +class MigratingMemcache( + cache: Memcache, + darkCache: Memcache, + ttl: (String, Array[Byte]) => Duration) + extends MigratingTtlCache[String, Array[Byte]](cache, darkCache, ttl) + with Memcache { + def incr(key: String, delta: Long = 1): Future[Option[Long]] = { + cache.incr(key, delta) onSuccess { + case None => + darkCache.delete(key) + + case Some(value) => + darkCache.incr(key, delta) onSuccess { + case Some(`value`) => // same value! + case _ => + val b = value.toString.getBytes + darkCache.set(key, b, ttl(key, b)) + } + } + } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = { + cache.decr(key, delta) onSuccess { + case None => + darkCache.delete(key) + + case Some(value) => + darkCache.decr(key, delta) onSuccess { + case Some(`value`) => // same value! + case _ => + val b = value.toString.getBytes + darkCache.set(key, b, ttl(key, b)) + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala new file mode 100644 index 000000000..59acd28d0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/MissingCache.scala @@ -0,0 +1,46 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.memcached.util.NotFound +import scala.util.Random + +/** + * wrap a ReadCache, forcing a miss rate. useful for playing back + * the same logs over and over, but simulating expected cache misses + */ +class MissingReadCache[K, V]( + underlyingCache: ReadCache[K, V], + hitRate: Float, + rand: Random = new Random) + extends ReadCache[K, V] { + assert(hitRate > 1 || hitRate < 0, "hitRate must be <= 1 and => 0") + + protected def filterResult[W](lr: KeyValueResult[K, W]) = { + val found = lr.found.filter { _ => + rand.nextFloat <= hitRate + } + val notFound = lr.notFound ++ NotFound(lr.found.keySet, found.keySet) + KeyValueResult(found, notFound, lr.failed) + } + + override def get(keys: Seq[K]) = + underlyingCache.get(keys) map { filterResult(_) } + + override def getWithChecksum(keys: Seq[K]) = + underlyingCache.getWithChecksum(keys) map { filterResult(_) } + + override def release() = underlyingCache.release() +} + +class MissingCache[K, V]( + override val underlyingCache: Cache[K, V], + hitRate: Float, + rand: Random = new Random) + extends MissingReadCache[K, V](underlyingCache, hitRate, rand) + with CacheWrapper[K, V] + +class MissingTtlCache[K, V]( + override val underlyingCache: TtlCache[K, V], + hitRate: Float, + rand: Random = new Random) + extends MissingReadCache[K, V](underlyingCache, hitRate, rand) + with TtlCacheWrapper[K, V] diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala new file mode 100644 index 000000000..a3bed9624 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/ObservableCache.scala @@ -0,0 +1,419 @@ +package com.twitter.servo.cache + +import com.twitter.finagle.stats.{Stat, StatsReceiver} +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.util.{ExceptionCounter, WindowedAverage} +import com.twitter.util._ + +/** + * track hits and misses in caches, time reads and writes + */ +trait CacheObserver { + + /** + * register a hit + */ + def hit(key: String): Unit + + /** + * register a miss + */ + def miss(key: String): Unit + + /** + * time the read, and automatically handle hits and misses from the KeyValueResult + */ + def read[K, T]( + name: String, + keys: Seq[K] + )( + f: => Future[KeyValueResult[K, T]] + ): Future[KeyValueResult[K, T]] + + /** + * time the write + */ + def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] + + /** + * time the incr, and record the success/failure + */ + def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]): Future[Option[Long]] + + /** + * produce a new CacheObserver with a nested scope + */ + def scope(s: String*): CacheObserver + + /** + * increment a counter tracking the number of expirations. + */ + def expired(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of failures. + */ + def failure(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of tombstones. + */ + def tombstone(delta: Int = 1): Unit + + /** + * Increment a counter tracking the number of not cached. + */ + def noCache(delta: Int = 1): Unit +} + +object NullCacheObserver extends CacheObserver { + override def hit(key: String) = () + override def miss(key: String) = () + override def read[K, T](name: String, keys: Seq[K])(f: => Future[KeyValueResult[K, T]]) = f + override def write[K, T](name: String, key: K)(f: => Future[T]) = f + override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = f + override def scope(s: String*) = this + override def expired(delta: Int = 1) = () + override def failure(delta: Int = 1): Unit = {} + override def tombstone(delta: Int = 1): Unit = {} + override def noCache(delta: Int = 1): Unit = {} +} + +/** + * A CacheObserver that writes to a StatsReceiver + */ +class StatsReceiverCacheObserver( + stats: StatsReceiver, + windowSize: Long, + log: Logger, + disableLogging: Boolean = false) + extends CacheObserver { + + def this( + statsReceiver: StatsReceiver, + windowSize: Long, + scope: String + ) = + this( + statsReceiver.scope(scope), + windowSize, + Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase) + ) + + def this( + statsReceiver: StatsReceiver, + windowSize: Long, + scope: String, + disableLogging: Boolean + ) = + this( + statsReceiver.scope(scope), + windowSize, + Logger.get(scope.replaceAll("([a-z]+)([A-Z])", "$1_$2").toLowerCase), + disableLogging + ) + + protected[this] val expirationCounter = stats.counter("expirations") + + // needed to make sure we hand out the same observer for each scope, + // so that the hit rates are properly calculated + protected[this] val children = Memoize { + new StatsReceiverCacheObserver(stats, windowSize, _: String, disableLogging) + } + + protected[this] val exceptionCounter = new ExceptionCounter(stats) + private[this] val hitCounter = stats.counter("hits") + private[this] val missCounter = stats.counter("misses") + private[this] val failuresCounter = stats.counter("failures") + private[this] val tombstonesCounter = stats.counter("tombstones") + private[this] val noCacheCounter = stats.counter("noCache") + + private[this] val windowedHitRate = new WindowedAverage(windowSize) + private[this] val windowedIncrHitRate = new WindowedAverage(windowSize) + + private[this] val hitRateGauge = stats.addGauge("hit_rate") { + windowedHitRate.value.getOrElse(1.0).toFloat + } + + private[this] val incrHitRateGauge = stats.addGauge("incr_hit_rate") { + windowedIncrHitRate.value.getOrElse(1.0).toFloat + } + + protected[this] def handleThrowable[K](name: String, t: Throwable, key: Option[K]): Unit = { + stats.counter(name + "_failures").incr() + exceptionCounter(t) + if (!disableLogging) { + lazy val suffix = key + .map { k => + "(" + k.toString + ")" + } + .getOrElse("") + log.warning("%s%s caught: %s", name, suffix, t.getClass.getName) + log.trace(t, "stack trace was: ") + } + } + + override def hit(key: String): Unit = { + hits(1) + if (!disableLogging) + log.trace("cache hit: %s", key) + } + + private[this] def hits(n: Int): Unit = { + windowedHitRate.record(n.toDouble, n.toDouble) + hitCounter.incr(n) + } + + override def miss(key: String): Unit = { + misses(1) + if (!disableLogging) + log.trace("cache miss: %s", key) + } + + private[this] def misses(n: Int): Unit = { + windowedHitRate.record(0.0F, n.toDouble) + missCounter.incr(n) + } + + override def read[K, T]( + name: String, + keys: Seq[K] + )( + f: => Future[KeyValueResult[K, T]] + ): Future[KeyValueResult[K, T]] = + Stat + .timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } + .respond { + case Return(lr) => + if (log.isLoggable(Level.TRACE)) { + lr.found.keys.foreach { k => + hit(k.toString) + } + lr.notFound.foreach { k => + miss(k.toString) + } + } else { + hits(lr.found.keys.size) + misses(lr.notFound.size) + } + lr.failed foreach { + case (k, t) => + handleThrowable(name, t, Some(k)) + // count failures as misses + miss(k.toString) + failuresCounter.incr() + } + case Throw(t) => + handleThrowable(name, t, None) + // count failures as misses + keys.foreach { k => + miss(k.toString) + } + failuresCounter.incr() + } + + override def write[K, T](name: String, key: K)(f: => Future[T]): Future[T] = + Stat.timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } onFailure { + handleThrowable(name, _, Some(key)) + } + + override def incr[K](name: String, key: Seq[K])(f: => Future[Option[Long]]) = + Stat.timeFuture(stats.stat(name)) { + stats.counter(name).incr() + f + } onSuccess { optVal => + val hit = optVal.isDefined + windowedIncrHitRate.record(if (hit) 1F else 0F) + stats.counter(name + (if (hit) "_hits" else "_misses")).incr() + } + + override def scope(s: String*) = + s.toList match { + case Nil => this + case head :: tail => children(head).scope(tail: _*) + } + + override def expired(delta: Int = 1): Unit = { expirationCounter.incr(delta) } + override def failure(delta: Int = 1): Unit = { failuresCounter.incr(delta) } + override def tombstone(delta: Int = 1): Unit = { tombstonesCounter.incr(delta) } + override def noCache(delta: Int = 1): Unit = { noCacheCounter.incr(delta) } + +} + +/** + * Wraps an underlying cache with calls to a CacheObserver + */ +class ObservableReadCache[K, V](underlyingCache: ReadCache[K, V], observer: CacheObserver) + extends ReadCache[K, V] { + override def get(keys: Seq[K]): Future[KeyValueResult[K, V]] = { + observer.read("get", keys) { + underlyingCache.get(keys) + } + } + + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V]] = { + observer.read[K, (Try[V], Checksum)]("get_with_checksum", keys) { + underlyingCache.getWithChecksum(keys) + } + } + + override def release() = underlyingCache.release() +} + +object ObservableCache { + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) + + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String, + disableLogging: Boolean + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver( + statsReceiver = statsReceiver, + windowSize = windowSize, + scope = name, + disableLogging = disableLogging) + ) + + def apply[K, V]( + underlyingCache: Cache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + log: Logger + ): Cache[K, V] = + new ObservableCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, log) + ) +} + +/** + * Wraps an underlying Cache with calls to a CacheObserver + */ +class ObservableCache[K, V](underlyingCache: Cache[K, V], observer: CacheObserver) + extends ObservableReadCache(underlyingCache, observer) + with Cache[K, V] { + override def add(key: K, value: V): Future[Boolean] = + observer.write("add", key) { + underlyingCache.add(key, value) + } + + override def checkAndSet(key: K, value: V, checksum: Checksum): Future[Boolean] = + observer.write("check_and_set", key) { + underlyingCache.checkAndSet(key, value, checksum) + } + + override def set(key: K, value: V): Future[Unit] = + observer.write("set", key) { + underlyingCache.set(key, value) + } + + override def replace(key: K, value: V): Future[Boolean] = + observer.write("replace", key) { + underlyingCache.replace(key, value) + } + + override def delete(key: K): Future[Boolean] = + observer.write("delete", key) { + underlyingCache.delete(key) + } +} + +object ObservableTtlCache { + def apply[K, V]( + underlyingCache: TtlCache[K, V], + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): TtlCache[K, V] = + new ObservableTtlCache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) +} + +/** + * Wraps an underlying TtlCache with calls to a CacheObserver + */ +class ObservableTtlCache[K, V](underlyingCache: TtlCache[K, V], observer: CacheObserver) + extends ObservableReadCache(underlyingCache, observer) + with TtlCache[K, V] { + override def add(key: K, value: V, ttl: Duration): Future[Boolean] = + observer.write("add", key) { + underlyingCache.add(key, value, ttl) + } + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] = + observer.write("check_and_set", key) { + underlyingCache.checkAndSet(key, value, checksum, ttl) + } + + override def set(key: K, value: V, ttl: Duration): Future[Unit] = + observer.write("set", key) { + underlyingCache.set(key, value, ttl) + } + + override def replace(key: K, value: V, ttl: Duration): Future[Boolean] = + observer.write("replace", key) { + underlyingCache.replace(key, value, ttl) + } + + override def delete(key: K): Future[Boolean] = + observer.write("delete", key) { + underlyingCache.delete(key) + } +} + +case class ObservableMemcacheFactory(memcacheFactory: MemcacheFactory, cacheObserver: CacheObserver) + extends MemcacheFactory { + + override def apply() = + new ObservableMemcache(memcacheFactory(), cacheObserver) +} + +@deprecated("use ObservableMemcacheFactory or ObservableMemcache directly", "0.1.2") +object ObservableMemcache { + def apply( + underlyingCache: Memcache, + statsReceiver: StatsReceiver, + windowSize: Long, + name: String + ): Memcache = + new ObservableMemcache( + underlyingCache, + new StatsReceiverCacheObserver(statsReceiver, windowSize, name) + ) +} + +class ObservableMemcache(underlyingCache: Memcache, observer: CacheObserver) + extends ObservableTtlCache[String, Array[Byte]](underlyingCache, observer) + with Memcache { + def incr(key: String, delta: Long = 1): Future[Option[Long]] = + observer.incr("incr", key) { + underlyingCache.incr(key, delta) + } + + def decr(key: String, delta: Long = 1): Future[Option[Long]] = + observer.incr("decr", key) { + underlyingCache.decr(key, delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala new file mode 100644 index 000000000..801d21ea6 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SecondaryIndexingCache.scala @@ -0,0 +1,85 @@ +package com.twitter.servo.cache + +import com.twitter.logging.Logger +import com.twitter.util.{Future, Return, Throw, Try} + +object SecondaryIndexingCache { + type IndexMapping[S, V] = V => Try[Option[S]] +} + +/** + * Stores a secondary index whenever set is called, + * using a mapping from value to secondary index + */ +class SecondaryIndexingCache[K, S, V]( + override val underlyingCache: Cache[K, Cached[V]], + secondaryIndexCache: Cache[S, Cached[K]], + secondaryIndex: SecondaryIndexingCache.IndexMapping[S, V]) + extends CacheWrapper[K, Cached[V]] { + protected[this] val log = Logger.get(getClass.getSimpleName) + + protected[this] def setSecondaryIndex(key: K, cachedValue: Cached[V]): Future[Unit] = + cachedValue.value match { + case Some(value) => + secondaryIndex(value) match { + case Return(Some(index)) => + val cachedKey = cachedValue.copy(value = Some(key)) + secondaryIndexCache.set(index, cachedKey) + case Return.None => + Future.Done + case Throw(t) => + log.error(t, "failed to determine secondary index for: %s", cachedValue) + Future.Done + } + // if we're storing a tombstone, no secondary index can be made + case None => Future.Done + } + + override def set(key: K, cachedValue: Cached[V]): Future[Unit] = + super.set(key, cachedValue) flatMap { _ => + setSecondaryIndex(key, cachedValue) + } + + override def checkAndSet(key: K, cachedValue: Cached[V], checksum: Checksum): Future[Boolean] = + super.checkAndSet(key, cachedValue, checksum) flatMap { wasStored => + if (wasStored) + // do a straight set of the secondary index, but only if the CAS succeeded + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def add(key: K, cachedValue: Cached[V]): Future[Boolean] = + super.add(key, cachedValue) flatMap { wasAdded => + if (wasAdded) + // do a straight set of the secondary index, but only if the add succeeded + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def replace(key: K, cachedValue: Cached[V]): Future[Boolean] = + super.replace(key, cachedValue) flatMap { wasReplaced => + if (wasReplaced) + setSecondaryIndex(key, cachedValue) map { _ => + true + } + else + Future.value(false) + } + + override def release(): Unit = { + underlyingCache.release() + secondaryIndexCache.release() + } + + def withSecondaryIndex[T]( + secondaryIndexingCache: Cache[T, Cached[K]], + secondaryIndex: SecondaryIndexingCache.IndexMapping[T, V] + ): SecondaryIndexingCache[K, T, V] = + new SecondaryIndexingCache[K, T, V](this, secondaryIndexingCache, secondaryIndex) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala new file mode 100644 index 000000000..3e46211e4 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SelectedCache.scala @@ -0,0 +1,97 @@ +package com.twitter.servo.cache + +import com.twitter.util.Future + +/** + * Represents multiple underlying ReadCaches selected by key at invocation time. + */ +trait SelectedReadCacheWrapper[K, V, This <: ReadCache[K, V]] extends ReadCache[K, V] { + + /** Retrieves the underlying cache for the given key. */ + def underlyingCache(key: K): This + + /** Retrieves tuples of the underlying caches and the keys they apply to. */ + def underlyingCacheForKeys(keys: Seq[K]): Seq[(This, Seq[K])] + + /** Retrieves all underlying caches. */ + def underlyingCaches: Seq[This] + + private[this] def collectUnderlying[V2]( + keys: Seq[K] + )( + f: (This, Seq[K]) => Future[KeyValueResult[K, V2]] + ): Future[KeyValueResult[K, V2]] = { + Future.collect( + underlyingCacheForKeys(keys) collect { + case (cacheForKey, keys) if !keys.isEmpty => + f(cacheForKey, keys) + } + ) map { + KeyValueResult.sum(_) + } + } + + override def get(keys: Seq[K]) = collectUnderlying(keys) { _.get(_) } + override def getWithChecksum(keys: Seq[K]) = collectUnderlying(keys) { _.getWithChecksum(_) } + + override def release(): Unit = { + underlyingCaches foreach { _.release() } + } +} + +/** + * Represents multiple underlying Caches selected by key at invocation time. + */ +trait SelectedCacheWrapper[K, V] + extends Cache[K, V] + with SelectedReadCacheWrapper[K, V, Cache[K, V]] { + override def add(key: K, value: V) = underlyingCache(key).add(key, value) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache(key).checkAndSet(key, value, checksum) + + override def set(key: K, value: V) = underlyingCache(key).set(key, value) + + override def replace(key: K, value: V) = underlyingCache(key).replace(key, value) + + override def delete(key: K) = underlyingCache(key).delete(key) +} + +/** + * GateSelectedCache implements SelectedCache to choose between two underlying + * caches based on a function. + */ +class SelectedCache[K, V](primary: Cache[K, V], secondary: Cache[K, V], usePrimary: K => Boolean) + extends SelectedCacheWrapper[K, V] { + override def underlyingCache(key: K) = if (usePrimary(key)) primary else secondary + + override def underlyingCacheForKeys(keys: Seq[K]) = { + keys partition (usePrimary) match { + case (primaryKeys, secondaryKeys) => Seq((primary, primaryKeys), (secondary, secondaryKeys)) + } + } + + override def underlyingCaches = Seq(primary, secondary) +} + +/** + * Factory for SelectedCache instances that use a simple function to migrate + * users from a secondary cache (function returns false) to a primary cache + * (function returns true). Serves a purpose similar to CacheFactory, but + * cannot extend it due to type constraints. + * + * The function is expected to produce stable results by key over time to + * prevent accessing stale cache entries due to keys flapping between the + * two caches. + */ +class SelectedCacheFactory[K]( + primaryFactory: CacheFactory, + secondaryFactory: CacheFactory, + usePrimary: K => Boolean) { + def apply[V](serializer: Serializer[V], scopes: String*): Cache[K, V] = + new SelectedCache( + primaryFactory[K, V](serializer, scopes: _*), + secondaryFactory[K, V](serializer, scopes: _*), + usePrimary + ) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala new file mode 100644 index 000000000..7477aa9c6 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SeqSerializer.scala @@ -0,0 +1,10 @@ +package com.twitter.servo.cache + +/** + * A Serializer of `Seq[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class SeqSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) + extends IterableSerializer[T, Seq[T]](() => Seq.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala new file mode 100644 index 000000000..abe4e420c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/Serializer.scala @@ -0,0 +1,184 @@ +package com.twitter.servo.cache + +import com.google.common.primitives.{Ints, Longs} +import com.twitter.finagle.thrift.Protocols +import com.twitter.io.Buf +import com.twitter.scrooge.{ThriftStruct, ThriftStructCodec, ThriftStructSerializer} +import com.twitter.servo.util.Transformer +import com.twitter.util.{Time => UtilTime, Try} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream} +import java.nio.ByteBuffer +import org.apache.thrift.TBase +import org.apache.thrift.protocol.{TCompactProtocol, TProtocolFactory} +import org.apache.thrift.transport.TIOStreamTransport + +object Serializers { self => + val CompactProtocolFactory = new TCompactProtocol.Factory + val EmptyByteArray = Array.empty[Byte] + + val Unit = Transformer[Unit, Array[Byte]](_ => EmptyByteArray, _ => ()) + + object Long { + val Simple = Transformer[Long, Array[Byte]](Longs.toByteArray, Longs.fromByteArray) + } + + object CachedLong { + val Compact: Serializer[Cached[Long]] = + new CachedSerializer(self.Long.Simple, CompactProtocolFactory) + } + + object SeqLong { + val Simple: Serializer[Seq[Long]] = new SeqSerializer(self.Long.Simple, 8) + } + + object CachedSeqLong { + val Compact: Serializer[Cached[Seq[Long]]] = + new CachedSerializer(self.SeqLong.Simple, CompactProtocolFactory) + } + + object Int { + val Simple = Transformer[Int, Array[Byte]](Ints.toByteArray, Ints.fromByteArray) + } + + object CachedInt { + val Compact: Serializer[Cached[Int]] = + new CachedSerializer(self.Int.Simple, CompactProtocolFactory) + } + + object SeqInt { + val Simple: Serializer[Seq[Int]] = new SeqSerializer(self.Int.Simple, 4) + } + + object CachedSeqInt { + val Compact: Serializer[Cached[Seq[Int]]] = + new CachedSerializer(self.SeqInt.Simple, CompactProtocolFactory) + } + + object String { + val Utf8: Serializer[String] = Transformer.Utf8ToBytes + } + + object CachedString { + val Compact: Serializer[Cached[String]] = + new CachedSerializer(self.String.Utf8, CompactProtocolFactory) + } + + object SeqString { + val Utf8: Serializer[Seq[String]] = new SeqSerializer(self.String.Utf8) + } + + object CachedSeqString { + val Compact: Serializer[Cached[Seq[String]]] = + new CachedSerializer(self.SeqString.Utf8, CompactProtocolFactory) + } + + /** + * We take care not to alter the buffer so that this conversion can + * safely be used multiple times with the same buffer, and that + * other threads cannot view other states of the buffer. + */ + private[this] def byteBufferToArray(b: ByteBuffer): Array[Byte] = { + val a = new Array[Byte](b.remaining) + b.duplicate.get(a) + a + } + + /** + * Convert between a ByteBuffer and an Array of bytes. The + * conversion to Array[Byte] makes a copy of the data, while the + * reverse conversion just wraps the array. + */ + val ArrayByteBuffer: Transformer[Array[Byte], ByteBuffer] = + Transformer(ByteBuffer.wrap(_: Array[Byte]), byteBufferToArray) + + val ArrayByteBuf: Transformer[Array[Byte], Buf] = + Transformer(Buf.ByteArray.Shared.apply, Buf.ByteArray.Shared.extract) + + /** + * Isomorphism between Time and Long. The Long represents the number + * of nanoseconds since the epoch. + */ + val TimeNanos: Transformer[UtilTime, Long] = + Transformer.pure[UtilTime, Long](_.inNanoseconds, UtilTime.fromNanoseconds) + + /** + * Transformer from Time to Array[Byte] always succeeds. The inverse + * transform throws BufferUnderflowException if the buffer is less + * than eight bytes in length. If it is greater than eight bytes, + * the later bytes are discarded. + */ + // This is lazy because if it is not, it may be initialized before + // Long.Simple. In that case, Long.Simple will be null at + // initialization time, and will be captured here. Unfortunately, + // this is dependent on the order of class initialization, which may + // vary between runs of a program. + lazy val Time: Serializer[UtilTime] = TimeNanos andThen Long.Simple +} + +/** + * A Serializer for Thrift structs generated by Scrooge. + * + * @param codec used to encode and decode structs for a given protocol + * @param protocolFactory defines the serialization protocol to be used + */ +class ThriftSerializer[T <: ThriftStruct]( + val codec: ThriftStructCodec[T], + val protocolFactory: TProtocolFactory) + extends Serializer[T] + with ThriftStructSerializer[T] { + override def to(obj: T): Try[Array[Byte]] = Try(toBytes(obj)) + override def from(bytes: Array[Byte]): Try[T] = Try(fromBytes(bytes)) +} + +/** + * A Serializer for Thrift structs generated by the Apache code generator. + * + * @param tFactory a factory for Thrift-defined objects of type T. Objects + * yielded by the factory are read into and returned during + * deserialization. + * + * @param protocolFactory defines the serialization protocol to be used + */ +class TBaseSerializer[T <: TBase[_, _]](tFactory: () => T, protocolFactory: TProtocolFactory) + extends Serializer[T] { + override def to(obj: T): Try[Array[Byte]] = Try { + val baos = new ByteArrayOutputStream + obj.write(protocolFactory.getProtocol(new TIOStreamTransport(baos))) + baos.toByteArray + } + + override def from(bytes: Array[Byte]): Try[T] = Try { + val obj = tFactory() + val stream = new ByteArrayInputStream(bytes) + obj.read(protocolFactory.getProtocol(new TIOStreamTransport(stream))) + obj + } +} + +object CachedSerializer { + def binary[T](valueSerializer: Serializer[T]): CachedSerializer[T] = + new CachedSerializer(valueSerializer, Protocols.binaryFactory()) + + def compact[T](valueSerializer: Serializer[T]): CachedSerializer[T] = + new CachedSerializer(valueSerializer, new TCompactProtocol.Factory) +} + +/** + * A Serializer of Cached object. + * + * @param valueSerializer an underlying serializer of the values to be cached. + * @param protocolFactory defines the serialization protocol to be used + */ +class CachedSerializer[T](valueSerializer: Serializer[T], protocolFactory: TProtocolFactory) + extends Serializer[Cached[T]] { + private[this] val underlying = new ThriftSerializer(CachedValue, protocolFactory) + + override def to(cached: Cached[T]): Try[Array[Byte]] = + underlying.to(cached.toCachedValue(valueSerializer)) + + private[this] val asCached: CachedValue => Cached[T] = + t => Cached(t, valueSerializer) + + override def from(bytes: Array[Byte]): Try[Cached[T]] = + underlying.from(bytes).map(asCached) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala new file mode 100644 index 000000000..9bc9a4c91 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SetSerializer.scala @@ -0,0 +1,10 @@ +package com.twitter.servo.cache + +/** + * A Serializer of `Set[T]`s. + * + * @param itemSerializer a Serializer for the individual elements. + * @param itemSizeEstimate estimated size in bytes of individual elements + */ +class SetSerializer[T](itemSerializer: Serializer[T], itemSizeEstimate: Int = 8) + extends IterableSerializer[T, Set[T]](() => Set.newBuilder[T], itemSerializer, itemSizeEstimate) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala new file mode 100644 index 000000000..595f0698a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/SimpleReplicatingCache.scala @@ -0,0 +1,231 @@ +package com.twitter.servo.cache + +import com.twitter.servo.keyvalue._ +import com.twitter.servo.util.{OptionOrdering, TryOrdering} +import com.twitter.util.{Future, Return, Throw, Time, Try} + +object SimpleReplicatingCache { + + /** + * Builds a SimpleReplicatingCache that writes a value multiple times to the same underlying + * cache but under different keys. If the underlying cache is backed by enough shards, there + * is a good chance that the different keys will end up on different shards, giving you similar + * behavior to having multiple distinct caches. + */ + def apply[K, K2, V]( + underlying: LockingCache[K2, Cached[V]], + keyReplicator: (K, Int) => K2, + replicas: Int = 2 + ) = new SimpleReplicatingCache( + (0 until replicas).toSeq map { replica => + new KeyTransformingLockingCache( + underlying, + (key: K) => keyReplicator(key, replica) + ) + } + ) +} + +/** + * A very simple replicating cache implementation. It writes the same key/value pair to + * multiple underlying caches. On read, each underlying cache is queried with the key; if the + * results are not all the same for a given key, then the most recent value is chosen and + * replicated to all caches. + * + * Some cache operations are not currently supported, because their semantics are a little fuzzy + * in the replication case. Specifically: add and checkAndSet. + */ +class SimpleReplicatingCache[K, V](underlyingCaches: Seq[LockingCache[K, Cached[V]]]) + extends LockingCache[K, Cached[V]] { + private type CsValue = (Try[Cached[V]], Checksum) + + private val cachedOrdering = new Ordering[Cached[V]] { + // sort by ascending timestamp + def compare(a: Cached[V], b: Cached[V]) = a.cachedAt.compare(b.cachedAt) + } + + private val csValueOrdering = new Ordering[CsValue] { + // order by Try[V], ignore checksum + val subordering = TryOrdering(cachedOrdering) + def compare(a: CsValue, b: CsValue) = subordering.compare(a._1, b._1) + } + + private val tryOptionCsValueOrdering = TryOrdering(OptionOrdering(csValueOrdering)) + private val tryOptionCachedOrdering = TryOrdering(OptionOrdering(cachedOrdering)) + + /** + * release any underlying resources + */ + def release(): Unit = { + underlyingCaches foreach { _.release() } + } + + /** + * Fetches from all underlying caches in parallel, and if results differ, will choose a + * winner and push updated results back to the stale caches. + */ + def get(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + getWithChecksum(keys) map { csKvRes => + val resBldr = new KeyValueResultBuilder[K, Cached[V]] + + csKvRes.found foreach { + case (k, (Return(v), _)) => resBldr.addFound(k, v) + case (k, (Throw(t), _)) => resBldr.addFailed(k, t) + } + + resBldr.addNotFound(csKvRes.notFound) + resBldr.addFailed(csKvRes.failed) + resBldr.result() + } + } + + /** + * Fetches from all underlying caches in parallel, and if results differ, will choose a + * winner and push updated results back to the stale caches. + */ + def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, Cached[V]]] = { + Future.collect { + underlyingCaches map { underlying => + underlying.getWithChecksum(keys) + } + } map { underlyingResults => + val resBldr = new KeyValueResultBuilder[K, CsValue] + + for (key <- keys) { + val keyResults = underlyingResults map { _(key) } + resBldr(key) = getAndReplicate(key, keyResults) map { + // treat evictions as misses + case Some((Return(c), _)) if c.status == CachedValueStatus.Evicted => None + case v => v + } + } + + resBldr.result() + } + } + + /** + * Looks at all the returned values for a given set of replication keys, returning the most recent + * cached value if available, or indicate a miss if applicable, or return a failure if all + * keys failed. If a cached value is returned, and some keys don't have that cached value, + * the cached value will be replicated to those keys, possibly overwriting stale data. + */ + private def getAndReplicate( + key: K, + keyResults: Seq[Try[Option[CsValue]]] + ): Try[Option[CsValue]] = { + val max = keyResults.max(tryOptionCsValueOrdering) + + max match { + // if one of the replication keys returned a cached value, then make sure all replication + // keys contain that cached value. + case Return(Some((Return(cached), cs))) => + for ((underlying, keyResult) <- underlyingCaches zip keyResults) { + if (keyResult != max) { + replicate(key, cached, keyResult, underlying) + } + } + case _ => + } + + max + } + + private def replicate( + key: K, + cached: Cached[V], + current: Try[Option[CsValue]], + underlying: LockingCache[K, Cached[V]] + ): Future[Unit] = { + current match { + case Throw(_) => + // if we failed to read a particular value, we don't want to write to that key + // because that key could potentially have the real newest value + Future.Unit + case Return(None) => + // add rather than set, and fail if another value is written first + underlying.add(key, cached).unit + case Return(Some((_, cs))) => + underlying.checkAndSet(key, cached, cs).unit + } + } + + /** + * Currently not supported. Use set or lockAndSet. + */ + def add(key: K, value: Cached[V]): Future[Boolean] = { + Future.exception(new UnsupportedOperationException("use set or lockAndSet")) + } + + /** + * Currently not supported. + */ + def checkAndSet(key: K, value: Cached[V], checksum: Checksum): Future[Boolean] = { + Future.exception(new UnsupportedOperationException("use set or lockAndSet")) + } + + /** + * Calls set on all underlying caches. If at least one set succeeds, Future.Unit is + * returned. If all fail, a Future.exception will be returned. + */ + def set(key: K, value: Cached[V]): Future[Unit] = { + liftAndCollect { + underlyingCaches map { _.set(key, value) } + } flatMap { seqTryUnits => + // return Future.Unit if any underlying call succeeded, otherwise return + // the first failure. + if (seqTryUnits exists { _.isReturn }) + Future.Unit + else + Future.const(seqTryUnits.head) + } + } + + /** + * Calls lockAndSet on the underlying cache for all replication keys. If at least one + * underlying call succeeds, a successful result will be returned. + */ + def lockAndSet(key: K, handler: LockingCache.Handler[Cached[V]]): Future[Option[Cached[V]]] = { + liftAndCollect { + underlyingCaches map { _.lockAndSet(key, handler) } + } flatMap { seqTryOptionCached => + Future.const(seqTryOptionCached.max(tryOptionCachedOrdering)) + } + } + + /** + * Returns Future(true) if any of the underlying caches return Future(true); otherwise, + * returns Future(false) if any of the underlying caches return Future(false); otherwise, + * returns the first failure. + */ + def replace(key: K, value: Cached[V]): Future[Boolean] = { + liftAndCollect { + underlyingCaches map { _.replace(key, value) } + } flatMap { seqTryBools => + if (seqTryBools.contains(Return.True)) + Future.value(true) + else if (seqTryBools.contains(Return.False)) + Future.value(false) + else + Future.const(seqTryBools.head) + } + } + + /** + * Performing an actual deletion on the underlying caches is not a good idea in the face + * of potential failure, because failing to remove all values would allow a cached value to + * be resurrected. Instead, delete actually does a replace on the underlying caches with a + * CachedValueStatus of Evicted, which will be treated as a miss on read. + */ + def delete(key: K): Future[Boolean] = { + replace(key, Cached(None, CachedValueStatus.Evicted, Time.now)) + } + + /** + * Convets a Seq[Future[A]] into a Future[Seq[Try[A]]], isolating failures into Trys, instead + * of allowing the entire Future to failure. + */ + private def liftAndCollect[A](seq: Seq[Future[A]]): Future[Seq[Try[A]]] = { + Future.collect { seq map { _ transform { Future(_) } } } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala new file mode 100644 index 000000000..14e64d133 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TransformingCache.scala @@ -0,0 +1,324 @@ +package com.twitter.servo.cache + +import com.twitter.servo.util.Transformer +import com.twitter.util.{Duration, Future, Return, Throw} +import scala.collection.mutable.ArrayBuffer +import scala.collection.{breakOut, mutable} + +/** + * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingReadCache[K, V1, V2]( + underlyingCache: ReadCache[K, V2], + transformer: Transformer[V1, V2]) + extends ReadCache[K, V1] { + // overridden to avoid mapping the unneeded keyMap + override def get(keys: Seq[K]): Future[KeyValueResult[K, V1]] = { + underlyingCache.get(keys) map { lr => + // fold lr.found into found/deserialization failures + val found = mutable.Map.empty[K, V1] + val failed = mutable.Map.empty[K, Throwable] + + lr.found foreach { + case (key, value) => + transformer.from(value) match { + case Return(v) => found += key -> v + case Throw(t) => failed += key -> t + } + } + + lr.copy(found = found.toMap, failed = lr.failed ++ failed.toMap) + } handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + // overridden to avoid mapping the unneeded keyMap + override def getWithChecksum(keys: Seq[K]): Future[CsKeyValueResult[K, V1]] = { + underlyingCache.getWithChecksum(keys) map { clr => + clr.copy(found = clr.found map { + case (key, (value, checksum)) => + key -> (value flatMap { transformer.from(_) }, checksum) + }) + } handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def release() = underlyingCache.release() +} + +/** + * Adaptor from a ReadCache[K, V1] to an underlying ReadCache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache: ReadCache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends ReadCache[K1, V1] { + + // make keymapping for key recovery later + private[this] def mappedKeys( + keys: Seq[K1] + ): (IndexedSeq[K2], Map[K2, K1]) = { + val k2s = new ArrayBuffer[K2](keys.size) + val k2k1s: Map[K2, K1] = + keys.map { key => + val k2 = underlyingKey(key) + k2s += k2 + k2 -> key + }(breakOut) + (k2s, k2k1s) + } + + override def get(keys: Seq[K1]): Future[KeyValueResult[K1, V1]] = { + val (k2s, kMap) = mappedKeys(keys) + + underlyingCache + .get(k2s) + .map { lr => + // fold lr.found into found/deserialization failures + val found = Map.newBuilder[K1, V1] + val failed = Map.newBuilder[K1, Throwable] + + lr.found.foreach { + case (key, value) => + transformer.from(value) match { + case Return(v) => found += kMap(key) -> v + case Throw(t) => failed += kMap(key) -> t + } + } + + lr.failed.foreach { + case (k, t) => + failed += kMap(k) -> t + } + + KeyValueResult( + found.result(), + lr.notFound.map { kMap(_) }, + failed.result() + ) + } + .handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def getWithChecksum(keys: Seq[K1]): Future[CsKeyValueResult[K1, V1]] = { + val (k2s, kMap) = mappedKeys(keys) + + underlyingCache + .getWithChecksum(k2s) + .map { clr => + KeyValueResult( + clr.found.map { + case (key, (value, checksum)) => + kMap(key) -> (value.flatMap(transformer.from), checksum) + }, + clr.notFound map { kMap(_) }, + clr.failed map { + case (key, t) => + kMap(key) -> t + } + ) + } + .handle { + case t => + KeyValueResult(failed = keys.map(_ -> t).toMap) + } + } + + override def release(): Unit = underlyingCache.release() +} + +class KeyTransformingCache[K1, K2, V](underlyingCache: Cache[K2, V], underlyingKey: K1 => K2) + extends KeyValueTransformingCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + +/** + * Adaptor from a Cache[K, V1] to an underlying Cache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingCache[K, V1, V2]( + underlyingCache: Cache[K, V2], + transformer: Transformer[V1, V2]) + extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) + with Cache[K, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.add(key, _) } + + override def checkAndSet(key: K, value: V1, checksum: Checksum): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum) } + + override def set(key: K, value: V1): Future[Unit] = + to(value) flatMap { underlyingCache.set(key, _) } + + override def replace(key: K, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(key, _) } + + override def delete(key: K): Future[Boolean] = + underlyingCache.delete(key) +} + +/** + * Adaptor from a Cache[K1, V1] to an underlying Cache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingCache[K1, K2, V1, V2]( + underlyingCache: Cache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache, + transformer, + underlyingKey + ) + with Cache[K1, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K1, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.add(underlyingKey(key), _) } + + override def checkAndSet(key: K1, value: V1, checksum: Checksum): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum) } + + override def set(key: K1, value: V1): Future[Unit] = + to(value) flatMap { underlyingCache.set(underlyingKey(key), _) } + + override def replace(key: K1, value: V1): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(underlyingKey(key), _) } + + override def delete(key: K1): Future[Boolean] = + underlyingCache.delete(underlyingKey(key)) +} + +/** + * Adaptor from a TtlCache[K, V1] to an underlying TtlCache[K, V2] + * + * a Transformer is used to map between value types + */ +class ValueTransformingTtlCache[K, V1, V2]( + underlyingCache: TtlCache[K, V2], + transformer: Transformer[V1, V2]) + extends ValueTransformingReadCache[K, V1, V2](underlyingCache, transformer) + with TtlCache[K, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.add(key, _, ttl) } + + override def checkAndSet( + key: K, + value: V1, + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(key, _, checksum, ttl) } + + override def set(key: K, value: V1, ttl: Duration): Future[Unit] = + to(value) flatMap { underlyingCache.set(key, _, ttl) } + + override def replace(key: K, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(key, _, ttl) } + + override def delete(key: K): Future[Boolean] = + underlyingCache.delete(key) +} + +/** + * Adaptor from a TtlCache[K1, V1] to an underlying TtlCache[K2, V2] + * + * a Transformer is used to map between value types, and a + * one-way mapping is used for keys, making it possible to + * store data in the underlying cache using keys that can't + * easily be reverse-mapped. + */ +class KeyValueTransformingTtlCache[K1, K2, V1, V2]( + underlyingCache: TtlCache[K2, V2], + transformer: Transformer[V1, V2], + underlyingKey: K1 => K2) + extends KeyValueTransformingReadCache[K1, K2, V1, V2]( + underlyingCache, + transformer, + underlyingKey + ) + with TtlCache[K1, V1] { + private[this] def to(v1: V1): Future[V2] = Future.const(transformer.to(v1)) + + override def add(key: K1, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.add(underlyingKey(key), _, ttl) } + + override def checkAndSet( + key: K1, + value: V1, + checksum: Checksum, + ttl: Duration + ): Future[Boolean] = + to(value) flatMap { underlyingCache.checkAndSet(underlyingKey(key), _, checksum, ttl) } + + override def set(key: K1, value: V1, ttl: Duration): Future[Unit] = + to(value) flatMap { underlyingCache.set(underlyingKey(key), _, ttl) } + + override def replace(key: K1, value: V1, ttl: Duration): Future[Boolean] = + to(value) flatMap { underlyingCache.replace(underlyingKey(key), _, ttl) } + + override def delete(key: K1): Future[Boolean] = + underlyingCache.delete(underlyingKey(key)) +} + +class KeyTransformingTtlCache[K1, K2, V](underlyingCache: TtlCache[K2, V], underlyingKey: K1 => K2) + extends KeyValueTransformingTtlCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + +class KeyTransformingLockingCache[K1, K2, V]( + underlyingCache: LockingCache[K2, V], + underlyingKey: K1 => K2) + extends KeyValueTransformingCache[K1, K2, V, V]( + underlyingCache, + Transformer.identity, + underlyingKey + ) + with LockingCache[K1, V] { + import LockingCache._ + + override def lockAndSet(key: K1, handler: Handler[V]): Future[Option[V]] = + underlyingCache.lockAndSet(underlyingKey(key), handler) +} + +class KeyTransformingCounterCache[K1, K2]( + underlyingCache: CounterCache[K2], + underlyingKey: K1 => K2) + extends KeyTransformingCache[K1, K2, Long](underlyingCache, underlyingKey) + with CounterCache[K1] { + override def incr(key: K1, delta: Int = 1): Future[Option[Long]] = { + underlyingCache.incr(underlyingKey(key), delta) + } + + override def decr(key: K1, delta: Int = 1): Future[Option[Long]] = { + underlyingCache.decr(underlyingKey(key), delta) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala new file mode 100644 index 000000000..d42766951 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/TtlCache.scala @@ -0,0 +1,95 @@ +package com.twitter.servo.cache + +import com.twitter.util.{Duration, Future} + +/** + * a Cache that takes a TTL per set + */ +trait TtlCache[K, V] extends ReadCache[K, V] { + def add(key: K, value: V, ttl: Duration): Future[Boolean] + + def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration): Future[Boolean] + + def set(key: K, value: V, ttl: Duration): Future[Unit] + + /** + * Replaces the value for an existing key. If the key doesn't exist, this has no effect. + * @return true if replaced, false if not found + */ + def replace(key: K, value: V, ttl: Duration): Future[Boolean] + + /** + * Deletes a value from cache. + * @return true if deleted, false if not found + */ + def delete(key: K): Future[Boolean] +} + +/** + * allows one TtlCache to wrap another + */ +trait TtlCacheWrapper[K, V] extends TtlCache[K, V] with ReadCacheWrapper[K, V, TtlCache[K, V]] { + override def add(key: K, value: V, ttl: Duration) = underlyingCache.add(key, value, ttl) + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = + underlyingCache.checkAndSet(key, value, checksum, ttl) + + override def set(key: K, value: V, ttl: Duration) = underlyingCache.set(key, value, ttl) + + override def replace(key: K, value: V, ttl: Duration) = underlyingCache.replace(key, value, ttl) + + override def delete(key: K) = underlyingCache.delete(key) +} + +class PerturbedTtlCache[K, V]( + override val underlyingCache: TtlCache[K, V], + perturbTtl: Duration => Duration) + extends TtlCacheWrapper[K, V] { + override def add(key: K, value: V, ttl: Duration) = + underlyingCache.add(key, value, perturbTtl(ttl)) + + override def checkAndSet(key: K, value: V, checksum: Checksum, ttl: Duration) = + underlyingCache.checkAndSet(key, value, checksum, perturbTtl(ttl)) + + override def set(key: K, value: V, ttl: Duration) = + underlyingCache.set(key, value, perturbTtl(ttl)) + + override def replace(key: K, value: V, ttl: Duration) = + underlyingCache.replace(key, value, perturbTtl(ttl)) +} + +/** + * an adaptor to wrap a Cache[K, V] interface around a TtlCache[K, V] + */ +class TtlCacheToCache[K, V](override val underlyingCache: TtlCache[K, V], ttl: (K, V) => Duration) + extends Cache[K, V] + with ReadCacheWrapper[K, V, TtlCache[K, V]] { + override def add(key: K, value: V) = underlyingCache.add(key, value, ttl(key, value)) + + override def checkAndSet(key: K, value: V, checksum: Checksum) = + underlyingCache.checkAndSet(key, value, checksum, ttl(key, value)) + + override def set(key: K, value: V) = underlyingCache.set(key, value, ttl(key, value)) + + override def replace(key: K, value: V) = underlyingCache.replace(key, value, ttl(key, value)) + + override def delete(key: K) = underlyingCache.delete(key) +} + +/** + * use a single TTL for all objects + */ +class SimpleTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl) + +/** + * use a value-based TTL function + */ +class ValueBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: V => Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(v)) + +/** + * use a key-based TTL function + */ +class KeyBasedTtlCacheToCache[K, V](underlyingTtlCache: TtlCache[K, V], ttl: K => Duration) + extends TtlCacheToCache[K, V](underlyingTtlCache, (k: K, v: V) => ttl(k)) diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala new file mode 100644 index 000000000..f2e74624d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/cache/package.scala @@ -0,0 +1,36 @@ +package com.twitter.servo + +import com.twitter.finagle.partitioning.PartitionNode +import com.twitter.servo.util.Transformer +import com.twitter.util.Try + +package object cache { + type CachedValue = thriftscala.CachedValue + val CachedValue = thriftscala.CachedValue + type CachedValueStatus = thriftscala.CachedValueStatus + val CachedValueStatus = thriftscala.CachedValueStatus + + type KeyTransformer[K] = K => String + type CsKeyValueResult[K, V] = KeyValueResult[K, (Try[V], Checksum)] + + type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] + val KeyValueResult = keyvalue.KeyValueResult + + @deprecated("Use com.twitter.finagle.partitioning.PartitionNode instead", "1/7/2013") + type WeightedHost = PartitionNode + + type Serializer[T] = Transformer[T, Array[Byte]] + + /** + * Like a companion object, but for a type alias! + */ + val Serializer = Serializers + + type MemcacheFactory = (() => Memcache) +} + +package cache { + package object constants { + val Colon = ":" + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala new file mode 100644 index 000000000..647e9b3f0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Accessors.scala @@ -0,0 +1,151 @@ +package com.twitter.servo.database + +import com.twitter.util.Time +import java.sql.{ResultSet, Timestamp} + +/** + * A base trait for transforming JDBC ResultSets. + * Designed to be used with the Accessors trait. + */ +trait ImplicitBuilder[T] extends Accessors { + def apply(implicit row: ResultSet): T +} + +object Accessors { + + /** + * helper to make it compile time error when trying to call getOption on types not supported + * instead of a runtime exception + */ + object SafeManifest { + implicit val booleanSafeManifest = new SafeManifest(implicitly[Manifest[Boolean]]) + implicit val doubleSafeManifest = new SafeManifest(implicitly[Manifest[Double]]) + implicit val intSafeManifest = new SafeManifest[Int](implicitly[Manifest[Int]]) + implicit val longSafeManifest = new SafeManifest[Long](implicitly[Manifest[Long]]) + implicit val stringSafeManifest = new SafeManifest[String](implicitly[Manifest[String]]) + implicit val timestampSafeManifest = + new SafeManifest[Timestamp](implicitly[Manifest[Timestamp]]) + } + + @deprecated("safe manifests no longer supported, use type-specific accessors instead", "1.1.1") + case class SafeManifest[T](mf: Manifest[T]) +} + +/** + * mixin to get ResultSet accessors for standard types + */ +trait Accessors { + import Accessors._ + + /** + * @return None when the column is null for the current row of the result set passed in + * Some[T] otherwise + * @throws UnsupportedOperationException if the return type expected is not supported, currently + * only Boolean, Int, Long, String and Timestamp are supported + */ + @deprecated("use type-specific accessors instead", "1.1.1") + def getOption[T](column: String)(implicit row: ResultSet, sf: SafeManifest[T]): Option[T] = { + val res = { + if (classOf[Boolean] == sf.mf.erasure) { + row.getBoolean(column) + } else if (classOf[Double] == sf.mf.erasure) { + row.getDouble(column) + } else if (classOf[Int] == sf.mf.erasure) { + row.getInt(column) + } else if (classOf[Long] == sf.mf.erasure) { + row.getLong(column) + } else if (classOf[String] == sf.mf.erasure) { + row.getString(column) + } else if (classOf[Timestamp] == sf.mf.erasure) { + row.getTimestamp(column) + } else { + throw new UnsupportedOperationException("type not supported: " + sf.mf.erasure) + } + } + if (row.wasNull()) { + None + } else { + Some(res.asInstanceOf[T]) + } + } + + /** + * @param get the method to apply to the ResultSet + * @param row the implicit ResultSet on which to apply get + * @return None when the column is null for the current row of the result set passed in + * Some[T] otherwise + */ + def getOption[T](get: ResultSet => T)(implicit row: ResultSet): Option[T] = { + val result = get(row) + if (row.wasNull()) { + None + } else { + Some(result) + } + } + + def booleanOption(column: String)(implicit row: ResultSet): Option[Boolean] = + getOption((_: ResultSet).getBoolean(column)) + + def boolean(column: String, default: Boolean = false)(implicit row: ResultSet): Boolean = + booleanOption(column).getOrElse(default) + + def doubleOption(column: String)(implicit row: ResultSet): Option[Double] = + getOption((_: ResultSet).getDouble(column)) + + def double(column: String, default: Double = 0.0)(implicit row: ResultSet): Double = + doubleOption(column).getOrElse(default) + + def intOption(column: String)(implicit row: ResultSet): Option[Int] = + getOption((_: ResultSet).getInt(column)) + + def int(column: String, default: Int = 0)(implicit row: ResultSet): Int = + intOption(column).getOrElse(default) + + def longOption(column: String)(implicit row: ResultSet): Option[Long] = + getOption((_: ResultSet).getLong(column)) + + def long(column: String, default: Long = 0)(implicit row: ResultSet): Long = + longOption(column).getOrElse(default) + + def stringOption(column: String)(implicit row: ResultSet): Option[String] = + getOption((_: ResultSet).getString(column)) + + def string(column: String, default: String = "")(implicit row: ResultSet): String = + stringOption(column).getOrElse(default) + + def timestampOption(column: String)(implicit row: ResultSet): Option[Timestamp] = + getOption((_: ResultSet).getTimestamp(column)) + + def timestamp( + column: String, + default: Timestamp = new Timestamp(0) + )( + implicit row: ResultSet + ): Timestamp = + timestampOption(column).getOrElse(default) + + def datetimeOption(column: String)(implicit row: ResultSet): Option[Long] = + timestampOption(column) map { _.getTime } + + def datetime(column: String, default: Long = 0L)(implicit row: ResultSet): Long = + datetimeOption(column).getOrElse(default) + + def timeOption(column: String)(implicit row: ResultSet): Option[Time] = + datetimeOption(column) map { Time.fromMilliseconds(_) } + + def time(column: String, default: Time = Time.epoch)(implicit row: ResultSet): Time = + timeOption(column).getOrElse(default) + + def bytesOption(column: String)(implicit row: ResultSet): Option[Array[Byte]] = + getOption((_: ResultSet).getBytes(column)) + + def bytes( + column: String, + default: Array[Byte] = Array.empty[Byte] + )( + implicit row: ResultSet + ): Array[Byte] = + bytesOption(column).getOrElse(default) + +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala new file mode 100644 index 000000000..fafd0fb72 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Bitfield.scala @@ -0,0 +1,56 @@ +package com.twitter.servo.database + +object Bitfield { + def multiValue(bits: Boolean*): Int = { + bits.foldLeft(0) { (accum, bit) => + (accum << 1) | (if (bit) 1 else 0) + } + } + + def multiValueLong(bits: Boolean*): Long = { + bits.foldLeft(0L) { (accum, bit) => + (accum << 1) | (if (bit) 1L else 0L) + } + } +} + +/** + * A mixin for unpacking bitfields. + */ +trait Bitfield { + val bitfield: Int + + /** + * Tests that a given position is set to 1. + */ + def isSet(position: Int): Boolean = { + (bitfield & (1 << position)) != 0 + } + + /** + * takes a sequence of booleans, from most to least significant + * and converts them to an integer. + * + * example: multiValue(true, false, true) yields 0b101 = 5 + */ + def multiValue(bits: Boolean*): Int = Bitfield.multiValue(bits: _*) +} + +trait LongBitfield { + val bitfield: Long + + /** + * Tests that a given position is set to 1. + */ + def isSet(position: Int): Boolean = { + (bitfield & (1L << position)) != 0 + } + + /** + * takes a sequence of booleans, from most to least significant + * and converts them to a long. + * + * example: multiValue(true, false, true) yields 0b101 = 5L + */ + def multiValue(bits: Boolean*): Long = Bitfield.multiValueLong(bits: _*) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala new file mode 100644 index 000000000..b4eef7418 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Credentials.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.database + +import com.twitter.util.security +import java.io.File + +sealed trait Credentials { + def username: String + def password: String +} + +case class InlineCredentials(username: String, password: String) extends Credentials + +case class FileCredentials( + path: String, + usernameField: String = "db_username", + passwordField: String = "db_password") + extends Credentials { + lazy val (username, password) = { + val credentials = security.Credentials(new File(path)) + (credentials(usernameField), credentials(passwordField)) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala new file mode 100644 index 000000000..3d9845c31 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/Database.scala @@ -0,0 +1,201 @@ +package com.twitter.servo.database + +import com.twitter.servo.repository._ +import com.twitter.util.Future +import scala.collection.mutable.{HashMap, HashSet, ListBuffer} +import scala.collection.generic.Growable + +object Database { + + /** + * Construct a KeyValueRepository wrapping access to a database. + * + * Data retrieved as a row from the query is passed to a Builder producing a + * (Key, Row) tuple. Once all rows have been processed this way it is passed as a + * sequence to a post-query function that can perform actions (aggregation usually) + * and produce a final sequence of (Key, Value). + * + * @tparam Q + * how we'll be querying the this repository + * + * @tparam K + * the key used for looking data up + * + * @tparam R + * each entry from the the database will be represented as an instance of R + * + * @tparam V + * the repository will return a V produced by processing one or more Rs + * + * @param database + * A database used to back the KeyValueRepository being built. + * + * @param dbQuery + * A database query for fetching records to be parsed into objects of type + * Row. The query string can contain instances of the character '?' as + * placeholders for parameter passed into the `Database.select` calls. + * + * @param builder + * A Builder that builds (K, Row) pairs from ResultSets from the database + * + * @param postProcess + * A function which can manipulate the Seq[(K, Row)] that is returned from the + * database. Useful for aggregating multi-mapped K, V pairs where V holds a + * container with multiple values for the same key in the database. This function + * should not manipulate the list of keys; doing so will result in Return.None + * elements in the ensuing KeyValueResult. + * + * AggregateByKey has a basic implementation that groups R objects by a + * specified identifier and may be useful as a common impl. + * + * @param selectParams + * A function that is applied to the distinct keys in a repository query. + * The result is passed to `Database.select` to be used for filling in + * bind variables in dbQuery. By default, the repository query is passed + * directly to the select. The use cases for this function are situations + * where the SELECT statement takes multiple parameters. + * + * Example: + * // A repository that takes Seq[Long]s of userids and returns + * // Item objects of a parameterized item type. + * Database.keyValueRepository[Seq[Long], Long, Item, Item]( + * database, + * "SELECT * FROM items WHERE user_id IN (?) AND item_type = ?;", + * ItemBuilder, + * selectParams = Seq(_: Seq[Long], itemType) + * ) + */ + def keyValueRepository[Q <: Seq[K], K, R, V]( + database: Database, + dbQuery: String, + builder: Builder[(K, R)], + postProcess: Seq[(K, R)] => Seq[(K, V)] = + (identity[Seq[(K, V)]] _): (Seq[(K, V)] => Seq[(K, V)]), + selectParams: Seq[K] => Seq[Any] = (Seq(_: Seq[K])): (Seq[K] => collection.Seq[Seq[K]]) + ): KeyValueRepository[Q, K, V] = + query => { + if (query.isEmpty) { + KeyValueResult.emptyFuture + } else { + val uniqueKeys = query.distinct + KeyValueResult.fromPairs(uniqueKeys) { + database.select(dbQuery, builder, selectParams(uniqueKeys): _*) map postProcess + } + } + } +} + +/** + * A thin trait for async interaction with a database. + */ +trait Database { + def select[A](query: String, builder: Builder[A], params: Any*): Future[Seq[A]] + def selectOne[A](query: String, builder: Builder[A], params: Any*): Future[Option[A]] + def execute(query: String, params: Any*): Future[Int] + def insert(query: String, params: Any*): Future[Long] + def release(): Unit +} + +object NullDatabase extends Database { + override def select[Unit](query: String, builder: Builder[Unit], params: Any*) = + Future.value(Seq.empty[Unit]) + + override def selectOne[Unit](query: String, builder: Builder[Unit], params: Any*) = + Future.value(None) + + override def release() = () + + override def execute(query: String, params: Any*) = + Future.value(0) + + override def insert(query: String, params: Any*) = + Future.value(0) +} + +object AggregateByKey { + def apply[K, R, A]( + extractKey: R => K, + reduce: Seq[R] => A, + pruneDuplicates: Boolean = false + ) = new AggregateByKey(extractKey, reduce, pruneDuplicates) + + /** + * In the event that the item type (V) does not carry an aggregation key then we can have + * the Builder return a tuple with some id attached. If that is done then each Row from the + * builder will look something like (SomeGroupId, SomeRowObject). Because we tend to minimize + * data duplication this seems to be a pretty common pattern and can be seen in + * SavedSearchesRepository, FacebookConnectionsRepository, and UserToRoleRepository. + * + * @tparam K + * The type for the key + * @tparam V + * The type of a single element of the list + * @tparam A + * The object we'll aggregate list items into + * @param reduce + * A function that combines a seq of V into A + * @param pruneDuplicates + * If set this ensures that, at most, one instance of any given V will be passed into reduce. + */ + def withKeyValuePairs[K, V, A]( + reduce: Seq[V] => A, + pruneDuplicates: Boolean + ): AggregateByKey[K, (K, V), A] = + new AggregateByKey( + { case (k, _) => k }, + values => reduce(values map { case (_, v) => v }), + pruneDuplicates + ) +} + +/** + * Basic aggregator that extracts keys from a Row, groups into a Seq by those keys, and + * performs some reduction step to mash those into an aggregated object. Order is not + * necessarily kept between the retrieving rows from the database and passing them into + * reduce. + * + * @tparam K + * the type used by the item on which we aggregate rows + * + * @tparam R + * object that a single row of the query will be represented as + * + * @tparam A + * what we collect groups of R into + * + * @param extractKey + * function to extract a key from a row object + * + * @param reduce + * function that can take a sequence of rows and combine them into an aggregate + * + * @param pruneDuplicates + * if set this will ensure that at most one copy of each R will be passed into reduce (as + * determined by R's equal method) but will pass the input through a set which will + * likely lose ordering. + */ +class AggregateByKey[K, R, A]( + extractKey: R => K, + reduce: Seq[R] => A, + pruneDuplicates: Boolean = false) + extends (Seq[R] => Seq[(K, A)]) { + override def apply(input: Seq[R]): Seq[(K, A)] = { + val collectionMap = new HashMap[K, Growable[R] with Iterable[R]] + + def emptyCollection: Growable[R] with Iterable[R] = + if (pruneDuplicates) { + new HashSet[R] + } else { + new ListBuffer[R] + } + + input foreach { element => + (collectionMap.getOrElseUpdate(extractKey(element), emptyCollection)) += element + } + + collectionMap map { + case (key, items) => + key -> reduce(items toSeq) + } toSeq + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala new file mode 100644 index 000000000..6a1f41437 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/database/package.scala @@ -0,0 +1,19 @@ +package com.twitter.servo + +import com.twitter.util.Future +import java.sql.ResultSet + +package object database { + type DatabaseFactory = (() => Database) + + /** + * A function type for translating ResultSets into objects of the result type A. + */ + type Builder[A] = ResultSet => A + + /** + * A function type for asynchronously translating ResultSets into objects + * of the result type A. + */ + type FutureBuilder[A] = Builder[Future[A]] +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala new file mode 100644 index 000000000..67feab329 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/hydrator/KeyValueHydrator.scala @@ -0,0 +1,155 @@ +package com.twitter.servo.hydrator + +import com.twitter.servo.data.Mutation +import com.twitter.servo.util.{Effect, Gate} +import com.twitter.servo.repository._ +import com.twitter.util.{Future, Return, Try} + +object KeyValueHydrator { + // KeyValueHydrator extends this function type + type FunctionType[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Mutation[V]] + type Filter[Q, K, V] = (Q, Future[KeyValueResult[K, V]]) => Future[Boolean] + + private[this] val _unit = fromMutation[Any, Any, Any](Mutation.unit[Any]) + + /** + * A no-op hydrator. Forms a monoid with `also`. + */ + def unit[Q, K, V]: KeyValueHydrator[Q, K, V] = + _unit.asInstanceOf[KeyValueHydrator[Q, K, V]] + + /** + * Packages a function as a KeyValueHydrator + */ + def apply[Q, K, V](f: FunctionType[Q, K, V]): KeyValueHydrator[Q, K, V] = + new KeyValueHydrator[Q, K, V] { + override def apply(query: Q, futureResults: Future[KeyValueResult[K, V]]) = + f(query, futureResults) + } + + /** + * Creates a new KeyValueHydrator out of several underlying KVHydrators. The + * apply method is called on each KeyValueHydrator with the same + * futureResults, allowing each to kick-off some asynchronous work + * to produce a future Hydrated[Mutation]. When all the future + * Hydrated[Mutation]s are available, the results are folded, + * left-to-right, over the mutations, to build up the final + * results. + */ + def inParallel[Q, K, V](hydrators: KeyValueHydrator[Q, K, V]*): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + val futureMutations = hydrators map { t => + t(query, futureResults) + } + Future.collect(futureMutations) map Mutation.all + } + + def const[Q, K, V](futureMutation: Future[Mutation[V]]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (_, _) => + futureMutation + } + + def fromMutation[Q, K, V](mutation: Mutation[V]): KeyValueHydrator[Q, K, V] = + const[Q, K, V](Future.value(mutation)) +} + +/** + * A KeyValueHydrator builds a Mutation to be applied to the values in a KeyValueResult, but does + * not itself apply the Mutation. This allows several KeyValueHydrators to be composed together to + * begin their work in parallel to build the Mutations, which can then be combined and applied + * to the results later (see asRepositoryFilter). + * + * Forms a monoid with KeyValueHydrator.unit as unit and `also` as the combining function. + */ +trait KeyValueHydrator[Q, K, V] extends KeyValueHydrator.FunctionType[Q, K, V] { + protected[this] val unitMutation = Mutation.unit[V] + protected[this] val futureUnitMutation = Future.value(unitMutation) + + /** + * Combines two KeyValueHydrators. Forms a monoid with KeyValueHydator.unit + */ + def also(next: KeyValueHydrator[Q, K, V]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator.inParallel(this, next) + + /** + * Turns a single KeyValueHydrator into a RepositoryFilter by applying the Mutation to + * found values in the KeyValueResult. If the mutation throws an exception, it will + * be caught and the resulting key/value paired moved to the failed map of the resulting + * KeyValueResult. + */ + lazy val asRepositoryFilter: RepositoryFilter[Q, KeyValueResult[K, V], KeyValueResult[K, V]] = + (query, futureResults) => { + this(query, futureResults) flatMap { mutation => + val update = mutation.endo + futureResults map { results => + results.mapValues { + case Return(Some(value)) => Try(Some(update(value))) + case x => x + } + } + } + } + + /** + * Apply this hydrator to the result of a repository. + */ + def hydratedBy_:(repo: KeyValueRepository[Q, K, V]): KeyValueRepository[Q, K, V] = + Repository.composed(repo, asRepositoryFilter) + + /** + * Return a new hydrator that applies the same mutation as this + * hydrator, but can be enabled/disabled or dark enabled/disabled via Gates. The light + * gate takes precedence over the dark gate. This allows you to go from 0%->100% dark, + * and then from 0%->100% light without affecting backend traffic. + */ + @deprecated("Use enabledBy(() => Boolean, () => Boolean)", "2.5.1") + def enabledBy(light: Gate[Unit], dark: Gate[Unit] = Gate.False): KeyValueHydrator[Q, K, V] = + enabledBy( + { () => + light() + }, + { () => + dark() + }) + + /** + * Return a new hydrator that applies the same mutation as this + * hydrator, but can be enabled/disabled or dark enable/disabled via nullary boolean functions. + * The light function takes precedence over the dark function. + * This allows you to go from 0%->100% dark, and then from 0%->100% light + * without affecting backend traffic. + */ + def enabledBy(light: () => Boolean, dark: () => Boolean): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + val isLight = light() + val isDark = !isLight && dark() + if (!isLight && !isDark) { + futureUnitMutation + } else { + this(query, futureResults) map { + case mutation if isLight => mutation + case mutation if isDark => mutation.dark + } + } + } + + /** + * Build a new hydrator that will return the same result as the current hydrator, + * but will additionally perform the supplied effect on the result of hydration. + */ + def withEffect(effect: Effect[Option[V]]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (query, futureResults) => + this(query, futureResults) map { _ withEffect effect } + } + + /** + * Builds a new hydrator that only attempt to hydrate if the + * supplied filter returns true. + */ + def filter(predicate: KeyValueHydrator.Filter[Q, K, V]): KeyValueHydrator[Q, K, V] = + KeyValueHydrator[Q, K, V] { (q, r) => + predicate(q, r) flatMap { t => + if (t) this(q, r) else futureUnitMutation + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala new file mode 100644 index 000000000..bcf49efb8 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/keyvalue/KeyValueResult.scala @@ -0,0 +1,473 @@ +package com.twitter.servo.keyvalue + +import com.twitter.finagle.memcached.util.NotFound +import com.twitter.util.{Future, Return, Throw, Try} +import scala.collection.immutable + +object KeyValueResult { + private[this] val Empty = KeyValueResult() + private[this] val EmptyFuture = Future.value(Empty) + + def empty[K, V]: KeyValueResult[K, V] = + Empty.asInstanceOf[KeyValueResult[K, V]] + + def emptyFuture[K, V]: Future[KeyValueResult[K, V]] = + EmptyFuture.asInstanceOf[Future[KeyValueResult[K, V]]] + + /** + * Builds a KeyValueResult using pairs of keys to Try[Option[V]]. These values are split + * out to build the separate found/notFound/failed collections. + */ + def build[K, V](data: (K, Try[Option[V]])*): KeyValueResult[K, V] = { + val bldr = new KeyValueResultBuilder[K, V] + data.foreach { case (k, v) => bldr.update(k, v) } + bldr.result() + } + + /** + * Builds a future KeyValueResult using a future sequence of key-value tuples. That + * sequence does not necessarily match up with the sequence of keys provided. The + * sequence of pairs represent the found results. notFound will be filled in from the + * missing keys. + */ + def fromPairs[K, V]( + keys: Iterable[K] = Nil: immutable.Nil.type + )( + futurePairs: Future[TraversableOnce[(K, V)]] + ): Future[KeyValueResult[K, V]] = { + fromMap(keys) { + futurePairs map { _.toMap } + } + } + + /** + * Builds a future KeyValueResult using a future map of found results. notFound will be filled + * in from the missing keys. + */ + def fromMap[K, V]( + keys: Iterable[K] = Nil: immutable.Nil.type + )( + futureMap: Future[Map[K, V]] + ): Future[KeyValueResult[K, V]] = { + futureMap map { found => + KeyValueResult[K, V](found = found, notFound = NotFound(keys.toSet, found.keySet)) + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future sequence of optional results. That + * sequence must match up pair-wise with the given sequence of keys. A value of Some[V] is + * counted as a found result, a value of None is counted as a notFound result. + */ + def fromSeqOption[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Option[V]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Some(value) => bldr.addFound(key, value) + case None => bldr.addNotFound(key) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future sequence of Try results. That + * sequence must match up pair-wise with the given sequence of keys. A value of Return[V] is + * counted as a found result, a value of Throw is counted as a failed result. + */ + def fromSeqTry[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Try[V]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Return(value) => bldr.addFound(key, value) + case Throw(t) => bldr.addFailed(key, t) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a sequence of future options. That sequence must + * match up pair-wise with the given sequence of keys. A value of Some[V] is + * counted as a found result, a value of None is counted as a notFound result. + */ + def fromSeqFuture[K, V]( + keys: Iterable[K] + )( + futureSeq: Seq[Future[Option[V]]] + ): Future[KeyValueResult[K, V]] = { + fromSeqTryOptions(keys) { + Future.collect { + futureSeq map { _.transform(Future(_)) } + } + } + } + + /** + * Builds a future KeyValueResult using a future sequence of Try[Option[V]]. That sequence must + * match up pair-wise with the given sequence of keys. A value of Return[Some[V]] is + * counted as a found result, a value of Return[None] is counted as a notFound result, and a value + * of Throw[V] is counted as a failed result. + */ + def fromSeqTryOptions[K, V]( + keys: Iterable[K] + )( + futureSeq: Future[Seq[Try[Option[V]]]] + ): Future[KeyValueResult[K, V]] = { + futureSeq map { seq => + keys.zip(seq).foldLeft(new KeyValueResultBuilder[K, V]) { + case (bldr, (key, tryRes)) => + tryRes match { + case Return(Some(value)) => bldr.addFound(key, value) + case Return(None) => bldr.addNotFound(key) + case Throw(t) => bldr.addFailed(key, t) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Builds a future KeyValueResult using a future map with value Try[Option[V]]. A value of + * Return[Some[V]] is counted as a found result, a value of Return[None] is counted as a notFound + * result, and a value of Throw[V] is counted as a failed result. + * + * notFound will be filled in from the missing keys. Exceptions will be handled by counting all + * keys as failed. Values that are in map but not keys will be ignored. + */ + def fromMapTryOptions[K, V]( + keys: Iterable[K] + )( + futureMapTryOptions: Future[Map[K, Try[Option[V]]]] + ): Future[KeyValueResult[K, V]] = { + futureMapTryOptions map { mapTryOptions => + keys.foldLeft(new KeyValueResultBuilder[K, V]) { + case (builder, key) => + mapTryOptions.get(key) match { + case Some(Return(Some(value))) => builder.addFound(key, value) + case Some(Return(None)) | None => builder.addNotFound(key) + case Some(Throw(failure)) => builder.addFailed(key, failure) + } + } result () + } handle { + case t => + KeyValueResult[K, V](failed = keys.map { _ -> t }.toMap) + } + } + + /** + * Reduces several KeyValueResults down to just 1, by combining as if by ++, but + * more efficiently with fewer intermediate results. + */ + def sum[K, V](results: Iterable[KeyValueResult[K, V]]): KeyValueResult[K, V] = { + val bldr = new KeyValueResultBuilder[K, V] + + results foreach { result => + bldr.addFound(result.found) + bldr.addNotFound(result.notFound) + bldr.addFailed(result.failed) + } + + val res = bldr.result() + + if (res.notFound.isEmpty && res.failed.isEmpty) { + res + } else { + val foundKeySet = res.found.keySet + val notFound = NotFound(res.notFound, foundKeySet) + val failed = NotFound(NotFound(res.failed, foundKeySet), res.notFound) + KeyValueResult(res.found, notFound, failed) + } + } +} + +case class KeyValueResult[K, +V]( + found: Map[K, V] = Map.empty[K, V]: immutable.Map[K, V], + notFound: Set[K] = Set.empty[K]: immutable.Set[K], + failed: Map[K, Throwable] = Map.empty[K, Throwable]: immutable.Map[K, Throwable]) + extends Iterable[(K, Try[Option[V]])] { + + /** + * A cheaper implementation of isEmpty than the default which relies + * on building an iterator. + */ + override def isEmpty = found.isEmpty && notFound.isEmpty && failed.isEmpty + + /** + * map over the keyspace to produce a new KeyValueResult + */ + def mapKeys[K2](f: K => K2): KeyValueResult[K2, V] = + copy( + found = found.map { case (k, v) => f(k) -> v }, + notFound = notFound.map(f), + failed = failed.map { case (k, t) => f(k) -> t } + ) + + /** + * Maps over found values to produce a new KeyValueResult. If the given function throws an + * exception for a particular value, that value will be moved to the `failed` bucket with + * the thrown exception. + */ + def mapFound[V2](f: V => V2): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => + builder.update(k, Try(Some(f(v)))) + } + builder.addNotFound(notFound) + builder.addFailed(failed) + + builder.result() + } + + /** + * map over the values provided by the iterator, to produce a new KeyValueResult + */ + def mapValues[V2](f: Try[Option[V]] => Try[Option[V2]]): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => + builder.update(k, f(Return(Some(v)))) + } + notFound.foreach { k => + builder.update(k, f(Return.None)) + } + failed.foreach { + case (k, t) => + builder.update(k, f(Throw(t))) + } + + builder.result() + } + + /** + * Map over found values to create a new KVR with the existing notFound and failed keys intact. + */ + def mapFoundValues[V2](f: V => Try[Option[V2]]): KeyValueResult[K, V2] = { + val builder = new KeyValueResultBuilder[K, V2]() + + found.foreach { + case (k, v) => builder.update(k, f(v)) + } + builder.addNotFound(notFound) + builder.addFailed(failed) + + builder.result() + } + + /** + * map over the pairs of results, creating a new KeyValueResult based on the returned + * tuples from the provided function. + */ + def mapPairs[K2, V2](f: (K, Try[Option[V]]) => (K2, Try[Option[V2]])): KeyValueResult[K2, V2] = { + val builder = new KeyValueResultBuilder[K2, V2] + + def update(k: K, v: Try[Option[V]]): Unit = + f(k, v) match { + case (k2, v2) => builder.update(k2, v2) + } + + found.foreach { + case (k, v) => + update(k, Return(Some(v))) + } + notFound.foreach { k => + update(k, Return.None) + } + failed.foreach { + case (k, t) => + update(k, Throw(t)) + } + + builder.result() + } + + /** + * filter the KeyValueResult, to produce a new KeyValueResult + */ + override def filter(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { + val builder = new KeyValueResultBuilder[K, V] + + def update(k: K, v: Try[Option[V]]): Unit = { + if (p((k, v))) + builder.update(k, v) + } + + found.foreach { + case (k, v) => + update(k, Return(Some(v))) + } + notFound.foreach { k => + update(k, Return.None) + } + failed.foreach { + case (k, t) => + update(k, Throw(t)) + } + + builder.result() + } + + /** + * filterNot the KeyValueResult, to produce a new KeyValueResult + */ + override def filterNot(p: ((K, Try[Option[V]])) => Boolean): KeyValueResult[K, V] = { + filter(!p(_)) + } + + /** + * Returns an Iterator that yields all found, notFound, and failed values + * represented in the combined Try[Option[V]] type. + */ + def iterator: Iterator[(K, Try[Option[V]])] = + (found.iterator map { case (k, v) => k -> Return(Some(v)) }) ++ + (notFound.iterator map { k => + k -> Return.None + }) ++ + (failed.iterator map { case (k, t) => k -> Throw(t) }) + + /** + * Returns a copy in which all failed entries are converted to misses. The specific + * failure information is lost. + */ + def convertFailedToNotFound = + copy( + notFound = notFound ++ failed.keySet, + failed = Map.empty[K, Throwable] + ) + + /** + * Returns a copy in which all not-found entries are converted to failures. + */ + def convertNotFoundToFailed(f: K => Throwable) = + copy( + notFound = Set.empty[K], + failed = failed ++ (notFound map { k => + k -> f(k) + }) + ) + + /** + * Returns a copy in which failures are repaired with the supplied handler + */ + def repairFailed[V2 >: V](handler: PartialFunction[Throwable, Option[V2]]) = + if (failed.isEmpty) { + this + } else { + val builder = new KeyValueResultBuilder[K, V2] + builder.addFound(found) + builder.addNotFound(notFound) + failed map { case (k, t) => builder.update(k, Throw(t) handle handler) } + builder.result() + } + + /** + * Combines two KeyValueResults. Conflicting founds/notFounds are resolved + * as founds, and conflicting (found|notFound)/failures are resolved as (found|notFound). + */ + def ++[K2 >: K, V2 >: V](that: KeyValueResult[K2, V2]): KeyValueResult[K2, V2] = { + if (this.isEmpty) that + else if (that.isEmpty) this.asInstanceOf[KeyValueResult[K2, V2]] + else { + val found = this.found ++ that.found + val notFound = NotFound(this.notFound ++ that.notFound, found.keySet) + val failed = NotFound(NotFound(this.failed ++ that.failed, found.keySet), notFound) + KeyValueResult(found, notFound, failed) + } + } + + /** + * Looks up a result for a key. + */ + def apply(key: K): Try[Option[V]] = { + found.get(key) match { + case some @ Some(_) => Return(some) + case None => + failed.get(key) match { + case Some(t) => Throw(t) + case None => Return.None + } + } + } + + /** + * Looks up a result for a key, returning a provided default if the key is not + * found or failed. + */ + def getOrElse[V2 >: V](key: K, default: => V2): V2 = + found.getOrElse(key, default) + + /** + * If any keys fail, will return the first failure. Otherwise, + * will convert founds/notFounds to a Seq[Option[V]], ordered by + * the keys provided + */ + def toFutureSeqOfOptions(keys: Seq[K]): Future[Seq[Option[V]]] = { + failed.values.headOption match { + case Some(t) => Future.exception(t) + case None => Future.value(keys.map(found.get)) + } + } + + // This is unfortunate, but we end up pulling in Iterable's toString, + // which is not all that readable. + override def toString(): String = { + val sb = new StringBuilder(256) + sb.append("KeyValueResult(") + sb.append("found = ") + sb.append(found) + sb.append(", notFound = ") + sb.append(notFound) + sb.append(", failed = ") + sb.append(failed) + sb.append(')') + sb.toString() + } +} + +class KeyValueResultBuilder[K, V] { + private[this] val found = Map.newBuilder[K, V] + private[this] val notFound = Set.newBuilder[K] + private[this] val failed = Map.newBuilder[K, Throwable] + + def addFound(k: K, v: V) = { found += (k -> v); this } + def addNotFound(k: K) = { notFound += k; this } + def addFailed(k: K, t: Throwable) = { failed += (k -> t); this } + + def addFound(kvs: Iterable[(K, V)]) = { found ++= kvs; this } + def addNotFound(ks: Iterable[K]) = { notFound ++= ks; this } + def addFailed(kts: Iterable[(K, Throwable)]) = { failed ++= kts; this } + + def update(k: K, tryV: Try[Option[V]]) = { + tryV match { + case Throw(t) => addFailed(k, t) + case Return(None) => addNotFound(k) + case Return(Some(v)) => addFound(k, v) + } + } + + def result() = KeyValueResult(found.result(), notFound.result(), failed.result()) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala new file mode 100644 index 000000000..40f69b81a --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingCounterKeyValueRepository.scala @@ -0,0 +1,44 @@ +package com.twitter.servo.repository + +import com.twitter.servo.cache._ +import com.twitter.util.Future + +class CachingCounterKeyValueRepository[K]( + underlying: CounterKeyValueRepository[K], + cache: CounterCache[K], + observer: CacheObserver = NullCacheObserver) + extends CounterKeyValueRepository[K] { + + def apply(keys: Seq[K]): Future[KeyValueResult[K, Long]] = { + val uniqueKeys = keys.distinct + cache.get(uniqueKeys) flatMap { cachedResults => + recordResults(cachedResults) + + val missed = cachedResults.notFound ++ cachedResults.failed.keySet + readThrough(missed.toSeq) map { readResults => + KeyValueResult(cachedResults.found) ++ readResults + } + } + } + + private def readThrough(keys: Seq[K]): Future[KeyValueResult[K, Long]] = + if (keys.isEmpty) { + KeyValueResult.emptyFuture + } else { + underlying(keys) onSuccess { readResults => + for ((k, v) <- readResults.found) { + cache.add(k, v) + } + } + } + + private def recordResults(cachedResults: KeyValueResult[K, Long]): Unit = { + cachedResults.found.keys foreach { key => + observer.hit(key.toString) + } + cachedResults.notFound foreach { key => + observer.miss(key.toString) + } + observer.failure(cachedResults.failed.size) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala new file mode 100644 index 000000000..fe6e257d2 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/CachingKeyValueRepository.scala @@ -0,0 +1,736 @@ +package com.twitter.servo.repository + +import com.twitter.logging.{Level, Logger} +import com.twitter.servo.cache._ +import com.twitter.servo.util.{Effect, Gate, RateLimitingLogger} +import com.twitter.util._ +import scala.collection.mutable +import scala.util.Random + +/** + * A set of classes that indicate how to handle cached results. + */ +sealed abstract class CachedResultAction[+V] + +object CachedResultAction { + + /** Indicates a key should be fetched from the underlying repo */ + case object HandleAsMiss extends CachedResultAction[Nothing] + + /** Indicates a key should be returned as not-found, and not fetched from the underlying repo */ + case object HandleAsNotFound extends CachedResultAction[Nothing] + + /** Indicates the value should be returned as found */ + case class HandleAsFound[V](value: V) extends CachedResultAction[V] + + /** Indicates the value should not be cached */ + case object HandleAsDoNotCache extends CachedResultAction[Nothing] + + /** Indicates that the given action should be applied, and the given function applied to the resulting value */ + case class TransformSubAction[V](action: CachedResultAction[V], f: V => V) + extends CachedResultAction[V] + + /** Indicates the key should be returned as a failure */ + case class HandleAsFailed(t: Throwable) extends CachedResultAction[Nothing] + + /** Indicates that the value should be refetched asynchronously, be immediately treated + * as the given action. */ + case class SoftExpiration[V](action: CachedResultAction[V]) extends CachedResultAction[V] +} + +/** + * A set of classes representing the various states for a cached result. + */ +sealed abstract class CachedResult[+K, +V] { + def key: K +} + +object CachedResult { + import CachedResultAction._ + + /** Indicates the key was not in cache */ + case class NotFound[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates there was an error fetching the key */ + case class Failed[K](key: K, t: Throwable) extends CachedResult[K, Nothing] + + /** Indicates the cached value could not be deserialized */ + case class DeserializationFailed[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates the cached value could not be serialized */ + case class SerializationFailed[K](key: K) extends CachedResult[K, Nothing] + + /** Indicates that a NotFound tombstone was found in cached */ + case class CachedNotFound[K]( + key: K, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, Nothing] + + /** Indicates that a Deleted tombstone was found in cached */ + case class CachedDeleted[K]( + key: K, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, Nothing] + + /** Indicates that value was found in cached */ + case class CachedFound[K, V]( + key: K, + value: V, + cachedAt: Time, + softTtlStep: Option[Short] = None) + extends CachedResult[K, V] + + /** Indicates that value should not be cached until */ + case class DoNotCache[K](key: K, until: Option[Time]) extends CachedResult[K, Nothing] + + type Handler[K, V] = CachedResult[K, V] => CachedResultAction[V] + + type PartialHandler[K, V] = CachedResult[K, V] => Option[CachedResultAction[V]] + + type HandlerFactory[Q, K, V] = Q => Handler[K, V] + + /** + * companion object for Handler type + */ + object Handler { + + /** + * terminate a PartialHandler to produce a new Handler + */ + def apply[K, V]( + partial: PartialHandler[K, V], + handler: Handler[K, V] = defaultHandler[K, V] + ): Handler[K, V] = { cachedResult => + partial(cachedResult) match { + case Some(s) => s + case None => handler(cachedResult) + } + } + } + + /** + * companion object for PartialHandler type + */ + object PartialHandler { + + /** + * Sugar to produce a PartialHandler from a PartialFunction. Successive calls to + * isDefined MUST return the same result. Otherwise, take the syntax hit and wire + * up your own PartialHandler. + */ + def apply[K, V]( + partial: PartialFunction[CachedResult[K, V], CachedResultAction[V]] + ): PartialHandler[K, V] = partial.lift + + /** + * chain one PartialHandler after another to produce a new PartialHandler + */ + def orElse[K, V]( + thisHandler: PartialHandler[K, V], + thatHandler: PartialHandler[K, V] + ): PartialHandler[K, V] = { cachedResult => + thisHandler(cachedResult) match { + case some @ Some(_) => some + case None => thatHandler(cachedResult) + } + } + } + + /** + * companion object for HandlerFactory type + */ + object HandlerFactory { + def apply[Q, K, V](handler: Handler[K, V]): HandlerFactory[Q, K, V] = _ => handler + } + + def defaultHandlerFactory[Q, K, V]: HandlerFactory[Q, K, V] = + HandlerFactory[Q, K, V](defaultHandler) + + /** + * This is the default Handler. Failures are treated as misses. + */ + def defaultHandler[K, V]: Handler[K, V] = { + case NotFound(_) | Failed(_, _) => HandleAsMiss + case DeserializationFailed(_) | SerializationFailed(_) => HandleAsMiss + case CachedNotFound(_, _, _) | CachedDeleted(_, _, _) => HandleAsNotFound + case CachedFound(_, value, _, _) => HandleAsFound(value) + case DoNotCache(_, Some(time)) if Time.now > time => HandleAsMiss + case DoNotCache(_, _) => HandleAsDoNotCache + } + + /** + * A PartialHandler that bubbles memcache failures up instead of converting + * those failures to misses. + */ + def failuresAreFailures[K, V] = PartialHandler[K, V] { + case Failed(_, t) => HandleAsFailed(t) + } + + /** + * A PartialHandler that doesn't attempt to write back to cache if the initial + * cache read failed, but still fetches from the underlying repo. + */ + def failuresAreDoNotCache[K, V] = PartialHandler[K, V] { + case Failed(_, _) => HandleAsDoNotCache + } + + /** + * A function that takes a cachedAt time and ttl, and returns an expiry time. This function + * _must_ be deterministic with respect to the arguments provided, otherwise, you might get a + * MatchError when using this with softTtlExpiration. + */ + type Expiry = (Time, Duration) => Time + + /** + * An Expiry function with an epsilon of zero. + */ + val fixedExpiry: Expiry = (cachedAt: Time, ttl: Duration) => cachedAt + ttl + + /** + * A repeatable "random" expiry function that perturbs the ttl with a random value + * no greater than +/-(ttl * maxFactor). + */ + def randomExpiry(maxFactor: Float): Expiry = { + if (maxFactor == 0) { + fixedExpiry + } else { (cachedAt: Time, ttl: Duration) => + { + val factor = (2 * new Random(cachedAt.inMilliseconds).nextFloat - 1) * maxFactor + cachedAt + ttl + Duration.fromNanoseconds((factor * ttl.inNanoseconds).toLong) + } + } + } + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl. + * + * @param ttl + * values older than this will be considered expired, but still + * returned, and asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def softTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = + softTtlExpiration(_ => ttl, expiry) + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl derived from the value + * + * @param ttl + * values older than this will be considered expired, but still + * returned, and asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def softTtlExpiration[K, V]( + ttl: Option[V] => Duration, + expiry: Expiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => + SoftExpiration(HandleAsFound(value)) + case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => + SoftExpiration(HandleAsNotFound) + } + + /** + * soft-expires CachedFound and CachedNotFound based on a ttl derived from both the value + * and the softTtlStep + * + * @param ttl + * values older than this will be considered expired, but still returned, and + * asynchronously refreshed in cache. + * @param expiry + * (optional) function to compute the expiry time + */ + def steppedSoftTtlExpiration[K, V]( + ttl: (Option[V], Option[Short]) => Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(Some(value), softTtlStep)) < Time.now => + SoftExpiration(HandleAsFound(value)) + case CachedNotFound(_, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => + SoftExpiration(HandleAsNotFound) + case CachedDeleted(_, cachedAt, softTtlStep) + if expiry(cachedAt, ttl(None, softTtlStep)) < Time.now => + SoftExpiration(HandleAsNotFound) + } + + /** + * hard-expires CachedFound and CachedNotFound based on a ttl. + * + * @param ttl + * values older than this will be considered a miss + * @param expiry + * (optional) function to compute the expiry time + */ + def hardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = + hardTtlExpiration(_ => ttl, expiry) + + /** + * hard-expires CachedFound and CachedNotFound based on a ttl derived from the value + * + * @param ttl + * values older than this will be considered a miss + * @param expiry + * (optional) function to compute the expiry time + */ + def hardTtlExpiration[K, V]( + ttl: Option[V] => Duration, + expiry: Expiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedFound(_, value, cachedAt, _) if expiry(cachedAt, ttl(Some(value))) < Time.now => + HandleAsMiss + case CachedNotFound(_, cachedAt, _) if expiry(cachedAt, ttl(None)) < Time.now => + HandleAsMiss + } + + /** + * hard-expires a CachedNotFound tombstone based on a ttl + * + * @param ttl + * values older than this will be considered expired + * @param expiry + * (optional) function to compute the expiry time + */ + def notFoundHardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedNotFound(_, cachedAt, _) => + if (expiry(cachedAt, ttl) < Time.now) + HandleAsMiss + else + HandleAsNotFound + } + + /** + * hard-expires a CachedDeleted tombstone based on a ttl + * + * @param ttl + * values older than this will be considered expired + * @param expiry + * (optional) function to compute the expiry time + */ + def deletedHardTtlExpiration[K, V]( + ttl: Duration, + expiry: Expiry = fixedExpiry + ): PartialHandler[K, V] = PartialHandler[K, V] { + case CachedDeleted(_, cachedAt, _) => + if (expiry(cachedAt, ttl) < Time.now) + HandleAsMiss + else + HandleAsNotFound + } + + /** + * read only from cache, never fall back to underlying KeyValueRepository + */ + def cacheOnly[K, V]: Handler[K, V] = { + case CachedFound(_, value, _, _) => HandleAsFound(value) + case _ => HandleAsNotFound + } + + /** + * use either primary or backup Handler, depending on usePrimary result + * + * @param primaryHandler + * the handler to be used if usePrimary evaluates to true + * @param backupHandler + * the handle to be used if usePrimary evaluates to false + * @param usePrimary + * evaluates the query to determine which handler to use + */ + def switchedHandlerFactory[Q, K, V]( + primaryHandler: Handler[K, V], + backupHandler: Handler[K, V], + usePrimary: Q => Boolean + ): HandlerFactory[Q, K, V] = { query => + if (usePrimary(query)) + primaryHandler + else + backupHandler + } +} + +object CacheResultObserver { + case class CachingRepositoryResult[K, V]( + resultFromCache: KeyValueResult[K, Cached[V]], + resultFromCacheMissReadthrough: KeyValueResult[K, V], + resultFromSoftTtlReadthrough: KeyValueResult[K, V]) + def unit[K, V] = Effect.unit[CachingRepositoryResult[K, V]] +} + +object CachingKeyValueRepository { + type CacheResultObserver[K, V] = Effect[CacheResultObserver.CachingRepositoryResult[K, V]] +} + +/** + * Reads keyed values from a LockingCache, and reads through to an underlying + * KeyValueRepository for misses. supports a "soft ttl", beyond which values + * will be read through out-of-band to the originating request + * + * @param underlying + * the underlying KeyValueRepository + * @param cache + * the locking cache to read from + * @param newQuery + * a function for converting a subset of the keys of the original query into a new + * query. this is used to construct the query passed to the underlying repository + * to fetch the cache misses. + * @param handlerFactory + * A factory to produce functions that specify policies about how to handle results + * from cache. (i.e. to handle failures as misses vs failures, etc) + * @param picker + * used to choose between the value in cache and the value read from the DB when + * storing values in the cache + * @param observer + * a CacheObserver for collecting cache statistics* + * @param writeSoftTtlStep + * Write the soft_ttl_step value to indicate number of consistent reads from underlying store + * @param cacheResultObserver + * An [[Effect]] of type [[CacheResultObserver.CachingRepositoryResult]] which is useful for examining + * the results from the cache, underlying storage, and any later read-throughs. The effect is + * executed asynchronously from the request path and has no bearing on the Future[KeyValueResult]* + * returned from this Repository. + */ +class CachingKeyValueRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + val cache: LockingCache[K, Cached[V]], + newQuery: SubqueryBuilder[Q, K], + handlerFactory: CachedResult.HandlerFactory[Q, K, V] = + CachedResult.defaultHandlerFactory[Q, K, V], + picker: LockingCache.Picker[Cached[V]] = new PreferNewestCached[V]: PreferNewestCached[V], + observer: CacheObserver = NullCacheObserver, + writeSoftTtlStep: Gate[Unit] = Gate.False, + cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = + CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]]) + extends KeyValueRepository[Q, K, V] { + import CachedResult._ + import CachedResultAction._ + + protected[this] val log = Logger.get(getClass.getSimpleName) + private[this] val rateLimitedLogger = new RateLimitingLogger(logger = log) + + protected[this] val effectiveCacheStats = observer.scope("effective") + + /** + * Calculates the softTtlStep based on result from cache and underlying store. + * The softTtlStep indicates how many times we have + * performed & recorded a consistent read-through. + * A value of None is equivalent to Some(0) - it indicates zero consistent read-throughs. + */ + protected[this] def updateSoftTtlStep( + underlyingResult: Option[V], + cachedResult: Cached[V] + ): Option[Short] = { + if (writeSoftTtlStep() && underlyingResult == cachedResult.value) { + cachedResult.softTtlStep match { + case Some(step) if step < Short.MaxValue => Some((step + 1).toShort) + case Some(step) if step == Short.MaxValue => cachedResult.softTtlStep + case _ => Some(1) + } + } else { + None + } + } + + protected case class ProcessedCacheResult( + hits: Map[K, V], + misses: Seq[K], + doNotCache: Set[K], + failures: Map[K, Throwable], + tombstones: Set[K], + softExpirations: Seq[K], + transforms: Map[K, (V => V)]) + + override def apply(keys: Q): Future[KeyValueResult[K, V]] = { + getFromCache(keys).flatMap { cacheResult => + val ProcessedCacheResult( + hits, + misses, + doNotCache, + failures, + tombstones, + softExpirations, + transforms + ) = + process(keys, cacheResult) + + if (log.isLoggable(Level.TRACE)) { + log.trace( + "CachingKVR.apply keys %d hit %d miss %d noCache %d failure %d " + + "tombstone %d softexp %d", + keys.size, + hits.size, + misses.size, + doNotCache.size, + failures.size, + tombstones.size, + softExpirations.size + ) + } + recordCacheStats( + keys, + notFound = misses.toSet, + doNotCache = doNotCache, + expired = softExpirations.toSet, + numFailures = failures.size, + numTombstones = tombstones.size + ) + + // now read through all notFound + val underlyingQuery = newQuery(misses ++ doNotCache, keys) + val writeToCacheQuery = if (doNotCache.nonEmpty) newQuery(misses, keys) else underlyingQuery + val futureFromUnderlying = readThrough(underlyingQuery, writeToCacheQuery) + + // async read-through for the expired results, ignore results + val softExpirationQuery = newQuery(softExpirations, keys) + val futureFromSoftExpiry = readThrough(softExpirationQuery, softExpirationQuery, cacheResult) + + // merge all results together + for { + fromUnderlying <- futureFromUnderlying + fromCache = KeyValueResult(hits, tombstones, failures) + fromUnderlyingTransformed = transformResults(fromUnderlying, transforms) + } yield { + futureFromSoftExpiry.onSuccess { readThroughResults => + cacheResultObserver( + CacheResultObserver.CachingRepositoryResult( + cacheResult, + fromUnderlyingTransformed, + readThroughResults + ) + ) + } + KeyValueResult.sum(Seq(fromCache, fromUnderlyingTransformed)) + } + } + } + + /** + * Given results and a map of keys to transform functions, apply those transform functions + * to the found results. + */ + protected[this] def transformResults( + results: KeyValueResult[K, V], + transforms: Map[K, (V => V)] + ): KeyValueResult[K, V] = { + if (transforms.isEmpty) { + results + } else { + results.copy(found = results.found.map { + case (key, value) => + (key, transforms.get(key).map(_(value)).getOrElse(value)) + }) + } + } + + protected[this] def getFromCache(keys: Seq[K]): Future[KeyValueResult[K, Cached[V]]] = { + val uniqueKeys = keys.distinct + cache.get(uniqueKeys) handle { + case t: Throwable => + rateLimitedLogger.logThrowable(t, "exception caught in cache get") + + // treat total cache failure as a fetch that returned all failures + KeyValueResult(failed = uniqueKeys.map { _ -> t }.toMap) + } + } + + /** + * Buckets cache results according to the wishes of the CachedResultHandler + */ + protected[this] def process( + keys: Q, + cacheResult: KeyValueResult[K, Cached[V]] + ): ProcessedCacheResult = { + val cachedResultHandler = handlerFactory(keys) + + val hits = Map.newBuilder[K, V] + val misses = new mutable.ArrayBuffer[K] + val failures = Map.newBuilder[K, Throwable] + val tombstones = Set.newBuilder[K] + val softExpiredKeys = new mutable.ListBuffer[K] + val doNotCache = Set.newBuilder[K] + val transforms = Map.newBuilder[K, (V => V)] + + for (key <- keys) { + val cachedResult = cacheResult(key) match { + case Throw(t) => Failed(key, t) + case Return(None) => NotFound(key) + case Return(Some(cached)) => + cached.status match { + case CachedValueStatus.Found => + cached.value match { + case None => NotFound(key) + case Some(value) => + CachedFound( + key, + value, + cached.cachedAt, + cached.softTtlStep + ) + } + case CachedValueStatus.NotFound => CachedNotFound(key, cached.cachedAt) + case CachedValueStatus.Deleted => CachedDeleted(key, cached.cachedAt) + case CachedValueStatus.SerializationFailed => SerializationFailed(key) + case CachedValueStatus.DeserializationFailed => DeserializationFailed(key) + case CachedValueStatus.Evicted => NotFound(key) + case CachedValueStatus.DoNotCache => DoNotCache(key, cached.doNotCacheUntil) + } + } + + def processAction(action: CachedResultAction[V]): Unit = { + action match { + case HandleAsMiss => misses += key + case HandleAsFound(value) => hits += key -> value + case HandleAsNotFound => tombstones += key + case HandleAsDoNotCache => doNotCache += key + case HandleAsFailed(t) => failures += key -> t + case TransformSubAction(subAction, f) => + transforms += key -> f + processAction(subAction) + case SoftExpiration(subAction) => + softExpiredKeys += key + processAction(subAction) + } + } + + processAction(cachedResultHandler(cachedResult)) + } + + ProcessedCacheResult( + hits.result(), + misses, + doNotCache.result(), + failures.result(), + tombstones.result(), + softExpiredKeys, + transforms.result() + ) + } + + protected[this] def recordCacheStats( + keys: Seq[K], + notFound: Set[K], + doNotCache: Set[K], + expired: Set[K], + numFailures: Int, + numTombstones: Int + ): Unit = { + keys.foreach { key => + val wasntFound = notFound.contains(key) + val keyString = key.toString + if (wasntFound || expired.contains(key)) + effectiveCacheStats.miss(keyString) + else + effectiveCacheStats.hit(keyString) + + if (wasntFound) + observer.miss(keyString) + else + observer.hit(keyString) + } + observer.expired(expired.size) + observer.failure(numFailures) + observer.tombstone(numTombstones) + observer.noCache(doNotCache.size) + } + + /** + * read through to the underlying repository + * + * @param cacheKeys + * the keys to read and cache + */ + def readThrough(cacheKeys: Q): Future[KeyValueResult[K, V]] = { + readThrough(cacheKeys, cacheKeys) + } + + /** + * read through to the underlying repository + * + * @param writeToCacheQuery + * the query to pass to the writeToCache method after getting a result back from the + * underlying repository. this query can be exactly the same as underlyingQuery if + * all readThrough keys should be cached, or it may contain a subset of the keys if + * some keys should not be written back to cache. + * @param cacheResult + * the current cache results for underlyingQuery. + */ + def readThrough( + underlyingQuery: Q, + writeToCacheQuery: Q, + cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult.empty + ): Future[KeyValueResult[K, V]] = { + if (underlyingQuery.isEmpty) { + KeyValueResult.emptyFuture + } else { + underlying(underlyingQuery).onSuccess { result => + if (writeToCacheQuery.nonEmpty) { + writeToCache(writeToCacheQuery, result, cacheResult) + } + } + } + } + + /** + * Writes the contents of the given KeyValueResult to cache. + */ + def writeToCache( + keys: Q, + underlyingResult: KeyValueResult[K, V], + cacheResult: KeyValueResult[K, Cached[V]] = KeyValueResult[K, Cached[V]]() + ): Unit = { + lazy val cachedEmpty = { + val now = Time.now + Cached[V](None, CachedValueStatus.NotFound, now, Some(now), softTtlStep = None) + } + + keys.foreach { key => + // only cache Returns from the underlying repo, skip Throws. + // iff cached value matches value from underlying store + // (for both NotFound and Found results), increment softTtlStep + // otherwise, set softTtlStep to None + underlyingResult(key) match { + case Return(optUnderlyingVal) => + val softTtlStep = + cacheResult(key) match { + case Return(Some(cacheVal)) => updateSoftTtlStep(optUnderlyingVal, cacheVal) + case _ => None + } + + val status = + optUnderlyingVal match { + case Some(_) => CachedValueStatus.Found + case None => CachedValueStatus.NotFound + } + + val cached = + cachedEmpty.copy( + value = optUnderlyingVal, + status = status, + softTtlStep = softTtlStep + ) + + cache + .lockAndSet(key, LockingCache.PickingHandler(cached, picker)) + .onFailure { + case t: Throwable => + rateLimitedLogger.logThrowable(t, "exception caught in lockAndSet") + } + + case Throw(_) => None + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala new file mode 100644 index 000000000..1816596fc --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ChunkingStrategy.scala @@ -0,0 +1,50 @@ +package com.twitter.servo.repository + +object ChunkingStrategy { + + /** + * A chunking strategy for breaking a query into fixed size chunks, with the last + * chunk possibly being any size between 1 and chunkSize. + */ + def fixedSize[K](chunkSize: Int): Seq[K] => Seq[Seq[K]] = { + fixedSize(chunkSize, keysAsQuery[K]) + } + + /** + * A chunking strategy for breaking a query into fixed size chunks, with the last + * chunk possibly being any size between 1 and chunkSize. + */ + def fixedSize[Q <: Seq[K], K]( + chunkSize: Int, + newQuery: SubqueryBuilder[Q, K] + ): Q => Seq[Q] = { query => + query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq + } + + /** + * A chunking strategy for breaking a query into roughly equal sized chunks no + * larger than maxSize. The last chunk may be slightly smaller due to rounding. + */ + def equalSize[K](maxSize: Int): Seq[K] => Seq[Seq[K]] = { + equalSize(maxSize, keysAsQuery[K]) + } + + /** + * A chunking strategy for breaking a query into roughly equal sized chunks no + * larger than maxSize. The last chunk may be slightly smaller due to rounding. + */ + def equalSize[Q <: Seq[K], K]( + maxSize: Int, + newQuery: SubqueryBuilder[Q, K] + ): Q => Seq[Q] = { query => + { + if (query.size <= maxSize) { + Seq(query) + } else { + val chunkCount = math.ceil(query.size / maxSize.toDouble) + val chunkSize = math.ceil(query.size / chunkCount).toInt + query.distinct.grouped(chunkSize) map { newQuery(_, query) } toSeq + } + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala new file mode 100644 index 000000000..f5c3f4c46 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/DarkmodingKeyValueRepositoryFactory.scala @@ -0,0 +1,161 @@ +package com.twitter.servo.repository + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.servo.cache.{CacheObserver, Cached, LockingCache} +import com.twitter.servo.repository +import com.twitter.servo.repository.CachedResult.{Handler, HandlerFactory} +import com.twitter.servo.util._ +import com.twitter.util._ + +import scala.util.control.NoStackTrace + +object DarkmodingKeyValueRepositoryFactory { + val DefaultEwmaHalfLife = 5.minutes + val DefaultRecentWindow = 10.seconds + val DefaultWindowSize = 5000 + val DefaultAvailabilityFromSuccessRate = + Availability.linearlyScaled(highWaterMark = 0.98, lowWaterMark = 0.75, minAvailability = 0.02) + + def DefaultEwmaTracker = new EwmaSuccessRateTracker(DefaultEwmaHalfLife) + def DefaultRecentWindowTracker = SuccessRateTracker.recentWindowed(DefaultRecentWindow) + def DefaultRollingWindowTracker = SuccessRateTracker.rollingWindow(DefaultWindowSize) + + /** + * Wraps an underlying repository, which can be manually or automatically darkmoded. + * + * Auto-darkmoding is based on success rate (SR) as reported by a [[SuccessRateTracker]]. + * + * @param readFromUnderlying Open: operate normally. Closed: read from backupRepo regardless of SR. + * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. + * @param stats Used to record success rate and availability; often should be scoped to this repo for stats naming + * @param underlyingRepo The underlying repo; read from when not darkmoded + * @param backupRepo The repo to read from when darkmoded; defaults to an always-failing repo. + * @param successRateTracker Strategy for reporting SR, usually over a moving window + * @param availabilityFromSuccessRate Function to calculate availability based on success rate + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + */ + def darkmoding[Q <: Seq[K], K, V]( + readFromUnderlying: Gate[Unit], + autoDarkmode: Gate[Unit], + stats: StatsReceiver, + underlyingRepo: KeyValueRepository[Q, K, V], + backupRepo: KeyValueRepository[Q, K, V] = + KeyValueRepository.alwaysFailing[Q, K, V](DarkmodedException), + successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, + availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation + ): KeyValueRepository[Q, K, V] = { + val (successRateTrackingRepoFactory, successRateGate) = + SuccessRateTrackingRepository.withGate[Q, K, V]( + stats, + availabilityFromSuccessRate, + successRateTracker.observed(stats), + shouldIgnore + ) + val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) + + Repository.selected( + q => gate(()), + successRateTrackingRepoFactory(underlyingRepo), + backupRepo + ) + } + + /** + * Produces a caching repository around an underlying repository, which + * can be manually or automatically darkmoded. + * + * @param underlyingRepo The underlying repo from which to read + * @param cache The typed locking cache to fall back to when darkmoded + * @param picker Used to break ties when a value being written is already present in cache + * @param readFromUnderlying Open: operate normally. Closed: read from cache regardless of SR. + * @param autoDarkmode Open: auto-darkmoding kicks in based on SR. Closed: auto-darkmoding will not kick in regardless of SR. + * @param cacheObserver Observes interactions with the cache; often should be scoped to this repo for stats naming + * @param stats Used to record various stats; often should be scoped to this repo for stats naming + * @param handler a [[Handler]] to use when not darkmoded + * @param successRateTracker Strategy for reporting SR, usually over a moving window + * @param availabilityFromSuccessRate Function to calculate availability based on success rate + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + */ + def darkmodingCaching[K, V, CacheKey]( + underlyingRepo: KeyValueRepository[Seq[K], K, V], + cache: LockingCache[K, Cached[V]], + picker: LockingCache.Picker[Cached[V]], + readFromUnderlying: Gate[Unit], + autoDarkmode: Gate[Unit], + cacheObserver: CacheObserver, + stats: StatsReceiver, + handler: Handler[K, V], + successRateTracker: SuccessRateTracker = DefaultRecentWindowTracker, + availabilityFromSuccessRate: Double => Double = DefaultAvailabilityFromSuccessRate, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation, + writeSoftTtlStep: Gate[Unit] = Gate.False, + cacheResultObserver: CachingKeyValueRepository.CacheResultObserver[K, V] = + CacheResultObserver.unit[K, V]: Effect[CacheResultObserver.CachingRepositoryResult[K, V]] + ): CachingKeyValueRepository[Seq[K], K, V] = { + val (successRateTrackingRepoFactory, successRateGate) = + SuccessRateTrackingRepository.withGate[Seq[K], K, V]( + stats, + availabilityFromSuccessRate, + successRateTracker.observed(stats), + shouldIgnore + ) + val gate = mkGate(successRateGate, readFromUnderlying, autoDarkmode) + + new CachingKeyValueRepository[Seq[K], K, V]( + successRateTrackingRepoFactory(underlyingRepo), + cache, + repository.keysAsQuery, + mkHandlerFactory(handler, gate), + picker, + cacheObserver, + writeSoftTtlStep = writeSoftTtlStep, + cacheResultObserver = cacheResultObserver + ) + } + + /** + * Create a composite gate suitable for controlling darkmoding, usually via decider + * + * @param successRate gate that should close and open according to success rate (SR) changes + * @param readFromUnderlying if open: returned gate operates normally. if closed: returned gate will be closed regardless of SR + * @param autoDarkMode if open: close gate according to SR. if closed: gate ignores SR changes + * @return + */ + def mkGate( + successRate: Gate[Unit], + readFromUnderlying: Gate[Unit], + autoDarkMode: Gate[Unit] + ): Gate[Unit] = + readFromUnderlying & (successRate | !autoDarkMode) + + /** + * Construct a [[CachedResult.HandlerFactory]] with sane defaults for use with a caching darkmoded repository + * @param softTtl TTL for soft-expiration of values in the cache + * @param expiry Used to apply the softTTL (e.g. fixed vs randomly perturbed) + */ + def mkDefaultHandler[K, V]( + softTtl: Option[V] => Duration, + expiry: CachedResult.Expiry + ): Handler[K, V] = + CachedResult.Handler( + CachedResult.failuresAreDoNotCache, + CachedResult.Handler(CachedResult.softTtlExpiration(softTtl, expiry)) + ) + + private[repository] def mkHandlerFactory[CacheKey, V, K]( + handler: Handler[K, V], + successRateGate: Gate[Unit] + ): HandlerFactory[Seq[K], K, V] = + query => + if (successRateGate(())) handler + else CachedResult.cacheOnly +} + +/** + * This exception is returned from a repository when it is auto-darkmoded due to low backend + * success rate, or darkmoded manually via gate (usually a decider). + */ +class DarkmodedException extends Exception with NoStackTrace +object DarkmodedException extends DarkmodedException diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala new file mode 100644 index 000000000..f8df436d0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/HotKeyCachingKeyValueRepository.scala @@ -0,0 +1,74 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.servo.cache.{InProcessCache, StatsReceiverCacheObserver} +import com.twitter.servo.util.FrequencyCounter +import com.twitter.util.Future + +/** + * A KeyValueRepository which uses a sliding window to track + * the frequency at which keys are requested and diverts requests + * for keys above the promotionThreshold through an in-memory request cache. + * + * @param underlyingRepo + * the underlying KeyValueRepository + * @param newQuery + * a function for converting a subset of the keys of the original query into a new query. + * @param windowSize + * the number of previous requests to include in the window + * @param promotionThreshold + * the number of requests for the same key in the window required + * to divert the request through the request cache + * @param cacheFactory + * a function which constructs a future response cache of the given size + * @param statsReceiver + * records stats on the cache + * @param disableLogging + * disables logging in token cache for pdp purposes + */ +object HotKeyCachingKeyValueRepository { + def apply[Q <: Seq[K], K, V]( + underlyingRepo: KeyValueRepository[Q, K, V], + newQuery: SubqueryBuilder[Q, K], + windowSize: Int, + promotionThreshold: Int, + cacheFactory: Int => InProcessCache[K, Future[Option[V]]], + statsReceiver: StatsReceiver, + disableLogging: Boolean = false + ): KeyValueRepository[Q, K, V] = { + val log = Logger.get(getClass.getSimpleName) + + val promotionsCounter = statsReceiver.counter("promotions") + + val onPromotion = { (k: K) => + log.debug("key %s promoted to HotKeyCache", k.toString) + promotionsCounter.incr() + } + + val frequencyCounter = new FrequencyCounter[K](windowSize, promotionThreshold, onPromotion) + + // Maximum cache size occurs in the event that every key in the buffer occurs + // `promotionThreshold` times. We apply a failure-refreshing filter to avoid + // caching failed responses. + val cache = + InProcessCache.withFilter( + cacheFactory(windowSize / promotionThreshold) + )( + ResponseCachingKeyValueRepository.refreshFailures + ) + + val observer = + new StatsReceiverCacheObserver(statsReceiver, windowSize, "request_cache", disableLogging) + + val cachingRepo = + new ResponseCachingKeyValueRepository[Q, K, V](underlyingRepo, cache, newQuery, observer) + + KeyValueRepository.selected( + frequencyCounter.incr, + cachingRepo, + underlyingRepo, + newQuery + ) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala new file mode 100644 index 000000000..f1711e99c --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ImmutableKeyValueRepository.scala @@ -0,0 +1,18 @@ +package com.twitter.servo.repository + +import com.twitter.util.{Future, Return, Throw, Try} + +class ImmutableKeyValueRepository[K, V](data: Map[K, Try[V]]) + extends KeyValueRepository[Seq[K], K, V] { + def apply(keys: Seq[K]) = Future { + val hits = keys flatMap { key => + data.get(key) map { key -> _ } + } toMap + + val found = hits collect { case (key, Return(value)) => key -> value } + val failed = hits collect { case (key, Throw(t)) => key -> t } + val notFound = keys.toSet -- found.keySet -- failed.keySet + + KeyValueResult(found, notFound, failed) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala new file mode 100644 index 000000000..82f6393f0 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/KeyValueRepository.scala @@ -0,0 +1,192 @@ +package com.twitter.servo.repository + +import com.twitter.util.{Future, Try} + +object KeyValueRepository { + + /** + * Builds a KeyValueRepository that returns KeyValueResults in which all keys failed with the + * provided Throwable. + */ + def alwaysFailing[Q <: Seq[K], K, V](failure: Throwable): KeyValueRepository[Q, K, V] = + (query: Q) => + Future.value( + KeyValueResult[K, V]( + failed = query map { _ -> failure } toMap + ) + ) + + /** + * Builds an immutable KeyValueRepository + */ + def apply[K, V](data: Map[K, Try[V]]): KeyValueRepository[Seq[K], K, V] = + new ImmutableKeyValueRepository(data) + + /** + * Sets up a mapReduce type operation on a KeyValueRepository where the query mapping function + * breaks the query up into smaller chunks, and the reducing function is just KeyValueResult.sum. + */ + def chunked[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + chunker: Q => Seq[Q] + ): KeyValueRepository[Q, K, V] = + Repository.mapReduced(repo, chunker, KeyValueResult.sum[K, V]) + + /** + * Wraps a KeyValueRepository with stats recording functionality. + */ + def observed[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + observer: RepositoryObserver, + querySize: Q => Int + ): KeyValueRepository[Q, K, V] = + query => { + observer.time(querySize(query)) { + repo(query).respond(observer.observeKeyValueResult) + } + } + + /** + * Creates a new KeyValueRepository that dispatches to onTrueRepo if the key + * predicate returns true, dispatches to onFalseRepo otherwise. + */ + def selected[Q <: Seq[K], K, V]( + select: K => Boolean, + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V], + queryBuilder: SubqueryBuilder[Q, K] + ): KeyValueRepository[Q, K, V] = selectedByQuery( + predicateFactory = _ => select, + onTrueRepo = onTrueRepo, + onFalseRepo = onFalseRepo, + queryBuilder = queryBuilder + ) + + /** + * Creates a new KeyValueRepository that uses predicateFactory to create a key predicate, then + * dispatches to onTrueRepo if the key predicate returns true, dispatches to onFalseRepo + * otherwise. + */ + def selectedByQuery[Q <: Seq[K], K, V]( + predicateFactory: Q => (K => Boolean), + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V], + queryBuilder: SubqueryBuilder[Q, K] + ): KeyValueRepository[Q, K, V] = { + val queryIsEmpty = (q: Q) => q.isEmpty + val r1 = shortCircuitEmpty(queryIsEmpty)(onTrueRepo) + val r2 = shortCircuitEmpty(queryIsEmpty)(onFalseRepo) + + (query: Q) => { + val predicate = predicateFactory(query) + val (q1, q2) = query.partition(predicate) + val futureRst1 = r1(queryBuilder(q1, query)) + val futureRst2 = r2(queryBuilder(q2, query)) + for { + r1 <- futureRst1 + r2 <- futureRst2 + } yield r1 ++ r2 + } + } + + /** + * Creates a new KeyValueRepository that dispatches to onTrueRepo if the query + * predicate returns true, dispatches to onFalseRepo otherwise. + */ + def choose[Q, K, V]( + predicate: Q => Boolean, + onTrueRepo: KeyValueRepository[Q, K, V], + onFalseRepo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = { (query: Q) => + { + if (predicate(query)) { + onTrueRepo(query) + } else { + onFalseRepo(query) + } + } + } + + /** + * Short-circuit a KeyValueRepository to return an empty + * KeyValueResult when the query is empty rather than calling the + * backend. It is up to the caller to define empty. + * + * The implementation of repo and isEmpty should satisfy: + * + * forAll { (q: Q) => !isEmpty(q) || (repo(q).get == KeyValueResult.empty[K, V]) } + */ + def shortCircuitEmpty[Q, K, V]( + isEmpty: Q => Boolean + )( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = { q => + if (isEmpty(q)) KeyValueResult.emptyFuture[K, V] else repo(q) + } + + /** + * Short-circuit a KeyValueRepository to return an empty + * KeyValueResult for any empty Traversable query rather than + * calling the backend. + * + * The implementation of repo should satisfy: + * + * forAll { (q: Q) => !q.isEmpty || (repo(q).get == KeyValueResult.empty[K, V]) } + */ + def shortCircuitEmpty[Q <: Traversable[_], K, V]( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = shortCircuitEmpty[Q, K, V]((_: Q).isEmpty)(repo) + + /** + * Turns a bulking KeyValueRepository into a non-bulking Repository. The query to the + * KeyValueRepository must be nothing more than a Seq[K]. + */ + def singular[K, V](repo: KeyValueRepository[Seq[K], K, V]): Repository[K, Option[V]] = + singular(repo, (key: K) => Seq(key)) + + /** + * Turns a bulking KeyValueRepository into a non-bulking Repository. + */ + def singular[Q, K, V]( + repo: KeyValueRepository[Q, K, V], + queryBuilder: K => Q + ): Repository[K, Option[V]] = + key => { + repo(queryBuilder(key)) flatMap { results => + Future.const(results(key)) + } + } + + /** + * Converts a KeyValueRepository with value type V to one with value type + * V2 using a function that maps found values. + */ + def mapFound[Q, K, V, V2]( + repo: KeyValueRepository[Q, K, V], + f: V => V2 + ): KeyValueRepository[Q, K, V2] = + repo andThen { _ map { _ mapFound f } } + + /** + * Converts a KeyValueRepository with value type V to one with value type + * V2 using a function that maps over results. + */ + def mapValues[Q, K, V, V2]( + repo: KeyValueRepository[Q, K, V], + f: Try[Option[V]] => Try[Option[V2]] + ): KeyValueRepository[Q, K, V2] = + repo andThen { _ map { _ mapValues f } } + + /** + * Turns a KeyValueRepository which may throw an exception to another + * KeyValueRepository which always returns Future.value(KeyValueResult) + * even when there is an exception + */ + def scatterExceptions[Q <: Traversable[K], K, V]( + repo: KeyValueRepository[Q, K, V] + ): KeyValueRepository[Q, K, V] = + q => + repo(q) handle { + case t => KeyValueResult[K, V](failed = q map { _ -> t } toMap) + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala new file mode 100644 index 000000000..4f0fc1f42 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ObservableKeyValueRepository.scala @@ -0,0 +1,89 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} +import com.twitter.util.{Future, Return, Throw, Try} + +class RepositoryObserver( + statsReceiver: StatsReceiver, + bucketBySize: Boolean, + exceptionCounter: ExceptionCounter) { + protected[this] lazy val timer = new LogarithmicallyBucketedTimer(statsReceiver) + protected[this] val sizeStat = statsReceiver.stat("size") + protected[this] val foundStat = statsReceiver.counter("found") + protected[this] val notFoundStat = statsReceiver.counter("not_found") + protected[this] val total = statsReceiver.counter("total") + private[this] val timeStat = statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName) + + def this(statsReceiver: StatsReceiver, bucketBySize: Boolean = true) = + this(statsReceiver, bucketBySize, new ExceptionCounter(statsReceiver)) + + def time[T](size: Int = 1)(f: => Future[T]) = { + sizeStat.add(size) + if (bucketBySize) + timer(size)(f) + else + Stat.timeFuture(timeStat)(f) + } + + private[this] def total(size: Int = 1): Unit = total.incr(size) + + def found(size: Int = 1): Unit = { + foundStat.incr(size) + total(size) + } + + def notFound(size: Int = 1): Unit = { + notFoundStat.incr(size) + total(size) + } + + def exception(ts: Throwable*): Unit = { + exceptionCounter(ts) + total(ts.size) + } + + def exceptions(ts: Seq[Throwable]): Unit = { + exception(ts: _*) + } + + def observeTry[V](tryObj: Try[V]): Unit = { + tryObj.respond { + case Return(_) => found() + case Throw(t) => exception(t) + } + } + + def observeOption[V](optionTry: Try[Option[V]]): Unit = { + optionTry.respond { + case Return(Some(_)) => found() + case Return(None) => notFound() + case Throw(t) => exception(t) + } + } + + def observeKeyValueResult[K, V](resultTry: Try[KeyValueResult[K, V]]): Unit = { + resultTry.respond { + case Return(result) => + found(result.found.size) + notFound(result.notFound.size) + exceptions(result.failed.values.toSeq) + case Throw(t) => + exception(t) + } + } + + /** + * observeSeq observes the result of a fetch against a key-value repository + * when the returned value is a Seq of type V. When the fetch is completed, + * observes whether or not the returned Seq is empty, contains some number of + * items, or has failed in some way. + */ + def observeSeq[V](seqTry: Try[Seq[V]]): Unit = { + seqTry.respond { + case Return(seq) if seq.isEmpty => notFound() + case Return(seq) => found(seq.length) + case Throw(t) => exception(t) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala new file mode 100644 index 000000000..5a62fe175 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/Repository.scala @@ -0,0 +1,133 @@ +package com.twitter.servo.repository + +import com.twitter.servo.util.RetryHandler +import com.twitter.util.{Duration, Future, Timer} + +object Repository { + + /** + * Composes a RepositoryFilter onto a Repository, producing a new Repository. + */ + def composed[Q, R1, R2]( + repo: Repository[Q, R1], + filter: RepositoryFilter[Q, R1, R2] + ): Repository[Q, R2] = + q => filter(q, repo(q)) + + /** + * Chains 2 or more RepositoryFilters together into a single RepositoryFilter. + */ + def chained[Q, R1, R2, R3]( + f1: RepositoryFilter[Q, R1, R2], + f2: RepositoryFilter[Q, R2, R3], + fs: RepositoryFilter[Q, R3, R3]* + ): RepositoryFilter[Q, R1, R3] = { + val first: RepositoryFilter[Q, R1, R3] = (q, r) => f2(q, f1(q, r)) + fs.toList match { + case Nil => first + case head :: tail => chained(first, head, tail: _*) + } + } + + /** + * Wraps a Repository with a function that transforms queries on the way in, and + * results on the way out. + */ + def transformed[Q, Q2, R, R2]( + repo: Repository[Q, R], + qmapper: Q2 => Q = (identity[Q] _): (Q => Q), + rmapper: R => R2 = (identity[R] _): (R => R) + ): Repository[Q2, R2] = + qmapper andThen repo andThen { _ map rmapper } + + /** + * Wraps a Repository with another Repository that explodes the query into multiple + * queries, executes those queries in parallel, then combines (reduces) results. + */ + def mapReduced[Q, Q2, R, R2]( + repo: Repository[Q, R], + mapper: Q2 => Seq[Q], + reducer: Seq[R] => R2 + ): Repository[Q2, R2] = + mapReducedWithQuery(repo, mapper, (rs: Seq[(Q, R)]) => reducer(rs map { case (_, r) => r })) + + /** + * An extension of mapReduced that passes query and result to the reducer. + */ + def mapReducedWithQuery[Q, Q2, R, R2]( + repo: Repository[Q, R], + mapper: Q2 => Seq[Q], + reducer: Seq[(Q, R)] => R2 + ): Repository[Q2, R2] = { + val queryRepo: Q => Future[(Q, R)] = q => repo(q) map { (q, _) } + q2 => Future.collect(mapper(q2) map queryRepo) map reducer + } + + /** + * Creates a new Repository that dispatches to r1 if the given query predicate returns true, + * and dispatches to r2 otherwise. + */ + def selected[Q, R]( + select: Q => Boolean, + onTrueRepo: Repository[Q, R], + onFalseRepo: Repository[Q, R] + ): Repository[Q, R] = + dispatched(select andThen { + case true => onTrueRepo + case false => onFalseRepo + }) + + /** + * Creates a new Repository that uses a function that selects an underlying repository + * based upon the query. + */ + def dispatched[Q, R](f: Q => Repository[Q, R]): Repository[Q, R] = + q => f(q)(q) + + /** + * Wraps a Repository with the given RetryHandler, which may automatically retry + * failed requests. + */ + def retrying[Q, R](handler: RetryHandler[R], repo: Repository[Q, R]): Repository[Q, R] = + handler.wrap(repo) + + /** + * Produces a new Repository where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is not interrupted. + */ + def withTimeout[Q, R]( + timer: Timer, + timeout: Duration, + repo: Repository[Q, R] + ): Repository[Q, R] = + repo andThen { _.within(timer, timeout) } + + /** + * Produces a new Repository where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is not interrupted. + */ + def withTimeout[Q, R]( + timer: Timer, + timeout: Duration, + exc: => Throwable, + repo: Repository[Q, R] + ): Repository[Q, R] = + repo andThen { _.within(timer, timeout, exc) } + + /** + * Wraps a Repository with stats recording functionality. + */ + def observed[Q, R]( + repo: Repository[Q, R], + observer: RepositoryObserver + ): Repository[Q, R] = + query => { + observer.time() { + repo(query).respond(observer.observeTry) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala new file mode 100644 index 000000000..efbd6f5a7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/ResponseCachingKeyValueRepository.scala @@ -0,0 +1,103 @@ +package com.twitter.servo.repository + +import com.twitter.servo.cache._ +import com.twitter.util._ + +object ResponseCachingKeyValueRepository { + + /** + * An cache filter that excludes cached future responses that are already fulfilled. + * Using this policy ensures that this repository will only ever have one outstanding request for the same item. + */ + def refreshSatisfied[K, V]: (K, Future[Option[V]]) => Boolean = + (_, v) => v.isDefined + + /** + * An cache filter that excludes cached future response that are failures + */ + def refreshFailures[K, V]: (K, Future[Option[V]]) => Boolean = + (_, v) => + v.poll match { + case Some(t) => t.isThrow + case None => false + } +} + +/** + * A repository that caches(in-process) Future responses from an underlying KeyValueRepository. + * Each time a request for a key is made, the repository first checks + * if any Future responses for that key are already cached. + * If so, the Future response from cache is returned. + * If not, a new Promise is placed in to cache, + * the underlying repository is queried to fulfill the Promise, + * and the new Promise is returned to the caller. + * @param underlying + * the underlying KeyValueRepository + * @param cache + * an inprocess cache of (future) responses + * @param newQuery + * a function which constructs a new query from a query and a set of keys + * @param observer + * a CacheObserver which records the hits/misses on the request cache + */ +class ResponseCachingKeyValueRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + cache: InProcessCache[K, Future[Option[V]]], + newQuery: SubqueryBuilder[Q, K], + observer: CacheObserver = NullCacheObserver) + extends KeyValueRepository[Q, K, V] { + private[this] def load(query: Q, promises: Seq[(K, Promise[Option[V]])]): Unit = { + if (promises.nonEmpty) { + underlying(newQuery(promises map { case (k, _) => k }, query)) respond { + case Throw(t) => promises foreach { case (_, p) => p.updateIfEmpty(Throw(t)) } + case Return(kvr) => promises foreach { case (k, p) => p.updateIfEmpty(kvr(k)) } + } + } + } + + sealed trait RefreshResult[K, V] { + def toInterruptible: Future[Option[V]] + } + + private case class CachedResult[K, V](result: Future[Option[V]]) extends RefreshResult[K, V] { + def toInterruptible = result.interruptible + } + + private case class LoadResult[K, V](keyToLoad: K, result: Promise[Option[V]]) + extends RefreshResult[K, V] { + def toInterruptible = result.interruptible + } + + private[this] def refresh(key: K): RefreshResult[K, V] = + synchronized { + cache.get(key) match { + case Some(updated) => + observer.hit(key.toString) + CachedResult(updated) + case None => + observer.miss(key.toString) + val promise = new Promise[Option[V]] + cache.set(key, promise) + LoadResult(key, promise) + } + } + + def apply(query: Q): Future[KeyValueResult[K, V]] = + KeyValueResult.fromSeqFuture(query) { + val result: Seq[RefreshResult[K, V]] = + query map { key => + cache.get(key) match { + case Some(value) => + observer.hit(key.toString) + CachedResult[K, V](value) + case None => + refresh(key) + } + } + + val toLoad = result collect { case LoadResult(k, p) => k -> p } + load(query, toLoad) + + result map { _.toInterruptible } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala new file mode 100644 index 000000000..9f2e315c7 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/RichQuery.scala @@ -0,0 +1,34 @@ +package com.twitter.servo.repository + +import scala.collection.SeqProxy + +/** + * RichQuery is a mixin trait for KeyValueRepository query objects that are more complex + * than Seq[K]. It extends SeqProxy to satisfy servo's requirements but provides Product-based + * implementations of equals and toString. (The query object is expected to be a case class + * and therefore implement Product.) + */ +trait RichQuery[K] extends SeqProxy[K] with Product { + // Compare to other RichQuery instances via Product; otherwise allow any sequence to + // match our proxied Seq (thereby matching the semantics of a case class that simply + // extends SeqProxy). + override def equals(any: Any) = { + any match { + case null => false + + case other: RichQuery[_] => + ( + this.productArity == other.productArity && + this.productIterator.zip(other.productIterator).foldLeft(true) { + case (ok, (e1, e2)) => + ok && e1 == e2 + } + ) + + case other => other.equals(this) + } + } + + // Produce reasonable string for testing + override def toString = "%s(%s)".format(this.productPrefix, this.productIterator.mkString(",")) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala new file mode 100644 index 000000000..d4d9aed9d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/SuccessRateTrackingRepository.scala @@ -0,0 +1,81 @@ +package com.twitter.servo.repository + +import com.twitter.finagle.mux.ClientDiscardedRequestException +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.{CancelledConnectionException, CancelledRequestException} +import com.twitter.servo.util.{Gate, SuccessRateTracker} +import com.twitter.util.Throwables.RootCause +import java.util.concurrent.CancellationException + +object SuccessRateTrackingRepository { + + /** + * (successes, failures) + */ + type SuccessRateObserver = (Int, Int) => Unit + + /** + * Identifies [[Throwable]]s that should not be counted as failures. + * + * This is a total function instead of a partial function so it can reliably recurse on itself + * to find a root cause. + */ + def isCancellation(t: Throwable): Boolean = + t match { + // We don't consider CancelledRequestExceptions or CancelledConnectionExceptions to be + // failures in order not to tarnish our success rate on upstream request cancellations. + case _: CancelledRequestException => true + case _: CancelledConnectionException => true + // non-finagle backends can throw CancellationExceptions when their futures are cancelled. + case _: CancellationException => true + // Mux servers can return ClientDiscardedRequestException. + case _: ClientDiscardedRequestException => true + // Most of these exceptions can be wrapped in com.twitter.finagle.Failure + case RootCause(t) => isCancellation(t) + case _ => false + } + + /** + * Return a Success Rate (SR) tracking repository along with the gate controlling it. + * + * @param stats Provides availability gauge + * @param availabilityFromSuccessRate function to calculate availability given SR + * @param tracker strategy for tracking (usually recent) SR + * @param shouldIgnore don't count certain exceptions as failures, e.g. cancellations + * @return tuple of (SR tracking repo, gate closing if SR drops too far) + */ + def withGate[Q <: Seq[K], K, V]( + stats: StatsReceiver, + availabilityFromSuccessRate: Double => Double, + tracker: SuccessRateTracker, + shouldIgnore: Throwable => Boolean = isCancellation + ): (KeyValueRepository[Q, K, V] => KeyValueRepository[Q, K, V], Gate[Unit]) = { + val successRateGate = tracker.observedAvailabilityGate(availabilityFromSuccessRate, stats) + + (new SuccessRateTrackingRepository[Q, K, V](_, tracker.record, shouldIgnore), successRateGate) + } +} + +/** + * A KeyValueRepository that provides feedback on query success rate to + * a SuccessRateObserver. Both found and not found are considered successful + * responses, while failures are not. Cancellations are ignored by default. + */ +class SuccessRateTrackingRepository[Q <: Seq[K], K, V]( + underlying: KeyValueRepository[Q, K, V], + observer: SuccessRateTrackingRepository.SuccessRateObserver, + shouldIgnore: Throwable => Boolean = SuccessRateTrackingRepository.isCancellation) + extends KeyValueRepository[Q, K, V] { + def apply(query: Q) = + underlying(query) onSuccess { kvr => + val nonIgnoredFailures = kvr.failed.values.foldLeft(0) { + case (count, t) if shouldIgnore(t) => count + case (count, _) => count + 1 + } + observer(kvr.found.size + kvr.notFound.size, nonIgnoredFailures) + } onFailure { t => + if (!shouldIgnore(t)) { + observer(0, query.size) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala new file mode 100644 index 000000000..4c4fe7e4d --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/repository/package.scala @@ -0,0 +1,50 @@ +package com.twitter.servo + +import com.twitter.util.Future + +package object repository { + + /** + * Base repository type. Maps a Query to a future Result + */ + type Repository[-Q, +R] = Q => Future[R] + + /** + * RepositoryFilters can be chained onto Repositories to asynchronously apply transformations to + * Repository results. + */ + type RepositoryFilter[-Q, -R, +S] = (Q, Future[R]) => Future[S] + + type KeyValueResult[K, V] = keyvalue.KeyValueResult[K, V] + val KeyValueResult = keyvalue.KeyValueResult + + /** + * A KeyValueRepository is a type of repository that handles bulk gets of data. The query + * defines the values to fetch, and is usually made of up of a Seq[K], possibly with other + * contextual information needed to perform the query. The result is a KeyValueResult, + * which contains a break-out of found, notFound, and failed key lookups. The set of + * keys may or may-not be computable locally from the query. This top-level type does not + * require that the keys are computable from the query, but certain instances, such as + * CachingKeyValueRepository, do require key-computability. + */ + type KeyValueRepository[Q, K, V] = Repository[Q, KeyValueResult[K, V]] + + type CounterKeyValueRepository[K] = KeyValueRepository[Seq[K], K, Long] + + /** + * For KeyValueRepository scenarios where the query is a sequence of keys, a SubqueryBuilder + * defines how to convert a sub-set of the keys from the query into a query. + */ + type SubqueryBuilder[Q <: Seq[K], K] = (Seq[K], Q) => Q + + /** + * A SubqueryBuilder where the query type is nothing more than a sequence of keys. + */ + @deprecated("use keysAsQuery", "1.1.0") + def KeysAsQuery[K]: SubqueryBuilder[Seq[K], K] = keysAsQuery[K] + + /** + * A SubqueryBuilder where the query type is nothing more than a sequence of keys. + */ + def keysAsQuery[K]: SubqueryBuilder[Seq[K], K] = (keys, parentQuery) => keys +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala new file mode 100644 index 000000000..a6dd69e26 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/CachingStore.scala @@ -0,0 +1,112 @@ +package com.twitter.servo.store + +import com.twitter.servo.cache.{Cached, CachedValueStatus, LockingCache} +import com.twitter.logging.Logger +import com.twitter.util.{Future, Time} + +/** + * Wraps a cache around an underlying store. + * + * CachingStore is a specialization of TransformingCachingStore where the store and cache are + * assumed to have the same key and value types. See TransformingCachingStore for a discussion + * of the arguments to CachingStore. + */ +class CachingStore[K, V]( + cache: LockingCache[K, Cached[V]], + underlying: Store[K, V], + valuePicker: LockingCache.Picker[Cached[V]], + key: V => K) + extends TransformingCachingStore[K, V, K, V]( + cache, + underlying, + valuePicker, + key, + identity, + identity + ) + +/** + * Wraps a cache of differing key/value types around an underlying store. + * + * Updates are applied first (unmodified) to the underlying store and then + * the cache is updated after running the key/value through a one-way function + * to derive the key/value as expected by the cache. + * + * @param cache + * the wrapping cache + * + * @param underlying + * the underlying store + * + * @param valuePicker + * chooses between existing and new value + * + * @param key + * computes a key from the value being stored + * + * @param cacheKey + * transforms the store's key type to the cache's key type + * + * @param cacheValue + * transforms the store's value type to the cache's value type + */ +class TransformingCachingStore[K, V, CacheK, CacheV]( + cache: LockingCache[CacheK, Cached[CacheV]], + underlying: Store[K, V], + valuePicker: LockingCache.Picker[Cached[CacheV]], + key: V => K, + cacheKey: K => CacheK, + cacheValue: V => CacheV) + extends Store[K, V] { + protected[this] val log = Logger.get(getClass.getSimpleName) + + override def create(value: V): Future[V] = { + chainCacheOp[V]( + underlying.create(value), + result => cache(key(result), Some(result), CachedValueStatus.Found, "new") + ) + } + + override def update(value: V): Future[Unit] = { + chainCacheOp[Unit]( + underlying.update(value), + _ => cache(key(value), Some(value), CachedValueStatus.Found, "updated") + ) + } + + override def destroy(key: K): Future[Unit] = { + chainCacheOp[Unit]( + underlying.destroy(key), + _ => cache(key, None, CachedValueStatus.Deleted, "deleted") + ) + } + + /** + * Subclasses may override this to alter the relationship between the result + * of the underlying Store operation and the result of the Cache operation. + * By default, the cache operation occurs asynchronously and only upon success + * of the store operation. Cache operation failures are logged but otherwise + * ignored. + */ + protected[this] def chainCacheOp[Result]( + storeOp: Future[Result], + cacheOp: Result => Future[Unit] + ): Future[Result] = { + storeOp onSuccess { cacheOp(_) } + } + + protected[this] def cache( + key: K, + value: Option[V], + status: CachedValueStatus, + desc: String + ): Future[Unit] = { + val now = Time.now + val cached = Cached(value map { cacheValue(_) }, status, now, None, Some(now)) + val handler = LockingCache.PickingHandler(cached, valuePicker) + cache.lockAndSet(cacheKey(key), handler).unit onFailure { + case t => + log.error(t, "exception caught while caching %s value", desc) + } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala new file mode 100644 index 000000000..96866e854 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/KeyValueStore.scala @@ -0,0 +1,13 @@ +package com.twitter.servo.store + +import com.twitter.util.Future + +trait KeyValueStore[C, K, V, R] { + def put(ctx: C, key: K, value: Option[V]): Future[R] = multiPut(ctx, Seq((key -> value))) + def multiPut(ctx: C, kvs: Seq[(K, Option[V])]): Future[R] +} + +trait SimpleKeyValueStore[K, V] extends KeyValueStore[Unit, K, V, Unit] { + def put(key: K, value: Option[V]): Future[Unit] = multiPut((), Seq(key -> value)) + def multiPut(kvs: Seq[(K, Option[V])]): Future[Unit] = multiPut((), kvs) +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala new file mode 100644 index 000000000..ae582c307 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/ObservableStore.scala @@ -0,0 +1,32 @@ +package com.twitter.servo.store + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.servo.util.{ExceptionCounter, LogarithmicallyBucketedTimer} +import com.twitter.util.Future + +class StoreObserver(statsReceiver: StatsReceiver) { + protected[this] val exceptionCounter = new ExceptionCounter(statsReceiver) + + def time[T](f: => Future[T]) = { + Stat.timeFuture(statsReceiver.stat(LogarithmicallyBucketedTimer.LatencyStatName))(f) + } + + def exception(ts: Throwable*): Unit = exceptionCounter(ts) +} + +class ObservableStore[K, V](underlying: Store[K, V], statsReceiver: StatsReceiver) + extends Store[K, V] { + protected[this] val observer = new StoreObserver(statsReceiver) + + override def create(value: V) = observer.time { + underlying.create(value) onFailure { observer.exception(_) } + } + + override def update(value: V) = observer.time { + underlying.update(value) onFailure { observer.exception(_) } + } + + override def destroy(key: K) = observer.time { + underlying.destroy(key) onFailure { observer.exception(_) } + } +} diff --git a/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala new file mode 100644 index 000000000..a86283b82 --- /dev/null +++ b/tweetypie/servo/repo/src/main/scala/com/twitter/servo/store/Store.scala @@ -0,0 +1,93 @@ +package com.twitter.servo.store + +import com.twitter.servo.util.Gate +import com.twitter.util.Future + +/** + * models a write-store of key/values + */ +trait Store[K, V] { + def create(value: V): Future[V] + def update(value: V): Future[Unit] + def destroy(key: K): Future[Unit] +} + +object Store { + + /** + * Filter store operations based on either the key or the value. If the gate passes then forward + * the operation to the underlying store, if not then forward the operation to a null store + * (effectively a no-op) + */ + def filtered[K, V](store: Store[K, V], filterKey: Gate[K], filterValue: Gate[V]) = + new GatedStore(store, new NullStore[K, V], filterKey, filterValue) + + /** + * A store type that selects between one of two underlying stores based on the key/value of the + * operation. If the key/value gate passes, forward the operation to the primary store, otherwise + * forward the operation to the secondary store. + */ + def gated[K, V]( + primary: Store[K, V], + secondary: Store[K, V], + usePrimaryKey: Gate[K], + usePrimaryValue: Gate[V] + ) = new GatedStore(primary, secondary, usePrimaryKey, usePrimaryValue) + + /** + * A store type that selects between one of two underlying stores based on a predicative value, + * which may change dynamically at runtime. + */ + def deciderable[K, V]( + primary: Store[K, V], + backup: Store[K, V], + primaryIsAvailable: => Boolean + ) = new DeciderableStore(primary, backup, primaryIsAvailable) +} + +trait StoreWrapper[K, V] extends Store[K, V] { + def underlyingStore: Store[K, V] + + override def create(value: V) = underlyingStore.create(value) + override def update(value: V) = underlyingStore.update(value) + override def destroy(key: K) = underlyingStore.destroy(key) +} + +class NullStore[K, V] extends Store[K, V] { + override def create(value: V) = Future.value(value) + override def update(value: V) = Future.Done + override def destroy(key: K) = Future.Done +} + +/** + * A Store type that selects between one of two underlying stores based + * on the key/value, which may change dynamically at runtime. + */ +private[servo] class GatedStore[K, V]( + primary: Store[K, V], + secondary: Store[K, V], + usePrimaryKey: Gate[K], + usePrimaryValue: Gate[V]) + extends Store[K, V] { + private[this] def pick[T](item: T, gate: Gate[T]) = if (gate(item)) primary else secondary + + override def create(value: V) = pick(value, usePrimaryValue).create(value) + override def update(value: V) = pick(value, usePrimaryValue).update(value) + override def destroy(key: K) = pick(key, usePrimaryKey).destroy(key) +} + +/** + * A Store type that selects between one of two underlying stores based + * on a predicative value, which may change dynamically at runtime. + */ +class DeciderableStore[K, V]( + primary: Store[K, V], + backup: Store[K, V], + primaryIsAvailable: => Boolean) + extends Store[K, V] { + private[this] def pick = if (primaryIsAvailable) primary else backup + + override def create(value: V) = pick.create(value) + override def update(value: V) = pick.update(value) + override def destroy(key: K) = pick.destroy(key) +} diff --git a/tweetypie/servo/repo/src/main/thrift/BUILD b/tweetypie/servo/repo/src/main/thrift/BUILD new file mode 100644 index 000000000..6ad3c0873 --- /dev/null +++ b/tweetypie/servo/repo/src/main/thrift/BUILD @@ -0,0 +1,13 @@ +create_thrift_libraries( + base_name = "thrift", + sources = ["**/*.thrift"], + platform = "java8", + tags = ["bazel-compatible"], + generate_languages = [ + "java", + "scala", + "strato", + ], + provides_java_name = "servo-repo-thrift-java", + provides_scala_name = "servo-repo-thrift-scala", +) diff --git a/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift new file mode 100644 index 000000000..51b7373f3 --- /dev/null +++ b/tweetypie/servo/repo/src/main/thrift/com/twitter/servo/cache/servo_repo.thrift @@ -0,0 +1,39 @@ +#@namespace scala com.twitter.servo.cache.thriftscala +#@ namespace strato com.twitter.servo.cache +// the java namespace is unused, but appeases the thrift Linter gods +namespace java com.twitter.servo.cache.thriftjava + +enum CachedValueStatus { + FOUND = 0, + NOT_FOUND = 1, + DELETED = 2, + SERIALIZATION_FAILED = 3 + DESERIALIZATION_FAILED = 4, + EVICTED = 5, + DO_NOT_CACHE = 6 +} + +/** + * Caching metadata for an binary cache value + */ +struct CachedValue { + 1: optional binary value + // can be used to distinguish between deletion tombstones and not-found tombstones + 2: CachedValueStatus status + // when was the cache value written + 3: i64 cached_at_msec + // set if the cache was read through + 4: optional i64 read_through_at_msec + // set if the cache was written through + 5: optional i64 written_through_at_msec + // This optional field is only read when the CacheValueStatus is DO_NOT_CACHE. + // When CacheValueStatus is DO_NOT_CACHE and this field is not set, the key + // will not be cached without a time limit. If the client wants to cache + // immediately, they would not set DO_NOT_CACHE. + 6: optional i64 do_not_cache_until_msec + // Indicates how many times we've successfully checked + // the cached value against the backing store. Should be initially set to 0. + // The client may choose to increase the soft TTL duration based on this value. + // See http://go/gd-dynamic-cache-ttls and http://go/strato-progressive-ttls for some use cases + 7: optional i16 soft_ttl_step +} (persisted='true') diff --git a/tweetypie/servo/request/BUILD b/tweetypie/servo/request/BUILD new file mode 100644 index 000000000..434ab68f4 --- /dev/null +++ b/tweetypie/servo/request/BUILD @@ -0,0 +1,5 @@ +target( + dependencies = [ + "tweetypie/servo/request/src/main/scala", + ], +) diff --git a/tweetypie/servo/request/src/main/scala/BUILD b/tweetypie/servo/request/src/main/scala/BUILD new file mode 100644 index 000000000..2d50540e5 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/BUILD @@ -0,0 +1,20 @@ +scala_library( + sources = ["**/*.scala"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-request", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "finagle/finagle-core/src/main", + "tweetypie/servo/util", + "twitter-config/yaml", + "util/util-stats/src/main/scala", + ], + exports = [ + "tweetypie/servo/util", + ], +) diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala new file mode 100644 index 000000000..1547adbbd --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestAuthorizer.scala @@ -0,0 +1,172 @@ +package com.twitter.servo.request + +import com.twitter.servo.gate.RateLimitingGate +import com.twitter.servo.util.Gate +import com.twitter.util.Future + +/** + * Collects per-request stats by method-name and client. + */ +trait ClientRequestAuthorizer extends ((String, Option[String]) => Future[Unit]) { self => + + /** + * @param methodName the name of the Service method being called + * @param clientIdStrOpt an Option of the string value of the originating + * request's ClientId + */ + def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] + + /** + * Compose this authorizer with another so that one is applied after the other. + * + * The resultant authorizer requires both underlying authorizers to succeed in + * order to authorize a request. + */ + def andThen(other: ClientRequestAuthorizer) = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + self.apply(methodName, clientIdStrOpt) flatMap { _ => + other(methodName, clientIdStrOpt) + } + } + } +} + +object ClientRequestAuthorizer { + case class UnauthorizedException(msg: String) extends Exception(msg) + + protected[this] val noClientIdException = + Future.exception(new UnauthorizedException("No ClientId specified")) + protected[this] val unauthorizedException = + new UnauthorizedException("Your ClientId is not authorized.") + protected[this] val overRateLimitException = + new UnauthorizedException("Your ClientId is over the allowed rate limit.") + + /** + * Increment stats counters for this request. + * + * Note that ClientRequestAuthorizer.observed doesn't compose in the same fashion + * as other authorizers via `andThen`. In order to observe authorization results, + * pass in an underlying authorizer as an argument to observed. + */ + def observed( + underlyingAuthorizer: ClientRequestAuthorizer, + observer: ClientRequestObserver + ) = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + val clientIdStr = clientIdStrOpt.getOrElse("no_client_id") + + observer(methodName, clientIdStrOpt map { Seq(_) }) + + underlyingAuthorizer(methodName, clientIdStrOpt) onFailure { _ => + observer.unauthorized(methodName, clientIdStr) + } onSuccess { _ => + observer.authorized(methodName, clientIdStr) + } + } + } + + def observed(observer: ClientRequestObserver): ClientRequestAuthorizer = + observed(ClientRequestAuthorizer.permissive, observer) + + /** + * Lets all requests through. + */ + def permissive = new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]) = Future.Done + } + + /** + * A Generic Authorizer that allows you to pass in your own authorizer function (filter). + * The filter should take in methodName and clientId and return a Boolean decision + * + * Note: Requires requests to have ClientIds. + * @param exception return this exception if the request does not pass the filter + */ + def filtered( + filter: (String, String) => Boolean, + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = + new ClientRequestAuthorizer { + val futureException = Future.exception(exception) + + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = { + clientIdStrOpt match { + case Some(clientIdStr) => + if (filter(methodName, clientIdStr)) + Future.Done + else + futureException + case None => + noClientIdException + } + } + } + + /** + * Authorizes client requests based on a allowlist of ClientId strings. + */ + def allowlisted(allowlist: Set[String]): ClientRequestAuthorizer = + filtered { (_, clientIdStr) => + allowlist.contains(clientIdStr) + } + + /** + * Authorizes requests if and only if they have an associated ClientId. + */ + def withClientId: ClientRequestAuthorizer = filtered { (_, _) => + true + } + + /** + * Consult a (presumably) Decider-backed predicate to authorize requests by ClientId. + * @param exception return this exception if the request does not pass the filter + */ + def deciderable( + isAvailable: String => Boolean, + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = + filtered( + { (_, clientIdStr) => + isAvailable(clientIdStr) + }, + exception + ) + + /** + * Simple rate limiter for unknown client ids. Useful for letting new clients + * send some traffic without the risk of being overrun by requests. + * + * @param limitPerSecond Number of calls per second we can tolerate + */ + def rateLimited(limitPerSecond: Double): ClientRequestAuthorizer = { + gated(RateLimitingGate.uniform(limitPerSecond), overRateLimitException) + } + + /** + * Simple Gate based authorizer, will authorize according to the result of the gate regardless + * of the client/method name + */ + def gated( + gate: Gate[Unit], + exception: Exception = unauthorizedException + ): ClientRequestAuthorizer = { + deciderable(_ => gate(), exception) + } + + /** + * @return A ClientRequestAuthorizer that switches between two provided + * ClientRequestAuthorizers depending on a decider. + */ + def select( + decider: Gate[Unit], + ifTrue: ClientRequestAuthorizer, + ifFalse: ClientRequestAuthorizer + ): ClientRequestAuthorizer = + new ClientRequestAuthorizer { + override def apply(methodName: String, clientIdStrOpt: Option[String]): Future[Unit] = + decider.pick( + ifTrue(methodName, clientIdStrOpt), + ifFalse(methodName, clientIdStrOpt) + ) + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala new file mode 100644 index 000000000..e7de2ab04 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/ClientRequestObserver.scala @@ -0,0 +1,58 @@ +package com.twitter.servo.request + +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Future + +object ClientRequestObserver { + private[request] val noClientIdKey = "no_client_id" +} + +/** + * Provides per-request stats based on Finagle ClientId. + * + * @param statsReceiver the StatsReceiver used for counting + * @param observeAuthorizationAttempts: if true (the default), observe all attempts. If false, + * only failures (unauthorized attempts) are observed. + */ +class ClientRequestObserver( + statsReceiver: StatsReceiver, + observeAuthorizationAttempts: Boolean = true) + extends ((String, Option[Seq[String]]) => Future[Unit]) { + import ClientRequestObserver.noClientIdKey + + protected[this] val scopedReceiver = statsReceiver.scope("client_request") + protected[this] val unauthorizedReceiver = scopedReceiver.scope("unauthorized") + protected[this] val unauthorizedCounter = scopedReceiver.counter("unauthorized") + + /** + * @param methodName the name of the Service method being called + * @param clientIdScopesOpt optional sequence of scope strings representing the + * originating request's ClientId + */ + override def apply(methodName: String, clientIdScopesOpt: Option[Seq[String]]): Future[Unit] = { + if (observeAuthorizationAttempts) { + scopedReceiver.counter(methodName).incr() + clientIdScopesOpt match { + case Some(clientIdScopes) => + scopedReceiver.scope(methodName).counter(clientIdScopes: _*).incr() + + case None => + scopedReceiver.scope(methodName).counter(noClientIdKey).incr() + } + } + Future.Done + } + + /** + * Increments a counter for unauthorized requests. + */ + def unauthorized(methodName: String, clientIdStr: String): Unit = { + unauthorizedCounter.incr() + unauthorizedReceiver.scope(methodName).counter(clientIdStr).incr() + } + + def authorized(methodName: String, clientIdStr: String): Unit = {} +} + +object NullClientRequestObserver extends ClientRequestObserver(NullStatsReceiver) diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala new file mode 100644 index 000000000..5ccc171ed --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/PermissionModule.scala @@ -0,0 +1,233 @@ +package com.twitter.servo.request + +import com.twitter.config.yaml.YamlMap +import com.twitter.util.Try + +/** + * Module for defining a set of permissions. This is similar to + * Enumeration in the scala standard library. + * + * To use, instantiate a subclass: + * + * {{{ + * object MyPermissions extends PermissionModule { + * val Eat = create("eat") + * val Drink = create("drink") + * } + * }}} + * + * Permissions only support one kind of authorization, which is that + * you can check whether a holder of permissions has all of the + * permissions in a particular set. + * + * {{{ + * val snack = MyPermissions.Eat + * val dinner = MyPermissions.Eat union MyPermissions.Drink + * val canEat = MyPermissions.Eat + * dinner satisfiedBy canEat // false + * snack satisfiedBy canEat // true + * }}} + * + * Each instance will have its own distinct permission type, so it is + * not possible to confuse the permissions defined in different + * modules. + * + * {{{ + * scala> object P1 extends PermissionModule { val Read = create("read") } + * scala> object P2 extends PermissionModule { val Read = create("read") } + * scala> P1.Read satisfiedBy P2.Read + * error: type mismatch; + * found : P2.Permissions + * required: P1.Permissions + * P1.Read satisfiedBy P2.Read + * }}} + * + * Once an instance has been created, it will not be possible to + * create new permissions. The intention is that all permissions will + * be created at object initialization time. + * + * Each instance also supplies functionality for accessing permissions + * by name, including parsing client permission maps from YAML. + */ +trait PermissionModule { + // This var is used during object initialization to collect all of + // the permissions that are created in the subclass. The lazy + // initializer for `All` will set this to null as a side-effect, so + // that further permission creations are not allowed. + @volatile private[this] var allPerms: Set[String] = Set.empty + + /** + * Create a new Permission with the given name. Note that "*" is a + * reversed string for `All` permissions, thus it can not be + * used as the name of an individual permission. + * + * This method must be called before `All` is accessed. + * The intention is that it should be called as part of + * object initialization. + * + * Note that some methods of PermissionModule access `All`, so it is + * best to create all of your permissions before doing anything + * else. + * + * @throws RuntimeException: If it is called after `All` has been + * initialized. + */ + protected def create(name: String) = { + synchronized { + if (allPerms == null) { + throw new RuntimeException("Permission creation after initialization") + } + + allPerms = allPerms union Set(name) + } + + new Permissions(Set(name)) + } + + /** + * Get a set of permissions with this single permission by name. It + * will return None if there is no permission by that name. + * + * No permissions may be defined after this method is called. + */ + def get(name: String): Option[Permissions] = All.get(name) + + /** + * Get the set of permissions that contains that single permission + * by name. + * + * @throws RuntimeException if there is no defined permission with + * this name. + * + * No permissions may be defined after this method is called. + */ + def apply(name: String): Permissions = + get(name) match { + case None => throw new RuntimeException("Unknown permission: " + name) + case Some(p) => p + } + + /** + * No permissions (required or held) + */ + val Empty: Permissions = new Permissions(Set.empty) + + /** + * All defined permissions. + * + * No permissions may be defined after this value is initialized. + */ + lazy val All: Permissions = { + val p = new Permissions(allPerms) + allPerms = null + p + } + + /** + * Load permissions from a YAML map. + * + * No permissions may be defined after this method is called. + * + * @return a map from client identifier to permission set. + * @throws RuntimeException when the permission from the Map is not defined. + */ + def fromYaml(m: YamlMap): Try[Map[String, Permissions]] = + Try { + m.keys.map { k => + k -> fromSeq((m yamlList k).map { _.toString }) + }.toMap + } + + /** + * Load permissions from map. + * + * No permissions may be defined after this method is called. + * + * @param m a map from client identifier to a set of permission strings + * + * @return a map from client identifier to permission set. + * @throws RuntimeException when the permission from the Map is not defined. + */ + def fromMap(m: Map[String, Seq[String]]): Try[Map[String, Permissions]] = + Try { + m.map { case (k, v) => k -> fromSeq(v) } + } + + /** + * Load permissions from seq. + * + * No permissions may be defined after this method is called. + * + * @param sequence a Seq of permission strings + * + * @return a permission set. + * @throws RuntimeException when the permission is not defined. + */ + def fromSeq(permissionStrings: Seq[String]): Permissions = + permissionStrings.foldLeft(Empty) { (p, v) => + v match { + case "all" if get("all").isEmpty => All + case other => p union apply(other) + } + } + + /** + * Authorizer based on a Permissions for RPC method names. + * @param requiredPermissions + * map of RPC method names to Permissions required for that RPC + * @param clientPermissions + * map of ClientId to Permissions a client has + */ + def permissionBasedAuthorizer( + requiredPermissions: Map[String, Permissions], + clientPermissions: Map[String, Permissions] + ): ClientRequestAuthorizer = + ClientRequestAuthorizer.filtered { (methodName, clientId) => + requiredPermissions.get(methodName) exists { + _ satisfiedBy clientPermissions.getOrElse(clientId, Empty) + } + } + + /** + * A set of permissions. This can represent either permissions that + * are required to perform an action, or permissions that are held + * by a client. + * + * This type cannot be instantiated directly. Use the methods of + * your subclass of PermissionModule to do so. + */ + class Permissions private[PermissionModule] (private[PermissionModule] val permSet: Set[String]) { + + /** + * Does the supplied set of held permissions satisfy the + * requirements of this set of permissions? + * + * For example, if this set of permissions is Set("read"), and the + * other set of permissions is Set("read", "write"), then the + * other set of permissions satisfies this set. + */ + def satisfiedBy(other: Permissions): Boolean = permSet subsetOf other.permSet + + override def equals(other: Any): Boolean = + other match { + case p: Permissions => p.permSet == permSet + case _ => false + } + + override lazy val hashCode: Int = 5 + 37 * permSet.hashCode + + /** + * Get a single permission + */ + def get(permName: String): Option[Permissions] = + if (permSet contains permName) Some(new Permissions(Set(permName))) else None + + /** + * Create a new permission set that holds the permissions of this + * object as well as the permissions of the other object. + */ + def union(other: Permissions): Permissions = new Permissions(permSet union other.permSet) + + override def toString: String = "Permissions(%s)".format(permSet.mkString(", ")) + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala new file mode 100644 index 000000000..e80044c2d --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestFilter.scala @@ -0,0 +1,120 @@ +package com.twitter.servo.request + +import com.twitter.finagle.tracing.TraceId +import com.twitter.servo.util.{FunctionArrow, Effect, FutureArrow, FutureEffect, Observable} +import com.twitter.util.{Future, Try} + +/** + * Useful mixins for request types. + */ +trait HasTraceId { + + /** + * The Finagle TraceId of the request. + */ + def traceId: TraceId +} + +/** + * A collection of RequestFilter factory functions. + * + * type RequestFilter[A] = FutureArrow[A, A] + */ +object RequestFilter { + + /** + * Produce a RequestFilter from a function `A => Future[A]`. + */ + def apply[A](f: A => Future[A]): RequestFilter[A] = FutureArrow(f) + + /** + * Produce a RequestFilter from a function `A => Try[A]`. + * + * The Try is evaluated within a Future. Thus, Throw results are translated + * to `Future.exception`s. + */ + def fromTry[A](f: A => Try[A]): RequestFilter[A] = FutureArrow.fromTry(f) + + /** + * A no-op RequestFilter; it simply returns the request. + * + * This forms a monoid with `append`. + */ + def identity[A]: RequestFilter[A] = FutureArrow.identity + + /** + * Appends two RequestFilters together. + * + * This forms a monoid with 'identity'. + */ + def append[A](a: RequestFilter[A], b: RequestFilter[A]): RequestFilter[A] = + FutureArrow.append(a, b) + + /** + * Compose an ordered series of RequestFilters into a single object. + */ + def all[A](filters: RequestFilter[A]*): RequestFilter[A] = + filters.foldLeft(identity[A])(append) + + /** + * Produce a RequestFilter that applies a side-effect, returning the argument + * request as-is. + */ + def effect[A](effect: Effect[A]): RequestFilter[A] = + FutureArrow.fromFunctionArrow(FunctionArrow.effect(effect)) + + /** + * Produce a RequestFilter that applies a side-effect, returning the argument + * request as-is. + */ + def effect[A](effect: FutureEffect[A]): RequestFilter[A] = FutureArrow.effect(effect) + + /** + * Returns a new request filter where all Futures returned from `a` have their + * `masked` method called + */ + def masked[A](a: RequestFilter[A]): RequestFilter[A] = a.masked + + /** + * Produces a RequestFilter that proxies to one of two others, depending on a + * predicate. + */ + def choose[A]( + predicate: A => Boolean, + ifTrue: RequestFilter[A], + ifFalse: RequestFilter[A] + ): RequestFilter[A] = + FutureArrow.choose(predicate, ifTrue, ifFalse) + + /** + * Guard the application of a filter on a predicate. The filter is applied + * if the predicate returns true, otherwise, the request is simply returned. + */ + def onlyIf[A](predicate: A => Boolean, f: RequestFilter[A]): RequestFilter[A] = + FutureArrow.onlyIf(predicate, f) + + /** + * Produces a RequestFilter that authorizes requests by applying an + * authorization function `A => Future[Unit]`. If the authorizer function + * results in a Future exception, requests are failed. Otherwise, they pass. + */ + def authorized[A <: Observable](authorizer: ClientRequestAuthorizer): RequestFilter[A] = + RequestFilter[A] { request => + authorizer(request.requestName, request.clientIdString) map { _ => + request + } + } + + /** + * Produces a RequestFilter that applies a ClientRequestObserver to requests. + * + * Used to increment counters and track stats for requests. + */ + def observed[A <: Observable](observer: ClientRequestObserver): RequestFilter[A] = + RequestFilter[A] { request => + val clientIdScopesOpt = request.clientIdString map { Seq(_) } + observer(request.requestName, clientIdScopesOpt) map { _ => + request + } + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala new file mode 100644 index 000000000..207999580 --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/RequestHandler.scala @@ -0,0 +1,24 @@ +package com.twitter.servo.request + +/** + * A collection of RequestHandler factory functions. + * + * type RequestHandler[-A, +B] = FutureArrow[A, B] + */ +object RequestHandler { + + /** + * Terminate a RequestFilter with a RequestHandler, producing a new handler. + */ + def apply[A, B <: A, C]( + filter: RequestFilter[A], + handler: RequestHandler[B, C] + ): RequestHandler[B, C] = + new RequestHandler[B, C] { + override def apply(request: B) = { + filter(request: A) flatMap { filteredRequest => + handler(filteredRequest.asInstanceOf[B]) + } + } + } +} diff --git a/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala new file mode 100644 index 000000000..c02b4161c --- /dev/null +++ b/tweetypie/servo/request/src/main/scala/com/twitter/servo/request/package.scala @@ -0,0 +1,35 @@ +package com.twitter.servo + +import com.twitter.servo.util.FutureArrow + +package object request { + + /** + * RequestFilters provide a mechanism for composing a chain of actions + * (e.g. logging, authentication, replication, etc) to be performed per + * request. The intention is for a series of RequestFilters are terminated in a + * RequestHandler, which returns an object of some response type. + * + * Upon completion of a filter's work, the convention is to either: + * + * a) Return a Future of a request object of type `A` to be passed to the next + * member of the filter/handler chain. + * b) Return a Future response outright in cases where request handling must + * be halted at the current filter (i.e. returning `Future.exception(...)`. + * + * @tparam A + * A type encapsulating all context and data required to satisfy a request. + */ + type RequestFilter[A] = FutureArrow[A, A] + + /** + * A handler of requests parameterized on the request and response types. + * + * @tparam A + * A type encapsulating all context and data required to satisfy a request. + * + * @tparam B + * A response type. + */ + type RequestHandler[-A, +B] = FutureArrow[A, B] +} diff --git a/tweetypie/servo/util/BUILD b/tweetypie/servo/util/BUILD new file mode 100644 index 000000000..b27c20631 --- /dev/null +++ b/tweetypie/servo/util/BUILD @@ -0,0 +1,6 @@ +target( + tags = ["bazel-compatible"], + dependencies = [ + "tweetypie/servo/util/src/main/scala", + ], +) diff --git a/tweetypie/servo/util/src/main/scala/BUILD b/tweetypie/servo/util/src/main/scala/BUILD new file mode 100644 index 000000000..2a6d5f1c5 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/BUILD @@ -0,0 +1,53 @@ +EXCEPTION_SOURCES = [ + "com/twitter/servo/util/Effect.scala", + "com/twitter/servo/util/ExceptionCounter.scala", + "com/twitter/servo/util/Gate.scala", + "com/twitter/servo/util/ThrowableHelper.scala", + "com/twitter/servo/util/package.scala", +] + +scala_library( + sources = ["**/*.scala"] + exclude_globs(EXCEPTION_SOURCES), + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-util", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + ":exception", + "3rdparty/jvm/com/google/guava", + "3rdparty/jvm/com/google/inject:guice", + "finagle/finagle-core/src/main", + "finagle/finagle-mux/src/main/scala", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer", + "util-internal/scribe", + "util/util-logging/src/main/scala/com/twitter/logging", + "util/util-stats/src/main/scala", + ], + exports = [ + ":exception", + "util/util-logging/src/main/scala/com/twitter/logging", + ], +) + +scala_library( + name = "exception", + sources = EXCEPTION_SOURCES, + compiler_option_sets = ["fatal_warnings"], + platform = "java8", + provides = scala_artifact( + org = "com.twitter", + name = "servo-util-exception", + repo = artifactory, + ), + strict_deps = True, + tags = ["bazel-compatible"], + dependencies = [ + "util/util-core:util-core-util", + "util/util-stats/src/main/scala", + ], +) diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala new file mode 100644 index 000000000..9396c38f7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Lens.scala @@ -0,0 +1,147 @@ +package com.twitter.servo.data + +import scala.language.existentials + +object Lens { + private[this] val _identity = iso[Any, Any](x => x, x => x) + + /** + * The identity lens. + */ + def identity[A] = _identity.asInstanceOf[Lens[A, A]] + + /** + * Convenience method for creating lenses with slightly more + * efficient setters. + */ + def checkEq[A, B](get: A => B, set: (A, B) => A) = Lens[A, B](get, set).checkEq + + /** + * Create a lens from an isomorphism. + */ + def iso[A, B](to: A => B, from: B => A) = Lens[A, B](to, (_, x) => from(x)) + + /** + * Using multiple lenses, copy multiple fields from one object to another, returning + * the updated result. + */ + def copyAll[A](lenses: Lens[A, _]*)(src: A, dst: A): A = + lenses.foldLeft(dst) { (t, l) => + l.copy(src, t) + } + + /** + * setAll can be used to set multiple values using multiple lenses on the same input + * value in one call, which is more readable than nested calls. For example, say + * that we have lenses (lensX: Lens[A, X]), (lensY: Lens[A, Y]), and (lensZ: Lens[A, Z]), + * then instead of writing: + * + * lensX.set(lensY.set(lensZ.set(a, z), y), x) + * + * you can write: + * + * Lens.setAll(a, lensX -> x, lensY -> y, lensZ -> z) + */ + def setAll[A](a: A, lensAndValues: ((Lens[A, B], B) forSome { type B })*): A = + lensAndValues.foldLeft(a) { case (a, (l, b)) => l.set(a, b) } + + /** + * Combines two lenses into one that gets and sets a tuple of values. + */ + def join[A, B, C](lensB: Lens[A, B], lensC: Lens[A, C]): Lens[A, (B, C)] = + Lens[A, (B, C)]( + a => (lensB.get(a), lensC.get(a)), + { case (a, (b, c)) => lensC.set(lensB.set(a, b), c) } + ) + + /** + * Combines three lenses into one that gets and sets a tuple of values. + */ + def join[A, B, C, D]( + lensB: Lens[A, B], + lensC: Lens[A, C], + lensD: Lens[A, D] + ): Lens[A, (B, C, D)] = + Lens[A, (B, C, D)]( + a => (lensB.get(a), lensC.get(a), lensD.get(a)), + { case (a, (b, c, d)) => lensD.set(lensC.set(lensB.set(a, b), c), d) } + ) +} + +/** + * A Lens is a first-class getter/setter. The value of lenses is that + * they can be composed with other operations. + * + * Note that it is up to you to ensure that the functions you pass to + * Lens obey the following laws for all inputs: + * + * a => set(a, get(a)) == a + * (a, b) => get(set(a, b)) == b + * (a, b, b1) => set(set(a, b), b1) == set(a, b1) + * + * The intuition for the name Lens[A, B] is that you are "viewing" A + * through a Lens that lets you see (and manipulate) a B. + * + * See e.g. + * http://stackoverflow.com/questions/5767129/lenses-fclabels-data-accessor-which-library-for-structure-access-and-mutatio#answer-5769285 + * for a more in-depth explanation of lenses. + */ +case class Lens[A, B](get: A => B, set: (A, B) => A) { + + /** + * Get the field. + */ + def apply(a: A) = get(a) + + /** + * Compose with another lens, such that the setter updates the + * outermost structure, and the getter gets the innermost structure. + */ + def andThen[C](next: Lens[B, C]) = + Lens(get andThen next.get, (a: A, c: C) => set(a, next.set(get(a), c))) + + /** + * An operator alias for `andThen`. + */ + def >>[C](next: Lens[B, C]) = andThen(next) + + /** + * Lift the function on the viewed value to a function on the outer + * value. + */ + def update(f: B => B): A => A = a => set(a, f(get(a))) + + /** + * Copies the field from one object to another. + */ + def copy(src: A, dst: A): A = set(dst, get(src)) + + /** + * Lift a mutation of the viewed value to a transform of the + * container. (E.g. a Mutation[Seq[UrlEntity]] to a Mutation[Tweet]) + */ + def mutation(m: Mutation[B]) = + Mutation[A] { a => + m(get(a)) map { set(a, _) } + } + + /** + * Create a new lens whose setter makes sure that the update would + * change the value. + * + * This should not change the meaning of the lens, but can possibly + * make it more efficient by avoiding copies when performing no-op + * sets. + * + * This is only worthwhile when the getter and equality comparison + * are cheap compared to the setter. + */ + def checkEq = Lens[A, B](get, (a, b) => if (get(a) == b) a else set(a, b)) + + /** + * Combines this lens and the given lens into one that gets and sets a tuple + * of values. + */ + def join[C](right: Lens[A, C]): Lens[A, (B, C)] = + Lens.join(this, right) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala new file mode 100644 index 000000000..78e08df74 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/data/Mutation.scala @@ -0,0 +1,268 @@ +package com.twitter.servo.data + +import com.twitter.util.{Return, Throw, Try} +import com.twitter.finagle.stats.{Counter, StatsReceiver} +import com.twitter.servo.util.{Effect, Gate} + +object Mutation { + + /** + * A mutation that ignores its input and always returns the given + * value as new. Use checkEq if this value could be the same as the + * input. + */ + def const[T](x: T) = Mutation[T] { _ => + Some(x) + } + + private[this] val _unit = Mutation[Any] { _ => + None + } + + /** + * A "no-op" mutation that will never alter the value. + * + * For any Mutations A, (A also unit) == (unit also A) == A. + * + * Forms a monoid with also as the operation. + */ + def unit[A]: Mutation[A] = _unit.asInstanceOf[Mutation[A]] + + /** + * Makes a Mutation out of a function. + */ + def apply[A](f: A => Option[A]): Mutation[A] = + new Mutation[A] { + override def apply(x: A) = f(x) + } + + /** + * Lift a function that returns the same type to a Mutation, using + * the type's notion of equality to detect when the mutation has + * not changed the value. + */ + def fromEndo[A](f: A => A): Mutation[A] = + Mutation[A] { x => + val y = f(x) + if (y == x) None else Some(y) + } + + /** + * Lift a partial function from A to A to a mutation. + */ + def fromPartial[A](f: PartialFunction[A, A]): Mutation[A] = Mutation[A](f.lift) + + /** + * Creates a new Mutation that applies all the given mutations in order. + */ + def all[A](mutations: Seq[Mutation[A]]): Mutation[A] = + mutations.foldLeft(unit[A])(_ also _) +} + +/** + * A Mutation encapsulates a computation that may optionally "mutate" a value, where + * "mutate" should be interpreted in the stateless/functional sense of making a copy with a + * a change. If the value is unchanged, the mutation should return None. When mutations are + * composed with `also`, the final result will be None iff no mutation actually changed the + * value. + * + * Forms a monoid with Mutation.unit as unit and `also` as the + * combining operation. + * + * This abstraction is useful for composing changes to a value when + * some action (such as updating a cache) should be performed if the + * value has changed. + */ +trait Mutation[A] extends (A => Option[A]) { + + /** + * Convert this mutation to a function that always returns a + * result. If the mutation has no effect, it returns the original + * input. + * + * (convert to an endofunction on A) + */ + lazy val endo: A => A = + x => + apply(x) match { + case Some(v) => v + case None => x + } + + /** + * Apply this mutation, and then apply the next mutation to the + * result. If this mutation leaves the value unchanged, the next + * mutation is invoked with the original input. + */ + def also(g: Mutation[A]): Mutation[A] = + Mutation[A] { x => + apply(x) match { + case None => g(x) + case someY @ Some(y) => + g(y) match { + case some @ Some(_) => some + case None => someY + } + } + } + + /** + * Apply this mutation, but refuse to return an altered value. This + * yields all of the effects of this mutation without affecting the + * final result. + */ + def dark: Mutation[A] = Mutation[A] { x => + apply(x); None + } + + /** + * Convert a Mutation on A to a Mutation on B by way of a pair of functions for + * converting from B to A and back. + */ + def xmap[B](f: B => A, g: A => B): Mutation[B] = + Mutation[B](f andThen this andThen { _ map g }) + + /** + * Converts a Mutation on A to a Mutation on Try[A], where the Mutation is only applied + * to Return values and any exceptions caught by the underying function are caught and + * returned as Some(Throw(_)) + */ + def tryable: Mutation[Try[A]] = + Mutation[Try[A]] { + case Throw(x) => Some(Throw(x)) + case Return(x) => + Try(apply(x)) match { + case Throw(y) => Some(Throw(y)) + case Return(None) => None + case Return(Some(y)) => Some(Return(y)) + } + } + + /** + * Perform this mutation only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: A => Boolean): Mutation[A] = + Mutation[A] { x => + if (predicate(x)) this(x) else None + } + + /** + * Performs this mutation only if the given gate returns true. + */ + def enabledBy(enabled: Gate[Unit]): Mutation[A] = + enabledBy(() => enabled()) + + /** + * Performs this mutation only if the given function returns true. + */ + def enabledBy(enabled: () => Boolean): Mutation[A] = + onlyIf { _ => + enabled() + } + + /** + * A new mutation that returns the same result as this mutation, + * and additionally calls the specified Effect. + */ + def withEffect(effect: Effect[Option[A]]): Mutation[A] = + Mutation[A](this andThen effect.identity) + + /** + * Perform an equality check when a value is returned from the + * mutation. If the values are equal, then the mutation will yield + * None. + * + * This is useful for two reasons: + * + * 1. Any effects that are conditional upon mutation will not occur + * when the values are equal (e.g. updating a cache) + * + * 2. When using a Lens to lift a mutation to a mutation on a + * larger structure, checking equality on the smaller structure + * can prevent unnecessary copies of the larger structure. + */ + def checkEq = Mutation[A] { x => + this(x) match { + case someY @ Some(y) if y != x => someY + case _ => None + } + } + + /** + * Converts this mutation to a mutation of a different type, using a Lens to + * convert between types. + */ + def lensed[B](lens: Lens[B, A]): Mutation[B] = + Mutation[B](b => this(lens(b)).map(lens.set(b, _))) + + /** + * Convert this mutation to a mutation of a Seq of its type. It will + * yield None if no values are changed, or a Seq of both the changed + * and unchanged values if any value is mutated. + */ + def liftSeq = Mutation[Seq[A]] { xs => + var changed = false + val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } + val mutated = xs map (this withEffect detectChange).endo + if (changed) Some(mutated) else None + } + + /** + * Convert this mutation to a mutation of a Option of its type. It will yield + * None if the value is not changed, or a Some(Some(_)) if the value is mutated. + */ + def liftOption = Mutation[Option[A]] { + case None => None + case Some(x) => + this(x) match { + case None => None + case Some(y) => Some(Some(y)) + } + } + + /** + * Convert this mutation to a mutation of the values of a Map. It will + * yield None if no values are changed, or a Map with both the changed + * and unchanged values if any value is mutated. + */ + def liftMapValues[K] = Mutation[Map[K, A]] { m => + var changed = false + val detectChange = Effect.fromPartial[Option[A]] { case Some(_) => changed = true } + val f = (this withEffect detectChange).endo + val mutated = m map { case (k, v) => (k, f(v)) } + if (changed) Some(mutated) else None + } + + /** + * Return a new mutation that returns the same result as this + * mutation, as well as incrementing the given counter when the + * value is mutated. + */ + def countMutations(c: Counter) = + this withEffect { Effect.fromPartial { case Some(_) => c.incr() } } + + /** + * Wrap a mutation in stats with the following counters: + * - no-op (returned value was the same as the input) + * - none (mutation returned none) + * - mutated (mutation modified the result) + */ + def withStats(stats: StatsReceiver): Mutation[A] = { + val none = stats.counter("none") + val noop = stats.counter("noop") + val mutated = stats.counter("mutated") + input: A => { + val result = apply(input) + result.fold(none.incr()) { output => + if (output == input) { + noop.incr() + } else { + mutated.incr() + } + } + result + } + } + +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala new file mode 100644 index 000000000..55d031784 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/Forked.scala @@ -0,0 +1,120 @@ +/** + * Provides the ability to partially tee traffic to a secondary + * service. + * + * This code was originally written to provide a way to provide + * production traffic to the TweetyPie staging cluster, selecting a + * consistent subset of tweet ids, to enable a production-like cache + * hit rate with a much smaller cache. + */ +package com.twitter.servo.forked + +import com.twitter.servo.data.Lens + +object Forked { + + /** + * A strategy for executing forked actions. + */ + type Executor = (() => Unit) => Unit + + /** + * Directly execute the forked action. + */ + val inlineExecutor: Executor = f => f() + + /** + * Produce objects of type A to send to a secondary target. + * Returning None signifies that nothing should be forked. + */ + type Fork[A] = A => Option[A] + + /** + * Fork the input unchanged, only when it passes the specified + * predicate. + * + * For instance, if your service has a get() method + */ + def forkWhen[T](f: T => Boolean): Fork[T] = + a => if (f(a)) Some(a) else None + + /** + * Fork a subset of the elements of the Seq, based on the supplied + * predicate. If the resulting Seq is empty, the secondary action + * will not be executed. + */ + def forkSeq[T](f: T => Boolean): Fork[Seq[T]] = { xs => + val newXs = xs filter f + if (newXs.nonEmpty) Some(newXs) else None + } + + /** + * Apply forking through lens. + */ + def forkLens[A, B](lens: Lens[A, B], f: Fork[B]): Fork[A] = + a => f(lens(a)).map(lens.set(a, _)) + + /** + * A factory for building actions that will partially tee their input + * to a secondary target. The executor is parameterized to make the + * execution strategy independent from the forking logic. + */ + def toSecondary[S](secondary: S, executor: Executor): S => Forked[S] = + primary => + new Forked[S] { + + /** + * Tee a subset of requests defined by the forking function to the + * secondary service. + */ + def apply[Q, R](fork: Forked.Fork[Q], action: (S, Q) => R): Q => R = { req => + fork(req) foreach { req => + executor(() => action(secondary, req)) + } + action(primary, req) + } + } + + /** + * A forked action builder that bypasses the forking altogether and + * just calls the supplied action on a service. + * + * This is useful for configurations that will sometimes have fork + * targets defined and sometimes not. + */ + def notForked[S]: S => Forked[S] = + service => + new Forked[S] { + def apply[Q, R](unusedFork: Forked.Fork[Q], action: (S, Q) => R): Q => R = + action(service, _) + } +} + +/** + * Factory for forking functions, primarily useful for sending a copy + * of a stream of requests to a secondary service. + */ +trait Forked[S] { + import Forked._ + + /** + * Fork an action that takes two parameters, forking only on the + * first parameter, passing the second unchanged. + */ + def first[Q1, Q2, R]( + fork: Fork[Q1], + action: S => (Q1, Q2) => R + ): (Q1, Q2) => R = { + val f = + apply[(Q1, Q2), R]( + fork = p => + fork(p._1) map { q1 => + (q1, p._2) + }, + action = (svc, p) => action(svc)(p._1, p._2) + ) + (q1, q2) => f((q1, q2)) + } + + def apply[Q, R](fork: Fork[Q], action: (S, Q) => R): Q => R +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala new file mode 100644 index 000000000..5b2949e45 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/forked/QueueExecutor.scala @@ -0,0 +1,82 @@ +package com.twitter.servo.forked + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.servo.util.ExceptionCounter +import com.twitter.util.{Duration, Time, Local, TimeoutException} +import java.util.concurrent.{LinkedBlockingQueue, TimeUnit, CountDownLatch} + +/** + * A forking action executor that executes the actions in a separate + * thread, using a bounded queue as the communication channel. If the + * queue is full (the secondary thread is slow to drain it), then the + * items will be dropped rather than enqueued. + */ +class QueueExecutor(maxQueueSize: Int, stats: StatsReceiver) extends Forked.Executor { + private val forkExceptionsCounter = new ExceptionCounter(stats) + private val enqueuedCounter = stats.counter("forked_actions_enqueued") + private val droppedCounter = stats.counter("forked_actions_dropped") + private val log = Logger.get("Forked.QueueExecutor") + + @volatile private var isStopped = false + private val releaseCountDownLatch = new CountDownLatch(1) + private val queue = new LinkedBlockingQueue[() => Unit](maxQueueSize) + private val thread = new Thread { + override def run(): Unit = { + while (!isStopped) { + try { + queue.take()() + } catch { + // Ignore interrupts from other threads + case _: InterruptedException => + // TODO: handle fatal errors more seriously + case e: Throwable => + forkExceptionsCounter(e) + log.error(e, "Executing queued action") + } + } + releaseCountDownLatch.countDown() + } + } + + thread.setDaemon(true) + thread.start() + + /** + * Interrupts the thread and directs it to stop processing. This + * method will not return until the processing thread has finished + * or the timeout occurs. Ok to call multiple times. + */ + def release(timeout: Duration): Unit = { + if (!isStopped) { + isStopped = true + thread.interrupt() + releaseCountDownLatch.await(timeout.inMilliseconds, TimeUnit.MILLISECONDS) || { + throw new TimeoutException(timeout.toString) + } + } + } + + /** + * Blocks until all the items currently in the queue have been + * executed, or the timeout occurs. Mostly useful during testing. + */ + def waitForQueueToDrain(timeout: Duration): Unit = { + val latch = new CountDownLatch(1) + val start = Time.now + queue.offer(() => latch.countDown(), timeout.inMilliseconds, TimeUnit.MILLISECONDS) + val remaining = timeout - (Time.now - start) + latch.await(remaining.inMilliseconds, TimeUnit.MILLISECONDS) || { + throw new TimeoutException(remaining.toString) + } + } + + /** + * Queue the action for execution in this object's thread. + */ + def apply(action: () => Unit) = + if (queue.offer(Local.closed(action))) + enqueuedCounter.incr() + else + droppedCounter.incr() +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala new file mode 100644 index 000000000..5cee23f22 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/gate/RateLimitingGate.scala @@ -0,0 +1,64 @@ +package com.twitter.servo.gate + +import com.google.common.annotations.VisibleForTesting +import com.google.common.util.concurrent.RateLimiter +import com.twitter.servo.util +import java.util.concurrent.TimeUnit + +/** + * A Rate Limiting Gate backed by com.google.common.util.concurrent.RateLimiter + * http://docs.guava-libraries.googlecode.com/git/javadoc/com/google/common/util/concurrent/RateLimiter.html + */ +object RateLimitingGate { + + /** + * Creates a Gate[Int] that returns true if acquiring number of permits + * from the ratelimiter succeeds. + */ + def weighted(permitsPerSecond: Double): util.Gate[Int] = { + val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) + util.Gate { rateLimiter.tryAcquire(_, 0, TimeUnit.SECONDS) } + } + + /** + * Creates a Gate[Unit] that returns true if acquiring a permit from the ratelimiter succeeds. + */ + def uniform(permitsPerSecond: Double): util.Gate[Unit] = { + weighted(permitsPerSecond) contramap { _ => + 1 + } + } + + /** + * Creates a Gate[Unit] with floating limit. Could be used with deciders. + */ + def dynamic(permitsPerSecond: => Double): util.Gate[Unit] = + dynamic(RateLimiter.create, permitsPerSecond) + + @VisibleForTesting + def dynamic( + rateLimiterFactory: Double => RateLimiter, + permitsPerSecond: => Double + ): util.Gate[Unit] = { + val rateLimiter: RateLimiter = rateLimiterFactory(permitsPerSecond) + util.Gate { _ => + val currentRate = permitsPerSecond + if (rateLimiter.getRate != currentRate) { + rateLimiter.setRate(currentRate) + } + rateLimiter.tryAcquire(0L, TimeUnit.SECONDS) + } + } +} + +@deprecated("Use RateLimitingGate.uniform", "2.8.2") +class RateLimitingGate[T](permitsPerSecond: Double) extends util.Gate[T] { + private[this] val rateLimiter: RateLimiter = RateLimiter.create(permitsPerSecond) + + /** + * If a "permit" is available, this method acquires it and returns true + * Else returns false immediately without waiting + */ + override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = + rateLimiter.tryAcquire(1, 0, TimeUnit.SECONDS) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala new file mode 100644 index 000000000..a23e9ed5f --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Availability.scala @@ -0,0 +1,43 @@ +package com.twitter.servo.util + +/** + * Provides functions for computing prescribed feature availability based + * on some runtime condition(s). (e.g. watermark values) + */ +object Availability { + + /** + * Stay at 100% available down to a high watermark success rate. Then + * between high and low watermarks, dial down availability to a provided + * minimum. Never go below this level because we need some requests to + * track the success rate going back up. + * + * NOTE: watermarks and minAvailability must be between 0 and 1. + */ + def linearlyScaled( + highWaterMark: Double, + lowWaterMark: Double, + minAvailability: Double + ): Double => Double = { + require( + highWaterMark >= lowWaterMark && highWaterMark <= 1, + s"highWaterMark ($highWaterMark) must be between lowWaterMark ($lowWaterMark) and 1, inclusive" + ) + require( + lowWaterMark >= minAvailability && lowWaterMark <= 1, + s"lowWaterMark ($lowWaterMark) must be between minAvailability ($minAvailability) and 1, inclusive" + ) + require( + minAvailability > 0 && minAvailability < 1, + s"minAvailability ($minAvailability) must be between 0 and 1, exclusive" + ) + + { + case sr if sr >= highWaterMark => 1.0 + case sr if sr <= lowWaterMark => minAvailability + case sr => + val linearFraction = (sr - lowWaterMark) / (highWaterMark - lowWaterMark) + minAvailability + (1.0 - minAvailability) * linearFraction + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala new file mode 100644 index 000000000..9aab6f25c --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Average.scala @@ -0,0 +1,116 @@ +package com.twitter.servo.util + +import com.twitter.util.{Duration, Time} + +/** + * Calculate a running average of data points + */ +trait Average { + def value: Option[Double] + def record(dataPoint: Double, count: Double = 1.0): Unit +} + +/** + * Calculates a running average using two windows of data points, a + * current one and a previous one. When the current window is full, + * it is rolled into the previous and the current window starts + * filling up again. + */ +class WindowedAverage(val windowSize: Long, initialValue: Option[Double] = None) extends Average { + private[this] val average = new ResettableAverage(None) + private[this] var lastAverage: Option[Double] = initialValue + + def value: Option[Double] = + synchronized { + lastAverage match { + case Some(lastAvg) => + // currentCount can temporarily exceed windowSize + val currentWeight = (average.count / windowSize) min 1.0 + Some((1.0 - currentWeight) * lastAvg + currentWeight * average.value.getOrElse(0.0)) + case None => average.value + } + } + + def record(dataPoint: Double, count: Double = 1.0): Unit = + synchronized { + if (average.count >= windowSize) { + lastAverage = value + average.reset() + } + average.record(dataPoint, count) + } +} + +/** + * Calculates a recent average using the past windowDuration of data points. Old average is mixed + * with the new average during windowDuration. If new data points are not recorded the average + * will revert towards defaultAverage. + */ +class RecentAverage( + val windowDuration: Duration, + val defaultAverage: Double, + currentTime: Time = Time.now // passing in start time to simplify scalacheck tests +) extends Average { + private[this] val default = Some(defaultAverage) + private[this] val currentAverage = new ResettableAverage(Some(defaultAverage)) + private[this] var prevAverage: Option[Double] = None + private[this] var windowStart: Time = currentTime + + private[this] def mix(fractOfV2: Double, v1: Double, v2: Double): Double = { + val f = 0.0.max(1.0.min(fractOfV2)) + (1.0 - f) * v1 + f * v2 + } + + private[this] def timeFract: Double = + 0.0.max(windowStart.untilNow.inNanoseconds.toDouble / windowDuration.inNanoseconds) + + def value: Some[Double] = + synchronized { + timeFract match { + case f if f < 1.0 => + Some(mix(f, prevAverage.getOrElse(defaultAverage), currentAverage.getValue)) + case f if f < 2.0 => Some(mix(f - 1.0, currentAverage.getValue, defaultAverage)) + case f => default + } + } + + def getValue: Double = value.get + + def record(dataPoint: Double, count: Double = 1.0): Unit = + synchronized { + // if we're past windowDuration, roll average + val now = Time.now + if (now - windowStart > windowDuration) { + prevAverage = value + windowStart = now + currentAverage.reset() + } + currentAverage.record(dataPoint, count) + } + + override def toString = + s"RecentAverage(window=$windowDuration, default=$defaultAverage, " + + s"prevValue=$prevAverage, value=$value, timeFract=$timeFract)" +} + +private class ResettableAverage[DoubleOpt <: Option[Double]](defaultAverage: DoubleOpt) + extends Average { + private[this] var currentCount: Double = 0 + private[this] var currentValue: Double = 0 + def reset(): Unit = { + currentCount = 0 + currentValue = 0 + } + def record(dataPoint: Double, count: Double): Unit = { + currentCount += count + currentValue += dataPoint + } + def value: Option[Double] = + if (currentCount == 0) defaultAverage + else Some(currentValue / currentCount) + + def getValue(implicit ev: DoubleOpt <:< Some[Double]): Double = + value.get + + def count: Double = currentCount +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala new file mode 100644 index 000000000..827e371c2 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/BatchExecutor.scala @@ -0,0 +1,218 @@ +package com.twitter.servo.util + +import com.twitter.logging.Logger +import com.twitter.util.{Timer, Duration, Promise, Future, Return, Throw} +import java.util.concurrent.CancellationException +import scala.collection.mutable.ArrayBuffer + +@deprecated("Use `Future.batched`", "2.6.1") +trait BatchExecutorFactory { + def apply[In, Out](f: Seq[In] => Future[Seq[Out]]): BatchExecutor[In, Out] +} + +/** + * A BatchExecutorFactory allows you to specify the criteria in which a batch + * should be flushed prior to constructing a BatchExecutor. A BatchExecutor asks for a + * function that takes a Seq[In] and returns a Future[Seq[Out]], in return it gives you + * a `In => Future[Out]` interface so that you can incrementally submit tasks to be + * performed when the criteria for batch flushing is met. + * + * Examples: + * val batcherFactory = BatchExecutorFactory(sizeThreshold = 10) + * def processBatch(reqs: Seq[Request]): Future[Seq[Response]] + * val batcher = batcherFactory(processBatch) + * + * val response: Future[Response] = batcher(new Request) + * + * the batcher will wait until 10 requests have been submitted, then delegate + * to the processBatch method to compute the responses. + * + * you can also construct a BatchExecutor that has a time-based threshold or both: + * val batcherFactory = BatchExecutorFactory( + * sizeThreshold = 10, timeThreshold = 10.milliseconds, timer = new JavaTimer(true)) + * + * A batcher's size can be controlled at runtime through a bufSizeFraction function + * that should return a float between 0.0 and 1.0 that represents the fractional size + * of the sizeThreshold that should be used for the next batch to be collected. + * + */ +@deprecated("Use `Future.batched`", "2.6.1") +object BatchExecutorFactory { + final val DefaultBufSizeFraction = 1.0f + lazy val instant = sized(1) + + def sized(sizeThreshold: Int): BatchExecutorFactory = new BatchExecutorFactory { + override def apply[In, Out](f: Seq[In] => Future[Seq[Out]]) = { + new BatchExecutor(sizeThreshold, None, f, DefaultBufSizeFraction) + } + } + + def timed(timeThreshold: Duration, timer: Timer): BatchExecutorFactory = + sizedAndTimed(Int.MaxValue, timeThreshold, timer) + + def sizedAndTimed( + sizeThreshold: Int, + timeThreshold: Duration, + timer: Timer + ): BatchExecutorFactory = + dynamicSizedAndTimed(sizeThreshold, timeThreshold, timer, DefaultBufSizeFraction) + + def dynamicSizedAndTimed( + sizeThreshold: Int, + timeThreshold: Duration, + timer: Timer, + bufSizeFraction: => Float + ): BatchExecutorFactory = new BatchExecutorFactory { + override def apply[In, Out](f: (Seq[In]) => Future[Seq[Out]]) = { + new BatchExecutor(sizeThreshold, Some(timeThreshold, timer), f, bufSizeFraction) + } + } +} + +@deprecated("Use `Future.batched`", "2.6.1") +class BatchExecutor[In, Out] private[util] ( + maxSizeThreshold: Int, + timeThreshold: Option[(Duration, Timer)], + f: Seq[In] => Future[Seq[Out]], + bufSizeFraction: => Float) { batcher => + + private[this] class ScheduledFlush(after: Duration, timer: Timer) { + @volatile private[this] var cancelled = false + private[this] val task = timer.schedule(after.fromNow) { flush() } + + def cancel(): Unit = { + cancelled = true + task.cancel() + } + + private[this] def flush(): Unit = { + val doAfter = batcher.synchronized { + if (!cancelled) { + flushBatch() + } else { () => + () + } + } + + doAfter() + } + } + + private[this] val log = Logger.get("BatchExecutor") + + // operations on these are synchronized on `this` + private[this] val buf = new ArrayBuffer[(In, Promise[Out])](maxSizeThreshold) + private[this] var scheduled: Option[ScheduledFlush] = None + private[this] var currentBufThreshold = newBufThreshold + + private[this] def shouldSchedule = timeThreshold.isDefined && scheduled.isEmpty + + private[this] def currentBufFraction = { + val fract = bufSizeFraction + + if (fract > 1.0f) { + log.warning( + "value returned for BatchExecutor.bufSizeFraction (%f) was > 1.0f, using 1.0", + fract + ) + 1.0f + } else if (fract < 0.0f) { + log.warning( + "value returned for BatchExecutor.bufSizeFraction (%f) was negative, using 0.0f", + fract + ) + 0.0f + } else { + fract + } + } + + private[this] def newBufThreshold = { + val size: Int = math.round(currentBufFraction * maxSizeThreshold) + + if (size < 1) { + 1 + } else if (size >= maxSizeThreshold) { + maxSizeThreshold + } else { + size + } + } + + def apply(t: In): Future[Out] = { + enqueue(t) + } + + private[this] def enqueue(t: In): Future[Out] = { + val promise = new Promise[Out] + val doAfter = synchronized { + buf.append((t, promise)) + if (buf.size >= currentBufThreshold) { + flushBatch() + } else { + scheduleFlushIfNecessary() + () => () + } + } + + doAfter() + promise + } + + private[this] def scheduleFlushIfNecessary(): Unit = { + timeThreshold foreach { + case (duration, timer) => + if (shouldSchedule) { + scheduled = Some(new ScheduledFlush(duration, timer)) + } + } + } + + private[this] def flushBatch(): () => Unit = { + // this must be executed within a synchronize block + val prevBatch = new ArrayBuffer[(In, Promise[Out])](buf.length) + buf.copyToBuffer(prevBatch) + buf.clear() + + scheduled foreach { _.cancel() } + scheduled = None + currentBufThreshold = newBufThreshold // set the next batch's size + + () => + try { + executeBatch(prevBatch) + } catch { + case e: Throwable => + log.warning(e, "unhandled exception caught in BatchExecutor: %s", e.toString) + } + } + + private[this] def executeBatch(batch: Seq[(In, Promise[Out])]): Unit = { + val uncancelled = batch filter { + case (in, p) => + p.isInterrupted match { + case Some(_cause) => + p.setException(new CancellationException) + false + case None => true + } + } + + val ins = uncancelled map { case (in, _) => in } + // N.B. intentionally not linking cancellation of these promises to the execution of the batch + // because it seems that in most cases you would be canceling mostly uncanceled work for an + // outlier. + val promises = uncancelled map { case (_, promise) => promise } + + f(ins) respond { + case Return(outs) => + (outs zip promises) foreach { + case (out, p) => + p() = Return(out) + } + case Throw(e) => + val t = Throw(e) + promises foreach { _() = t } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala new file mode 100644 index 000000000..ca3ebe151 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CancelledExceptionExtractor.scala @@ -0,0 +1,21 @@ +package com.twitter.servo.util + +import com.twitter.finagle.mux.stats.MuxCancelledCategorizer +import com.twitter.finagle.stats.CancelledCategorizer +import com.twitter.util.FutureCancelledException +import com.twitter.util.Throwables.RootCause + +/** + * Helper that consolidates various ways (nested and top level) cancel exceptions can be detected. + */ +object CancelledExceptionExtractor { + def unapply(e: Throwable): Option[Throwable] = { + e match { + case _: FutureCancelledException => Some(e) + case MuxCancelledCategorizer(cause) => Some(cause) + case CancelledCategorizer(cause) => Some(cause) + case RootCause(CancelledExceptionExtractor(cause)) => Some(cause) + case _ => None + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala new file mode 100644 index 000000000..f8da5c5cf --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/CounterInitializingStatsReceiver.scala @@ -0,0 +1,24 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{Counter, MetricBuilder, StatsReceiver, StatsReceiverProxy} + +/** + * A StatsReceiver that initializes counters to zero. + * Provides a simple wrapper that wraps a StatsReceiver where when using counters, + * have them auto initialize to 0. + * Until a counter performs its first incr() its returned as "undefined", + * which means if an alert is set on that counter + * it will result in an error. + * Another advantage is to remove the need to manually initialize counters in order + * to overcome aforementioned problem. + * @param self - underlying StatsReceiver + */ +class CounterInitializingStatsReceiver(protected val self: StatsReceiver) + extends StatsReceiverProxy { + + override def counter(metricBuilder: MetricBuilder): Counter = { + val counter = self.counter(metricBuilder) + counter.incr(0) + counter + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala new file mode 100644 index 000000000..00510a3e3 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Effect.scala @@ -0,0 +1,83 @@ +package com.twitter.servo.util + +object Effect { + // a no-op effect + private[this] val _unit = Effect[Any] { _ => + () + } + + /** + * A "no-op" Effect. For any effect E, (E also unit) == (unit also E) == E. + * Forms a monoid with `also`. + */ + def unit[A]: Effect[A] = _unit.asInstanceOf[Effect[A]] + + /** + * Package a function as an Effect. + */ + def apply[A](f: A => Unit): Effect[A] = + new Effect[A] { + override def apply(value: A) = f(value) + } + + /** + * An effect that only applies to some values. + */ + def fromPartial[A](f: PartialFunction[A, Unit]): Effect[A] = + Effect[A] { x => + if (f.isDefinedAt(x)) f(x) + } +} + +/** + * Perform an effect with the given value, without altering the result. + * + * Forms a monoid with Effect.unit as unit and `also` as the combining operation. + */ +trait Effect[A] extends (A => Unit) { self => + + /** + * An identity function that executes this effect as a side-effect. + */ + lazy val identity: A => A = { value => + self(value); value + } + + /** + * Combine effects, so that both effects are performed. + * Forms a monoid with Effect.unit. + */ + def also(next: Effect[A]): Effect[A] = + Effect[A](identity andThen next) + + /** + * Convert an effect to an effect of a more general type by way + * of an extraction function. (contravariant map) + */ + def contramap[B](extract: B => A): Effect[B] = + Effect[B](extract andThen self) + + /** + * Perform this effect only if the provided gate returns true. + */ + @deprecated("Use enabledBy(() => Boolean)", "2.5.1") + def enabledBy(enabled: Gate[Unit]): Effect[A] = + enabledBy(() => enabled()) + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: () => Boolean): Effect[A] = + onlyIf { _ => + enabled() + } + + /** + * Perform this effect only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: A => Boolean) = + Effect[A] { x => + if (predicate(x)) this(x) else () + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala new file mode 100644 index 000000000..85e4ac996 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ExceptionCounter.scala @@ -0,0 +1,193 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Future +import scala.collection.mutable + +/** + * Categorizes an exception according to some criteria. + * n.b. Implemented in terms of lift rather than apply to avoid extra allocations when + * used when lifting the effect. + */ +trait ExceptionCategorizer { + import ExceptionCategorizer._ + + def lift(effect: Effect[Category]): Effect[Throwable] + + def apply(t: Throwable): Set[Category] = { + val s = mutable.Set.empty[Category] + lift(Effect(s += _))(t) + s.toSet + } + + /** + * construct a new categorizer that prepends scope to all categories returned by this categorizer + */ + def scoped(scope: Seq[String]): ExceptionCategorizer = + if (scope.isEmpty) { + this + } else { + val scopeIt: Category => Category = Memoize(scope ++ _) + fromLift(effect => lift(effect.contramap(scopeIt))) + } + + /** + * construct a new categorizer that returns the union of the categories returned by this and that + */ + def ++(that: ExceptionCategorizer): ExceptionCategorizer = + fromLift(effect => this.lift(effect).also(that.lift(effect))) + + /** + * construct a new categorizer that only returns categories for throwables matching pred + */ + def onlyIf(pred: Throwable => Boolean): ExceptionCategorizer = + fromLift(lift(_).onlyIf(pred)) +} + +object ExceptionCategorizer { + type Category = Seq[String] + + def const(categories: Set[Category]): ExceptionCategorizer = ExceptionCategorizer(_ => categories) + def const(c: Category): ExceptionCategorizer = const(Set(c)) + def const(s: String): ExceptionCategorizer = const(Seq(s)) + + def apply(fn: Throwable => Set[Category]): ExceptionCategorizer = + new ExceptionCategorizer { + def lift(effect: Effect[Category]) = Effect[Throwable](t => fn(t).foreach(effect)) + override def apply(t: Throwable) = fn(t) + } + + def fromLift(fn: Effect[Category] => Effect[Throwable]): ExceptionCategorizer = + new ExceptionCategorizer { + def lift(effect: Effect[Category]) = fn(effect) + } + + def singular(fn: Throwable => Category): ExceptionCategorizer = + fromLift(_.contramap(fn)) + + def simple(fn: Throwable => String): ExceptionCategorizer = + singular(fn.andThen(Seq(_))) + + def default( + name: Category = Seq("exceptions"), + sanitizeClassnameChain: Throwable => Seq[String] = ThrowableHelper.sanitizeClassnameChain + ): ExceptionCategorizer = + ExceptionCategorizer.const(name) ++ + ExceptionCategorizer.singular(sanitizeClassnameChain).scoped(name) +} + +/** + * Increments a counter for each category returned by the exception categorizer + * + * @param statsReceiver + * the unscoped statsReceiver on which to hang the counters + * @param categorizer + * A function that returns a list of category names that a throwable should be counted under. + */ +class ExceptionCounter(statsReceiver: StatsReceiver, categorizer: ExceptionCategorizer) { + + /** + * alternative constructor for backwards compatibility + * + * @param statsReceiver + * the unscoped statsReceiver on which to hang the counters + * @param name + * the counter name for total exceptions, and scope for individual + * exception counters. default value is `exceptions` + * @param sanitizeClassnameChain + * A function that can be used to cleanup classnames before passing them to the StatsReceiver. + */ + def this( + statsReceiver: StatsReceiver, + name: String, + sanitizeClassnameChain: Throwable => Seq[String] + ) = + this(statsReceiver, ExceptionCategorizer.default(List(name), sanitizeClassnameChain)) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver) = + this(statsReceiver, ExceptionCategorizer.default()) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver, name: String) = + this(statsReceiver, ExceptionCategorizer.default(List(name))) + + /** + * provided for backwards compatibility + */ + def this(statsReceiver: StatsReceiver, sanitizeClassnameChain: Throwable => Seq[String]) = + this( + statsReceiver, + ExceptionCategorizer.default(sanitizeClassnameChain = sanitizeClassnameChain) + ) + + private[this] val counter = categorizer.lift(Effect(statsReceiver.counter(_: _*).incr())) + + /** + * count one or more throwables + */ + def apply(t: Throwable, throwables: Throwable*): Unit = { + counter(t) + if (throwables.nonEmpty) apply(throwables) + } + + /** + * count n throwables + */ + def apply(throwables: Iterable[Throwable]): Unit = { + throwables.foreach(counter) + } + + /** + * wrap around a Future to capture exceptions + */ + def apply[T](f: => Future[T]): Future[T] = { + f onFailure { case t => apply(t) } + } +} + +/** + * A memoized exception counter factory. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + * @param categorizer + * A function that returns a list of category names that a throwable should be counted under. + */ +class MemoizedExceptionCounterFactory(stats: StatsReceiver, categorizer: ExceptionCategorizer) { + + /** + * A memoized exception counter factory using the default categorizer. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + */ + def this(stats: StatsReceiver) = + this(stats, ExceptionCategorizer.default()) + + /** + * A memoized exception counter factory using a categorizer with the given suffix. + * + * @param stats + * the unscoped statsReceiver on which to hang the counters + * @param suffix + * All created exception counters will have the + * specified suffix added. This allows compatibility with + * Servo's ExceptionCounter's name param (allows creating + * exception counters that default to the "exceptions" namespace + * as well as those with an otherwise-specified scope). + */ + def this(stats: StatsReceiver, suffix: Seq[String]) = + this(stats, ExceptionCategorizer.default(suffix)) + + private[this] val getCounter = + Memoize { (path: Seq[String]) => + new ExceptionCounter(stats, categorizer.scoped(path)) + } + + def apply(path: String*): ExceptionCounter = getCounter(path) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala new file mode 100644 index 000000000..2fecb6414 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FrequencyCounter.scala @@ -0,0 +1,51 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} +import scala.collection.mutable + +/** + * Maintains a frequency counted circular buffer of objects. + */ +class FrequencyCounter[Q]( + size: Int, + threshold: Int, + trigger: Q => Unit, + statsReceiver: StatsReceiver = NullStatsReceiver) { + require(threshold > 1) // in order to minimize work for the common case + private[this] val buffer = new mutable.ArraySeq[Q](size) + private[this] var index = 0 + private[this] val counts = mutable.Map[Q, Int]() + + private[this] val keyCountStat = statsReceiver.scope("frequencyCounter").stat("keyCount") + + /** + * Adds a new key to the circular buffer and updates frequency counts. + * Runs trigger if this key occurs exactly `threshold` times in the buffer. + * Returns true if this key occurs at least `threshold` times in the buffer. + */ + def incr(key: Q): Boolean = { + // TOOD(aa): maybe write lock-free version + val count = synchronized { + counts(key) = counts.getOrElse(key, 0) + 1 + + Option(buffer(index)) foreach { oldKey => + val countVal = counts(oldKey) + if (countVal == 1) { + counts -= oldKey + } else { + counts(oldKey) = countVal - 1 + } + } + + buffer(index) = key + index = (index + 1) % size + counts(key) + } + keyCountStat.add(count) + if (count == threshold) { + trigger(key) + } + count >= threshold + } + +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala new file mode 100644 index 000000000..a9cc5be0e --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FunctionArrow.scala @@ -0,0 +1,75 @@ +package com.twitter.servo.util + +/** + * A collection of FunctionArrow factory functions. + */ +object FunctionArrow { + def apply[A, B](f: A => B): FunctionArrow[A, B] = fromFunction(f) + + /** + * Produce an FunctionArrow from a function `A => B`. + */ + def fromFunction[A, B](f: A => B): FunctionArrow[A, B] = + new FunctionArrow[A, B] { + def apply(a: A): B = f(a) + } + + /** + * Produces a FunctionArrow with no side-effects that simply returns its argument. + */ + def identity[A]: FunctionArrow[A, A] = apply(Predef.identity[A]) + + /** + * Appends two FunctionArrows together. + * + * This forms a monoid with 'identity'. + */ + def append[A, B, C](a: FunctionArrow[A, B], b: FunctionArrow[B, C]): FunctionArrow[A, C] = + a.andThen(b) + + /** + * Produce an FunctionArrow that applies an Effect, returning the argument + * value as-is. + */ + def effect[A](effect: Effect[A]): FunctionArrow[A, A] = apply { a => + effect(a); a + } + + /** + * Produces an FunctionArrow that proxies to one of two others, depending on a + * predicate. + */ + def choose[A, B]( + predicate: A => Boolean, + ifTrue: FunctionArrow[A, B], + ifFalse: FunctionArrow[A, B] + ): FunctionArrow[A, B] = + apply { a: A => + if (predicate(a)) ifTrue(a) else ifFalse(a) + } + + /** + * Produces an FunctionArrow whose application is guarded by a predicate. `f` is + * applied if the predicate returns true, otherwise the argument is simply + * returned. + */ + def onlyIf[A](predicate: A => Boolean, f: FunctionArrow[A, A]): FunctionArrow[A, A] = + choose(predicate, f, identity[A]) +} + +/** + * A function encapsulating a computation. + * + * Background on the Arrow abstraction: + * http://en.wikipedia.org/wiki/Arrow_(computer_science) + */ +trait FunctionArrow[-A, +B] extends (A => B) { self => + + /** + * Composes two FunctionArrows. Produces a new FunctionArrow that performs both in series. + */ + def andThen[C](next: FunctionArrow[B, C]): FunctionArrow[A, C] = + new FunctionArrow[A, C] { + override def apply(a: A) = next.apply(self(a)) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala new file mode 100644 index 000000000..ea3fb8959 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureArrow.scala @@ -0,0 +1,501 @@ +package com.twitter.servo.util + +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.finagle.FailedFastException +import com.twitter.finagle.Filter +import com.twitter.finagle.Service +import com.twitter.util._ +import scala.util.control.NonFatal + +/** + * A collection of FutureArrow factory functions. + */ +object FutureArrow { + + /** + * Produce a FutureArrow from a function `A => Future[B]`. + */ + def apply[A, B](f: A => Future[B]): FutureArrow[A, B] = + new FutureArrow[A, B] { + override def apply(a: A): Future[B] = + try f(a) + catch { + case NonFatal(e) => Future.exception(e) + } + } + + /** + * Produce a FutureArrow that supports recursive calls. Recursing from a `Future` + * continuation is stack-safe, but direct recursion will use the stack, like a + * normal method invocation. + */ + def rec[A, B](f: FutureArrow[A, B] => A => Future[B]): FutureArrow[A, B] = + new FutureArrow[A, B] { self => + private val g: A => Future[B] = f(this) + override def apply(a: A): Future[B] = + try g(a) + catch { + case NonFatal(e) => Future.exception(e) + } + } + + /** + * Produce a FutureArrow from an FunctionArrow. + */ + def fromFunctionArrow[A, B](f: FunctionArrow[A, B]): FutureArrow[A, B] = + FutureArrow[A, B](a => Future(f(a))) + + /** + * Produce a FutureArrow from a function. + */ + def fromFunction[A, B](f: A => B): FutureArrow[A, B] = fromFunctionArrow(FunctionArrow(f)) + + /** + * Produce a FutureArrow from a function `A => Try[B]`. + * + * The Try is evaluated within a Future. Thus, Throw results are translated + * to `Future.exception`s. + */ + def fromTry[A, B](f: A => Try[B]): FutureArrow[A, B] = + FutureArrow[A, B](a => Future.const(f(a))) + + /** + * A FutureArrow that simply returns a Future of its argument. + */ + def identity[A]: FutureArrow[A, A] = + FutureArrow[A, A](a => Future.value(a)) + + /** + * A FutureArrow with a constant result, regardless of input. + */ + def const[A, B](value: Future[B]): FutureArrow[A, B] = + FutureArrow[A, B](_ => value) + + /** + * Appends two FutureArrows together. + * + * This forms a category with 'identity'. + */ + def append[A, B, C](a: FutureArrow[A, B], b: FutureArrow[B, C]) = a.andThen(b) + + /** + * Produce a FutureArrow that applies an FutureEffect, returning the argument + * value as-is on success. If the effect returns an Future exception, then the + * result of the filter will also be that exception. + */ + def effect[A](effect: FutureEffect[A]): FutureArrow[A, A] = + apply(a => effect(a).map(_ => a)) + + /** + * Produces a FutureArrow that proxies to one of two others, depending on a + * predicate. + */ + def choose[A, B](predicate: A => Boolean, ifTrue: FutureArrow[A, B], ifFalse: FutureArrow[A, B]) = + FutureArrow[A, B](a => if (predicate(a)) ifTrue(a) else ifFalse(a)) + + /** + * Produces a FutureArrow whose application is guarded by a predicate. `f` is + * applied if the predicate returns true, otherwise the argument is simply + * returned. + */ + def onlyIf[A](predicate: A => Boolean, f: FutureArrow[A, A]) = + choose(predicate, f, identity[A]) + + /** + * Produces a FutureArrow that forwards to multiple FutureArrows and collects + * the results into a `Seq[B]`. Results are gathered via Future.collect, so + * failure semantics are inherited from that method. + */ + def collect[A, B](arrows: Seq[FutureArrow[A, B]]): FutureArrow[A, Seq[B]] = + apply(a => Future.collect(arrows.map(arrow => arrow(a)))) + + private val RetryOnNonFailedFast: PartialFunction[Try[Any], Boolean] = { + case Throw(_: FailedFastException) => false + case Throw(_: Exception) => true + } +} + +/** + * A function encapsulating an asynchronous computation. + * + * Background on the Arrow abstraction: + * http://en.wikipedia.org/wiki/Arrow_(computer_science) + */ +trait FutureArrow[-A, +B] extends (A => Future[B]) { self => + + /** + * Composes two FutureArrows. Produces a new FutureArrow that performs both in + * series, depending on the success of the first. + */ + def andThen[C](next: FutureArrow[B, C]): FutureArrow[A, C] = + FutureArrow[A, C](a => self(a).flatMap(next.apply)) + + /** + * Combines this FutureArrow with another, producing one that translates a + * tuple of its constituents' arguments into a tuple of their results. + */ + def zipjoin[C, D](other: FutureArrow[C, D]): FutureArrow[(A, C), (B, D)] = + FutureArrow[(A, C), (B, D)] { + case (a, c) => self(a) join other(c) + } + + /** + * Converts a FutureArrow on a scalar input and output value into a FutureArrow on a + * Sequence of input values producing a pairwise sequence of output values. The elements + * of the input sequence are processed in parallel, so execution order is not guaranteed. + * Results are gathered via Future.collect, so failure semantics are inherited from that method. + */ + def liftSeq: FutureArrow[Seq[A], Seq[B]] = + FutureArrow[Seq[A], Seq[B]] { seqA => + Future.collect(seqA.map(this)) + } + + /** + * Converts this FutureArrow to a FutureEffect, where the result value is ignored. + */ + def asFutureEffect[A2 <: A]: FutureEffect[A2] = + FutureEffect(this.unit) + + /** + * Combines this FutureArrow with another, producing one that applies both + * in parallel, producing a tuple of their results. + */ + def inParallel[A2 <: A, C](other: FutureArrow[A2, C]): FutureArrow[A2, (B, C)] = { + val paired = self.zipjoin(other) + FutureArrow[A2, (B, C)](a => paired((a, a))) + } + + /** + * Wrap a FutureArrow with an ExceptionCounter, thus providing + * observability into the arrow's success and failure. + */ + def countExceptions( + exceptionCounter: ExceptionCounter + ): FutureArrow[A, B] = + FutureArrow[A, B](request => exceptionCounter(self(request))) + + /** + * Returns a chained FutureArrow in which the given function will be called for any + * input that succeeds. + */ + def onSuccess[A2 <: A](f: (A2, B) => Unit): FutureArrow[A2, B] = + FutureArrow[A2, B](a => self(a).onSuccess(b => f(a, b))) + + /** + * Returns a chained FutureArrow in which the given function will be called for any + * input that fails. + */ + def onFailure[A2 <: A](f: (A2, Throwable) => Unit): FutureArrow[A2, B] = + FutureArrow[A2, B](a => self(a).onFailure(t => f(a, t))) + + /** + * Translate exception returned by a FutureArrow according to a + * PartialFunction. + */ + def translateExceptions( + translateException: PartialFunction[Throwable, Throwable] + ): FutureArrow[A, B] = + FutureArrow[A, B] { request => + self(request).rescue { + case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) + case t => Future.exception(t) + } + } + + /** + * Apply a FutureArrow, lifting any non-Future exceptions thrown into + * `Future.exception`s. + */ + def liftExceptions: FutureArrow[A, B] = + FutureArrow[A, B] { request => + // Flattening the Future[Future[Response]] is equivalent, but more concise + // than wrapping the arrow(request) call in a try/catch block that transforms + // the exception to a Future.exception, or at least was more concise before + // I added a four-line comment. + Future(self(request)).flatten + } + + /** + * Wrap a FutureArrow in exception-tracking and -translation. Given a + * filter and a handler, exceptional results will be observed and translated + * according to the function passed in this function's second argument list. + */ + def cleanly( + exceptionCounter: ExceptionCounter + )( + translateException: PartialFunction[Throwable, Throwable] = { case t => t } + ): FutureArrow[A, B] = { + liftExceptions + .translateExceptions(translateException) + .countExceptions(exceptionCounter) + } + + /** + * Produces a FutureArrow that tracks its own application latency. + */ + @deprecated("use trackLatency(StatsReceiver, (A2 => String)", "2.11.1") + def trackLatency[A2 <: A]( + extractName: (A2 => String), + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + trackLatency(statsReceiver, extractName) + + /** + * Produces a FutureArrow that tracks its own application latency. + */ + def trackLatency[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { request => + Stat.timeFuture(statsReceiver.stat(extractName(request), "latency_ms")) { + self(request) + } + } + + /** + * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of + * requests. + */ + @deprecated("use trackOutcome(StatsReceiver, (A2 => String)", "2.11.1") + def trackOutcome[A2 <: A]( + extractName: (A2 => String), + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + trackOutcome(statsReceiver, extractName) + + def trackOutcome[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + trackOutcome(statsReceiver, extractName, _ => None) + + /** + * Produces a FutureArrow that tracks the outcome (i.e. success vs failure) of + * requests. + */ + def trackOutcome[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + exceptionCategorizer: Throwable => Option[String] + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { request => + val scope = statsReceiver.scope(extractName(request)) + + self(request).respond { r => + statsReceiver.counter("requests").incr() + scope.counter("requests").incr() + + r match { + case Return(_) => + statsReceiver.counter("success").incr() + scope.counter("success").incr() + + case Throw(t) => + val category = exceptionCategorizer(t).getOrElse("failures") + statsReceiver.counter(category).incr() + scope.counter(category).incr() + scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() + } + } + } + + /** + * Observe latency and success rate for any FutureArrow[A, B] where A is Observable + */ + def observed[A2 <: A with Observable]( + statsReceiver: StatsReceiver + ): FutureArrow[A2, B] = + observed(statsReceiver, exceptionCategorizer = _ => None) + + /** + * Observe latency and success rate for any FutureArrow[A, B] where A is Observable + */ + def observed[A2 <: A with Observable]( + statsReceiver: StatsReceiver, + exceptionCategorizer: Throwable => Option[String] + ): FutureArrow[A2, B] = + self.observed( + statsReceiver.scope("client_request"), + (a: A2) => a.requestName, + exceptionCategorizer + ) + + /** + * Observe latency and success rate for any FutureArrow + */ + def observed[A2 <: A]( + statsReceiver: StatsReceiver, + statsScope: A2 => String, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): FutureArrow[A2, B] = + self + .trackLatency(statsReceiver, statsScope) + .trackOutcome(statsReceiver, statsScope, exceptionCategorizer) + + /** + * Trace the future arrow using local spans as documented here: + * https://docbird.twitter.biz/finagle/Tracing.html + */ + def traced[A2 <: A]( + traceScope: A2 => String + ): FutureArrow[A2, B] = { + FutureArrow[A2, B] { a => + Trace.traceLocalFuture(traceScope(a))(self(a)) + } + } + + /** + * Produces a new FutureArrow where the given function is applied to the input, and the result + * passed to this FutureArrow. + */ + def contramap[C](f: C => A): FutureArrow[C, B] = + FutureArrow[C, B](f.andThen(self)) + + /** + * Produces a new FutureArrow where the given function is applied to the result of this + * FutureArrow. + */ + def map[C](f: B => C): FutureArrow[A, C] = + mapResult(_.map(f)) + + /** + * Produces a new FutureArrow where the given function is applied to the resulting Future of + * this FutureArrow. + */ + def mapResult[C](f: Future[B] => Future[C]): FutureArrow[A, C] = + FutureArrow[A, C](a => f(self(a))) + + /** + * Produces a new FutureArrow which translates exceptions into futures + */ + def rescue[B2 >: B]( + rescueException: PartialFunction[Throwable, Future[B2]] + ): FutureArrow[A, B2] = { + FutureArrow[A, B2] { a => + self(a).rescue(rescueException) + } + } + + /** + * Produces a new FutureArrow where the result value is ignored, and Unit is returned. + */ + def unit: FutureArrow[A, Unit] = + mapResult(_.unit) + + /** + * Returns a copy of this FutureArrow where the returned Future has its `.masked` + * method called. + */ + def masked: FutureArrow[A, B] = + mapResult(_.masked) + + /** + * Wraps this FutureArrow by passing the underlying operation to the given retry handler + * for possible retries. + */ + def retry(handler: RetryHandler[B]): FutureArrow[A, B] = + FutureArrow[A, B](a => handler(self(a))) + + def retry[A2 <: A]( + policy: RetryPolicy[Try[B]], + timer: Timer, + statsReceiver: StatsReceiver, + extractName: (A2 => String) + ): FutureArrow[A2, B] = + FutureArrow[A2, B] { a => + val scoped = statsReceiver.scope(extractName(a)) + RetryHandler(policy, timer, scoped)(self(a)) + } + + /** + * Produces a new FutureArrow where the returned Future[B] must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: => Duration): FutureArrow[A, B] = + mapResult(_.within(timer, timeout)) + + /** + * Produces a new FutureArrow where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = + mapResult(_.within(timer, timeout, exc)) + + /** + * Produces a new FutureArrow where the returned Future[B] must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * The [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: => Duration): FutureArrow[A, B] = + mapResult(_.raiseWithin(timeout)(timer)) + + /** + * Produces a new FutureArrow where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * [[timeout]] is passed by name to take advantage of deadlines passed in the request context. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: => Duration, exc: => Throwable): FutureArrow[A, B] = + mapResult(_.raiseWithin(timer, timeout, exc)) + + /** + * Produces a finagle.Service instance that invokes this arrow. + */ + def asService: Service[A, B] = Service.mk(this) + + /** + * Produces a new FutureArrow with the given finagle.Filter applied to this instance. + */ + def withFilter[A2, B2](filter: Filter[A2, B2, A, B]): FutureArrow[A2, B2] = + FutureArrow[A2, B2](filter.andThen(asService)) + + /** + * Produces a new FutureArrow with the given timeout which retries on Exceptions or timeouts and + * records stats about the logical request. This is only appropriate for idempotent operations. + */ + def observedWithTimeoutAndRetry[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + timer: Timer, + timeout: Duration, + numTries: Int, + shouldRetry: PartialFunction[Try[B], Boolean] = FutureArrow.RetryOnNonFailedFast + ): FutureArrow[A2, B] = { + val retryPolicy = RetryPolicy.tries(numTries, shouldRetry) + withTimeout(timer, timeout) + .retry(retryPolicy, timer, statsReceiver, extractName) + .trackLatency(statsReceiver, extractName) + .trackOutcome(statsReceiver, extractName) + } + + /** + * Produces a new FutureArrow with the given timeout and records stats about the logical request. + * This does not retry and is appropriate for non-idempotent operations. + */ + def observedWithTimeout[A2 <: A]( + statsReceiver: StatsReceiver, + extractName: (A2 => String), + timer: Timer, + timeout: Duration + ): FutureArrow[A2, B] = + withTimeout(timer, timeout) + .trackLatency(statsReceiver, extractName) + .trackOutcome(statsReceiver, extractName) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala new file mode 100644 index 000000000..aa20bcd9f --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/FutureEffect.scala @@ -0,0 +1,379 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.logging.{Logger, NullLogger} +import com.twitter.util._ + +object FutureEffect { + private[this] val _unit = FutureEffect[Any] { _ => + Future.Unit + } + + /** + * A FutureEffect that always succeeds. + */ + def unit[T]: FutureEffect[T] = + _unit.asInstanceOf[FutureEffect[T]] + + /** + * A FutureEffect that always fails with the given exception. + */ + def fail[T](ex: Throwable): FutureEffect[T] = + FutureEffect[T] { _ => + Future.exception(ex) + } + + /** + * Lift a function returning a Future to a FutureEffect. + */ + def apply[T](f: T => Future[Unit]) = + new FutureEffect[T] { + override def apply(x: T) = f(x) + } + + /** + * Performs all of the effects in order. If any effect fails, the + * whole operation fails, and the subsequent effects are not + * attempted. + */ + def sequentially[T](effects: FutureEffect[T]*): FutureEffect[T] = + effects.foldLeft[FutureEffect[T]](unit[T])(_ andThen _) + + /** + * Perform all of the effects concurrently. If any effect fails, the + * whole operation fails, but any of the effects may or may not have + * taken place. + */ + def inParallel[T](effects: FutureEffect[T]*): FutureEffect[T] = + FutureEffect[T] { t => + Future.join(effects map { _(t) }) + } + + def fromPartial[T](f: PartialFunction[T, Future[Unit]]) = + FutureEffect[T] { x => + if (f.isDefinedAt(x)) f(x) else Future.Unit + } + + /** + * Combines two FutureEffects into one that dispatches according to a gate. If the gate is + * true, use `a`, otherwise, use `b`. + */ + def selected[T](condition: Gate[Unit], a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = + selected(() => condition(), a, b) + + /** + * Combines two FutureEffects into one that dispatches according to a nullary boolean function. + * If the function returns true, use `a`, otherwise, use `b`. + */ + def selected[T](f: () => Boolean, a: FutureEffect[T], b: FutureEffect[T]): FutureEffect[T] = + FutureEffect[T] { t => + if (f()) a(t) else b(t) + } +} + +/** + * A function whose only result is a future effect. This wrapper + * provides convenient combinators. + */ +trait FutureEffect[T] extends (T => Future[Unit]) { self => + + /** + * Simplified version of `apply` when type is `Unit`. + */ + def apply()(implicit ev: Unit <:< T): Future[Unit] = self(()) + + /** + * Combines two Future effects, performing this one first and + * performing the next one if this one succeeds. + */ + def andThen(next: FutureEffect[T]): FutureEffect[T] = + FutureEffect[T] { x => + self(x) flatMap { _ => + next(x) + } + } + + /** + * Wraps this FutureEffect with a failure handling function that will be chained to + * the Future returned by this FutureEffect. + */ + def rescue( + handler: PartialFunction[Throwable, FutureEffect[T]] + ): FutureEffect[T] = + FutureEffect[T] { x => + self(x) rescue { + case t if handler.isDefinedAt(t) => + handler(t)(x) + } + } + + /** + * Combines two future effects, performing them both simultaneously. + * If either effect fails, the result will be failure, but the other + * effects will have occurred. + */ + def inParallel(other: FutureEffect[T]) = + FutureEffect[T] { x => + Future.join(Seq(self(x), other(x))) + } + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: Gate[Unit]): FutureEffect[T] = + enabledBy(() => enabled()) + + /** + * Perform this effect only if the provided gate returns true. + */ + def enabledBy(enabled: () => Boolean): FutureEffect[T] = + onlyIf { _ => + enabled() + } + + /** + * Perform this effect only if the provided predicate returns true + * for the input. + */ + def onlyIf(predicate: T => Boolean) = + FutureEffect[T] { x => + if (predicate(x)) self(x) else Future.Unit + } + + /** + * Perform this effect with arg only if the condition is true. Otherwise just return Future Unit + */ + def when(condition: Boolean)(arg: => T): Future[Unit] = + if (condition) self(arg) else Future.Unit + + /** + * Adapt this effect to take a different input via the provided conversion. + * + * (Contravariant map) + */ + def contramap[U](g: U => T) = FutureEffect[U] { u => + self(g(u)) + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * + * (Contravariant map) + */ + def contramapFuture[U](g: U => Future[T]) = FutureEffect[U] { u => + g(u) flatMap self + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * If the output value of the given function is None, the effect is a no-op. + */ + def contramapOption[U](g: U => Option[T]) = + FutureEffect[U] { + g andThen { + case None => Future.Unit + case Some(t) => self(t) + } + } + + /** + * Adapt this effect to take a different input via the provided conversion. + * If the output value of the given function is future-None, the effect is a no-op. + * (Contravariant map) + */ + def contramapFutureOption[U](g: U => Future[Option[T]]) = + FutureEffect[U] { u => + g(u) flatMap { + case None => Future.Unit + case Some(x) => self(x) + } + } + + /** + * Adapt this effect to take a sequence of input values. + */ + def liftSeq: FutureEffect[Seq[T]] = + FutureEffect[Seq[T]] { seqT => + Future.join(seqT.map(self)) + } + + /** + * Allow the effect to fail, but immediately return success. The + * effect is not guaranteed to have finished when its future is + * available. + */ + def ignoreFailures: FutureEffect[T] = + FutureEffect[T] { x => + Try(self(x)); Future.Unit + } + + /** + * Allow the effect to fail but always return success. Unlike ignoreFailures, the + * effect is guaranteed to have finished when its future is available. + */ + def ignoreFailuresUponCompletion: FutureEffect[T] = + FutureEffect[T] { x => + Try(self(x)) match { + case Return(f) => f.handle { case _ => () } + case Throw(_) => Future.Unit + } + } + + /** + * Returns a chained FutureEffect in which the given function will be called for any + * input that succeeds. + */ + def onSuccess(f: T => Unit): FutureEffect[T] = + FutureEffect[T] { x => + self(x).onSuccess(_ => f(x)) + } + + /** + * Returns a chained FutureEffect in which the given function will be called for any + * input that fails. + */ + def onFailure(f: (T, Throwable) => Unit): FutureEffect[T] = + FutureEffect[T] { x => + self(x).onFailure(t => f(x, t)) + } + + /** + * Translate exception returned by a FutureEffect according to a + * PartialFunction. + */ + def translateExceptions( + translateException: PartialFunction[Throwable, Throwable] + ): FutureEffect[T] = + FutureEffect[T] { request => + self(request) rescue { + case t if translateException.isDefinedAt(t) => Future.exception(translateException(t)) + case t => Future.exception(t) + } + } + + /** + * Wraps an effect with retry logic. Will retry against any failure. + */ + def retry(backoffs: Stream[Duration], timer: Timer, stats: StatsReceiver): FutureEffect[T] = + retry(RetryHandler.failuresOnly(backoffs, timer, stats)) + + /** + * Returns a new FutureEffect that executes the effect within the given RetryHandler, which + * may retry the operation on failures. + */ + def retry(handler: RetryHandler[Unit]): FutureEffect[T] = + FutureEffect[T](handler.wrap(self)) + + @deprecated("use trackOutcome", "2.11.1") + def countExceptions(stats: StatsReceiver, getScope: T => String) = { + val exceptionCounterFactory = new MemoizedExceptionCounterFactory(stats) + FutureEffect[T] { t => + exceptionCounterFactory(getScope(t)) { self(t) } + } + } + + /** + * Produces a FutureEffect that tracks the latency of the underlying operation. + */ + def trackLatency(stats: StatsReceiver, extractName: T => String): FutureEffect[T] = + FutureEffect[T] { t => + Stat.timeFuture(stats.stat(extractName(t), "latency_ms")) { self(t) } + } + + def trackOutcome( + stats: StatsReceiver, + extractName: T => String, + logger: Logger = NullLogger + ): FutureEffect[T] = trackOutcome(stats, extractName, logger, _ => None) + + /** + * Produces a FutureEffect that tracks the outcome (i.e. success vs failure) of + * requests, including counting exceptions by classname. + */ + def trackOutcome( + stats: StatsReceiver, + extractName: T => String, + logger: Logger, + exceptionCategorizer: Throwable => Option[String] + ): FutureEffect[T] = + FutureEffect[T] { t => + val name = extractName(t) + val scope = stats.scope(name) + + self(t) respond { r => + scope.counter("requests").incr() + + r match { + case Return(_) => + scope.counter("success").incr() + + case Throw(t) => + val category = exceptionCategorizer(t).getOrElse("failures") + scope.counter(category).incr() + scope.scope(category).counter(ThrowableHelper.sanitizeClassnameChain(t): _*).incr() + logger.warning(t, s"failure in $name") + } + } + } + + /** + * Observe latency and success rate for any FutureEffect + * @param statsScope a function to produce a parent stats scope from the argument + * to the FutureEffect + * @param exceptionCategorizer a function to assign different Throwables with custom stats scopes. + */ + def observed( + statsReceiver: StatsReceiver, + statsScope: T => String, + logger: Logger = NullLogger, + exceptionCategorizer: Throwable => Option[String] = _ => None + ): FutureEffect[T] = + self + .trackLatency(statsReceiver, statsScope) + .trackOutcome(statsReceiver, statsScope, logger, exceptionCategorizer) + + /** + * Produces a new FutureEffect where the given function is applied to the result of this + * FutureEffect. + */ + def mapResult(f: Future[Unit] => Future[Unit]): FutureEffect[T] = + FutureEffect[T] { x => + f(self(x)) + } + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: Duration): FutureEffect[T] = + mapResult(_.within(timer, timeout)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is NOT interrupted. + */ + def withTimeout(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = + mapResult(_.within(timer, timeout, exc)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with a com.twitter.util.TimeoutException. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: Duration): FutureEffect[T] = + mapResult(_.raiseWithin(timeout)(timer)) + + /** + * Produces a new FutureEffect where the returned Future must complete within the specified + * timeout, otherwise the Future fails with the specified Throwable. + * + * ''Note'': On timeout, the underlying future is interrupted. + */ + def raiseWithin(timer: Timer, timeout: Duration, exc: => Throwable): FutureEffect[T] = + mapResult(_.raiseWithin(timer, timeout, exc)) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala new file mode 100644 index 000000000..7b1420bff --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Gate.scala @@ -0,0 +1,210 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Time} +import java.util.concurrent.ThreadLocalRandom +import scala.language.implicitConversions + +object Gate { + + /** + * Construct a new Gate from a boolean function and a string representation + */ + def apply[T](f: T => Boolean, repr: => String): Gate[T] = + new Gate[T] { + override def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean = f(asT(u)) + override def toString: String = repr + } + + /** + * Construct a new Gate from a boolean function + */ + def apply[T](f: T => Boolean): Gate[T] = Gate(f, "Gate(" + f + ")") + + /** + * Create a Gate[Any] with a probability of returning true + * that increases linearly with the availability, which should range from 0.0 to 1.0. + */ + def fromAvailability( + availability: => Double, + randomDouble: => Double = ThreadLocalRandom.current().nextDouble(), + repr: String = "Gate.fromAvailability" + ): Gate[Any] = + Gate(_ => randomDouble < math.max(math.min(availability, 1.0), 0.0), repr) + + /** + * Creates a Gate[Any] with a probability of returning true that + * increases linearly in time between startTime and (startTime + rampUpDuration). + */ + def linearRampUp( + startTime: Time, + rampUpDuration: Duration, + randomDouble: => Double = ThreadLocalRandom.current().nextDouble() + ): Gate[Any] = { + val availability = availabilityFromLinearRampUp(startTime, rampUpDuration) + + fromAvailability( + availability(Time.now), + randomDouble, + repr = "Gate.rampUp(" + startTime + ", " + rampUpDuration + ")" + ) + } + + /** + * Generates an availability function that maps a point in time to an availability value + * in the range of 0.0 - 1.0. Availability is 0 if the given time is before startTime, is + * 1 if the greather than (startTime + rampUpDuration), and is otherwise linearly + * interpolated between 0.0 and 1.0 as the time moves through the two endpoints. + */ + def availabilityFromLinearRampUp(startTime: Time, rampUpDuration: Duration): Time => Double = { + val endTime = startTime + rampUpDuration + val rampUpMillis = rampUpDuration.inMilliseconds.toDouble + now => { + if (now >= endTime) { + 1.0 + } else if (now <= startTime) { + 0.0 + } else { + (now - startTime).inMilliseconds.toDouble / rampUpMillis + } + } + } + + /** + * Returns a gate that increments true / false counters for each Gate invocation. Counter name + * can be overridden with trueName and falseName. + */ + def observed[T]( + gate: Gate[T], + stats: StatsReceiver, + trueName: String = "true", + falseName: String = "false" + ): Gate[T] = { + val trueCount = stats.counter(trueName) + val falseCount = stats.counter(falseName) + gate + .onTrue[T] { _ => + trueCount.incr() + } + .onFalse[T] { _ => + falseCount.incr() + } + } + + /** + * Construct a new Gate from a boolean value + */ + def const(v: Boolean): Gate[Any] = Gate(_ => v, v.toString) + + /** + * Constructs a new Gate that returns true if any of the gates in the input list return true. + * Always returns false when the input list is empty. + */ + def any[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.False)(_ | _) + + /** + * Constructs a new Gate that returns true iff all the gates in the input list return true. + * Always returns true when the input list is empty. + */ + def all[T](gates: Gate[T]*): Gate[T] = gates.foldLeft[Gate[T]](Gate.True)(_ & _) + + /** + * Gates that always return true/false + */ + val True: Gate[Any] = const(true) + val False: Gate[Any] = const(false) + + // Implicit conversions to downcast Gate to a plain function + implicit def gate2function1[T](g: Gate[T]): T => Boolean = g(_) + implicit def gate2function0(g: Gate[Unit]): () => Boolean = () => g(()) +} + +/** + * A function from T to Boolean, composable with boolean-like operators. + * Also supports building higher-order functions + * for dispatching based upon the value of this function over values of type T. + * Note: Gate does not inherit from T => Boolean in order to enforce correct type checking + * in the apply method of Gate[Unit]. (Scala is over eager to convert the return type of + * expression to Unit.) Instead, an implicit conversion allows Gate to be used in methods that + * require a function T => Boolean. + */ +trait Gate[-T] { + + /** + * A function from T => boolean with strict type bounds + */ + def apply[U](u: U)(implicit asT: <:<[U, T]): Boolean + + /** + * A nullary variant of apply that can be used when T is a Unit + */ + def apply()(implicit isUnit: <:<[Unit, T]): Boolean = apply(isUnit(())) + + /** + * Return a new Gate which applies the given function and then calls this Gate + */ + def contramap[U](f: U => T): Gate[U] = Gate(f andThen this, "%s.contramap(%s)".format(this, f)) + + /** + * Returns a new Gate of the requested type that ignores its input + */ + def on[U](implicit isUnit: <:<[Unit, T]): Gate[U] = contramap((_: U) => ()) + + /** + * Returns a new Gate which returns true when this Gate returns false + */ + def unary_! : Gate[T] = Gate(x => !this(x), "!%s".format(this)) + + /** + * Returns a new Gate which returns true when both this Gate and other Gate return true + */ + def &[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) && other(x), "(%s & %s)".format(this, other)) + + /** + * Returns a new Gate which returns true when either this Gate or other Gate return true + */ + def |[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) || other(x), "(%s | %s)".format(this, other)) + + /** + * Returns a new Gate which returns true when return values of this Gate and other Gate differ + */ + def ^[U <: T](other: Gate[U]): Gate[U] = + Gate(x => this(x) ^ other(x), "(%s ^ %s)".format(this, other)) + + /** + * Returns the first value when this Gate returns true, or the second value if it returns false. + */ + def pick[A](t: T, x: => A, y: => A): A = if (this(t)) x else y + + /** + * A varient of pick that doesn't require a value if T is a subtype of Unit + */ + def pick[A](x: => A, y: => A)(implicit isUnit: <:<[Unit, T]): A = pick(isUnit(()), x, y) + + /** + * Returns a 1-arg function that dynamically picks x or y based upon the function arg. + */ + def select[A](x: => A, y: => A): T => A = pick(_, x, y) + + /** + * Returns a version of this gate that runs the effect if the gate returns true. + */ + def onTrue[U <: T](f: U => Unit): Gate[U] = + Gate { (t: U) => + val v = this(t) + if (v) f(t) + v + } + + /** + * Returns a version of this gate that runs the effect if the gate returns false. + */ + def onFalse[U <: T](f: U => Unit): Gate[U] = + Gate { (t: U) => + val v = this(t) + if (!v) f(t) + v + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala new file mode 100644 index 000000000..262ea1bab --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/LogarithmicallyBucketedTimer.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{StatsReceiver, Stat} +import com.twitter.util.Future + +object LogarithmicallyBucketedTimer { + val LatencyStatName = "latency_ms" +} + +/** + * helper to bucket timings by quantity. it produces base10 and baseE log buckets. + */ +class LogarithmicallyBucketedTimer( + statsReceiver: StatsReceiver, + prefix: String = LogarithmicallyBucketedTimer.LatencyStatName) { + + protected[this] def base10Key(count: Int) = + prefix + "_log_10_" + math.floor(math.log10(count)).toInt + + protected[this] def baseEKey(count: Int) = + prefix + "_log_E_" + math.floor(math.log(count)).toInt + + /** + * takes the base10 and baseE logs of the count, adds timings to the + * appropriate buckets + */ + def apply[T](count: Int = 0)(f: => Future[T]) = { + Stat.timeFuture(statsReceiver.stat(prefix)) { + // only bucketize for positive, non-zero counts + if (count > 0) { + Stat.timeFuture(statsReceiver.stat(base10Key(count))) { + Stat.timeFuture(statsReceiver.stat(baseEKey(count))) { + f + } + } + } else { + f + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala new file mode 100644 index 000000000..995d01906 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/MemoizingStatsReceiver.scala @@ -0,0 +1,46 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats._ + +/** + * Stores scoped StatsReceivers in a map to avoid unnecessary object creation. + */ +class MemoizingStatsReceiver(val self: StatsReceiver) + extends StatsReceiver + with DelegatingStatsReceiver + with Proxy { + def underlying: Seq[StatsReceiver] = Seq(self) + + val repr = self.repr + + private[this] lazy val scopeMemo = + Memoize[String, StatsReceiver] { name => + new MemoizingStatsReceiver(self.scope(name)) + } + + private[this] lazy val counterMemo = + Memoize[(Seq[String], Verbosity), Counter] { + case (names, verbosity) => + self.counter(verbosity, names: _*) + } + + private[this] lazy val statMemo = + Memoize[(Seq[String], Verbosity), Stat] { + case (names, verbosity) => + self.stat(verbosity, names: _*) + } + + def counter(metricBuilder: MetricBuilder): Counter = + counterMemo(metricBuilder.name -> metricBuilder.verbosity) + + def stat(metricBuilder: MetricBuilder): Stat = statMemo( + metricBuilder.name -> metricBuilder.verbosity) + + def addGauge(metricBuilder: MetricBuilder)(f: => Float): Gauge = { + // scalafix:off StoreGaugesAsMemberVariables + self.addGauge(metricBuilder)(f) + // scalafix:on StoreGaugesAsMemberVariables + } + + override def scope(name: String): StatsReceiver = scopeMemo(name) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala new file mode 100644 index 000000000..443911763 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Observable.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.util + +import com.twitter.finagle.thrift.ClientId + +/** + * A trait defining contextual information necessary to authorize + * and observe a request. + */ +trait Observable { + val requestName: String + val clientId: Option[ClientId] + + /** + * An Option[String] representation of the request-issuer's ClientId. + */ + lazy val clientIdString: Option[String] = + // It's possible for `ClientId.name` to be `null`, so we wrap it in + // `Option()` to force such cases to be None. + clientId flatMap { cid => + Option(cid.name) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala new file mode 100644 index 000000000..11635316a --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/OptionOrdering.scala @@ -0,0 +1,22 @@ +package com.twitter.servo.util + +object OptionOrdering { + + /** + * Creates an Ordering of Option objects. Nones are ordered before Somes, and two Somes + * are ordered according to the given value ordering. + */ + def apply[A](valueOrdering: Ordering[A]) = new Ordering[Option[A]] { + // Nones before Somes, for two Somes, use valueOrdering + def compare(x: Option[A], y: Option[A]): Int = { + x match { + case None => if (y.nonEmpty) -1 else 0 + case Some(xValue) => + y match { + case None => 1 + case Some(yValue) => valueOrdering.compare(xValue, yValue) + } + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala new file mode 100644 index 000000000..569538554 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RandomPerturber.scala @@ -0,0 +1,16 @@ +package com.twitter.servo.util + +import com.twitter.util.Duration +import scala.util.Random + +/** + * A class for generating bounded random fluctuations around a given Duration. + */ +class RandomPerturber(percentage: Float, rnd: Random = new Random) extends (Duration => Duration) { + assert(percentage > 0 && percentage < 1, "percentage must be > 0 and < 1") + + override def apply(dur: Duration): Duration = { + val ns = dur.inNanoseconds + Duration.fromNanoseconds((ns + ((2 * rnd.nextFloat - 1) * percentage * ns)).toLong) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala new file mode 100644 index 000000000..749addcc7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RateLimitingLogger.scala @@ -0,0 +1,71 @@ +package com.twitter.servo.util + +import com.twitter.logging.{Level, Logger} +import com.twitter.util.{Duration, Time} +import com.twitter.conversions.DurationOps._ +import java.util.concurrent.atomic.AtomicLong + +object RateLimitingLogger { + private[util] val DefaultLoggerName = "servo" + private[util] val DefaultLogInterval = 500.milliseconds +} + +/** + * Class that makes it easier to rate-limit log messages, either by call site, or by + * logical grouping of messages. + * @param interval the interval in which messages should be rate limited + * @param logger the logger to use + */ +class RateLimitingLogger( + interval: Duration = RateLimitingLogger.DefaultLogInterval, + logger: Logger = Logger(RateLimitingLogger.DefaultLoggerName)) { + private[this] val last: AtomicLong = new AtomicLong(0L) + private[this] val sinceLast: AtomicLong = new AtomicLong(0L) + + private[this] val intervalNanos = interval.inNanoseconds + private[this] val intervalMsString = interval.inMilliseconds.toString + + private[this] def limited(action: Long => Unit): Unit = { + val now = Time.now.inNanoseconds + val lastNanos = last.get() + if (now - lastNanos > intervalNanos) { + if (last.compareAndSet(lastNanos, now)) { + val currentSinceLast = sinceLast.getAndSet(0L) + action(currentSinceLast) + } + } else { + sinceLast.incrementAndGet() + } + } + + def log(msg: => String, level: Level = Level.ERROR): Unit = { + limited { currentSinceLast: Long => + logger( + level, + "%s (group is logged at most once every %s ms%s)".format( + msg, + intervalMsString, + if (currentSinceLast > 0) { + s", ${currentSinceLast} occurrences since last" + } else "" + ) + ) + } + } + + def logThrowable(t: Throwable, msg: => String, level: Level = Level.ERROR): Unit = { + limited { currentSinceLast: Long => + logger( + level, + t, + "%s (group is logged at most once every %s ms%s)".format( + msg, + intervalMsString, + if (currentSinceLast > 0) { + s", ${currentSinceLast} occurrences since last" + } else "" + ) + ) + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala new file mode 100644 index 000000000..164dc2561 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Retry.scala @@ -0,0 +1,100 @@ +package com.twitter.servo.util + +import com.twitter.finagle.{Backoff, Service, TimeoutException, WriteException} +import com.twitter.finagle.service.{RetryExceptionsFilter, RetryPolicy} +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.util.DefaultTimer +import com.twitter.util.{Duration, Future, Throw, Timer, Try} + +/** + * Allows an action to be retried according to a backoff strategy. + * This is an adaption of the Finagle RetryExceptionsFilter, but with an + * arbitrary asynchronous computation. + */ +class Retry( + statsReceiver: StatsReceiver, + backoffs: Backoff, + private[this] val timer: Timer = DefaultTimer) { + + /** + * retry on specific exceptions + */ + def apply[T]( + f: () => Future[T] + )( + shouldRetry: PartialFunction[Throwable, Boolean] + ): Future[T] = { + val policy = RetryPolicy.backoff[Try[Nothing]](backoffs) { + case Throw(t) if shouldRetry.isDefinedAt(t) => shouldRetry(t) + } + + val service = new Service[Unit, T] { + override def apply(u: Unit): Future[T] = f() + } + + val retrying = new RetryExceptionsFilter(policy, timer, statsReceiver) andThen service + + retrying() + } + + @deprecated("release() has no function and will be removed", "2.8.2") + def release(): Unit = {} +} + +/** + * Use to configure separate backoffs for WriteExceptions, TimeoutExceptions, + * and service-specific exceptions + */ +class ServiceRetryPolicy( + writeExceptionBackoffs: Backoff, + timeoutBackoffs: Backoff, + serviceBackoffs: Backoff, + shouldRetryService: PartialFunction[Throwable, Boolean]) + extends RetryPolicy[Try[Nothing]] { + override def apply(r: Try[Nothing]) = r match { + case Throw(t) if shouldRetryService.isDefinedAt(t) => + if (shouldRetryService(t)) + onServiceException + else + None + case Throw(_: WriteException) => onWriteException + case Throw(_: TimeoutException) => onTimeoutException + case _ => None + } + + def copy( + writeExceptionBackoffs: Backoff = writeExceptionBackoffs, + timeoutBackoffs: Backoff = timeoutBackoffs, + serviceBackoffs: Backoff = serviceBackoffs, + shouldRetryService: PartialFunction[Throwable, Boolean] = shouldRetryService + ) = + new ServiceRetryPolicy( + writeExceptionBackoffs, + timeoutBackoffs, + serviceBackoffs, + shouldRetryService + ) + + private[this] def onWriteException = consume(writeExceptionBackoffs) { tail => + copy(writeExceptionBackoffs = tail) + } + + private[this] def onTimeoutException = consume(timeoutBackoffs) { tail => + copy(timeoutBackoffs = tail) + } + + private[this] def onServiceException = consume(serviceBackoffs) { tail => + copy(serviceBackoffs = tail) + } + + private[this] def consume(b: Backoff)(f: Backoff => ServiceRetryPolicy) = { + if (b.isExhausted) None + else Some((b.duration, f(b.next))) + } + + override val toString = "ServiceRetryPolicy(%s, %s, %s)".format( + writeExceptionBackoffs, + timeoutBackoffs, + serviceBackoffs + ) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala new file mode 100644 index 000000000..f1e02c641 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RetryHandler.scala @@ -0,0 +1,169 @@ +package com.twitter.servo.util + +import com.twitter.finagle.Backoff +import com.twitter.finagle.service.{RetryBudget, RetryPolicy} +import com.twitter.finagle.stats.{Counter, StatsReceiver} +import com.twitter.util._ +import java.util.concurrent.CancellationException +import scala.util.control.NonFatal + +/** + * A RetryHandler can wrap an arbitrary Future-producing operation with retry logic, where the + * operation may conditionally be retried multiple times. + */ +trait RetryHandler[-A] { + + /** + * Executes the given operation and performs any applicable retries. + */ + def apply[A2 <: A](f: => Future[A2]): Future[A2] + + /** + * Wraps an arbitrary function with this RetryHandler's retrying logic. + */ + def wrap[A2 <: A, B](f: B => Future[A2]): B => Future[A2] = + b => this(f(b)) +} + +object RetryHandler { + + /** + * Builds a RetryHandler that retries according to the given RetryPolicy. Retries, if any, + * will be scheduled on the given Timer to be executed after the appropriate backoff, if any. + * Retries will be limited according the given `RetryBudget`. + */ + def apply[A]( + policy: RetryPolicy[Try[A]], + timer: Timer, + statsReceiver: StatsReceiver, + budget: RetryBudget = RetryBudget() + ): RetryHandler[A] = { + val firstTryCounter = statsReceiver.counter("first_try") + val retriesCounter = statsReceiver.counter("retries") + val budgetExhausedCounter = statsReceiver.counter("budget_exhausted") + + new RetryHandler[A] { + def apply[A2 <: A](f: => Future[A2]): Future[A2] = { + firstTryCounter.incr() + budget.deposit() + retry[A2](policy, timer, retriesCounter, budgetExhausedCounter, budget)(f) + } + } + } + + /** + * Builds a RetryHandler that will only retry on failures that are handled by the given policy, + * and does not consider any successful future for retries. + */ + def failuresOnly[A]( + policy: RetryPolicy[Try[Nothing]], + timer: Timer, + statsReceiver: StatsReceiver, + budget: RetryBudget = RetryBudget() + ): RetryHandler[A] = + apply(failureOnlyRetryPolicy(policy), timer, statsReceiver, budget) + + /** + * Builds a RetryHandler that will retry any failure according to the given backoff schedule, + * until either either the operation succeeds or all backoffs are exhausted. + */ + def failuresOnly[A]( + backoffs: Stream[Duration], + timer: Timer, + stats: StatsReceiver, + budget: RetryBudget + ): RetryHandler[A] = + failuresOnly( + RetryPolicy.backoff[Try[Nothing]](Backoff.fromStream(backoffs)) { case Throw(_) => true }, + timer, + stats, + budget + ) + + /** + * Builds a RetryHandler that will retry any failure according to the given backoff schedule, + * until either either the operation succeeds or all backoffs are exhausted. + */ + def failuresOnly[A]( + backoffs: Stream[Duration], + timer: Timer, + stats: StatsReceiver + ): RetryHandler[A] = + failuresOnly(backoffs, timer, stats, RetryBudget()) + + /** + * Converts a RetryPolicy that only handles failures (Throw) to a RetryPolicy that also + * handles successes (Return), by flagging that successes need not be retried. + */ + def failureOnlyRetryPolicy[A](policy: RetryPolicy[Try[Nothing]]): RetryPolicy[Try[A]] = + RetryPolicy[Try[A]] { + case Return(_) => None + case Throw(ex) => + policy(Throw(ex)) map { + case (backoff, p2) => (backoff, failureOnlyRetryPolicy(p2)) + } + } + + private[this] def retry[A]( + policy: RetryPolicy[Try[A]], + timer: Timer, + retriesCounter: Counter, + budgetExhausedCounter: Counter, + budget: RetryBudget + )( + f: => Future[A] + ): Future[A] = { + forceFuture(f).transform { transformed => + policy(transformed) match { + case Some((backoff, nextPolicy)) => + if (budget.tryWithdraw()) { + retriesCounter.incr() + schedule(backoff, timer) { + retry(nextPolicy, timer, retriesCounter, budgetExhausedCounter, budget)(f) + } + } else { + budgetExhausedCounter.incr() + Future.const(transformed) + } + case None => + Future.const(transformed) + } + } + } + + // similar to finagle's RetryExceptionsFilter + private[this] def schedule[A](d: Duration, timer: Timer)(f: => Future[A]) = { + if (d.inNanoseconds > 0) { + val promise = new Promise[A] + val task = timer.schedule(Time.now + d) { + if (!promise.isDefined) { + try { + promise.become(f) + } catch { + case NonFatal(cause) => + // Ignore any exceptions thrown by Promise#become(). This usually means that the promise + // was already defined and cannot be transformed. + } + } + } + promise.setInterruptHandler { + case cause => + task.cancel() + val cancellation = new CancellationException + cancellation.initCause(cause) + promise.updateIfEmpty(Throw(cancellation)) + } + promise + } else forceFuture(f) + } + + // (Future { f } flatten), but without the allocation + private[this] def forceFuture[A](f: => Future[A]) = { + try { + f + } catch { + case NonFatal(cause) => + Future.exception(cause) + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala new file mode 100644 index 000000000..36b790760 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/RpcRetry.scala @@ -0,0 +1,90 @@ +package com.twitter.servo.util + +import com.twitter.util.Future + +object RpcRetry { + + /** + * Provides a generic implementation of a retry logic to only a subset + * of requests according to a given predicate and returning the result + * in the original order after the retry. + * @param rpcs Methods that can transform a Seq[Request] to + * Future[Map[Request, Response]], they will be invoked in order + * while there are remaining rpcs to invoke AND some responses + * still return false to the predicate. + * @param isSuccess if true, keep the response, else retry. + * @tparam Req a request object + * @tparam Resp a response object + * @return an rpc function (Seq[Req] => Future[Map[Req, Resp]]) that performs + * the retries internally. + */ + def retryableRpc[Req, Resp]( + rpcs: Seq[Seq[Req] => Future[Map[Req, Resp]]], + isSuccess: Resp => Boolean + ): Seq[Req] => Future[Map[Req, Resp]] = { + requestRetryAndMerge[Req, Resp](_, isSuccess, rpcs.toStream) + } + + /** + * Provides a generic implementation of a retry logic to only a subset + * of requests according to a given predicate and returning the result + * in the original order after the retry. + * @param rpcs Methods that can transform a Seq[Request] to + * Future[Seq[Response]], they will be invoked in order + * while there are remaining rpcs to invoke AND some responses + * still return false to the predicate. + * Note that all Request objects must adhere to hashCode/equals standards + * @param isSuccess if true, keep the response, else retry. + * @tparam Req a request object. Must adhere to hashCode/equals standards + * @tparam Resp a response object + * @return an rpc function (Seq[Req] => Future[Seq[Resp]]) that performs + * the retries internally. + */ + def retryableRpcSeq[Req, Resp]( + rpcs: Seq[Seq[Req] => Future[Seq[Resp]]], + isSuccess: Resp => Boolean + ): Seq[Req] => Future[Seq[Resp]] = { + requestRetryAndMergeSeq[Req, Resp](_, isSuccess, rpcs) + } + + private[this] def requestRetryAndMergeSeq[Req, Resp]( + requests: Seq[Req], + isSuccess: Resp => Boolean, + rpcs: Seq[Seq[Req] => Future[Seq[Resp]]] + ): Future[Seq[Resp]] = { + requestRetryAndMerge(requests, isSuccess, (rpcs map { rpcToMapResponse(_) }).toStream) map { + responseMap => + requests map { responseMap(_) } + } + } + + private[this] def requestRetryAndMerge[Req, Resp]( + requests: Seq[Req], + isSuccess: Resp => Boolean, + rpcs: Stream[Seq[Req] => Future[Map[Req, Resp]]] + ): Future[Map[Req, Resp]] = { + if (rpcs.isEmpty) { + Future.exception(new IllegalArgumentException("rpcs is empty.")) + } else { + val rpc = rpcs.head + rpc(requests) flatMap { responses => + val (keep, recurse) = responses partition { + case (_, rep) => isSuccess(rep) + } + if (rpcs.tail.nonEmpty && recurse.nonEmpty) { + requestRetryAndMerge(recurse.keys.toSeq, isSuccess, rpcs.tail) map { keep ++ _ } + } else { + Future.value(responses) + } + } + } + } + + private[this] def rpcToMapResponse[Req, Resp]( + rpc: Seq[Req] => Future[Seq[Resp]] + ): Seq[Req] => Future[Map[Req, Resp]] = { (reqs: Seq[Req]) => + rpc(reqs) map { reps => + (reqs zip reps).toMap + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala new file mode 100644 index 000000000..1d20842df --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Scribe.scala @@ -0,0 +1,80 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.{NullStatsReceiver, StatsReceiver} +import com.twitter.logging._ +import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStruct, ThriftStructCodec} +import com.twitter.util.Future + +object Scribe { + + /** + * Returns a new FutureEffect for scribing text to the specified category. + */ + def apply( + category: String, + statsReceiver: StatsReceiver = NullStatsReceiver + ): FutureEffect[String] = + Scribe(loggingHandler(category = category, statsReceiver = statsReceiver)) + + /** + * Returns a new FutureEffect for scribing text to the specified logging handler. + */ + def apply(handler: Handler): FutureEffect[String] = + FutureEffect[String] { msg => + handler.publish(new LogRecord(handler.getLevel, msg)) + Future.Unit + } + + /** + * Returns a new FutureEffect for scribing thrift objects to the specified category. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + category: String + ): FutureEffect[T] = + Scribe(codec, Scribe(category = category)) + + /** + * Returns a new FutureEffect for scribing thrift objects to the specified category. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + category: String, + statsReceiver: StatsReceiver + ): FutureEffect[T] = + Scribe(codec, Scribe(category = category, statsReceiver = statsReceiver)) + + /** + * Returns a new FutureEffect for scribing thrift objects to the underlying scribe effect. + * The thrift object will be serialized to binary then converted to Base64. + */ + def apply[T <: ThriftStruct]( + codec: ThriftStructCodec[T], + underlying: FutureEffect[String] + ): FutureEffect[T] = + underlying contramap serialize(codec) + + /** + * Builds a logging Handler that scribes log messages, wrapped with a QueueingHandler. + */ + def loggingHandler( + category: String, + formatter: Formatter = BareFormatter, + maxQueueSize: Int = 5000, + statsReceiver: StatsReceiver = NullStatsReceiver + ): Handler = + new QueueingHandler( + ScribeHandler(category = category, formatter = formatter, statsReceiver = statsReceiver)(), + maxQueueSize = maxQueueSize + ) + + /** + * Returns a function that serializes thrift structs to Base64. + */ + def serialize[T <: ThriftStruct](c: ThriftStructCodec[T]): T => String = { + val serializer = BinaryThriftStructSerializer(c) + t => serializer.toString(t) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala new file mode 100644 index 000000000..4e84fb801 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SuccessRateTracker.scala @@ -0,0 +1,179 @@ +package com.twitter.servo.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.{Duration, Local} + +/** + * A strategy for tracking success rate, usually over a window + */ +trait SuccessRateTracker { self => + def record(successes: Int, failures: Int): Unit + def successRate: Double + + /** + * A [[Gate]] whose availability is computed from the success rate (SR) reported by the tracker. + * + * @param availabilityFromSuccessRate function to calculate availability of gate given SR + */ + def availabilityGate(availabilityFromSuccessRate: Double => Double): Gate[Unit] = + Gate.fromAvailability(availabilityFromSuccessRate(successRate)) + + /** + * A [[Gate]] whose availability is computed from the success rate reported by the tracker + * with stats attached. + */ + def observedAvailabilityGate( + availabilityFromSuccessRate: Double => Double, + stats: StatsReceiver + ): Gate[Unit] = + new Gate[Unit] { + val underlying = availabilityGate(availabilityFromSuccessRate) + val availabilityGauge = + stats.addGauge("availability") { availabilityFromSuccessRate(successRate).toFloat } + override def apply[U](u: U)(implicit asT: <:<[U, Unit]): Boolean = underlying.apply(u) + } + + /** + * Tracks number of successes and failures as counters, and success_rate as a gauge + */ + def observed(stats: StatsReceiver) = { + val successCounter = stats.counter("successes") + val failureCounter = stats.counter("failures") + new SuccessRateTracker { + private[this] val successRateGauge = stats.addGauge("success_rate")(successRate.toFloat) + override def record(successes: Int, failures: Int) = { + self.record(successes, failures) + successCounter.incr(successes) + failureCounter.incr(failures) + } + override def successRate = self.successRate + } + } +} + +object SuccessRateTracker { + + /** + * Track success rate (SR) using [[RecentAverage]] + * + * Defaults success rate to 100% which prevents early failures (or periods of 0 data points, + * e.g. tracking backend SR during failover) from producing dramatic drops in success rate. + * + * @param window Window size as duration + */ + def recentWindowed(window: Duration) = + new AverageSuccessRateTracker(new RecentAverage(window, defaultAverage = 1.0)) + + /** + * Track success rate using [[WindowedAverage]] + * + * Initializes the windowedAverage to one window's worth of successes. This prevents + * the problem where early failures produce dramatic drops in the success rate. + * + * @param windowSize Window size in number of data points + */ + def rollingWindow(windowSize: Int) = + new AverageSuccessRateTracker(new WindowedAverage(windowSize, initialValue = Some(1.0))) +} + +/** + * Tracks success rate using an [[Average]] + * + * @param average Strategy for recording an average, usually over a window + */ +class AverageSuccessRateTracker(average: Average) extends SuccessRateTracker { + def record(successes: Int, failures: Int): Unit = + average.record(successes, successes + failures) + + def successRate: Double = average.value.getOrElse(1) +} + +/** + * EwmaSuccessRateTracker computes a failure rate with exponential decay over a time bound. + * + * @param halfLife determines the rate of decay. Assuming a hypothetical service that is initially + * 100% successful and then instantly switches to 50% successful, it will take `halfLife` time + * for this tracker to report a success rate of ~75%. + */ +class EwmaSuccessRateTracker(halfLife: Duration) extends SuccessRateTracker { + // math.exp(-x) = 0.50 when x == ln(2) + // math.exp(-x / Tau) == math.exp(-x / halfLife * ln(2)) therefore when x/halfLife == 1, the + // decay output is 0.5 + private[this] val Tau: Double = halfLife.inNanoseconds.toDouble / math.log(2.0) + + private[this] var stamp: Long = EwmaSuccessRateTracker.nanoTime() + private[this] var decayingFailureRate: Double = 0.0 + + def record(successes: Int, failures: Int): Unit = { + if (successes < 0 || failures < 0) return + + val total = successes + failures + if (total == 0) return + + val observation = (failures.toDouble / total) max 0.0 min 1.0 + + synchronized { + val time = EwmaSuccessRateTracker.nanoTime() + val delta = ((time - stamp) max 0L).toDouble + val weight = math.exp(-delta / Tau) + decayingFailureRate = (decayingFailureRate * weight) + (observation * (1.0 - weight)) + stamp = time + } + } + + /** + * The current success rate computed as the inverse of the failure rate. + */ + def successRate: Double = 1.0 - failureRate + + def failureRate = synchronized { decayingFailureRate } +} + +private[servo] trait NanoTimeControl { + def set(nanoTime: Long): Unit + def advance(delta: Long): Unit + def advance(delta: Duration): Unit = advance(delta.inNanoseconds) +} + +object EwmaSuccessRateTracker { + private[EwmaSuccessRateTracker] val localNanoTime = new Local[() => Long] + + private[EwmaSuccessRateTracker] def nanoTime(): Long = { + localNanoTime() match { + case None => System.nanoTime() + case Some(f) => f() + } + } + + /** + * Execute body with the time function replaced by `timeFunction` + * WARNING: This is only meant for testing purposes. + */ + private[this] def withNanoTimeFunction[A]( + timeFunction: => Long + )( + body: NanoTimeControl => A + ): A = { + @volatile var tf = () => timeFunction + + localNanoTime.let(() => tf()) { + val timeControl = new NanoTimeControl { + def set(nanoTime: Long): Unit = { + tf = () => nanoTime + } + def advance(delta: Long): Unit = { + val newNanoTime = tf() + delta + tf = () => newNanoTime + } + } + + body(timeControl) + } + } + + private[this] def withNanoTimeAt[A](nanoTime: Long)(body: NanoTimeControl => A): A = + withNanoTimeFunction(nanoTime)(body) + + private[servo] def withCurrentNanoTimeFrozen[A](body: NanoTimeControl => A): A = + withNanoTimeAt(System.nanoTime())(body) +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala new file mode 100644 index 000000000..a57d30533 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/SynchronizedHashMap.scala @@ -0,0 +1,5 @@ +package com.twitter.servo.util + +import scala.collection.mutable + +class SynchronizedHashMap[K, V] extends mutable.HashMap[K, V] with mutable.SynchronizedMap[K, V] diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala new file mode 100644 index 000000000..3edd1cf31 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThreadLocalStringBuilder.scala @@ -0,0 +1,11 @@ +package com.twitter.servo.util + +class ThreadLocalStringBuilder(initialSize: Int) extends ThreadLocal[StringBuilder] { + override def initialValue = new StringBuilder(initialSize) + + def apply() = { + val buf = get + buf.setLength(0) + buf + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala new file mode 100644 index 000000000..5feeaa7e7 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/ThrowableHelper.scala @@ -0,0 +1,41 @@ +package com.twitter.servo.util + +import com.twitter.util.Throwables + +/** + * An object with some helper methods for dealing with exceptions + * (currently just classname cleanup) + */ +object ThrowableHelper { + + /** + * Returns a sanitized sequence of classname for the given Throwable + * including root causes. + */ + def sanitizeClassnameChain(t: Throwable): Seq[String] = + Throwables.mkString(t).map(classnameTransform(_)) + + /** + * Returns a sanitized classname for the given Throwable. + */ + def sanitizeClassname(t: Throwable): String = + classnameTransform(t.getClass.getName) + + /** + * A function that applies a bunch of cleanup transformations to exception classnames + * (currently just 1, but there will likely be more!). + */ + private val classnameTransform: String => String = + Memoize { stripSuffix("$Immutable").andThen(stripSuffix("$")) } + + /** + * Generates a function that strips off the specified suffix from strings, if found. + */ + private def stripSuffix(suffix: String): String => String = + s => { + if (s.endsWith(suffix)) + s.substring(0, s.length - suffix.length) + else + s + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala new file mode 100644 index 000000000..d5cb14479 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/Transformer.scala @@ -0,0 +1,227 @@ +package com.twitter.servo.util + +import com.google.common.base.Charsets +import com.google.common.primitives.{Ints, Longs} +import com.twitter.scrooge.{BinaryThriftStructSerializer, ThriftStructCodec, ThriftStruct} +import com.twitter.util.{Future, Return, Try, Throw} +import java.nio.{ByteBuffer, CharBuffer} +import java.nio.charset.{Charset, CharsetEncoder, CharsetDecoder} + +/** + * Transformer is a (possibly partial) bidirectional conversion + * between values of two types. It is particularly useful for + * serializing values for storage and reading them back out (see + * com.twitter.servo.cache.Serializer). + * + * In some implementations, the conversion may lose data (for example + * when used for storage in a cache). In general, any data that passes + * through a conversion should be preserved if the data is converted + * back. There is code to make it easy to check that your Transformer + * instance has this property in + * com.twitter.servo.util.TransformerLawSpec. + * + * Transformers should take care not to mutate their inputs when + * converting in either direction, in order to ensure that concurrent + * transformations of the same input yield the same result. + * + * Transformer forms a category with `andThen` and `identity`. + */ +trait Transformer[A, B] { self => + def to(a: A): Try[B] + + def from(b: B): Try[A] + + @deprecated("Use Future.const(transformer.to(x))", "2.0.1") + def asyncTo(a: A): Future[B] = Future.const(to(a)) + + @deprecated("Use Future.const(transformer.from(x))", "2.0.1") + def asyncFrom(b: B): Future[A] = Future.const(from(b)) + + /** + * Compose this transformer with another. As long as both + * transformers follow the stated laws, the composed transformer + * will follow them. + */ + def andThen[C](t: Transformer[B, C]): Transformer[A, C] = + new Transformer[A, C] { + override def to(a: A) = self.to(a) andThen t.to + override def from(c: C) = t.from(c) andThen self.from + } + + /** + * Reverse the direction of this transformer. + * + * Law: t.flip.flip == t + */ + lazy val flip: Transformer[B, A] = + new Transformer[B, A] { + override lazy val flip = self + override def to(b: B) = self.from(b) + override def from(a: A) = self.to(a) + } +} + +object Transformer { + + /** + * Create a new Transformer from the supplied functions, catching + * exceptions and converting them to failures. + */ + def apply[A, B](tTo: A => B, tFrom: B => A): Transformer[A, B] = + new Transformer[A, B] { + override def to(a: A): Try[B] = Try { tTo(a) } + override def from(b: B): Try[A] = Try { tFrom(b) } + } + + def identity[A]: Transformer[A, A] = pure[A, A](a => a, a => a) + + /** + * Lift a pair of (total) conversion functions to a Transformer. The + * caller is responsible for ensuring that the resulting transformer + * follows the laws for Transformers. + */ + def pure[A, B](pureTo: A => B, pureFrom: B => A): Transformer[A, B] = + new Transformer[A, B] { + override def to(a: A): Try[B] = Return(pureTo(a)) + override def from(b: B): Try[A] = Return(pureFrom(b)) + } + + /** + * Lift a transformer to a transformer on optional values. + * + * None bypasses the underlying conversion (as it must, since there + * is no value to transform). + */ + def optional[A, B](underlying: Transformer[A, B]): Transformer[Option[A], Option[B]] = + new Transformer[Option[A], Option[B]] { + override def to(optA: Option[A]) = optA match { + case None => Return.None + case Some(a) => underlying.to(a) map { Some(_) } + } + + override def from(optB: Option[B]) = optB match { + case None => Return.None + case Some(b) => underlying.from(b) map { Some(_) } + } + } + + ////////////////////////////////////////////////// + // Transformers for accessing/generating fields of a Map. + // + // These transformers are useful for serializing/deserializing to + // storage that stores Maps, for example Hamsa. + + /** + * Thrown by `requiredField` when the field is not present. + */ + case class MissingRequiredField[K](k: K) extends RuntimeException + + /** + * Get a value from the map, yielding MissingRequiredField when the + * value is not present in the map. + * + * The inverse transform yields a Map containing only the one value. + */ + def requiredField[K, V](k: K): Transformer[Map[K, V], V] = + new Transformer[Map[K, V], V] { + override def to(m: Map[K, V]) = + m get k match { + case Some(v) => Return(v) + case None => Throw(MissingRequiredField(k)) + } + + override def from(v: V) = Return(Map(k -> v)) + } + + /** + * Attempt to get a field from a Map, yielding None if the value is + * not present. + * + * The inverse transform will put the value in a Map if it is Some, + * and omit it if it is None. + */ + def optionalField[K, V](k: K): Transformer[Map[K, V], Option[V]] = + pure[Map[K, V], Option[V]](_.get(k), _.map { k -> _ }.toMap) + + /** + * Transforms an Option[T] to a T, using a default value for None. + * + * Note that the default value will be converted back to None by + * .from (.from will never return Some(default)). + */ + def default[T](value: T): Transformer[Option[T], T] = + pure[Option[T], T](_ getOrElse value, t => if (t == value) None else Some(t)) + + /** + * Transforms `Long`s to big-endian byte arrays. + */ + lazy val LongToBigEndian: Transformer[Long, Array[Byte]] = + new Transformer[Long, Array[Byte]] { + def to(a: Long) = Try(Longs.toByteArray(a)) + def from(b: Array[Byte]) = Try(Longs.fromByteArray(b)) + } + + /** + * Transforms `Int`s to big-endian byte arrays. + */ + lazy val IntToBigEndian: Transformer[Int, Array[Byte]] = + new Transformer[Int, Array[Byte]] { + def to(a: Int) = Try(Ints.toByteArray(a)) + def from(b: Array[Byte]) = Try(Ints.fromByteArray(b)) + } + + /** + * Transforms UTF8-encoded strings to byte arrays. + */ + lazy val Utf8ToBytes: Transformer[String, Array[Byte]] = + stringToBytes(Charsets.UTF_8) + + /** + * Transforms strings, encoded in a given character set, to byte arrays. + */ + private[util] def stringToBytes(charset: Charset): Transformer[String, Array[Byte]] = + new Transformer[String, Array[Byte]] { + private[this] val charsetEncoder = new ThreadLocal[CharsetEncoder]() { + protected override def initialValue() = charset.newEncoder + } + + private[this] val charsetDecoder = new ThreadLocal[CharsetDecoder]() { + protected override def initialValue() = charset.newDecoder + } + + override def to(str: String): Try[Array[Byte]] = Try { + // We can't just use `String.getBytes("UTF-8")` here because it will + // silently replace UTF-16 surrogate characters, which will cause + // CharsetEncoder to throw exceptions. + val bytes = charsetEncoder.get.encode(CharBuffer.wrap(str)) + bytes.array.slice(bytes.position, bytes.limit) + } + + override def from(bytes: Array[Byte]): Try[String] = Try { + charsetDecoder.get.decode(ByteBuffer.wrap(bytes)).toString + } + } + + /** + * Transforms a ThriftStruct to a byte-array using Thrift's TBinaryProtocol. + */ + def thriftStructToBytes[T <: ThriftStruct](c: ThriftStructCodec[T]): Transformer[T, Array[Byte]] = + new Transformer[T, Array[Byte]] { + private[this] val ser = BinaryThriftStructSerializer(c) + def to(a: T) = Try(ser.toBytes(a)) + def from(b: Array[Byte]) = Try(ser.fromBytes(b)) + } +} + +/** + * transforms an Option[T] to a T, using a default value for None + */ +@deprecated("Use Transformer.default", "2.0.1") +class OptionToTypeTransformer[T](default: T) extends Transformer[Option[T], T] { + override def to(b: Option[T]): Try[T] = Return(b.getOrElse(default)) + + override def from(a: T): Try[Option[T]] = a match { + case `default` => Return.None + case _ => Return(Some(a)) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala new file mode 100644 index 000000000..d770be704 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/TryOrdering.scala @@ -0,0 +1,23 @@ +package com.twitter.servo.util + +import com.twitter.util.{Return, Throw, Try} + +object TryOrdering { + + /** + * Creates an Ordering of Try objects. Throws are ordered before Returns, and two Returns + * are ordered according to the given value ordering. + */ + def apply[A](valueOrdering: Ordering[A]) = new Ordering[Try[A]] { + def compare(x: Try[A], y: Try[A]): Int = { + x match { + case Throw(_) => if (y.isReturn) -1 else 0 + case Return(xValue) => + y match { + case Throw(_) => 1 + case Return(yValue) => valueOrdering.compare(xValue, yValue) + } + } + } + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala new file mode 100644 index 000000000..e76020098 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/WaitForServerSets.scala @@ -0,0 +1,60 @@ +package com.twitter.servo.util + +import com.twitter.finagle.util.DefaultTimer +import com.twitter.finagle.{Addr, Name, Namer} +import com.twitter.logging.Logger +import com.twitter.util._ +import scala.collection.JavaConverters._ + +/** + * A simple utility class to wait for serverset names to be resolved at startup. + * + * See [[com.twitter.finagle.client.ClientRegistry.expAllRegisteredClientsResolved()]] for an + * alternative way to wait for ServerSet resolution. + */ +object WaitForServerSets { + val log = Logger.get("WaitForServerSets") + + /** + * Convenient wrapper for single name in Java. Provides the default timer from Finagle. + */ + def ready(name: Name, timeout: Duration): Future[Unit] = + ready(Seq(name), timeout, DefaultTimer) + + /** + * Java Compatibility wrapper. Uses java.util.List instead of Seq. + */ + def ready(names: java.util.List[Name], timeout: Duration, timer: Timer): Future[Unit] = + ready(names.asScala, timeout, timer) + + /** + * Returns a Future that is satisfied when no more names resolve to Addr.Pending, + * or the specified timeout expires. + * + * This ignores address resolution failures, so just because the Future is satisfied + * doesn't necessarily imply that all names are resolved to something useful. + */ + def ready(names: Seq[Name], timeout: Duration, timer: Timer): Future[Unit] = { + val vars: Var[Seq[(Name, Addr)]] = Var.collect(names.map { + case n @ Name.Path(v) => Namer.resolve(v).map((n, _)) + case n @ Name.Bound(v) => v.map((n, _)) + }) + + val pendings = vars.changes.map { names => + names.filter { case (_, addr) => addr == Addr.Pending } + } + + pendings + .filter(_.isEmpty) + .toFuture() + .unit + .within( + timer, + timeout, + new TimeoutException( + "Failed to resolve: " + + vars.map(_.map { case (name, _) => name }).sample() + ) + ) + } +} diff --git a/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala new file mode 100644 index 000000000..e9afcacc1 --- /dev/null +++ b/tweetypie/servo/util/src/main/scala/com/twitter/servo/util/package.scala @@ -0,0 +1,6 @@ +package com.twitter.servo + +package object util { + /* aliases to preserve compatibility after classes moved to different package */ + val Memoize = com.twitter.util.Memoize +} From b389c3d30201f466cc51a4fa397cc5e81c24fe50 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 19 May 2023 09:53:56 -0700 Subject: [PATCH 10/11] Open-sourcing pushservice Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send. --- README.md | 10 + pushservice/BUILD.bazel | 48 + pushservice/readme.md | 45 + .../main/python/models/heavy_ranking/BUILD | 169 + .../python/models/heavy_ranking/README.md | 20 + .../python/models/heavy_ranking/__init__.py | 0 .../python/models/heavy_ranking/deep_norm.py | 136 + .../main/python/models/heavy_ranking/eval.py | 59 + .../python/models/heavy_ranking/features.py | 138 + .../main/python/models/heavy_ranking/graph.py | 129 + .../python/models/heavy_ranking/lib/BUILD | 42 + .../python/models/heavy_ranking/lib/layers.py | 128 + .../python/models/heavy_ranking/lib/model.py | 76 + .../python/models/heavy_ranking/lib/params.py | 49 + .../models/heavy_ranking/model_pools.py | 34 + .../python/models/heavy_ranking/params.py | 89 + .../python/models/heavy_ranking/run_args.py | 59 + .../update_warm_start_checkpoint.py | 146 + pushservice/src/main/python/models/libs/BUILD | 16 + .../src/main/python/models/libs/__init__.py | 0 .../models/libs/customized_full_sparse.py | 56 + .../python/models/libs/get_feat_config.py | 176 + .../main/python/models/libs/graph_utils.py | 42 + .../main/python/models/libs/group_metrics.py | 114 + .../main/python/models/libs/initializer.py | 118 + .../models/libs/light_ranking_metrics.py | 255 + .../python/models/libs/metric_fn_utils.py | 294 + .../src/main/python/models/libs/model_args.py | 231 + .../main/python/models/libs/model_utils.py | 339 ++ .../python/models/libs/warm_start_utils.py | 309 + .../main/python/models/light_ranking/BUILD | 69 + .../python/models/light_ranking/README.md | 14 + .../python/models/light_ranking/__init__.py | 0 .../python/models/light_ranking/deep_norm.py | 226 + .../python/models/light_ranking/eval_model.py | 89 + .../models/light_ranking/model_pools_mlp.py | 187 + .../twitter/frigate/pushservice/BUILD.bazel | 337 ++ .../PushMixerThriftServerWarmupHandler.scala | 93 + .../frigate/pushservice/PushServiceMain.scala | 193 + .../ContentRecommenderMixerAdaptor.scala | 323 ++ ...EarlyBirdFirstDegreeCandidateAdaptor.scala | 293 + .../ExploreVideoTweetCandidateAdaptor.scala | 120 + .../adaptor/FRSTweetCandidateAdaptor.scala | 272 + .../adaptor/GenericCandidateAdaptor.scala | 107 + .../adaptor/HighQualityTweetsAdaptor.scala | 280 + .../ListsToRecommendCandidateAdaptor.scala | 152 + ...oggedOutPushCandidateSourceGenerator.scala | 54 + .../OnboardingPushCandidateAdaptor.scala | 101 + .../PushCandidateSourceGenerator.scala | 162 + .../TopTweetImpressionsCandidateAdaptor.scala | 326 ++ .../adaptor/TopTweetsByGeoAdaptor.scala | 413 ++ .../adaptor/TrendsCandidatesAdaptor.scala | 215 + .../adaptor/TripGeoCandidatesAdaptor.scala | 188 + .../frigate/pushservice/config/Config.scala | 461 ++ .../pushservice/config/DeployConfig.scala | 2150 +++++++ .../config/ExperimentsWithStats.scala | 16 + .../pushservice/config/ProdConfig.scala | 230 + .../pushservice/config/StagingConfig.scala | 193 + .../mlconfig/DeepbirdV2ModelConfig.scala | 23 + .../controller/PushServiceController.scala | 114 + ...DisplayLocationNotSupportedException.scala | 12 + .../InvalidSportDomainException.scala | 12 + .../TweetNTabRequestHydratorException.scala | 7 + .../exception/UnsupportedCrtException.scala | 11 + .../UttEntityNotFoundException.scala | 12 + .../pushservice/ml/HealthFeatureGetter.scala | 220 + .../ml/HydrationContextBuilder.scala | 179 + .../pushservice/ml/PushMLModelScorer.scala | 188 + .../pushservice/model/DiscoverTwitter.scala | 89 + .../model/F1FirstdegreeTweet.scala | 60 + .../ListRecommendationPushCandidate.scala | 72 + ...MagicFanoutCreatorEventPushCandidate.scala | 136 + .../model/MagicFanoutEventPushCandidate.scala | 303 + .../model/MagicFanoutHydratedCandidate.scala | 147 + .../model/MagicFanoutNewsEvent.scala | 99 + ...agicFanoutProductLaunchPushCandidate.scala | 95 + .../MagicFanoutSportsPushCandidate.scala | 119 + .../OutOfNetworkTweetPushCandidate.scala | 68 + .../frigate/pushservice/model/PushTypes.scala | 61 + .../model/ScheduledSpaceSpeaker.scala | 85 + .../model/ScheduledSpaceSubscriber.scala | 86 + .../SubscribedSearchTweetPushCandidate.scala | 56 + .../TopTweetImpressionsPushCandidate.scala | 70 + .../model/TopicProofTweetPushCandidate.scala | 71 + .../model/TrendTweetPushCandidate.scala | 50 + .../model/TripTweetPushCandidate.scala | 60 + .../pushservice/model/TweetAction.scala | 26 + .../pushservice/model/TweetFavorite.scala | 53 + .../pushservice/model/TweetRetweet.scala | 51 + .../model/candidate/CopyInfo.scala | 33 + .../model/candidate/MLScores.scala | 307 + .../model/candidate/QualityScribing.scala | 104 + .../pushservice/model/candidate/Scriber.scala | 277 + .../ibis/CustomConfigurationMapForIbis.scala | 25 + .../DiscoverTwitterPushIbis2Hydrator.scala | 17 + .../F1FirstDegreeTweetIbis2Hydrator.scala | 24 + .../model/ibis/Ibis2Hydrator.scala | 127 + .../ibis/InlineActionIbis2Hydrator.scala | 12 + .../model/ibis/ListIbis2Hydrator.scala | 21 + ...MagicFanoutCreatorEventIbis2Hydrator.scala | 29 + .../MagicFanoutNewsEventIbis2Hydrator.scala | 103 + ...agicFanoutProductLaunchIbis2Hydrator.scala | 54 + .../MagicFanoutSportsEventIbis2Hydrator.scala | 89 + .../ibis/OutOfNetworkTweetIbis2Hydrator.scala | 90 + .../model/ibis/OverrideForIbis2Request.scala | 210 + .../model/ibis/PushOverrideInfo.scala | 246 + .../RankedSocialContextIbis2Hydrator.scala | 22 + .../ScheduledSpaceSpeakerIbis2Hydrator.scala | 34 + ...cheduledSpaceSubscriberIbis2Hydrator.scala | 29 + .../SubscribedSearchTweetIbis2Hydrator.scala | 33 + ...eetImpressionsCandidateIbis2Hydrator.scala | 21 + .../ibis/TopicProofTweetIbis2Hydrator.scala | 32 + .../model/ibis/TrendTweetIbis2Hydrator.scala | 16 + .../ibis/TweetCandidateIbis2Hydrator.scala | 166 + .../ibis/TweetFavoriteIbis2Hydrator.scala | 21 + .../ibis/TweetRetweetIbis2Hydrator.scala | 32 + .../model/ntab/CandidateNTabCopy.scala | 21 + .../DiscoverTwitterNtabRequestHydrator.scala | 58 + .../model/ntab/EventNTabRequestHydrator.scala | 21 + ...1FirstDegreeTweetNTabRequestHydrator.scala | 18 + .../ListCandidateNTabRequestHydrator.scala | 34 + ...anoutCreatorEventNtabRequestHydrator.scala | 110 + ...icFanoutNewsEventNTabRequestHydrator.scala | 16 + ...noutProductLaunchNtabRequestHydrator.scala | 97 + ...FanoutSportsEventNTabRequestHydrator.scala | 95 + .../pushservice/model/ntab/NTabRequest.scala | 10 + .../model/ntab/NTabRequestHydrator.scala | 64 + .../model/ntab/NTabSocialContext.scala | 46 + ...OutOfNetworkTweetNTabRequestHydrator.scala | 78 + .../ScheduledSpaceNTabRequestHydrator.scala | 106 + ...cribedSearchTweetNtabRequestHydrator.scala | 23 + ...pTweetImpressionsNTabRequestHydrator.scala | 37 + .../TopicProofTweetNtabRequestHydrator.scala | 60 + .../model/ntab/TrendTweetNtabHydrator.scala | 61 + .../TweetFavoriteNTabRequestHydrator.scala | 38 + .../model/ntab/TweetNTabRequestHydrator.scala | 55 + .../TweetRetweetNTabRequestHydrator.scala | 38 + .../module/DeployConfigModule.scala | 68 + .../pushservice/module/FilterModule.scala | 16 + .../pushservice/module/FlagModule.scala | 56 + ...LoggedOutPushTargetUserBuilderModule.scala | 27 + .../module/PushHandlerModule.scala | 78 + .../module/PushServiceDarkTrafficModule.scala | 33 + .../module/PushTargetUserBuilderModule.scala | 64 + .../module/ThriftWebFormsModule.scala | 9 + .../pushservice/params/DeciderKey.scala | 210 + .../pushservice/params/PushConstants.scala | 126 + .../pushservice/params/PushEnums.scala | 135 + .../params/PushFeatureSwitchParams.scala | 5043 +++++++++++++++++ .../params/PushFeatureSwitches.scala | 751 +++ .../params/PushMLModelParams.scala | 60 + .../pushservice/params/PushParams.scala | 534 ++ .../params/PushServiceTunableKeys.scala | 9 + .../pushservice/params/ShardParams.scala | 3 + ...ingEpsilonGreedyExplorationPredicate.scala | 58 + .../predicate/BqmlHealthModelPredicates.scala | 129 + .../BqmlQualityModelPredicates.scala | 141 + .../CaretFeedbackHistoryFilter.scala | 99 + .../predicate/CasLockPredicate.scala | 45 + .../predicate/CrtDeciderPredicate.scala | 25 + .../predicate/DiscoverTwitterPredicate.scala | 47 + .../predicate/FatiguePredicate.scala | 74 + .../predicate/HealthPredicates.scala | 740 +++ .../JointDauAndQualityModelPredicate.scala | 39 + .../predicate/ListPredicates.scala | 110 + .../LoggedOutPreRankingPredicates.scala | 37 + .../predicate/LoggedOutTargetPredicates.scala | 53 + .../MlModelsHoldbackExperimentPredicate.scala | 71 + .../predicate/OONSpreadControlPredicate.scala | 116 + ...NTweetNegativeFeedbackBasedPredicate.scala | 82 + ...OfNetworkCandidatesQualityPredicates.scala | 221 + .../predicate/PNegMultimodalPredicates.scala | 83 + .../PostRankingPredicateHelper.scala | 50 + .../predicate/PreRankingPredicates.scala | 158 + .../predicate/PredicatesForCandidate.scala | 874 +++ .../predicate/SGSPredicatesForCandidate.scala | 174 + .../predicate/ScarecrowPredicate.scala | 138 + .../predicate/SpacePredicate.scala | 153 + .../predicate/TargetEngagementPredicate.scala | 27 + ...TargetNtabCaretClickFatiguePredicate.scala | 91 + .../predicate/TargetPredicates.scala | 292 + .../TopTweetImpressionsPredicates.scala | 56 + .../TweetEngagementRatioPredicate.scala | 112 + .../predicate/TweetLanguagePredicate.scala | 109 + .../TweetWithheldContentPredicate.scala | 35 + .../event/EventPredicatesForCandidate.scala | 155 + .../MagicFanoutPredicatesForCandidate.scala | 525 ++ .../MagicFanoutPredicatesUtil.scala | 218 + .../magic_fanout/MagicFanoutSportsUtil.scala | 231 + ...rgetingPredicateWrappersForCandidate.scala | 133 + ...BasedNtabCaretClickFatiguePredicates.scala | 973 ++++ .../ContinuousFunction.scala | 148 + .../ntab_caret_fatigue/FeedbackModel.scala | 136 + ...MagicFanoutNtabCaretFatiguePredicate.scala | 28 + ...bCaretClickCandidateFatiguePredicate.scala | 87 + .../NtabCaretClickFatiguePredicate.scala | 47 + .../NtabCaretClickFatigueUtils.scala | 108 + .../RecTypeNtabCaretFatiguePredicate.scala | 87 + .../pushservice/predicate/package.scala | 44 + .../OpenOrNtabClickQualityPredicate.scala | 27 + .../QualityPredicateCommon.scala | 165 + .../QualityPredicateMap.scala | 21 + .../pushservice/rank/CRTBoostRanker.scala | 54 + .../pushservice/rank/CRTDownRanker.scala | 45 + .../pushservice/rank/LoggedOutRanker.scala | 45 + .../pushservice/rank/ModelBasedRanker.scala | 204 + .../pushservice/rank/PushserviceRanker.scala | 31 + .../pushservice/rank/RFPHLightRanker.scala | 139 + .../frigate/pushservice/rank/RFPHRanker.scala | 297 + .../rank/SubscriptionCreatorRanker.scala | 110 + .../LoggedOutRefreshForPushHandler.scala | 259 + .../PushCandidateHydrator.scala | 239 + .../refresh_handler/RFPHFeatureHydrator.scala | 69 + .../refresh_handler/RFPHPrerankFilter.scala | 104 + .../refresh_handler/RFPHRestrictStep.scala | 34 + .../refresh_handler/RFPHStatsRecorder.scala | 77 + .../RefreshForPushHandler.scala | 292 + .../RefreshForPushNotifier.scala | 128 + .../cross/BaseCopyFramework.scala | 79 + .../cross/CandidateCopyExpansion.scala | 56 + .../cross/CandidateCopyPair.scala | 11 + .../cross/CandidateToCopy.scala | 263 + .../refresh_handler/cross/CopyFilters.scala | 41 + .../cross/CopyPredicates.scala | 36 + .../scriber/MrRequestScribeHandler.scala | 388 ++ .../send_handler/SendHandler.scala | 250 + .../SendHandlerPushCandidateHydrator.scala | 184 + .../generator/CandidateGenerator.scala | 17 + ...FanoutCreatorEventCandidateGenerator.scala | 70 + ...gicFanoutNewsEventCandidateGenerator.scala | 57 + ...anoutProductLaunchCandidateGenerator.scala | 54 + ...cFanoutSportsEventCandidateGenerator.scala | 153 + .../generator/PushRequestToCandidate.scala | 49 + ...eduledSpaceSpeakerCandidateGenerator.scala | 55 + ...ledSpaceSubscriberCandidateGenerator.scala | 55 + .../pushservice/store/ContentMixerStore.scala | 17 + .../store/CopySelectionServiceStore.scala | 15 + .../pushservice/store/CrMixerTweetStore.scala | 58 + .../store/ExploreRankerStore.scala | 28 + .../store/FollowRecommendationsStore.scala | 46 + .../frigate/pushservice/store/IbisStore.scala | 190 + .../store/InterestDiscoveryStore.scala | 16 + .../store/LabeledPushRecsDecideredStore.scala | 156 + .../pushservice/store/LexServiceStore.scala | 26 + .../pushservice/store/NTabHistoryStore.scala | 45 + .../store/OCFPromptHistoryStore.scala | 73 + .../store/OnlineUserHistoryStore.scala | 81 + .../pushservice/store/OpenAppUserStore.scala | 13 + .../SocialGraphServiceProcessStore.scala | 21 + .../store/SoftUserFollowingStore.scala | 61 + .../store/TweetImpressionsStore.scala | 19 + .../store/TweetTranslationStore.scala | 211 + .../store/UttEntityHydrationStore.scala | 79 + .../pushservice/take/CandidateNotifier.scala | 160 + .../LoggedOutRefreshForPushNotifier.scala | 118 + .../pushservice/take/NotificationSender.scala | 95 + .../take/NotificationServiceSender.scala | 273 + .../take/SendHandlerNotifier.scala | 86 + .../CandidateValidator.scala | 83 + .../RFPHCandidateValidator.scala | 27 + .../SendHandlerPostCandidateValidator.scala | 26 + .../SendHandlerPreCandidateValidator.scala | 24 + .../channel_selection/ChannelCandidate.scala | 24 + .../channel_selection/ChannelSelector.scala | 15 + .../NtabOnlyChannelSelector.scala | 21 + .../take/history/EventBusWriter.scala | 37 + .../take/history/HistoryWriter.scala | 49 + .../take/predicates/BasicRFPHPredicates.scala | 7 + .../BasicSendHandlerPredicates.scala | 13 + .../predicates/BasicTweetPredicates.scala | 104 + .../BasicTweetPredicatesForRFPH.scala | 41 + .../OutOfNetworkTweetPredicates.scala | 16 + .../predicates/TakeCommonPredicates.scala | 36 + .../CandidatePredicatesMap.scala | 75 + .../SendHandlerCandidatePredicatesMap.scala | 78 + .../pushservice/take/sender/Ibis2Sender.scala | 185 + .../pushservice/take/sender/NtabSender.scala | 237 + .../pushservice/target/CustomFSFields.scala | 98 + .../LoggedOutPushTargetUserBuilder.scala | 182 + .../target/PushTargetUserBuilder.scala | 694 +++ .../target/RFPHTargetPredicateGenerator.scala | 37 + .../target/TargetAppPermissions.scala | 10 + .../target/TargetScoringDetails.scala | 121 + .../pushservice/util/AdaptorUtils.scala | 15 + .../pushservice/util/AdhocStatsUtil.scala | 104 + .../util/Candidate2FrigateNotification.scala | 119 + .../util/CandidateHydrationUtil.scala | 439 ++ .../pushservice/util/CandidateUtil.scala | 138 + .../frigate/pushservice/util/CopyUtil.scala | 448 ++ .../util/EmailLandingPageExperimentUtil.scala | 92 + .../pushservice/util/FunctionalUtil.scala | 12 + .../pushservice/util/IbisScribeTargets.scala | 55 + .../pushservice/util/InlineActionUtil.scala | 219 + .../util/MediaAnnotationsUtil.scala | 52 + .../util/MinDurationModifierCalculator.scala | 187 + .../pushservice/util/MrUserStateUtil.scala | 16 + .../util/NsfwPersonalizationUtil.scala | 126 + .../util/OverrideNotificationUtil.scala | 230 + .../pushservice/util/PushAdaptorUtil.scala | 151 + .../util/PushAppPermissionUtil.scala | 49 + .../pushservice/util/PushCapUtil.scala | 184 + .../pushservice/util/PushDeviceUtil.scala | 57 + .../pushservice/util/PushIbisUtil.scala | 36 + .../pushservice/util/PushToHomeUtil.scala | 24 + .../pushservice/util/RFPHTakeStepUtil.scala | 114 + .../pushservice/util/RelationshipUtil.scala | 66 + .../util/ResponseStatsTrackUtils.scala | 42 + .../util/SendHandlerPredicateUtil.scala | 129 + .../frigate/pushservice/util/TopicsUtil.scala | 340 ++ 309 files changed, 42796 insertions(+) create mode 100644 pushservice/BUILD.bazel create mode 100644 pushservice/readme.md create mode 100644 pushservice/src/main/python/models/heavy_ranking/BUILD create mode 100644 pushservice/src/main/python/models/heavy_ranking/README.md create mode 100644 pushservice/src/main/python/models/heavy_ranking/__init__.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/deep_norm.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/eval.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/features.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/graph.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/lib/BUILD create mode 100644 pushservice/src/main/python/models/heavy_ranking/lib/layers.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/lib/model.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/lib/params.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/model_pools.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/params.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/run_args.py create mode 100644 pushservice/src/main/python/models/heavy_ranking/update_warm_start_checkpoint.py create mode 100644 pushservice/src/main/python/models/libs/BUILD create mode 100644 pushservice/src/main/python/models/libs/__init__.py create mode 100644 pushservice/src/main/python/models/libs/customized_full_sparse.py create mode 100644 pushservice/src/main/python/models/libs/get_feat_config.py create mode 100644 pushservice/src/main/python/models/libs/graph_utils.py create mode 100644 pushservice/src/main/python/models/libs/group_metrics.py create mode 100644 pushservice/src/main/python/models/libs/initializer.py create mode 100644 pushservice/src/main/python/models/libs/light_ranking_metrics.py create mode 100644 pushservice/src/main/python/models/libs/metric_fn_utils.py create mode 100644 pushservice/src/main/python/models/libs/model_args.py create mode 100644 pushservice/src/main/python/models/libs/model_utils.py create mode 100644 pushservice/src/main/python/models/libs/warm_start_utils.py create mode 100644 pushservice/src/main/python/models/light_ranking/BUILD create mode 100644 pushservice/src/main/python/models/light_ranking/README.md create mode 100644 pushservice/src/main/python/models/light_ranking/__init__.py create mode 100644 pushservice/src/main/python/models/light_ranking/deep_norm.py create mode 100644 pushservice/src/main/python/models/light_ranking/eval_model.py create mode 100644 pushservice/src/main/python/models/light_ranking/model_pools_mlp.py create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/BUILD.bazel create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/PushMixerThriftServerWarmupHandler.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/PushServiceMain.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ContentRecommenderMixerAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/EarlyBirdFirstDegreeCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ExploreVideoTweetCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/FRSTweetCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/GenericCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/HighQualityTweetsAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ListsToRecommendCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/LoggedOutPushCandidateSourceGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/OnboardingPushCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/PushCandidateSourceGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetImpressionsCandidateAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetsByGeoAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TrendsCandidatesAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TripGeoCandidatesAdaptor.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/Config.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/DeployConfig.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ExperimentsWithStats.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ProdConfig.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/StagingConfig.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/config/mlconfig/DeepbirdV2ModelConfig.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/controller/PushServiceController.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/DisplayLocationNotSupportedException.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/InvalidSportDomainException.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/TweetNTabRequestHydratorException.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UnsupportedCrtException.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UttEntityNotFoundException.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HealthFeatureGetter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HydrationContextBuilder.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/PushMLModelScorer.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/DiscoverTwitter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/F1FirstdegreeTweet.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ListRecommendationPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutCreatorEventPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutEventPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutHydratedCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutNewsEvent.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutProductLaunchPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutSportsPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/OutOfNetworkTweetPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/PushTypes.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSpeaker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSubscriber.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/SubscribedSearchTweetPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopTweetImpressionsPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopicProofTweetPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TrendTweetPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TripTweetPushCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetAction.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetFavorite.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetRetweet.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/CopyInfo.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/MLScores.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/QualityScribing.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/Scriber.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/CustomConfigurationMapForIbis.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/DiscoverTwitterPushIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/F1FirstDegreeTweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/Ibis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/InlineActionIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ListIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutCreatorEventIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutNewsEventIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutProductLaunchIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutSportsEventIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OutOfNetworkTweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OverrideForIbis2Request.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/PushOverrideInfo.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/RankedSocialContextIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSpeakerIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSubscriberIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/SubscribedSearchTweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopTweetImpressionsCandidateIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopicProofTweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TrendTweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetCandidateIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetFavoriteIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetRetweetIbis2Hydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/CandidateNTabCopy.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/DiscoverTwitterNtabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/EventNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/F1FirstDegreeTweetNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ListCandidateNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutCreatorEventNtabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutNewsEventNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutProductLaunchNtabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutSportsEventNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequest.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabSocialContext.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/OutOfNetworkTweetNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ScheduledSpaceNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/SubscribedSearchTweetNtabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopTweetImpressionsNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopicProofTweetNtabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TrendTweetNtabHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetFavoriteNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetRetweetNTabRequestHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/DeployConfigModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FilterModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FlagModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/LoggedOutPushTargetUserBuilderModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushHandlerModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushServiceDarkTrafficModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushTargetUserBuilderModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/module/ThriftWebFormsModule.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/DeciderKey.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushConstants.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushEnums.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitchParams.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitches.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushMLModelParams.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushParams.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushServiceTunableKeys.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/params/ShardParams.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BigFilteringEpsilonGreedyExplorationPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlHealthModelPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlQualityModelPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CaretFeedbackHistoryFilter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CasLockPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CrtDeciderPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/DiscoverTwitterPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/FatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/HealthPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/JointDauAndQualityModelPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ListPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutPreRankingPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutTargetPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/MlModelsHoldbackExperimentPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONSpreadControlPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONTweetNegativeFeedbackBasedPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OutOfNetworkCandidatesQualityPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PNegMultimodalPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PostRankingPredicateHelper.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PreRankingPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PredicatesForCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SGSPredicatesForCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ScarecrowPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SpacePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetEngagementPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetNtabCaretClickFatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TopTweetImpressionsPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetEngagementRatioPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetLanguagePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetWithheldContentPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/event/EventPredicatesForCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesForCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutSportsUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutTargetingPredicateWrappersForCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/CRTBasedNtabCaretClickFatiguePredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/ContinuousFunction.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/FeedbackModel.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/MagicFanoutNtabCaretFatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickCandidateFatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatigueUtils.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/RecTypeNtabCaretFatiguePredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/package.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/OpenOrNtabClickQualityPredicate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateCommon.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateMap.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTBoostRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTDownRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/LoggedOutRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/ModelBasedRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/PushserviceRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHLightRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/SubscriptionCreatorRanker.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/LoggedOutRefreshForPushHandler.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/PushCandidateHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHFeatureHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHPrerankFilter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHRestrictStep.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHStatsRecorder.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushHandler.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushNotifier.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/BaseCopyFramework.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyExpansion.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyPair.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateToCopy.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyFilters.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/scriber/MrRequestScribeHandler.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandler.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandlerPushCandidateHydrator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/CandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutCreatorEventCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutNewsEventCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutProductLaunchCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutSportsEventCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/PushRequestToCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSpeakerCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSubscriberCandidateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ContentMixerStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CopySelectionServiceStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CrMixerTweetStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ExploreRankerStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/FollowRecommendationsStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/IbisStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/InterestDiscoveryStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LabeledPushRecsDecideredStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LexServiceStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/NTabHistoryStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OCFPromptHistoryStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OnlineUserHistoryStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OpenAppUserStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SocialGraphServiceProcessStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SoftUserFollowingStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetImpressionsStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetTranslationStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/store/UttEntityHydrationStore.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/CandidateNotifier.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/LoggedOutRefreshForPushNotifier.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationSender.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationServiceSender.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/SendHandlerNotifier.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/CandidateValidator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/RFPHCandidateValidator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPostCandidateValidator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPreCandidateValidator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelCandidate.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelSelector.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/NtabOnlyChannelSelector.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/EventBusWriter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/HistoryWriter.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicRFPHPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicSendHandlerPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicatesForRFPH.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/OutOfNetworkTweetPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/TakeCommonPredicates.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/CandidatePredicatesMap.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/SendHandlerCandidatePredicatesMap.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/Ibis2Sender.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/NtabSender.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/CustomFSFields.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/LoggedOutPushTargetUserBuilder.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/PushTargetUserBuilder.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/RFPHTargetPredicateGenerator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetAppPermissions.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetScoringDetails.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdaptorUtils.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdhocStatsUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/Candidate2FrigateNotification.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateHydrationUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CopyUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/EmailLandingPageExperimentUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/FunctionalUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/IbisScribeTargets.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/InlineActionUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MediaAnnotationsUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MinDurationModifierCalculator.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MrUserStateUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/NsfwPersonalizationUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/OverrideNotificationUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAdaptorUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAppPermissionUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushCapUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushDeviceUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushIbisUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushToHomeUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RFPHTakeStepUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RelationshipUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/ResponseStatsTrackUtils.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/SendHandlerPredicateUtil.scala create mode 100644 pushservice/src/main/scala/com/twitter/frigate/pushservice/util/TopicsUtil.scala diff --git a/README.md b/README.md index ebb136186..5bff49018 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,16 @@ The core components of the For You Timeline included in this repository are list | | [visibility-filters](visibilitylib/README.md) | Responsible for filtering Twitter content to support legal compliance, improve product quality, increase user trust, protect revenue through the use of hard-filtering, visible product treatments, and coarse-grained downranking. | | | [timelineranker](timelineranker/README.md) | Legacy service which provides relevance-scored tweets from the Earlybird Search Index and UTEG service. | +### Recommended Notifications + +The core components that power Recommended Notifications included in this repository are listed below: + +| Type | Component | Description | +|------------|------------|------------| +| Service | [pushservice](pushservice/README.md) | Main recommendation service at Twitter used to surface recommendations to our users via notifications. +| Ranking | [pushservice-light-ranker](pushservice/src/main/python/models/light_ranking/README.md) | Light Ranker model used by pushservice to rank Tweets. Bridges candidate generation and heavy ranking by pre-selecting highly-relevant candidates from the initial huge candidate pool. | +| | [pushservice-heavy-ranker](pushservice/src/main/python/models/heavy_ranking/README.md) | Multi-task learning model to predict the probabilities that the target users will open and engage with the sent notifications. | + ## Build and test code We include Bazel BUILD files for most components, but not a top-level BUILD or WORKSPACE file. We plan to add a more complete build and test system in the future. diff --git a/pushservice/BUILD.bazel b/pushservice/BUILD.bazel new file mode 100644 index 000000000..12efdb2e6 --- /dev/null +++ b/pushservice/BUILD.bazel @@ -0,0 +1,48 @@ +alias( + name = "frigate-pushservice", + target = ":frigate-pushservice_lib", +) + +target( + name = "frigate-pushservice_lib", + dependencies = [ + "frigate/frigate-pushservice-opensource/src/main/scala/com/twitter/frigate/pushservice", + ], +) + +jvm_binary( + name = "bin", + basename = "frigate-pushservice", + main = "com.twitter.frigate.pushservice.PushServiceMain", + runtime_platform = "java11", + tags = ["bazel-compatible"], + dependencies = [ + "3rdparty/jvm/ch/qos/logback:logback-classic", + "finatra/inject/inject-logback/src/main/scala", + "frigate/frigate-pushservice-opensource/src/main/scala/com/twitter/frigate/pushservice", + "loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback", + "twitter-server/logback-classic/src/main/scala", + ], + excludes = [ + exclude("com.twitter.translations", "translations-twitter"), + exclude("org.apache.hadoop", "hadoop-aws"), + exclude("org.tensorflow"), + scala_exclude("com.twitter", "ckoia-scala"), + ], +) + +jvm_app( + name = "bundle", + basename = "frigate-pushservice-package-dist", + archive = "zip", + binary = ":bin", + tags = ["bazel-compatible"], +) + +python3_library( + name = "mr_model_constants", + sources = [ + "config/deepbird/constants.py", + ], + tags = ["bazel-compatible"], +) diff --git a/pushservice/readme.md b/pushservice/readme.md new file mode 100644 index 000000000..99c20fcba --- /dev/null +++ b/pushservice/readme.md @@ -0,0 +1,45 @@ +# Pushservice + +Pushservice is the main push recommendation service at Twitter used to generate recommendation-based notifications for users. It currently powers two functionalities: + +- RefreshForPushHandler: This handler determines whether to send a recommendation push to a user based on their ID. It generates the best push recommendation item and coordinates with downstream services to deliver it +- SendHandler: This handler determines and manage whether send the push to users based on the given target user details and the provided push recommendation item + +## Overview + +### RefreshForPushHandler + +RefreshForPushHandler follows these steps: + +- Building Target and checking eligibility + - Builds a target user object based on the given user ID + - Performs target-level filterings to determine if the target is eligible for a recommendation push +- Fetch Candidates + - Retrieves a list of potential candidates for the push by querying various candidate sources using the target +- Candidate Hydration + - Hydrates the candidate details with batch calls to different downstream services. +- Pre-rank Filtering, also called Light Filtering + - Filters the hydrated candidates with lightweight RPC calls. +- Rank + - Perform feature hydration for candidates and target user + - Performs light ranking on candidates + - Performs heavy ranking on candidates +- Take Step, also called Heavy Filtering + - Takes the top-ranked candidates one by one and applies heavy filtering until one candidate passes all filter steps +- Send + - Calls the appropriate downstream service to deliver the eligible candidate as a push and in-app notification to the target user + +### SendHandler + +SendHandler follows these steps: + +- Building Target + - Builds a target user object based on the given user ID +- Candidate Hydration + - Hydrates the candidate details with batch calls to different downstream services. +- Feature Hydration + - Perform feature hydration for candidates and target user +- Take Step, also called Heavy Filtering + - Perform filterings and validation checking for the given candidate +- Send + - Calls the appropriate downstream service to deliver the given candidate as a push and/or in-app notification to the target user \ No newline at end of file diff --git a/pushservice/src/main/python/models/heavy_ranking/BUILD b/pushservice/src/main/python/models/heavy_ranking/BUILD new file mode 100644 index 000000000..2c25693a9 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/BUILD @@ -0,0 +1,169 @@ +python37_binary( + name = "update_warm_start_checkpoint", + source = "update_warm_start_checkpoint.py", + tags = ["no-mypy"], + dependencies = [ + ":deep_norm_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:update_warm_start_checkpoint", + ], +) + +python3_library( + name = "params_lib", + sources = ["params.py"], + tags = ["no-mypy"], + dependencies = [ + "3rdparty/python/pydantic:default", + "src/python/twitter/deepbird/projects/magic_recs/v11/lib:params_lib", + ], +) + +python3_library( + name = "features_lib", + sources = ["features.py"], + tags = ["no-mypy"], + dependencies = [ + ":params_lib", + "src/python/twitter/deepbird/projects/magic_recs/libs", + "twml:twml-nodeps", + ], +) + +python3_library( + name = "model_pools_lib", + sources = ["model_pools.py"], + tags = ["no-mypy"], + dependencies = [ + ":features_lib", + ":params_lib", + "src/python/twitter/deepbird/projects/magic_recs/v11/lib:model_lib", + ], +) + +python3_library( + name = "graph_lib", + sources = ["graph.py"], + tags = ["no-mypy"], + dependencies = [ + ":params_lib", + "src/python/twitter/deepbird/projects/magic_recs/libs", + ], +) + +python3_library( + name = "run_args_lib", + sources = ["run_args.py"], + tags = ["no-mypy"], + dependencies = [ + ":features_lib", + ":params_lib", + "twml:twml-nodeps", + ], +) + +python3_library( + name = "deep_norm_lib", + sources = ["deep_norm.py"], + tags = ["no-mypy"], + dependencies = [ + ":features_lib", + ":graph_lib", + ":model_pools_lib", + ":params_lib", + ":run_args_lib", + "src/python/twitter/deepbird/projects/magic_recs/libs", + "src/python/twitter/deepbird/util/data", + "twml:twml-nodeps", + ], +) + +python3_library( + name = "eval_lib", + sources = ["eval.py"], + tags = ["no-mypy"], + dependencies = [ + ":features_lib", + ":graph_lib", + ":model_pools_lib", + ":params_lib", + ":run_args_lib", + "src/python/twitter/deepbird/projects/magic_recs/libs", + "twml:twml-nodeps", + ], +) + +python37_binary( + name = "deep_norm", + source = "deep_norm.py", + dependencies = [ + ":deep_norm_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:deep_norm", + "twml", + ], +) + +python37_binary( + name = "eval", + source = "eval.py", + dependencies = [ + ":eval_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:eval", + "twml", + ], +) + +python3_library( + name = "mlwf_libs", + tags = ["no-mypy"], + dependencies = [ + ":deep_norm_lib", + "twml", + ], +) + +python37_binary( + name = "train_model", + source = "deep_norm.py", + dependencies = [ + ":deep_norm_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:train_model", + ], +) + +python37_binary( + name = "train_model_local", + source = "deep_norm.py", + dependencies = [ + ":deep_norm_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:train_model_local", + "twml", + ], +) + +python37_binary( + name = "eval_model_local", + source = "eval.py", + dependencies = [ + ":eval_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:eval_model_local", + "twml", + ], +) + +python37_binary( + name = "eval_model", + source = "eval.py", + dependencies = [ + ":eval_lib", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:eval_model", + ], +) + +python37_binary( + name = "mlwf_model", + source = "deep_norm.py", + dependencies = [ + ":mlwf_libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/heavy_ranking:mlwf_model", + ], +) diff --git a/pushservice/src/main/python/models/heavy_ranking/README.md b/pushservice/src/main/python/models/heavy_ranking/README.md new file mode 100644 index 000000000..75336a09c --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/README.md @@ -0,0 +1,20 @@ +# Notification Heavy Ranker Model + +## Model Context +There are 4 major components of Twitter notifications recommendation system: 1) candidate generation 2) light ranking 3) heavy ranking & 4) quality control. This notification heavy ranker model is the core ranking model for the personalised notifications recommendation. It's a multi-task learning model to predict the probabilities that the target users will open and engage with the sent notifications. + + +## Directory Structure +- BUILD: this file defines python library dependencies +- deep_norm.py: this file contains how to set up continuous training, model evaluation and model exporting for the notification heavy ranker model +- eval.py: the main python entry file to set up the overall model evaluation pipeline +- features.py: this file contains importing feature list and support functions for feature engineering +- graph.py: this file defines how to build the tensorflow graph with specified model architecture, loss function and training configuration +- model_pools.py: this file defines the available model types for the heavy ranker +- params.py: this file defines hyper-parameters used in the notification heavy ranker +- run_args.py: this file defines command line parameters to run model training & evaluation +- update_warm_start_checkpoint.py: this file contains the support to modify checkpoints of the given saved heavy ranker model +- lib/BUILD: this file defines python library dependencies for tensorflow model architecture +- lib/layers.py: this file defines different type of convolution layers to be used in the heavy ranker model +- lib/model.py: this file defines the module containing ClemNet, the heavy ranker model type +- lib/params.py: this file defines parameters used in the heavy ranker model diff --git a/pushservice/src/main/python/models/heavy_ranking/__init__.py b/pushservice/src/main/python/models/heavy_ranking/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pushservice/src/main/python/models/heavy_ranking/deep_norm.py b/pushservice/src/main/python/models/heavy_ranking/deep_norm.py new file mode 100644 index 000000000..7db281b4a --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/deep_norm.py @@ -0,0 +1,136 @@ +""" +Training job for the heavy ranker of the push notification service. +""" +from datetime import datetime +import json +import os + +import twml + +from ..libs.metric_fn_utils import flip_disliked_labels, get_metric_fn +from ..libs.model_utils import read_config +from ..libs.warm_start_utils import get_feature_list_for_heavy_ranking, warm_start_checkpoint +from .features import get_feature_config +from .model_pools import ALL_MODELS +from .params import load_graph_params +from .run_args import get_training_arg_parser + +import tensorflow.compat.v1 as tf +from tensorflow.compat.v1 import logging + + +def main() -> None: + args, _ = get_training_arg_parser().parse_known_args() + logging.info(f"Parsed args: {args}") + + params = load_graph_params(args) + logging.info(f"Loaded graph params: {params}") + + param_file = os.path.join(args.save_dir, "params.json") + logging.info(f"Saving graph params to: {param_file}") + with tf.io.gfile.GFile(param_file, mode="w") as file: + json.dump(params.json(), file, ensure_ascii=False, indent=4) + + logging.info(f"Get Feature Config: {args.feature_list}") + feature_list = read_config(args.feature_list).items() + feature_config = get_feature_config( + data_spec_path=args.data_spec, + params=params, + feature_list_provided=feature_list, + ) + feature_list_path = args.feature_list + + warm_start_from = args.warm_start_from + if args.warm_start_base_dir: + logging.info(f"Get warm started model from: {args.warm_start_base_dir}.") + + continuous_binary_feat_list_save_path = os.path.join( + args.warm_start_base_dir, "continuous_binary_feat_list.json" + ) + warm_start_folder = os.path.join(args.warm_start_base_dir, "best_checkpoint") + job_name = os.path.basename(args.save_dir) + ws_output_ckpt_folder = os.path.join(args.warm_start_base_dir, f"warm_start_for_{job_name}") + if tf.io.gfile.exists(ws_output_ckpt_folder): + tf.io.gfile.rmtree(ws_output_ckpt_folder) + + tf.io.gfile.mkdir(ws_output_ckpt_folder) + + warm_start_from = warm_start_checkpoint( + warm_start_folder, + continuous_binary_feat_list_save_path, + feature_list_path, + args.data_spec, + ws_output_ckpt_folder, + ) + logging.info(f"Created warm_start_from_ckpt {warm_start_from}.") + + logging.info("Build Trainer.") + metric_fn = get_metric_fn("OONC_Engagement" if len(params.tasks) == 2 else "OONC", False) + + trainer = twml.trainers.DataRecordTrainer( + name="magic_recs", + params=args, + build_graph_fn=lambda *args: ALL_MODELS[params.model.name](params=params)(*args), + save_dir=args.save_dir, + run_config=None, + feature_config=feature_config, + metric_fn=flip_disliked_labels(metric_fn), + warm_start_from=warm_start_from, + ) + + logging.info("Build train and eval input functions.") + train_input_fn = trainer.get_train_input_fn(shuffle=True) + eval_input_fn = trainer.get_eval_input_fn(repeat=False, shuffle=False) + + learn = trainer.learn + if args.distributed or args.num_workers is not None: + learn = trainer.train_and_evaluate + + if not args.directly_export_best: + logging.info("Starting training") + start = datetime.now() + learn( + early_stop_minimize=False, + early_stop_metric="pr_auc_unweighted_OONC", + early_stop_patience=args.early_stop_patience, + early_stop_tolerance=args.early_stop_tolerance, + eval_input_fn=eval_input_fn, + train_input_fn=train_input_fn, + ) + logging.info(f"Total training time: {datetime.now() - start}") + else: + logging.info("Directly exporting the model") + + if not args.export_dir: + args.export_dir = os.path.join(args.save_dir, "exported_models") + + logging.info(f"Exporting the model to {args.export_dir}.") + start = datetime.now() + twml.contrib.export.export_fn.export_all_models( + trainer=trainer, + export_dir=args.export_dir, + parse_fn=feature_config.get_parse_fn(), + serving_input_receiver_fn=feature_config.get_serving_input_receiver_fn(), + export_output_fn=twml.export_output_fns.batch_prediction_continuous_output_fn, + ) + + logging.info(f"Total model export time: {datetime.now() - start}") + logging.info(f"The MLP directory is: {args.save_dir}") + + continuous_binary_feat_list_save_path = os.path.join( + args.save_dir, "continuous_binary_feat_list.json" + ) + logging.info( + f"Saving the list of continuous and binary features to {continuous_binary_feat_list_save_path}." + ) + continuous_binary_feat_list = get_feature_list_for_heavy_ranking( + feature_list_path, args.data_spec + ) + twml.util.write_file( + continuous_binary_feat_list_save_path, continuous_binary_feat_list, encode="json" + ) + + +if __name__ == "__main__": + main() + logging.info("Done.") diff --git a/pushservice/src/main/python/models/heavy_ranking/eval.py b/pushservice/src/main/python/models/heavy_ranking/eval.py new file mode 100644 index 000000000..7f74472fb --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/eval.py @@ -0,0 +1,59 @@ +""" +Evaluation job for the heavy ranker of the push notification service. +""" + +from datetime import datetime + +import twml + +from ..libs.metric_fn_utils import get_metric_fn +from ..libs.model_utils import read_config +from .features import get_feature_config +from .model_pools import ALL_MODELS +from .params import load_graph_params +from .run_args import get_eval_arg_parser + +from tensorflow.compat.v1 import logging + + +def main(): + args, _ = get_eval_arg_parser().parse_known_args() + logging.info(f"Parsed args: {args}") + + params = load_graph_params(args) + logging.info(f"Loaded graph params: {params}") + + logging.info(f"Get Feature Config: {args.feature_list}") + feature_list = read_config(args.feature_list).items() + feature_config = get_feature_config( + data_spec_path=args.data_spec, + params=params, + feature_list_provided=feature_list, + ) + + logging.info("Build DataRecordTrainer.") + metric_fn = get_metric_fn("OONC_Engagement" if len(params.tasks) == 2 else "OONC", False) + + trainer = twml.trainers.DataRecordTrainer( + name="magic_recs", + params=args, + build_graph_fn=lambda *args: ALL_MODELS[params.model.name](params=params)(*args), + save_dir=args.save_dir, + run_config=None, + feature_config=feature_config, + metric_fn=metric_fn, + ) + + logging.info("Run the evaluation.") + start = datetime.now() + trainer._estimator.evaluate( + input_fn=trainer.get_eval_input_fn(repeat=False, shuffle=False), + steps=None if (args.eval_steps is not None and args.eval_steps < 0) else args.eval_steps, + checkpoint_path=args.eval_checkpoint, + ) + logging.info(f"Evaluating time: {datetime.now() - start}.") + + +if __name__ == "__main__": + main() + logging.info("Job done.") diff --git a/pushservice/src/main/python/models/heavy_ranking/features.py b/pushservice/src/main/python/models/heavy_ranking/features.py new file mode 100644 index 000000000..ce6a2686a --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/features.py @@ -0,0 +1,138 @@ +import os +from typing import Dict + +from twitter.deepbird.projects.magic_recs.libs.model_utils import filter_nans_and_infs +import twml +from twml.layers import full_sparse, sparse_max_norm + +from .params import FeaturesParams, GraphParams, SparseFeaturesParams + +import tensorflow as tf +from tensorflow import Tensor +import tensorflow.compat.v1 as tf1 + + +FEAT_CONFIG_DEFAULT_VAL = 0 +DEFAULT_FEATURE_LIST_PATH = "./feature_list_default.yaml" +FEATURE_LIST_DEFAULT_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_PATH +) + + +def get_feature_config(data_spec_path=None, feature_list_provided=[], params: GraphParams = None): + + a_string_feat_list = [feat for feat, feat_type in feature_list_provided if feat_type != "S"] + + builder = twml.contrib.feature_config.FeatureConfigBuilder( + data_spec_path=data_spec_path, debug=False + ) + + builder = builder.extract_feature_group( + feature_regexes=a_string_feat_list, + group_name="continuous_features", + default_value=FEAT_CONFIG_DEFAULT_VAL, + type_filter=["CONTINUOUS"], + ) + + builder = builder.extract_feature_group( + feature_regexes=a_string_feat_list, + group_name="binary_features", + type_filter=["BINARY"], + ) + + if params.model.features.sparse_features: + builder = builder.extract_features_as_hashed_sparse( + feature_regexes=a_string_feat_list, + hash_space_size_bits=params.model.features.sparse_features.bits, + type_filter=["DISCRETE", "STRING", "SPARSE_BINARY"], + output_tensor_name="sparse_not_continuous", + ) + + builder = builder.extract_features_as_hashed_sparse( + feature_regexes=[feat for feat, feat_type in feature_list_provided if feat_type == "S"], + hash_space_size_bits=params.model.features.sparse_features.bits, + type_filter=["SPARSE_CONTINUOUS"], + output_tensor_name="sparse_continuous", + ) + + builder = builder.add_labels([task.label for task in params.tasks] + ["label.ntabDislike"]) + + if params.weight: + builder = builder.define_weight(params.weight) + + return builder.build() + + +def dense_features(features: Dict[str, Tensor], training: bool) -> Tensor: + """ + Performs feature transformations on the raw dense features (continuous and binary). + """ + with tf.name_scope("dense_features"): + x = filter_nans_and_infs(features["continuous_features"]) + + x = tf.sign(x) * tf.math.log(tf.abs(x) + 1) + x = tf1.layers.batch_normalization( + x, momentum=0.9999, training=training, renorm=training, axis=1 + ) + x = tf.clip_by_value(x, -5, 5) + + transformed_continous_features = tf.where(tf.math.is_nan(x), tf.zeros_like(x), x) + + binary_features = filter_nans_and_infs(features["binary_features"]) + binary_features = tf.dtypes.cast(binary_features, tf.float32) + + output = tf.concat([transformed_continous_features, binary_features], axis=1) + + return output + + +def sparse_features( + features: Dict[str, Tensor], training: bool, params: SparseFeaturesParams +) -> Tensor: + """ + Performs feature transformations on the raw sparse features. + """ + + with tf.name_scope("sparse_features"): + with tf.name_scope("sparse_not_continuous"): + sparse_not_continuous = full_sparse( + inputs=features["sparse_not_continuous"], + output_size=params.embedding_size, + use_sparse_grads=training, + use_binary_values=False, + ) + + with tf.name_scope("sparse_continuous"): + shape_enforced_input = twml.util.limit_sparse_tensor_size( + sparse_tf=features["sparse_continuous"], input_size_bits=params.bits, mask_indices=False + ) + + normalized_continuous_sparse = sparse_max_norm( + inputs=shape_enforced_input, is_training=training + ) + + sparse_continuous = full_sparse( + inputs=normalized_continuous_sparse, + output_size=params.embedding_size, + use_sparse_grads=training, + use_binary_values=False, + ) + + output = tf.concat([sparse_not_continuous, sparse_continuous], axis=1) + + return output + + +def get_features(features: Dict[str, Tensor], training: bool, params: FeaturesParams) -> Tensor: + """ + Performs feature transformations on the dense and sparse features and combine the resulting + tensors into a single one. + """ + with tf.name_scope("features"): + x = dense_features(features, training) + tf1.logging.info(f"Dense features: {x.shape}") + + if params.sparse_features: + x = tf.concat([x, sparse_features(features, training, params.sparse_features)], axis=1) + + return x diff --git a/pushservice/src/main/python/models/heavy_ranking/graph.py b/pushservice/src/main/python/models/heavy_ranking/graph.py new file mode 100644 index 000000000..4188736ac --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/graph.py @@ -0,0 +1,129 @@ +""" +Graph class defining methods to obtain key quantities such as: + * the logits + * the probabilities + * the final score + * the loss function + * the training operator +""" +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict + +from twitter.deepbird.hparam import HParams +import twml + +from ..libs.model_utils import generate_disliked_mask +from .params import GraphParams + +import tensorflow as tf +import tensorflow.compat.v1 as tf1 + + +class Graph(ABC): + def __init__(self, params: GraphParams): + self.params = params + + @abstractmethod + def get_logits(self, features: Dict[str, tf.Tensor], mode: tf.estimator.ModeKeys) -> tf.Tensor: + pass + + def get_probabilities(self, logits: tf.Tensor) -> tf.Tensor: + return tf.math.cumprod(tf.nn.sigmoid(logits), axis=1, name="probabilities") + + def get_task_weights(self, labels: tf.Tensor) -> tf.Tensor: + oonc_label = tf.reshape(labels[:, 0], shape=(-1, 1)) + task_weights = tf.concat([tf.ones_like(oonc_label), oonc_label], axis=1) + + n_labels = len(self.params.tasks) + task_weights = tf.reshape(task_weights[:, 0:n_labels], shape=(-1, n_labels)) + + return task_weights + + def get_loss(self, labels: tf.Tensor, logits: tf.Tensor, **kwargs: Any) -> tf.Tensor: + with tf.name_scope("weights"): + disliked_mask = generate_disliked_mask(labels) + + labels = tf.reshape(labels[:, 0:2], shape=[-1, 2]) + + labels = labels * tf.cast(tf.logical_not(disliked_mask), dtype=labels.dtype) + + with tf.name_scope("task_weight"): + task_weights = self.get_task_weights(labels) + + with tf.name_scope("batch_size"): + batch_size = tf.cast(tf.shape(labels)[0], dtype=tf.float32, name="batch_size") + + weights = task_weights / batch_size + + with tf.name_scope("loss"): + loss = tf.reduce_sum( + tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits) * weights, + ) + + return loss + + def get_score(self, probabilities: tf.Tensor) -> tf.Tensor: + with tf.name_scope("score_weight"): + score_weights = tf.constant([task.score_weight for task in self.params.tasks]) + score_weights = score_weights / tf.reduce_sum(score_weights, axis=0) + + with tf.name_scope("score"): + score = tf.reshape(tf.reduce_sum(probabilities * score_weights, axis=1), shape=[-1, 1]) + + return score + + def get_train_op(self, loss: tf.Tensor, twml_params) -> Any: + with tf.name_scope("optimizer"): + learning_rate = twml_params.learning_rate + optimizer = tf1.train.GradientDescentOptimizer(learning_rate=learning_rate) + + update_ops = set(tf1.get_collection(tf1.GraphKeys.UPDATE_OPS)) + with tf.control_dependencies(update_ops): + train_op = twml.optimizers.optimize_loss( + loss=loss, + variables=tf1.trainable_variables(), + global_step=tf1.train.get_global_step(), + optimizer=optimizer, + learning_rate=None, + ) + + return train_op + + def __call__( + self, + features: Dict[str, tf.Tensor], + labels: tf.Tensor, + mode: tf.estimator.ModeKeys, + params: HParams, + config=None, + ) -> Dict[str, tf.Tensor]: + training = mode == tf.estimator.ModeKeys.TRAIN + logits = self.get_logits(features=features, training=training) + probabilities = self.get_probabilities(logits=logits) + score = None + loss = None + train_op = None + + if mode == tf.estimator.ModeKeys.PREDICT: + score = self.get_score(probabilities=probabilities) + output = {"loss": loss, "train_op": train_op, "prediction": score} + + elif mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL): + loss = self.get_loss(labels=labels, logits=logits) + + if mode == tf.estimator.ModeKeys.TRAIN: + train_op = self.get_train_op(loss=loss, twml_params=params) + + output = {"loss": loss, "train_op": train_op, "output": probabilities} + + else: + raise ValueError( + f""" + Invalid mode. Possible values are: {tf.estimator.ModeKeys.PREDICT}, {tf.estimator.ModeKeys.TRAIN}, and {tf.estimator.ModeKeys.EVAL} + . Passed: {mode} + """ + ) + + return output diff --git a/pushservice/src/main/python/models/heavy_ranking/lib/BUILD b/pushservice/src/main/python/models/heavy_ranking/lib/BUILD new file mode 100644 index 000000000..a0ed713c4 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/lib/BUILD @@ -0,0 +1,42 @@ +python3_library( + name = "params_lib", + sources = [ + "params.py", + ], + tags = [ + "bazel-compatible", + "no-mypy", + ], + dependencies = [ + "3rdparty/python/pydantic:default", + ], +) + +python3_library( + name = "layers_lib", + sources = [ + "layers.py", + ], + tags = [ + "bazel-compatible", + "no-mypy", + ], + dependencies = [ + ], +) + +python3_library( + name = "model_lib", + sources = [ + "model.py", + ], + tags = [ + "bazel-compatible", + "no-mypy", + ], + dependencies = [ + ":layers_lib", + ":params_lib", + "3rdparty/python/absl-py:default", + ], +) diff --git a/pushservice/src/main/python/models/heavy_ranking/lib/layers.py b/pushservice/src/main/python/models/heavy_ranking/lib/layers.py new file mode 100644 index 000000000..33dd6f012 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/lib/layers.py @@ -0,0 +1,128 @@ +""" +Different type of convolution layers to be used in the ClemNet. +""" +from typing import Any + +import tensorflow as tf + + +class KerasConv1D(tf.keras.layers.Layer): + """ + Basic Conv1D layer in a wrapper to be compatible with ClemNet. + """ + + def __init__( + self, + kernel_size: int, + filters: int, + strides: int, + padding: str, + use_bias: bool = True, + kernel_initializer: str = "glorot_uniform", + bias_initializer: str = "zeros", + **kwargs: Any, + ): + super(KerasConv1D, self).__init__(**kwargs) + self.kernel_size = kernel_size + self.filters = filters + self.use_bias = use_bias + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.strides = strides + self.padding = padding + + def build(self, input_shape: tf.TensorShape) -> None: + assert ( + len(input_shape) == 3 + ), f"Tensor shape must be of length 3. Passed tensor of shape {input_shape}." + + self.features = input_shape[1] + + self.w = tf.keras.layers.Conv1D( + kernel_size=self.kernel_size, + filters=self.filters, + strides=self.strides, + padding=self.padding, + use_bias=self.use_bias, + kernel_initializer=self.kernel_initializer, + bias_initializer=self.bias_initializer, + name=self.name, + ) + + def call(self, inputs: tf.Tensor, **kwargs: Any) -> tf.Tensor: + return self.w(inputs) + + +class ChannelWiseDense(tf.keras.layers.Layer): + """ + Dense layer is applied to each channel separately. This is more memory and computationally + efficient than flattening the channels and performing single dense layers over it which is the + default behavior in tf1. + """ + + def __init__( + self, + output_size: int, + use_bias: bool, + kernel_initializer: str = "uniform_glorot", + bias_initializer: str = "zeros", + **kwargs: Any, + ): + super(ChannelWiseDense, self).__init__(**kwargs) + self.output_size = output_size + self.use_bias = use_bias + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + + def build(self, input_shape: tf.TensorShape) -> None: + assert ( + len(input_shape) == 3 + ), f"Tensor shape must be of length 3. Passed tensor of shape {input_shape}." + + input_size = input_shape[1] + channels = input_shape[2] + + self.kernel = self.add_weight( + name="kernel", + shape=(channels, input_size, self.output_size), + initializer=self.kernel_initializer, + trainable=True, + ) + + self.bias = self.add_weight( + name="bias", + shape=(channels, self.output_size), + initializer=self.bias_initializer, + trainable=self.use_bias, + ) + + def call(self, inputs: tf.Tensor, **kwargs: Any) -> tf.Tensor: + x = inputs + + transposed_x = tf.transpose(x, perm=[2, 0, 1]) + transposed_residual = ( + tf.transpose(tf.matmul(transposed_x, self.kernel), perm=[1, 0, 2]) + self.bias + ) + output = tf.transpose(transposed_residual, perm=[0, 2, 1]) + + return output + + +class ResidualLayer(tf.keras.layers.Layer): + """ + Layer implementing a 3D-residual connection. + """ + + def build(self, input_shape: tf.TensorShape) -> None: + assert ( + len(input_shape) == 3 + ), f"Tensor shape must be of length 3. Passed tensor of shape {input_shape}." + + def call(self, inputs: tf.Tensor, residual: tf.Tensor, **kwargs: Any) -> tf.Tensor: + shortcut = tf.keras.layers.Conv1D( + filters=int(residual.shape[2]), strides=1, kernel_size=1, padding="SAME", use_bias=False + )(inputs) + + output = tf.add(shortcut, residual) + + return output diff --git a/pushservice/src/main/python/models/heavy_ranking/lib/model.py b/pushservice/src/main/python/models/heavy_ranking/lib/model.py new file mode 100644 index 000000000..c6c8b1c6b --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/lib/model.py @@ -0,0 +1,76 @@ +""" +Module containing ClemNet. +""" +from typing import Any + +from .layers import ChannelWiseDense, KerasConv1D, ResidualLayer +from .params import BlockParams, ClemNetParams + +import tensorflow as tf +import tensorflow.compat.v1 as tf1 + + +class Block2(tf.keras.layers.Layer): + """ + Possible ClemNet block. Architecture is as follow: + Optional(DenseLayer + BN + Act) + Optional(ConvLayer + BN + Act) + Optional(Residual Layer) + + """ + + def __init__(self, params: BlockParams, **kwargs: Any): + super(Block2, self).__init__(**kwargs) + self.params = params + + def build(self, input_shape: tf.TensorShape) -> None: + assert ( + len(input_shape) == 3 + ), f"Tensor shape must be of length 3. Passed tensor of shape {input_shape}." + + def call(self, inputs: tf.Tensor, training: bool) -> tf.Tensor: + x = inputs + if self.params.dense: + x = ChannelWiseDense(**self.params.dense.dict())(inputs=x, training=training) + x = tf1.layers.batch_normalization(x, momentum=0.9999, training=training, axis=1) + x = tf.keras.layers.Activation(self.params.activation)(x) + + if self.params.conv: + x = KerasConv1D(**self.params.conv.dict())(inputs=x, training=training) + x = tf1.layers.batch_normalization(x, momentum=0.9999, training=training, axis=1) + x = tf.keras.layers.Activation(self.params.activation)(x) + + if self.params.residual: + x = ResidualLayer()(inputs=inputs, residual=x) + + return x + + +class ClemNet(tf.keras.layers.Layer): + """ + A residual network stacking residual blocks composed of dense layers and convolutions. + """ + + def __init__(self, params: ClemNetParams, **kwargs: Any): + super(ClemNet, self).__init__(**kwargs) + self.params = params + + def build(self, input_shape: tf.TensorShape) -> None: + assert len(input_shape) in ( + 2, + 3, + ), f"Tensor shape must be of length 3. Passed tensor of shape {input_shape}." + + def call(self, inputs: tf.Tensor, training: bool) -> tf.Tensor: + if len(inputs.shape) < 3: + inputs = tf.expand_dims(inputs, axis=-1) + + x = inputs + for block_params in self.params.blocks: + x = Block2(block_params)(inputs=x, training=training) + + x = tf.keras.layers.Flatten(name="flattened")(x) + if self.params.top: + x = tf.keras.layers.Dense(units=self.params.top.n_labels, name="logits")(x) + + return x diff --git a/pushservice/src/main/python/models/heavy_ranking/lib/params.py b/pushservice/src/main/python/models/heavy_ranking/lib/params.py new file mode 100644 index 000000000..721d6ed95 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/lib/params.py @@ -0,0 +1,49 @@ +""" +Parameters used in ClemNet. +""" +from typing import List, Optional + +from pydantic import BaseModel, Extra, Field, PositiveInt + + +# checkstyle: noqa + + +class ExtendedBaseModel(BaseModel): + class Config: + extra = Extra.forbid + + +class DenseParams(ExtendedBaseModel): + name: Optional[str] + bias_initializer: str = "zeros" + kernel_initializer: str = "glorot_uniform" + output_size: PositiveInt + use_bias: bool = Field(True) + + +class ConvParams(ExtendedBaseModel): + name: Optional[str] + bias_initializer: str = "zeros" + filters: PositiveInt + kernel_initializer: str = "glorot_uniform" + kernel_size: PositiveInt + padding: str = "SAME" + strides: PositiveInt = 1 + use_bias: bool = Field(True) + + +class BlockParams(ExtendedBaseModel): + activation: Optional[str] + conv: Optional[ConvParams] + dense: Optional[DenseParams] + residual: Optional[bool] + + +class TopLayerParams(ExtendedBaseModel): + n_labels: PositiveInt + + +class ClemNetParams(ExtendedBaseModel): + blocks: List[BlockParams] = [] + top: Optional[TopLayerParams] diff --git a/pushservice/src/main/python/models/heavy_ranking/model_pools.py b/pushservice/src/main/python/models/heavy_ranking/model_pools.py new file mode 100644 index 000000000..de59ee1a6 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/model_pools.py @@ -0,0 +1,34 @@ +""" +Candidate architectures for each task's. +""" + +from __future__ import annotations + +from typing import Dict + +from .features import get_features +from .graph import Graph +from .lib.model import ClemNet +from .params import ModelTypeEnum + +import tensorflow as tf + + +class MagicRecsClemNet(Graph): + def get_logits(self, features: Dict[str, tf.Tensor], training: bool) -> tf.Tensor: + + with tf.name_scope("logits"): + inputs = get_features(features=features, training=training, params=self.params.model.features) + + with tf.name_scope("OONC_logits"): + model = ClemNet(params=self.params.model.architecture) + oonc_logit = model(inputs=inputs, training=training) + + with tf.name_scope("EngagementGivenOONC_logits"): + model = ClemNet(params=self.params.model.architecture) + eng_logits = model(inputs=inputs, training=training) + + return tf.concat([oonc_logit, eng_logits], axis=1) + + +ALL_MODELS = {ModelTypeEnum.clemnet: MagicRecsClemNet} diff --git a/pushservice/src/main/python/models/heavy_ranking/params.py b/pushservice/src/main/python/models/heavy_ranking/params.py new file mode 100644 index 000000000..64a7de2b1 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/params.py @@ -0,0 +1,89 @@ +import enum +import json +from typing import List, Optional + +from .lib.params import BlockParams, ClemNetParams, ConvParams, DenseParams, TopLayerParams + +from pydantic import BaseModel, Extra, NonNegativeFloat +import tensorflow.compat.v1 as tf + + +# checkstyle: noqa + + +class ExtendedBaseModel(BaseModel): + class Config: + extra = Extra.forbid + + +class SparseFeaturesParams(ExtendedBaseModel): + bits: int + embedding_size: int + + +class FeaturesParams(ExtendedBaseModel): + sparse_features: Optional[SparseFeaturesParams] + + +class ModelTypeEnum(str, enum.Enum): + clemnet: str = "clemnet" + + +class ModelParams(ExtendedBaseModel): + name: ModelTypeEnum + features: FeaturesParams + architecture: ClemNetParams + + +class TaskNameEnum(str, enum.Enum): + oonc: str = "OONC" + engagement: str = "Engagement" + + +class Task(ExtendedBaseModel): + name: TaskNameEnum + label: str + score_weight: NonNegativeFloat + + +DEFAULT_TASKS = [ + Task(name=TaskNameEnum.oonc, label="label", score_weight=0.9), + Task(name=TaskNameEnum.engagement, label="label.engagement", score_weight=0.1), +] + + +class GraphParams(ExtendedBaseModel): + tasks: List[Task] = DEFAULT_TASKS + model: ModelParams + weight: Optional[str] + + +DEFAULT_ARCHITECTURE_PARAMS = ClemNetParams( + blocks=[ + BlockParams( + activation="relu", + conv=ConvParams(kernel_size=3, filters=5), + dense=DenseParams(output_size=output_size), + residual=False, + ) + for output_size in [1024, 512, 256, 128] + ], + top=TopLayerParams(n_labels=1), +) + +DEFAULT_GRAPH_PARAMS = GraphParams( + model=ModelParams( + name=ModelTypeEnum.clemnet, + architecture=DEFAULT_ARCHITECTURE_PARAMS, + features=FeaturesParams(sparse_features=SparseFeaturesParams(bits=18, embedding_size=50)), + ), +) + + +def load_graph_params(args) -> GraphParams: + params = DEFAULT_GRAPH_PARAMS + if args.param_file: + with tf.io.gfile.GFile(args.param_file, mode="r+") as file: + params = GraphParams.parse_obj(json.load(file)) + + return params diff --git a/pushservice/src/main/python/models/heavy_ranking/run_args.py b/pushservice/src/main/python/models/heavy_ranking/run_args.py new file mode 100644 index 000000000..1cc33a8e0 --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/run_args.py @@ -0,0 +1,59 @@ +from twml.trainers import DataRecordTrainer + +from .features import FEATURE_LIST_DEFAULT_PATH + + +def get_training_arg_parser(): + parser = DataRecordTrainer.add_parser_arguments() + + parser.add_argument( + "--feature_list", + default=FEATURE_LIST_DEFAULT_PATH, + type=str, + help="Which features to use for training", + ) + + parser.add_argument( + "--param_file", + default=None, + type=str, + help="Path to JSON file containing the graph parameters. If None, model will load default parameters.", + ) + + parser.add_argument( + "--directly_export_best", + default=False, + action="store_true", + help="whether to directly_export best_checkpoint", + ) + + parser.add_argument( + "--warm_start_from", default=None, type=str, help="model dir to warm start from" + ) + + parser.add_argument( + "--warm_start_base_dir", + default=None, + type=str, + help="latest ckpt in this folder will be used to ", + ) + + parser.add_argument( + "--model_type", + default=None, + type=str, + help="Which type of model to train.", + ) + return parser + + +def get_eval_arg_parser(): + parser = get_training_arg_parser() + parser.add_argument( + "--eval_checkpoint", + default=None, + type=str, + help="Which checkpoint to use for evaluation", + ) + + return parser diff --git a/pushservice/src/main/python/models/heavy_ranking/update_warm_start_checkpoint.py b/pushservice/src/main/python/models/heavy_ranking/update_warm_start_checkpoint.py new file mode 100644 index 000000000..04887b9cf --- /dev/null +++ b/pushservice/src/main/python/models/heavy_ranking/update_warm_start_checkpoint.py @@ -0,0 +1,146 @@ +""" +Model for modifying the checkpoints of the magic recs cnn Model with addition, deletion, and reordering +of continuous and binary features. +""" + +import os + +from twitter.deepbird.projects.magic_recs.libs.get_feat_config import FEATURE_LIST_DEFAULT_PATH +from twitter.deepbird.projects.magic_recs.libs.warm_start_utils_v11 import ( + get_feature_list_for_heavy_ranking, + mkdirp, + rename_dir, + rmdir, + warm_start_checkpoint, +) +import twml +from twml.trainers import DataRecordTrainer + +import tensorflow.compat.v1 as tf +from tensorflow.compat.v1 import logging + + +def get_arg_parser(): + parser = DataRecordTrainer.add_parser_arguments() + parser.add_argument( + "--model_type", + default="deepnorm_gbdt_inputdrop2_rescale", + type=str, + help="specify the model type to use.", + ) + + parser.add_argument( + "--model_trainer_name", + default="None", + type=str, + help="deprecated, added here just for api compatibility.", + ) + + parser.add_argument( + "--warm_start_base_dir", + default="none", + type=str, + help="latest ckpt in this folder will be used.", + ) + + parser.add_argument( + "--output_checkpoint_dir", + default="none", + type=str, + help="Output folder for warm started ckpt. If none, it will move warm_start_base_dir to backup, and overwrite it", + ) + + parser.add_argument( + "--feature_list", + default="none", + type=str, + help="Which features to use for training", + ) + + parser.add_argument( + "--old_feature_list", + default="none", + type=str, + help="Which features to use for training", + ) + + return parser + + +def get_params(args=None): + parser = get_arg_parser() + if args is None: + return parser.parse_args() + else: + return parser.parse_args(args) + + +def _main(): + opt = get_params() + logging.info("parse is: ") + logging.info(opt) + + if opt.feature_list == "none": + feature_list_path = FEATURE_LIST_DEFAULT_PATH + else: + feature_list_path = opt.feature_list + + if opt.warm_start_base_dir != "none" and tf.io.gfile.exists(opt.warm_start_base_dir): + if opt.output_checkpoint_dir == "none" or opt.output_checkpoint_dir == opt.warm_start_base_dir: + _warm_start_base_dir = os.path.normpath(opt.warm_start_base_dir) + "_backup_warm_start" + _output_folder_dir = opt.warm_start_base_dir + + rename_dir(opt.warm_start_base_dir, _warm_start_base_dir) + tf.logging.info(f"moved {opt.warm_start_base_dir} to {_warm_start_base_dir}") + else: + _warm_start_base_dir = opt.warm_start_base_dir + _output_folder_dir = opt.output_checkpoint_dir + + continuous_binary_feat_list_save_path = os.path.join( + _warm_start_base_dir, "continuous_binary_feat_list.json" + ) + + if opt.old_feature_list != "none": + tf.logging.info("getting old continuous_binary_feat_list") + continuous_binary_feat_list = get_feature_list_for_heavy_ranking( + opt.old_feature_list, opt.data_spec + ) + rmdir(continuous_binary_feat_list_save_path) + twml.util.write_file( + continuous_binary_feat_list_save_path, continuous_binary_feat_list, encode="json" + ) + tf.logging.info(f"Finish writting files to {continuous_binary_feat_list_save_path}") + + warm_start_folder = os.path.join(_warm_start_base_dir, "best_checkpoint") + if not tf.io.gfile.exists(warm_start_folder): + warm_start_folder = _warm_start_base_dir + + rmdir(_output_folder_dir) + mkdirp(_output_folder_dir) + + new_ckpt = warm_start_checkpoint( + warm_start_folder, + continuous_binary_feat_list_save_path, + feature_list_path, + opt.data_spec, + _output_folder_dir, + opt.model_type, + ) + logging.info(f"Created new ckpt {new_ckpt} from {warm_start_folder}") + + tf.logging.info("getting new continuous_binary_feat_list") + new_continuous_binary_feat_list_save_path = os.path.join( + _output_folder_dir, "continuous_binary_feat_list.json" + ) + continuous_binary_feat_list = get_feature_list_for_heavy_ranking( + feature_list_path, opt.data_spec + ) + rmdir(new_continuous_binary_feat_list_save_path) + twml.util.write_file( + new_continuous_binary_feat_list_save_path, continuous_binary_feat_list, encode="json" + ) + tf.logging.info(f"Finish writting files to {new_continuous_binary_feat_list_save_path}") + + +if __name__ == "__main__": + _main() diff --git a/pushservice/src/main/python/models/libs/BUILD b/pushservice/src/main/python/models/libs/BUILD new file mode 100644 index 000000000..82a014ba5 --- /dev/null +++ b/pushservice/src/main/python/models/libs/BUILD @@ -0,0 +1,16 @@ +python3_library( + name = "libs", + sources = ["*.py"], + tags = [ + "bazel-compatible", + "no-mypy", + ], + dependencies = [ + "cortex/recsys/src/python/twitter/cortex/recsys/utils", + "magicpony/common/file_access/src/python/twitter/magicpony/common/file_access", + "src/python/twitter/cortex/ml/embeddings/deepbird", + "src/python/twitter/cortex/ml/embeddings/deepbird/grouped_metrics", + "src/python/twitter/deepbird/util/data", + "twml:twml-nodeps", + ], +) diff --git a/pushservice/src/main/python/models/libs/__init__.py b/pushservice/src/main/python/models/libs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pushservice/src/main/python/models/libs/customized_full_sparse.py b/pushservice/src/main/python/models/libs/customized_full_sparse.py new file mode 100644 index 000000000..b41f7d694 --- /dev/null +++ b/pushservice/src/main/python/models/libs/customized_full_sparse.py @@ -0,0 +1,56 @@ +# pylint: disable=no-member, arguments-differ, attribute-defined-outside-init, unused-argument +""" +Implementing Full Sparse Layer, allow specify use_binary_value in call() to +overide default action. +""" + +from twml.layers import FullSparse as defaultFullSparse +from twml.layers.full_sparse import sparse_dense_matmul + +import tensorflow.compat.v1 as tf + + +class FullSparse(defaultFullSparse): + def call(self, inputs, use_binary_values=None, **kwargs): # pylint: disable=unused-argument + """The logic of the layer lives here. + + Arguments: + inputs: + A SparseTensor or a list of SparseTensors. + If `inputs` is a list, all tensors must have same `dense_shape`. + + Returns: + - If `inputs` is `SparseTensor`, then returns `bias + inputs * dense_b`. + - If `inputs` is a `list[SparseTensor`, then returns + `bias + add_n([sp_a * dense_b for sp_a in inputs])`. + """ + + if use_binary_values is not None: + default_use_binary_values = use_binary_values + else: + default_use_binary_values = self.use_binary_values + + if isinstance(default_use_binary_values, (list, tuple)): + raise ValueError( + "use_binary_values can not be %s when inputs is %s" + % (type(default_use_binary_values), type(inputs)) + ) + + outputs = sparse_dense_matmul( + inputs, + self.weight, + self.use_sparse_grads, + default_use_binary_values, + name="sparse_mm", + partition_axis=self.partition_axis, + num_partitions=self.num_partitions, + compress_ids=self._use_compression, + cast_indices_dtype=self._cast_indices_dtype, + ) + + if self.bias is not None: + outputs = tf.nn.bias_add(outputs, self.bias) + + if self.activation is not None: + return self.activation(outputs) # pylint: disable=not-callable + return outputs diff --git a/pushservice/src/main/python/models/libs/get_feat_config.py b/pushservice/src/main/python/models/libs/get_feat_config.py new file mode 100644 index 000000000..4d8b3e93c --- /dev/null +++ b/pushservice/src/main/python/models/libs/get_feat_config.py @@ -0,0 +1,176 @@ +import os + +from twitter.deepbird.projects.magic_recs.libs.metric_fn_utils import USER_AGE_FEATURE_NAME +from twitter.deepbird.projects.magic_recs.libs.model_utils import read_config +from twml.contrib import feature_config as contrib_feature_config + + +# checkstyle: noqa + +FEAT_CONFIG_DEFAULT_VAL = -1.23456789 + +DEFAULT_INPUT_SIZE_BITS = 18 + +DEFAULT_FEATURE_LIST_PATH = "./feature_list_default.yaml" +FEATURE_LIST_DEFAULT_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_PATH +) + +DEFAULT_FEATURE_LIST_LIGHT_RANKING_PATH = "./feature_list_light_ranking.yaml" +FEATURE_LIST_DEFAULT_LIGHT_RANKING_PATH = os.path.join( + os.path.dirname(os.path.realpath(__file__)), DEFAULT_FEATURE_LIST_LIGHT_RANKING_PATH +) + +FEATURE_LIST_DEFAULT = read_config(FEATURE_LIST_DEFAULT_PATH).items() +FEATURE_LIST_LIGHT_RANKING_DEFAULT = read_config(FEATURE_LIST_DEFAULT_LIGHT_RANKING_PATH).items() + + +LABELS = ["label"] +LABELS_MTL = {"OONC": ["label"], "OONC_Engagement": ["label", "label.engagement"]} +LABELS_LR = { + "Sent": ["label.sent"], + "HeavyRankPosition": ["meta.ranking.is_top3"], + "HeavyRankProbability": ["meta.ranking.weighted_oonc_model_score"], +} + + +def _get_new_feature_config_base( + data_spec_path, + labels, + add_sparse_continous=True, + add_gbdt=True, + add_user_id=False, + add_timestamp=False, + add_user_age=False, + feature_list_provided=[], + opt=None, + run_light_ranking_group_metrics_in_bq=False, +): + """ + Getter of the feature config based on specification. + + Args: + data_spec_path: A string indicating the path of the data_spec.json file, which could be + either a local path or a hdfs path. + labels: A list of strings indicating the name of the label in the data spec. + add_sparse_continous: A bool indicating if sparse_continuous feature needs to be included. + add_gbdt: A bool indicating if gbdt feature needs to be included. + add_user_id: A bool indicating if user_id feature needs to be included. + add_timestamp: A bool indicating if timestamp feature needs to be included. This will be useful + for sequential models and meta learning models. + add_user_age: A bool indicating if the user age feature needs to be included. + feature_list_provided: A list of features thats need to be included. If not specified, will use + FEATURE_LIST_DEFAULT by default. + opt: A namespace of arguments indicating the hyparameters. + run_light_ranking_group_metrics_in_bq: A bool indicating if heavy ranker score info needs to be included to compute group metrics in BigQuery. + + Returns: + A twml feature config object. + """ + + input_size_bits = DEFAULT_INPUT_SIZE_BITS if opt is None else opt.input_size_bits + + feature_list = feature_list_provided if feature_list_provided != [] else FEATURE_LIST_DEFAULT + a_string_feat_list = [f[0] for f in feature_list if f[1] != "S"] + + builder = contrib_feature_config.FeatureConfigBuilder(data_spec_path=data_spec_path) + + builder = builder.extract_feature_group( + feature_regexes=a_string_feat_list, + group_name="continuous", + default_value=FEAT_CONFIG_DEFAULT_VAL, + type_filter=["CONTINUOUS"], + ) + + builder = builder.extract_features_as_hashed_sparse( + feature_regexes=a_string_feat_list, + output_tensor_name="sparse_no_continuous", + hash_space_size_bits=input_size_bits, + type_filter=["BINARY", "DISCRETE", "STRING", "SPARSE_BINARY"], + ) + + if add_gbdt: + builder = builder.extract_features_as_hashed_sparse( + feature_regexes=["ads\..*"], + output_tensor_name="gbdt_sparse", + hash_space_size_bits=input_size_bits, + ) + + if add_sparse_continous: + s_string_feat_list = [f[0] for f in feature_list if f[1] == "S"] + + builder = builder.extract_features_as_hashed_sparse( + feature_regexes=s_string_feat_list, + output_tensor_name="sparse_continuous", + hash_space_size_bits=input_size_bits, + type_filter=["SPARSE_CONTINUOUS"], + ) + + if add_user_id: + builder = builder.extract_feature("meta.user_id") + if add_timestamp: + builder = builder.extract_feature("meta.timestamp") + if add_user_age: + builder = builder.extract_feature(USER_AGE_FEATURE_NAME) + + if run_light_ranking_group_metrics_in_bq: + builder = builder.extract_feature("meta.trace_id") + builder = builder.extract_feature("meta.ranking.weighted_oonc_model_score") + + builder = builder.add_labels(labels).define_weight("meta.weight") + + return builder.build() + + +def get_feature_config_with_sparse_continuous( + data_spec_path, + feature_list_provided=[], + opt=None, + add_user_id=False, + add_timestamp=False, + add_user_age=False, +): + task_name = opt.task_name if getattr(opt, "task_name", None) is not None else "OONC" + if task_name not in LABELS_MTL: + raise ValueError("Invalid Task Name !") + + return _get_new_feature_config_base( + data_spec_path=data_spec_path, + labels=LABELS_MTL[task_name], + add_sparse_continous=True, + add_user_id=add_user_id, + add_timestamp=add_timestamp, + add_user_age=add_user_age, + feature_list_provided=feature_list_provided, + opt=opt, + ) + + +def get_feature_config_light_ranking( + data_spec_path, + feature_list_provided=[], + opt=None, + add_user_id=True, + add_timestamp=False, + add_user_age=False, + add_gbdt=False, + run_light_ranking_group_metrics_in_bq=False, +): + task_name = opt.task_name if getattr(opt, "task_name", None) is not None else "HeavyRankPosition" + if task_name not in LABELS_LR: + raise ValueError("Invalid Task Name !") + if not feature_list_provided: + feature_list_provided = FEATURE_LIST_LIGHT_RANKING_DEFAULT + + return _get_new_feature_config_base( + data_spec_path=data_spec_path, + labels=LABELS_LR[task_name], + add_sparse_continous=False, + add_gbdt=add_gbdt, + add_user_id=add_user_id, + add_timestamp=add_timestamp, + add_user_age=add_user_age, + feature_list_provided=feature_list_provided, + opt=opt, + run_light_ranking_group_metrics_in_bq=run_light_ranking_group_metrics_in_bq, + ) diff --git a/pushservice/src/main/python/models/libs/graph_utils.py b/pushservice/src/main/python/models/libs/graph_utils.py new file mode 100644 index 000000000..4a4626a59 --- /dev/null +++ b/pushservice/src/main/python/models/libs/graph_utils.py @@ -0,0 +1,42 @@ +""" +Utilties that aid in building the magic recs graph. +""" + +import re + +import tensorflow.compat.v1 as tf + + +def get_trainable_variables(all_trainable_variables, trainable_regexes): + """Returns a subset of trainable variables for training. + + Given a collection of trainable variables, this will return all those that match the given regexes. + Will also log those variables. + + Args: + all_trainable_variables (a collection of trainable tf.Variable): The variables to search through. + trainable_regexes (a collection of regexes): Variables that match any regex will be included. + + Returns a list of tf.Variable + """ + if trainable_regexes is None or len(trainable_regexes) == 0: + tf.logging.info("No trainable regexes found. Not using get_trainable_variables behavior.") + return None + + assert any( + tf.is_tensor(var) for var in all_trainable_variables + ), f"Non TF variable found: {all_trainable_variables}" + trainable_variables = list( + filter( + lambda var: any(re.match(regex, var.name, re.IGNORECASE) for regex in trainable_regexes), + all_trainable_variables, + ) + ) + tf.logging.info(f"Using filtered trainable variables: {trainable_variables}") + + assert ( + trainable_variables + ), "Did not find trainable variables after filtering after filtering from {} number of vars originaly. All vars: {} and train regexes: {}".format( + len(all_trainable_variables), all_trainable_variables, trainable_regexes + ) + return trainable_variables diff --git a/pushservice/src/main/python/models/libs/group_metrics.py b/pushservice/src/main/python/models/libs/group_metrics.py new file mode 100644 index 000000000..eeef3c501 --- /dev/null +++ b/pushservice/src/main/python/models/libs/group_metrics.py @@ -0,0 +1,114 @@ +import os +import time + +from twitter.cortex.ml.embeddings.deepbird.grouped_metrics.computation import ( + write_grouped_metrics_to_mldash, +) +from twitter.cortex.ml.embeddings.deepbird.grouped_metrics.configuration import ( + ClassificationGroupedMetricsConfiguration, + NDCGGroupedMetricsConfiguration, +) +import twml + +from .light_ranking_metrics import ( + CGRGroupedMetricsConfiguration, + ExpectedLossGroupedMetricsConfiguration, + RecallGroupedMetricsConfiguration, +) + +import numpy as np +import tensorflow.compat.v1 as tf +from tensorflow.compat.v1 import logging + + +# checkstyle: noqa + + +def run_group_metrics(trainer, data_dir, model_path, parse_fn, group_feature_name="meta.user_id"): + + start_time = time.time() + logging.info("Evaluating with group metrics.") + + metrics = write_grouped_metrics_to_mldash( + trainer=trainer, + data_dir=data_dir, + model_path=model_path, + group_fn=lambda datarecord: str( + datarecord.discreteFeatures[twml.feature_id(group_feature_name)[0]] + ), + parse_fn=parse_fn, + metric_configurations=[ + ClassificationGroupedMetricsConfiguration(), + NDCGGroupedMetricsConfiguration(k=[5, 10, 20]), + ], + total_records_to_read=1000000000, + shuffle=False, + mldash_metrics_name="grouped_metrics", + ) + + end_time = time.time() + logging.info(f"Evaluated Group Metics: {metrics}.") + logging.info(f"Group metrics evaluation time {end_time - start_time}.") + + +def run_group_metrics_light_ranking( + trainer, data_dir, model_path, parse_fn, group_feature_name="meta.trace_id" +): + + start_time = time.time() + logging.info("Evaluating with group metrics.") + + metrics = write_grouped_metrics_to_mldash( + trainer=trainer, + data_dir=data_dir, + model_path=model_path, + group_fn=lambda datarecord: str( + datarecord.discreteFeatures[twml.feature_id(group_feature_name)[0]] + ), + parse_fn=parse_fn, + metric_configurations=[ + CGRGroupedMetricsConfiguration(lightNs=[50, 100, 200], heavyKs=[1, 3, 10, 20, 50]), + RecallGroupedMetricsConfiguration(n=[50, 100, 200], k=[1, 3, 10, 20, 50]), + ExpectedLossGroupedMetricsConfiguration(lightNs=[50, 100, 200]), + ], + total_records_to_read=10000000, + num_batches_to_load=50, + batch_size=1024, + shuffle=False, + mldash_metrics_name="grouped_metrics_for_light_ranking", + ) + + end_time = time.time() + logging.info(f"Evaluated Group Metics for Light Ranking: {metrics}.") + logging.info(f"Group metrics evaluation time {end_time - start_time}.") + + +def run_group_metrics_light_ranking_in_bq(trainer, params, checkpoint_path): + logging.info("getting Test Predictions for Light Ranking Group Metrics in BigQuery !!!") + eval_input_fn = trainer.get_eval_input_fn(repeat=False, shuffle=False) + info_pool = [] + + for result in trainer.estimator.predict( + eval_input_fn, checkpoint_path=checkpoint_path, yield_single_examples=False + ): + traceID = result["trace_id"] + pred = result["prediction"] + label = result["target"] + info = np.concatenate([traceID, pred, label], axis=1) + info_pool.append(info) + + info_pool = np.concatenate(info_pool) + + locname = "/tmp/000/" + if not os.path.exists(locname): + os.makedirs(locname) + + locfile = locname + params.pred_file_name + columns = ["trace_id", "model_prediction", "meta__ranking__weighted_oonc_model_score"] + np.savetxt(locfile, info_pool, delimiter=",", header=",".join(columns)) + tf.io.gfile.copy(locfile, params.pred_file_path + params.pred_file_name, overwrite=True) + + if os.path.isfile(locfile): + os.remove(locfile) + + logging.info("Done Prediction for Light Ranking Group Metrics in BigQuery.") diff --git a/pushservice/src/main/python/models/libs/initializer.py b/pushservice/src/main/python/models/libs/initializer.py new file mode 100644 index 000000000..8bba00216 --- /dev/null +++ b/pushservice/src/main/python/models/libs/initializer.py @@ -0,0 +1,118 @@ +import numpy as np +from tensorflow.keras import backend as K + + +class VarianceScaling(object): + """Initializer capable of adapting its scale to the shape of weights. + With `distribution="normal"`, samples are drawn from a truncated normal + distribution centered on zero, with `stddev = sqrt(scale / n)` where n is: + - number of input units in the weight tensor, if mode = "fan_in" + - number of output units, if mode = "fan_out" + - average of the numbers of input and output units, if mode = "fan_avg" + With `distribution="uniform"`, + samples are drawn from a uniform distribution + within [-limit, limit], with `limit = sqrt(3 * scale / n)`. + # Arguments + scale: Scaling factor (positive float). + mode: One of "fan_in", "fan_out", "fan_avg". + distribution: Random distribution to use. One of "normal", "uniform". + seed: A Python integer. Used to seed the random generator. + # Raises + ValueError: In case of an invalid value for the "scale", mode" or + "distribution" arguments.""" + + def __init__( + self, + scale=1.0, + mode="fan_in", + distribution="normal", + seed=None, + fan_in=None, + fan_out=None, + ): + self.fan_in = fan_in + self.fan_out = fan_out + if scale <= 0.0: + raise ValueError("`scale` must be a positive float. Got:", scale) + mode = mode.lower() + if mode not in {"fan_in", "fan_out", "fan_avg"}: + raise ValueError( + "Invalid `mode` argument: " 'expected on of {"fan_in", "fan_out", "fan_avg"} ' "but got", + mode, + ) + distribution = distribution.lower() + if distribution not in {"normal", "uniform"}: + raise ValueError( + "Invalid `distribution` argument: " 'expected one of {"normal", "uniform"} ' "but got", + distribution, + ) + self.scale = scale + self.mode = mode + self.distribution = distribution + self.seed = seed + + def __call__(self, shape, dtype=None, partition_info=None): + fan_in = shape[-2] if self.fan_in is None else self.fan_in + fan_out = shape[-1] if self.fan_out is None else self.fan_out + + scale = self.scale + if self.mode == "fan_in": + scale /= max(1.0, fan_in) + elif self.mode == "fan_out": + scale /= max(1.0, fan_out) + else: + scale /= max(1.0, float(fan_in + fan_out) / 2) + if self.distribution == "normal": + stddev = np.sqrt(scale) / 0.87962566103423978 + return K.truncated_normal(shape, 0.0, stddev, dtype=dtype, seed=self.seed) + else: + limit = np.sqrt(3.0 * scale) + return K.random_uniform(shape, -limit, limit, dtype=dtype, seed=self.seed) + + def get_config(self): + return { + "scale": self.scale, + "mode": self.mode, + "distribution": self.distribution, + "seed": self.seed, + } + + +def customized_glorot_uniform(seed=None, fan_in=None, fan_out=None): + """Glorot uniform initializer, also called Xavier uniform initializer. + It draws samples from a uniform distribution within [-limit, limit] + where `limit` is `sqrt(6 / (fan_in + fan_out))` + where `fan_in` is the number of input units in the weight tensor + and `fan_out` is the number of output units in the weight tensor. + # Arguments + seed: A Python integer. Used to seed the random generator. + # Returns + An initializer.""" + return VarianceScaling( + scale=1.0, + mode="fan_avg", + distribution="uniform", + seed=seed, + fan_in=fan_in, + fan_out=fan_out, + ) + + +def customized_glorot_norm(seed=None, fan_in=None, fan_out=None): + """Glorot norm initializer, also called Xavier uniform initializer. + It draws samples from a uniform distribution within [-limit, limit] + where `limit` is `sqrt(6 / (fan_in + fan_out))` + where `fan_in` is the number of input units in the weight tensor + and `fan_out` is the number of output units in the weight tensor. + # Arguments + seed: A Python integer. Used to seed the random generator. + # Returns + An initializer.""" + return VarianceScaling( + scale=1.0, + mode="fan_avg", + distribution="normal", + seed=seed, + fan_in=fan_in, + fan_out=fan_out, + ) diff --git a/pushservice/src/main/python/models/libs/light_ranking_metrics.py b/pushservice/src/main/python/models/libs/light_ranking_metrics.py new file mode 100644 index 000000000..b83fcf3ae --- /dev/null +++ b/pushservice/src/main/python/models/libs/light_ranking_metrics.py @@ -0,0 +1,255 @@ +from functools import partial + +from twitter.cortex.ml.embeddings.deepbird.grouped_metrics.configuration import ( + GroupedMetricsConfiguration, +) +from twitter.cortex.ml.embeddings.deepbird.grouped_metrics.helpers import ( + extract_prediction_from_prediction_record, +) + + +# checkstyle: noqa + + +def score_loss_at_n(labels, predictions, lightN): + """ + Compute the absolute ScoreLoss ranking metric + Args: + labels (list) : A list of label values (HeavyRanking Reference) + predictions (list): A list of prediction values (LightRanking Predictions) + lightN (int): size of the list at which of Initial candidates to compute ScoreLoss. (LightRanking) + """ + assert len(labels) == len(predictions) + + if lightN <= 0: + return None + + labels_with_predictions = zip(labels, predictions) + labels_with_sorted_predictions = sorted( + labels_with_predictions, key=lambda x: x[1], reverse=True + )[:lightN] + labels_top1_light = max([label for label, _ in labels_with_sorted_predictions]) + labels_top1_heavy = max(labels) + + return labels_top1_heavy - labels_top1_light + + +def cgr_at_nk(labels, predictions, lightN, heavyK): + """ + Compute Cumulative Gain Ratio (CGR) ranking metric + Args: + labels (list) : A list of label values (HeavyRanking Reference) + predictions (list): A list of prediction values (LightRanking Predictions) + lightN (int): size of the list at which of Initial candidates to compute CGR. (LightRanking) + heavyK (int): size of the list at which of Refined candidates to compute CGR. (HeavyRanking) + """ + assert len(labels) == len(predictions) + + if (not lightN) or (not heavyK): + out = None + elif lightN <= 0 or heavyK <= 0: + out = None + else: + + labels_with_predictions = zip(labels, predictions) + labels_with_sorted_predictions = sorted( + labels_with_predictions, key=lambda x: x[1], reverse=True + )[:lightN] + labels_topN_light = [label for label, _ in labels_with_sorted_predictions] + + if lightN <= heavyK: + cg_light = sum(labels_topN_light) + else: + labels_topK_heavy_from_light = sorted(labels_topN_light, reverse=True)[:heavyK] + cg_light = sum(labels_topK_heavy_from_light) + + ideal_ordering = sorted(labels, reverse=True) + cg_heavy = sum(ideal_ordering[: min(lightN, heavyK)]) + + out = 0.0 + if cg_heavy != 0: + out = max(cg_light / cg_heavy, 0) + + return out + + +def _get_weight(w, atK): + if not w: + return 1.0 + elif len(w) <= atK: + return 0.0 + else: + return w[atK] + + +def recall_at_nk(labels, predictions, n=None, k=None, w=None): + """ + Recall at N-K ranking metric + Args: + labels (list): A list of label values + predictions (list): A list of prediction values + n (int): size of the list at which of predictions to compute recall. (Light Ranking Predictions) + The default is None in which case the length of the provided predictions is used as L + k (int): size of the list at which of labels to compute recall. (Heavy Ranking Predictions) + The default is None in which case the length of the provided labels is used as L + w (list): weight vector sorted by labels + """ + assert len(labels) == len(predictions) + + if not any(labels): + out = None + else: + + safe_n = len(predictions) if not n else min(len(predictions), n) + safe_k = len(labels) if not k else min(len(labels), k) + + labels_with_predictions = zip(labels, predictions) + sorted_labels_with_predictions = sorted( + labels_with_predictions, key=lambda x: x[0], reverse=True + ) + + order_sorted_labels_predictions = zip(range(len(labels)), *zip(*sorted_labels_with_predictions)) + + order_with_predictions = [ + (order, pred) for order, label, pred in order_sorted_labels_predictions + ] + order_with_sorted_predictions = sorted(order_with_predictions, key=lambda x: x[1], reverse=True) + + pred_sorted_order_at_n = [order for order, _ in order_with_sorted_predictions][:safe_n] + + intersection_weight = [ + _get_weight(w, order) if order < safe_k else 0 for order in pred_sorted_order_at_n + ] + + intersection_score = sum(intersection_weight) + full_score = sum(w) if w else float(safe_k) + + out = 0.0 + if full_score != 0: + out = intersection_score / full_score + + return out + + +class ExpectedLossGroupedMetricsConfiguration(GroupedMetricsConfiguration): + """ + This is the Expected Loss Grouped metric computation configuration. + """ + + def __init__(self, lightNs=[]): + """ + Args: + lightNs (list): size of the list at which of Initial candidates to compute Expected Loss. (LightRanking) + """ + self.lightNs = lightNs + + @property + def name(self): + return "ExpectedLoss" + + @property + def metrics_dict(self): + metrics_to_compute = {} + for lightN in self.lightNs: + metric_name = "ExpectedLoss_atLight_" + str(lightN) + metrics_to_compute[metric_name] = partial(score_loss_at_n, lightN=lightN) + return metrics_to_compute + + def extract_label(self, prec, drec, drec_label): + return drec_label + + def extract_prediction(self, prec, drec, drec_label): + return extract_prediction_from_prediction_record(prec) + + +class CGRGroupedMetricsConfiguration(GroupedMetricsConfiguration): + """ + This is the Cumulative Gain Ratio (CGR) Grouped metric computation configuration. + CGR at the max length of each session is the default. + CGR at additional positions can be computed by specifying a list of 'n's and 'k's + """ + + def __init__(self, lightNs=[], heavyKs=[]): + """ + Args: + lightNs (list): size of the list at which of Initial candidates to compute CGR. (LightRanking) + heavyK (int): size of the list at which of Refined candidates to compute CGR. (HeavyRanking) + """ + self.lightNs = lightNs + self.heavyKs = heavyKs + + @property + def name(self): + return "cgr" + + @property + def metrics_dict(self): + metrics_to_compute = {} + for lightN in self.lightNs: + for heavyK in self.heavyKs: + metric_name = "cgr_atLight_" + str(lightN) + "_atHeavy_" + str(heavyK) + metrics_to_compute[metric_name] = partial(cgr_at_nk, lightN=lightN, heavyK=heavyK) + return metrics_to_compute + + def extract_label(self, prec, drec, drec_label): + return drec_label + + def extract_prediction(self, prec, drec, drec_label): + return extract_prediction_from_prediction_record(prec) + + +class RecallGroupedMetricsConfiguration(GroupedMetricsConfiguration): + """ + This is the Recall Grouped metric computation configuration. + Recall at the max length of each session is the default. + Recall at additional positions can be computed by specifying a list of 'n's and 'k's + """ + + def __init__(self, n=[], k=[], w=[]): + """ + Args: + n (list): A list of ints. List of prediction rank thresholds (for light) + k (list): A list of ints. List of label rank thresholds (for heavy) + """ + self.predN = n + self.labelK = k + self.weight = w + + @property + def name(self): + return "group_recall" + + @property + def metrics_dict(self): + metrics_to_compute = {"group_recall_unweighted": recall_at_nk} + if not self.weight: + metrics_to_compute["group_recall_weighted"] = partial(recall_at_nk, w=self.weight) + + if self.predN and self.labelK: + for n in self.predN: + for k in self.labelK: + if n >= k: + metrics_to_compute[ + "group_recall_unweighted_at_L" + str(n) + "_at_H" + str(k) + ] = partial(recall_at_nk, n=n, k=k) + if self.weight: + metrics_to_compute[ + "group_recall_weighted_at_L" + str(n) + "_at_H" + str(k) + ] = partial(recall_at_nk, n=n, k=k, w=self.weight) + + if self.labelK and not self.predN: + for k in self.labelK: + metrics_to_compute["group_recall_unweighted_at_full_at_H" + str(k)] = partial( + recall_at_nk, k=k + ) + if self.weight: + metrics_to_compute["group_recall_weighted_at_full_at_H" + str(k)] = partial( + recall_at_nk, k=k, w=self.weight + ) + return metrics_to_compute + + def extract_label(self, prec, drec, drec_label): + return drec_label + + def extract_prediction(self, prec, drec, drec_label): + return extract_prediction_from_prediction_record(prec) diff --git a/pushservice/src/main/python/models/libs/metric_fn_utils.py b/pushservice/src/main/python/models/libs/metric_fn_utils.py new file mode 100644 index 000000000..fc26a1305 --- /dev/null +++ b/pushservice/src/main/python/models/libs/metric_fn_utils.py @@ -0,0 +1,294 @@ +""" +Utilties for constructing a metric_fn for magic recs. +""" + +from twml.contrib.metrics.metrics import ( + get_dual_binary_tasks_metric_fn, + get_numeric_metric_fn, + get_partial_multi_binary_class_metric_fn, + get_single_binary_task_metric_fn, +) + +from .model_utils import generate_disliked_mask + +import tensorflow.compat.v1 as tf + + +METRIC_BOOK = { + "OONC": ["OONC"], + "OONC_Engagement": ["OONC", "Engagement"], + "Sent": ["Sent"], + "HeavyRankPosition": ["HeavyRankPosition"], + "HeavyRankProbability": ["HeavyRankProbability"], +} + +USER_AGE_FEATURE_NAME = "accountAge" +NEW_USER_AGE_CUTOFF = 0 + + +def remove_padding_and_flatten(tensor, valid_batch_size): + """Remove the padding of the input padded tensor given the valid batch size tensor, + then flatten the output with respect to the first dimension. + Args: + tensor: A tensor of size [META_BATCH_SIZE, BATCH_SIZE, FEATURE_DIM]. + valid_batch_size: A tensor of size [META_BATCH_SIZE], with each element indicating + the effective batch size of the BATCH_SIZE dimension. + + Returns: + A tesnor of size [tf.reduce_sum(valid_batch_size), FEATURE_DIM]. + """ + unpadded_ragged_tensor = tf.RaggedTensor.from_tensor(tensor=tensor, lengths=valid_batch_size) + + return unpadded_ragged_tensor.flat_values + + +def safe_mask(values, mask): + """Mask values if possible. + + Boolean mask inputed values if and only if values is a tensor of the same dimension as mask (or can be broadcasted to that dimension). + + Args: + values (Any or Tensor): Input tensor to mask. Dim 0 should be size N. + mask (boolean tensor): A boolean tensor of size N. + + Returns Values or Values masked. + """ + if values is None: + return values + if not tf.is_tensor(values): + return values + values_shape = values.get_shape() + if not values_shape or len(values_shape) == 0: + return values + if not mask.get_shape().is_compatible_with(values_shape[0]): + return values + return tf.boolean_mask(values, mask) + + +def add_new_user_metrics(metric_fn): + """Will stratify the metric_fn by adding new user metrics. + + Given an input metric_fn, double every metric: One will be the orignal and the other will only include those for new users. + + Args: + metric_fn (python function): Base twml metric_fn. + + Returns a metric_fn with new user metrics included. + """ + + def metric_fn_with_new_users(graph_output, labels, weights): + if USER_AGE_FEATURE_NAME not in graph_output: + raise ValueError( + "In order to get metrics stratified by user age, {name} feature should be added to model graph output. However, only the following output keys were found: {keys}.".format( + name=USER_AGE_FEATURE_NAME, keys=graph_output.keys() + ) + ) + + metric_ops = metric_fn(graph_output, labels, weights) + + is_new = tf.reshape( + tf.math.less_equal( + tf.cast(graph_output[USER_AGE_FEATURE_NAME], tf.int64), + tf.cast(NEW_USER_AGE_CUTOFF, tf.int64), + ), + [-1], + ) + + labels = safe_mask(labels, is_new) + weights = safe_mask(weights, is_new) + graph_output = {key: safe_mask(values, is_new) for key, values in graph_output.items()} + + new_user_metric_ops = metric_fn(graph_output, labels, weights) + new_user_metric_ops = {name + "_new_users": ops for name, ops in new_user_metric_ops.items()} + metric_ops.update(new_user_metric_ops) + return metric_ops + + return metric_fn_with_new_users + + +def get_meta_learn_single_binary_task_metric_fn( + metrics, classnames, top_k=(5, 5, 5), use_top_k=False +): + """Wrapper function to use the metric_fn with meta learning evaluation scheme. + + Args: + metrics: A list of string representing metric names. + classnames: A list of string repsenting class names, In case of multiple binary class models, + the names for each class or label. + top_k: A tuple of int to specify top K metrics. + use_top_k: A boolean value indicating of top K of metrics is used. + + Returns: + A customized metric_fn function. + """ + + def get_eval_metric_ops(graph_output, labels, weights): + """The op func of the eval_metrics. Comparing with normal version, + the difference is we flatten the output, label, and weights. + + Args: + graph_output: A dict of tensors. + labels: A tensor of int32 be the value of either 0 or 1. + weights: A tensor of float32 to indicate the per record weight. + + Returns: + A dict of metric names and values. + """ + metric_op_weighted = get_partial_multi_binary_class_metric_fn( + metrics, predcols=0, classes=classnames + ) + classnames_unweighted = ["unweighted_" + classname for classname in classnames] + metric_op_unweighted = get_partial_multi_binary_class_metric_fn( + metrics, predcols=0, classes=classnames_unweighted + ) + + valid_batch_size = graph_output["valid_batch_size"] + graph_output["output"] = remove_padding_and_flatten(graph_output["output"], valid_batch_size) + labels = remove_padding_and_flatten(labels, valid_batch_size) + weights = remove_padding_and_flatten(weights, valid_batch_size) + + tf.ensure_shape(graph_output["output"], [None, 1]) + tf.ensure_shape(labels, [None, 1]) + tf.ensure_shape(weights, [None, 1]) + + metrics_weighted = metric_op_weighted(graph_output, labels, weights) + metrics_unweighted = metric_op_unweighted(graph_output, labels, None) + metrics_weighted.update(metrics_unweighted) + + if use_top_k: + metric_op_numeric = get_numeric_metric_fn(metrics=None, topK=top_k, predcol=0, labelcol=1) + metrics_numeric = metric_op_numeric(graph_output, labels, weights) + metrics_weighted.update(metrics_numeric) + return metrics_weighted + + return get_eval_metric_ops + + +def get_meta_learn_dual_binary_tasks_metric_fn( + metrics, classnames, top_k=(5, 5, 5), use_top_k=False +): + """Wrapper function to use the metric_fn with meta learning evaluation scheme. + + Args: + metrics: A list of string representing metric names. + classnames: A list of string repsenting class names, In case of multiple binary class models, + the names for each class or label. + top_k: A tuple of int to specify top K metrics. + use_top_k: A boolean value indicating of top K of metrics is used. + + Returns: + A customized metric_fn function. + """ + + def get_eval_metric_ops(graph_output, labels, weights): + """The op func of the eval_metrics. Comparing with normal version, + the difference is we flatten the output, label, and weights. + + Args: + graph_output: A dict of tensors. + labels: A tensor of int32 be the value of either 0 or 1. + weights: A tensor of float32 to indicate the per record weight. + + Returns: + A dict of metric names and values. + """ + metric_op_weighted = get_partial_multi_binary_class_metric_fn( + metrics, predcols=[0, 1], classes=classnames + ) + classnames_unweighted = ["unweighted_" + classname for classname in classnames] + metric_op_unweighted = get_partial_multi_binary_class_metric_fn( + metrics, predcols=[0, 1], classes=classnames_unweighted + ) + + valid_batch_size = graph_output["valid_batch_size"] + graph_output["output"] = remove_padding_and_flatten(graph_output["output"], valid_batch_size) + labels = remove_padding_and_flatten(labels, valid_batch_size) + weights = remove_padding_and_flatten(weights, valid_batch_size) + + tf.ensure_shape(graph_output["output"], [None, 2]) + tf.ensure_shape(labels, [None, 2]) + tf.ensure_shape(weights, [None, 1]) + + metrics_weighted = metric_op_weighted(graph_output, labels, weights) + metrics_unweighted = metric_op_unweighted(graph_output, labels, None) + metrics_weighted.update(metrics_unweighted) + + if use_top_k: + metric_op_numeric = get_numeric_metric_fn(metrics=None, topK=top_k, predcol=2, labelcol=2) + metrics_numeric = metric_op_numeric(graph_output, labels, weights) + metrics_weighted.update(metrics_numeric) + return metrics_weighted + + return get_eval_metric_ops + + +def get_metric_fn(task_name, use_stratify_metrics, use_meta_batch=False): + """Will retrieve the metric_fn for magic recs. + + Args: + task_name (string): Which task is being used for this model. + use_stratify_metrics (boolean): Should we add stratified metrics (new user metrics). + use_meta_batch (boolean): If the output/label/weights are passed in 3D shape instead of + 2D shape. + + Returns: + A metric_fn function to pass in twml Trainer. + """ + if task_name not in METRIC_BOOK: + raise ValueError( + "Task name of {task_name} not recognized. Unable to retrieve metrics.".format( + task_name=task_name + ) + ) + class_names = METRIC_BOOK[task_name] + if use_meta_batch: + get_n_binary_task_metric_fn = ( + get_meta_learn_single_binary_task_metric_fn + if len(class_names) == 1 + else get_meta_learn_dual_binary_tasks_metric_fn + ) + else: + get_n_binary_task_metric_fn = ( + get_single_binary_task_metric_fn if len(class_names) == 1 else get_dual_binary_tasks_metric_fn + ) + + metric_fn = get_n_binary_task_metric_fn(metrics=None, classnames=METRIC_BOOK[task_name]) + + if use_stratify_metrics: + metric_fn = add_new_user_metrics(metric_fn) + + return metric_fn + + +def flip_disliked_labels(metric_fn): + """This function returns an adapted metric_fn which flips the labels of the OONCed evaluation data to 0 if it is disliked. + Args: + metric_fn: A metric_fn function to pass in twml Trainer. + + Returns: + _adapted_metric_fn: A customized metric_fn function with disliked OONC labels flipped. + """ + + def _adapted_metric_fn(graph_output, labels, weights): + """A customized metric_fn function with disliked OONC labels flipped. + + Args: + graph_output: A dict of tensors. + labels: labels of training samples, which is a 2D tensor of shape batch_size x 3: [OONCs, engagements, dislikes] + weights: A tensor of float32 to indicate the per record weight. + + Returns: + A dict of metric names and values. + """ + # We want to multiply the label of the observation by 0 only when it is disliked + disliked_mask = generate_disliked_mask(labels) + + # Extract OONC and engagement labels only. + labels = tf.reshape(labels[:, 0:2], shape=[-1, 2]) + + # Labels will be set to 0 if it is disliked. + adapted_labels = labels * tf.cast(tf.logical_not(disliked_mask), dtype=labels.dtype) + + return metric_fn(graph_output, adapted_labels, weights) + + return _adapted_metric_fn diff --git a/pushservice/src/main/python/models/libs/model_args.py b/pushservice/src/main/python/models/libs/model_args.py new file mode 100644 index 000000000..ae142d818 --- /dev/null +++ b/pushservice/src/main/python/models/libs/model_args.py @@ -0,0 +1,231 @@ +from twml.trainers import DataRecordTrainer + + +# checkstyle: noqa + + +def get_arg_parser(): + parser = DataRecordTrainer.add_parser_arguments() + + parser.add_argument( + "--input_size_bits", + type=int, + default=18, + help="number of bits allocated to the input size", + ) + parser.add_argument( + "--model_trainer_name", + default="magic_recs_mlp_calibration_MTL_OONC_Engagement", + type=str, + help="specify the model trainer name.", + ) + + parser.add_argument( + "--model_type", + default="deepnorm_gbdt_inputdrop2_rescale", + type=str, + help="specify the model type to use.", + ) + parser.add_argument( + "--feat_config_type", + default="get_feature_config_with_sparse_continuous", + type=str, + help="specify the feature configure function to use.", + ) + + parser.add_argument( + "--directly_export_best", + default=False, + action="store_true", + help="whether to directly_export best_checkpoint", + ) + + parser.add_argument( + "--warm_start_base_dir", + default="none", + type=str, + help="latest ckpt in this folder will be used to ", + ) + + parser.add_argument( + "--feature_list", + default="none", + type=str, + help="Which features to use for training", + ) + parser.add_argument( + "--warm_start_from", default=None, type=str, help="model dir to warm start from" + ) + + parser.add_argument( + "--momentum", default=0.99999, type=float, help="Momentum term for batch normalization" + ) + parser.add_argument( + "--dropout", + default=0.2, + type=float, + help="input_dropout_rate to rescale output by (1 - input_dropout_rate)", + ) + parser.add_argument( + "--out_layer_1_size", default=256, type=int, help="Size of MLP_branch layer 1" + ) + parser.add_argument( + "--out_layer_2_size", default=128, type=int, help="Size of MLP_branch layer 2" + ) + parser.add_argument("--out_layer_3_size", default=64, type=int, help="Size of MLP_branch layer 3") + parser.add_argument( + "--sparse_embedding_size", default=50, type=int, help="Dimensionality of sparse embedding layer" + ) + parser.add_argument( + "--dense_embedding_size", default=128, type=int, help="Dimensionality of dense embedding layer" + ) + + parser.add_argument( + "--use_uam_label", + default=False, + type=str, + help="Whether to use uam_label or not", + ) + + parser.add_argument( + "--task_name", + default="OONC_Engagement", + type=str, + help="specify the task name to use: OONC or OONC_Engagement.", + ) + parser.add_argument( + "--init_weight", + default=0.9, + type=float, + help="Initial OONC Task Weight MTL: OONC+Engagement.", + ) + parser.add_argument( + "--use_engagement_weight", + default=False, + action="store_true", + help="whether to use engagement weight for base model.", + ) + parser.add_argument( + "--mtl_num_extra_layers", + type=int, + default=1, + help="Number of Hidden Layers for each TaskBranch.", + ) + parser.add_argument( + "--mtl_neuron_scale", type=int, default=4, help="Scaling Factor of Neurons in MTL Extra Layers." + ) + parser.add_argument( + "--use_oonc_score", + default=False, + action="store_true", + help="whether to use oonc score only or combined score.", + ) + parser.add_argument( + "--use_stratified_metrics", + default=False, + action="store_true", + help="Use stratified metrics: Break out new-user metrics.", + ) + parser.add_argument( + "--run_group_metrics", + default=False, + action="store_true", + help="Will run evaluation metrics grouped by user.", + ) + parser.add_argument( + "--use_full_scope", + default=False, + action="store_true", + help="Will add extra scope and naming to graph.", + ) + parser.add_argument( + "--trainable_regexes", + default=None, + nargs="*", + help="The union of variables specified by the list of regexes will be considered trainable.", + ) + parser.add_argument( + "--fine_tuning.ckpt_to_initialize_from", + dest="fine_tuning_ckpt_to_initialize_from", + type=str, + default=None, + help="Checkpoint path from which to warm start. Indicates the pre-trained model.", + ) + parser.add_argument( + "--fine_tuning.warm_start_scope_regex", + dest="fine_tuning_warm_start_scope_regex", + type=str, + default=None, + help="All variables matching this will be restored.", + ) + + return parser + + +def get_params(args=None): + parser = get_arg_parser() + if args is None: + return parser.parse_args() + else: + return parser.parse_args(args) + + +def get_arg_parser_light_ranking(): + parser = get_arg_parser() + + parser.add_argument( + "--use_record_weight", + default=False, + action="store_true", + help="whether to use record weight for base model.", + ) + parser.add_argument( + "--min_record_weight", default=0.0, type=float, help="Minimum record weight to use." + ) + parser.add_argument( + "--smooth_weight", default=0.0, type=float, help="Factor to smooth Rank Position Weight." + ) + + parser.add_argument( + "--num_mlp_layers", type=int, default=3, help="Number of Hidden Layers for MLP model." + ) + parser.add_argument( + "--mlp_neuron_scale", type=int, default=4, help="Scaling Factor of Neurons in MLP Layers." + ) + parser.add_argument( + "--run_light_ranking_group_metrics", + default=False, + action="store_true", + help="Will run evaluation metrics grouped by user for Light Ranking.", + ) + parser.add_argument( + "--use_missing_sub_branch", + default=False, + action="store_true", + help="Whether to use missing value sub-branch for Light Ranking.", + ) + parser.add_argument( + "--use_gbdt_features", + default=False, + action="store_true", + help="Whether to use GBDT features for Light Ranking.", + ) + parser.add_argument( + "--run_light_ranking_group_metrics_in_bq", + default=False, + action="store_true", + help="Whether to get_predictions for Light Ranking to compute group metrics in BigQuery.", + ) + parser.add_argument( + "--pred_file_path", + default=None, + type=str, + help="path", + ) + parser.add_argument( + "--pred_file_name", + default=None, + type=str, + help="path", + ) + return parser diff --git a/pushservice/src/main/python/models/libs/model_utils.py b/pushservice/src/main/python/models/libs/model_utils.py new file mode 100644 index 000000000..1c5306911 --- /dev/null +++ b/pushservice/src/main/python/models/libs/model_utils.py @@ -0,0 +1,339 @@ +import sys + +import twml + +from .initializer import customized_glorot_uniform + +import tensorflow.compat.v1 as tf +import yaml + + +# checkstyle: noqa + + +def read_config(whitelist_yaml_file): + with tf.gfile.FastGFile(whitelist_yaml_file) as f: + try: + return yaml.safe_load(f) + except yaml.YAMLError as exc: + print(exc) + sys.exit(1) + + +def _sparse_feature_fixup(features, input_size_bits): + """Rebuild a sparse tensor feature so that its dense shape attribute is present. + + Arguments: + features (SparseTensor): Sparse feature tensor of shape ``(B, sparse_feature_dim)``. + input_size_bits (int): Number of columns in ``log2`` scale. Must be positive. + + Returns: + SparseTensor: Rebuilt and non-faulty version of `features`.""" + sparse_feature_dim = tf.constant(2**input_size_bits, dtype=tf.int64) + sparse_shape = tf.stack([features.dense_shape[0], sparse_feature_dim]) + sparse_tf = tf.SparseTensor(features.indices, features.values, sparse_shape) + return sparse_tf + + +def self_atten_dense(input, out_dim, activation=None, use_bias=True, name=None): + def safe_concat(base, suffix): + """Concats variables name components if base is given.""" + if not base: + return base + return f"{base}:{suffix}" + + input_dim = input.shape.as_list()[1] + + sigmoid_out = twml.layers.FullDense( + input_dim, dtype=tf.float32, activation=tf.nn.sigmoid, name=safe_concat(name, "sigmoid_out") + )(input) + atten_input = sigmoid_out * input + mlp_out = twml.layers.FullDense( + out_dim, + dtype=tf.float32, + activation=activation, + use_bias=use_bias, + name=safe_concat(name, "mlp_out"), + )(atten_input) + return mlp_out + + +def get_dense_out(input, out_dim, activation, dense_type): + if dense_type == "full_dense": + out = twml.layers.FullDense(out_dim, dtype=tf.float32, activation=activation)(input) + elif dense_type == "self_atten_dense": + out = self_atten_dense(input, out_dim, activation=activation) + return out + + +def get_input_trans_func(bn_normalized_dense, is_training): + gw_normalized_dense = tf.expand_dims(bn_normalized_dense, -1) + group_num = bn_normalized_dense.shape.as_list()[1] + + gw_normalized_dense = GroupWiseTrans(group_num, 1, 8, name="groupwise_1", activation=tf.tanh)( + gw_normalized_dense + ) + gw_normalized_dense = GroupWiseTrans(group_num, 8, 4, name="groupwise_2", activation=tf.tanh)( + gw_normalized_dense + ) + gw_normalized_dense = GroupWiseTrans(group_num, 4, 1, name="groupwise_3", activation=tf.tanh)( + gw_normalized_dense + ) + + gw_normalized_dense = tf.squeeze(gw_normalized_dense, [-1]) + + bn_gw_normalized_dense = tf.layers.batch_normalization( + gw_normalized_dense, + training=is_training, + renorm_momentum=0.9999, + momentum=0.9999, + renorm=is_training, + trainable=True, + ) + + return bn_gw_normalized_dense + + +def tensor_dropout( + input_tensor, + rate, + is_training, + sparse_tensor=None, +): + """ + Implements dropout layer for both dense and sparse input_tensor + + Arguments: + input_tensor: + B x D dense tensor, or a sparse tensor + rate (float32): + dropout rate + is_training (bool): + training stage or not. + sparse_tensor (bool): + whether the input_tensor is sparse tensor or not. Default to be None, this value has to be passed explicitly. + rescale_sparse_dropout (bool): + Do we need to do rescaling or not. + Returns: + tensor dropped out""" + if sparse_tensor == True: + if is_training: + with tf.variable_scope("sparse_dropout"): + values = input_tensor.values + keep_mask = tf.keras.backend.random_binomial( + tf.shape(values), p=1 - rate, dtype=tf.float32, seed=None + ) + keep_mask.set_shape([None]) + keep_mask = tf.cast(keep_mask, tf.bool) + + keep_indices = tf.boolean_mask(input_tensor.indices, keep_mask, axis=0) + keep_values = tf.boolean_mask(values, keep_mask, axis=0) + + dropped_tensor = tf.SparseTensor(keep_indices, keep_values, input_tensor.dense_shape) + return dropped_tensor + else: + return input_tensor + elif sparse_tensor == False: + return tf.layers.dropout(input_tensor, rate=rate, training=is_training) + + +def adaptive_transformation(bn_normalized_dense, is_training, func_type="default"): + assert func_type in [ + "default", + "tiny", + ], f"fun_type can only be one of default and tiny, but get {func_type}" + + gw_normalized_dense = tf.expand_dims(bn_normalized_dense, -1) + group_num = bn_normalized_dense.shape.as_list()[1] + + if func_type == "default": + gw_normalized_dense = FastGroupWiseTrans( + group_num, 1, 8, name="groupwise_1", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + + gw_normalized_dense = FastGroupWiseTrans( + group_num, 8, 4, name="groupwise_2", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + + gw_normalized_dense = FastGroupWiseTrans( + group_num, 4, 1, name="groupwise_3", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + elif func_type == "tiny": + gw_normalized_dense = FastGroupWiseTrans( + group_num, 1, 2, name="groupwise_1", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + + gw_normalized_dense = FastGroupWiseTrans( + group_num, 2, 1, name="groupwise_2", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + + gw_normalized_dense = FastGroupWiseTrans( + group_num, 1, 1, name="groupwise_3", activation=tf.tanh, init_multiplier=8 + )(gw_normalized_dense) + + gw_normalized_dense = tf.squeeze(gw_normalized_dense, [-1]) + bn_gw_normalized_dense = tf.layers.batch_normalization( + gw_normalized_dense, + training=is_training, + renorm_momentum=0.9999, + momentum=0.9999, + renorm=is_training, + trainable=True, + ) + + return bn_gw_normalized_dense + + +class FastGroupWiseTrans(object): + """ + used to apply group-wise fully connected layers to the input. + it applies a tiny, unique MLP to each individual feature.""" + + def __init__(self, group_num, input_dim, out_dim, name, activation=None, init_multiplier=1): + self.group_num = group_num + self.input_dim = input_dim + self.out_dim = out_dim + self.activation = activation + self.init_multiplier = init_multiplier + + self.w = tf.get_variable( + name + "_group_weight", + [1, group_num, input_dim, out_dim], + initializer=customized_glorot_uniform( + fan_in=input_dim * init_multiplier, fan_out=out_dim * init_multiplier + ), + trainable=True, + ) + self.b = tf.get_variable( + name + "_group_bias", + [1, group_num, out_dim], + initializer=tf.constant_initializer(0.0), + trainable=True, + ) + + def __call__(self, input_tensor): + """ + input_tensor: batch_size x group_num x input_dim + output_tensor: batch_size x group_num x out_dim""" + input_tensor_expand = tf.expand_dims(input_tensor, axis=-1) + + output_tensor = tf.add( + tf.reduce_sum(tf.multiply(input_tensor_expand, self.w), axis=-2, keepdims=False), + self.b, + ) + + if self.activation is not None: + output_tensor = self.activation(output_tensor) + return output_tensor + + +class GroupWiseTrans(object): + """ + Used to apply group fully connected layers to the input. + """ + + def __init__(self, group_num, input_dim, out_dim, name, activation=None): + self.group_num = group_num + self.input_dim = input_dim + self.out_dim = out_dim + self.activation = activation + + w_list, b_list = [], [] + for idx in range(out_dim): + this_w = tf.get_variable( + name + f"_group_weight_{idx}", + [1, group_num, input_dim], + initializer=tf.keras.initializers.glorot_uniform(), + trainable=True, + ) + this_b = tf.get_variable( + name + f"_group_bias_{idx}", + [1, group_num, 1], + initializer=tf.constant_initializer(0.0), + trainable=True, + ) + w_list.append(this_w) + b_list.append(this_b) + self.w_list = w_list + self.b_list = b_list + + def __call__(self, input_tensor): + """ + input_tensor: batch_size x group_num x input_dim + output_tensor: batch_size x group_num x out_dim + """ + out_tensor_list = [] + for idx in range(self.out_dim): + this_res = ( + tf.reduce_sum(input_tensor * self.w_list[idx], axis=-1, keepdims=True) + self.b_list[idx] + ) + out_tensor_list.append(this_res) + output_tensor = tf.concat(out_tensor_list, axis=-1) + + if self.activation is not None: + output_tensor = self.activation(output_tensor) + return output_tensor + + +def add_scalar_summary(var, name, name_scope="hist_dense_feature/"): + with tf.name_scope("summaries/"): + with tf.name_scope(name_scope): + tf.summary.scalar(name, var) + + +def add_histogram_summary(var, name, name_scope="hist_dense_feature/"): + with tf.name_scope("summaries/"): + with tf.name_scope(name_scope): + tf.summary.histogram(name, tf.reshape(var, [-1])) + + +def sparse_clip_by_value(sparse_tf, min_val, max_val): + new_vals = tf.clip_by_value(sparse_tf.values, min_val, max_val) + return tf.SparseTensor(sparse_tf.indices, new_vals, sparse_tf.dense_shape) + + +def check_numerics_with_msg(tensor, message="", sparse_tensor=False): + if sparse_tensor: + values = tf.debugging.check_numerics(tensor.values, message=message) + return tf.SparseTensor(tensor.indices, values, tensor.dense_shape) + else: + return tf.debugging.check_numerics(tensor, message=message) + + +def pad_empty_sparse_tensor(tensor): + dummy_tensor = tf.SparseTensor( + indices=[[0, 0]], + values=[0.00001], + dense_shape=tensor.dense_shape, + ) + result = tf.cond( + tf.equal(tf.size(tensor.values), 0), + lambda: dummy_tensor, + lambda: tensor, + ) + return result + + +def filter_nans_and_infs(tensor, sparse_tensor=False): + if sparse_tensor: + sparse_values = tensor.values + filtered_val = tf.where( + tf.logical_or(tf.is_nan(sparse_values), tf.is_inf(sparse_values)), + tf.zeros_like(sparse_values), + sparse_values, + ) + return tf.SparseTensor(tensor.indices, filtered_val, tensor.dense_shape) + else: + return tf.where( + tf.logical_or(tf.is_nan(tensor), tf.is_inf(tensor)), tf.zeros_like(tensor), tensor + ) + + +def generate_disliked_mask(labels): + """Generate a disliked mask where only samples with dislike labels are set to 1 otherwise set to 0. + Args: + labels: labels of training samples, which is a 2D tensor of shape batch_size x 3: [OONCs, engagements, dislikes] + Returns: + 1D tensor of shape batch_size x 1: [dislikes (booleans)] + """ + return tf.equal(tf.reshape(labels[:, 2], shape=[-1, 1]), 1) diff --git a/pushservice/src/main/python/models/libs/warm_start_utils.py b/pushservice/src/main/python/models/libs/warm_start_utils.py new file mode 100644 index 000000000..ca83df585 --- /dev/null +++ b/pushservice/src/main/python/models/libs/warm_start_utils.py @@ -0,0 +1,309 @@ +from collections import OrderedDict +import json +import os +from os.path import join + +from twitter.magicpony.common import file_access +import twml + +from .model_utils import read_config + +import numpy as np +from scipy import stats +import tensorflow.compat.v1 as tf + + +# checkstyle: noqa + + +def get_model_type_to_tensors_to_change_axis(): + model_type_to_tensors_to_change_axis = { + "magic_recs/model/batch_normalization/beta": ([0], "continuous"), + "magic_recs/model/batch_normalization/gamma": ([0], "continuous"), + "magic_recs/model/batch_normalization/moving_mean": ([0], "continuous"), + "magic_recs/model/batch_normalization/moving_stddev": ([0], "continuous"), + "magic_recs/model/batch_normalization/moving_variance": ([0], "continuous"), + "magic_recs/model/batch_normalization/renorm_mean": ([0], "continuous"), + "magic_recs/model/batch_normalization/renorm_stddev": ([0], "continuous"), + "magic_recs/model/logits/EngagementGivenOONC_logits/clem_net_1/block2_4/channel_wise_dense_4/kernel": ( + [1], + "all", + ), + "magic_recs/model/logits/OONC_logits/clem_net/block2/channel_wise_dense/kernel": ([1], "all"), + } + + return model_type_to_tensors_to_change_axis + + +def mkdirp(dirname): + if not tf.io.gfile.exists(dirname): + tf.io.gfile.makedirs(dirname) + + +def rename_dir(dirname, dst): + file_access.hdfs.mv(dirname, dst) + + +def rmdir(dirname): + if tf.io.gfile.exists(dirname): + if tf.io.gfile.isdir(dirname): + tf.io.gfile.rmtree(dirname) + else: + tf.io.gfile.remove(dirname) + + +def get_var_dict(checkpoint_path): + checkpoint = tf.train.get_checkpoint_state(checkpoint_path) + var_dict = OrderedDict() + with tf.Session() as sess: + all_var_list = tf.train.list_variables(checkpoint_path) + for var_name, _ in all_var_list: + # Load the variable + var = tf.train.load_variable(checkpoint_path, var_name) + var_dict[var_name] = var + return var_dict + + +def get_continunous_mapping_from_feat_list(old_feature_list, new_feature_list): + """ + get var_ind for old_feature and corresponding var_ind for new_feature + """ + new_var_ind, old_var_ind = [], [] + for this_new_id, this_new_name in enumerate(new_feature_list): + if this_new_name in old_feature_list: + this_old_id = old_feature_list.index(this_new_name) + new_var_ind.append(this_new_id) + old_var_ind.append(this_old_id) + return np.asarray(old_var_ind), np.asarray(new_var_ind) + + +def get_continuous_mapping_from_feat_dict(old_feature_dict, new_feature_dict): + """ + get var_ind for old_feature and corresponding var_ind for new_feature + """ + old_cont = old_feature_dict["continuous"] + old_bin = old_feature_dict["binary"] + + new_cont = new_feature_dict["continuous"] + new_bin = new_feature_dict["binary"] + + _dummy_sparse_feat = [f"sparse_feature_{_idx}" for _idx in range(100)] + + cont_old_var_ind, cont_new_var_ind = get_continunous_mapping_from_feat_list(old_cont, new_cont) + + all_old_var_ind, all_new_var_ind = get_continunous_mapping_from_feat_list( + old_cont + old_bin + _dummy_sparse_feat, new_cont + new_bin + _dummy_sparse_feat + ) + + _res = { + "continuous": (cont_old_var_ind, cont_new_var_ind), + "all": (all_old_var_ind, all_new_var_ind), + } + + return _res + + +def warm_start_from_var_dict( + old_ckpt_path, + var_ind_dict, + output_dir, + new_len_var, + var_to_change_dict_fn=get_model_type_to_tensors_to_change_axis, +): + """ + Parameters: + old_ckpt_path (str): path to the old checkpoint path + new_var_ind (array of int): index to overlapping features in new var between old and new feature list. + old_var_ind (array of int): index to overlapping features in old var between old and new feature list. + + output_dir (str): dir that used to write modified checkpoint + new_len_var ({str:int}): number of feature in the new feature list. + var_to_change_dict_fn (dict): A function to get the dictionary of format {var_name: dim_to_change} + """ + old_var_dict = get_var_dict(old_ckpt_path) + + ckpt_file_name = os.path.basename(old_ckpt_path) + mkdirp(output_dir) + output_path = join(output_dir, ckpt_file_name) + + tensors_to_change = var_to_change_dict_fn() + tf.compat.v1.reset_default_graph() + + with tf.Session() as sess: + var_name_shape_list = tf.train.list_variables(old_ckpt_path) + count = 0 + + for var_name, var_shape in var_name_shape_list: + old_var = old_var_dict[var_name] + if var_name in tensors_to_change.keys(): + _info_tuple = tensors_to_change[var_name] + dims_to_remove_from, var_type = _info_tuple + + new_var_ind, old_var_ind = var_ind_dict[var_type] + + this_shape = list(old_var.shape) + for this_dim in dims_to_remove_from: + this_shape[this_dim] = new_len_var[var_type] + + stddev = np.std(old_var) + truncated_norm_generator = stats.truncnorm(-0.5, 0.5, loc=0, scale=stddev) + size = np.prod(this_shape) + new_var = truncated_norm_generator.rvs(size).reshape(this_shape) + new_var = new_var.astype(old_var.dtype) + + new_var = copy_feat_based_on_mapping( + new_var, old_var, dims_to_remove_from, new_var_ind, old_var_ind + ) + count = count + 1 + else: + new_var = old_var + var = tf.Variable(new_var, name=var_name) + assert count == len(tensors_to_change.keys()), "not all variables are exchanged.\n" + saver = tf.train.Saver() + sess.run(tf.global_variables_initializer()) + saver.save(sess, output_path) + return output_path + + +def copy_feat_based_on_mapping(new_array, old_array, dims_to_remove_from, new_var_ind, old_var_ind): + if dims_to_remove_from == [0, 1]: + for this_new_ind, this_old_ind in zip(new_var_ind, old_var_ind): + new_array[this_new_ind, new_var_ind] = old_array[this_old_ind, old_var_ind] + elif dims_to_remove_from == [0]: + new_array[new_var_ind] = old_array[old_var_ind] + elif dims_to_remove_from == [1]: + new_array[:, new_var_ind] = old_array[:, old_var_ind] + else: + raise RuntimeError(f"undefined dims_to_remove_from pattern: ({dims_to_remove_from})") + return new_array + + +def read_file(filename, decode=False): + """ + Reads contents from a file and optionally decodes it. + + Arguments: + filename: + path to file where the contents will be loaded from. + Accepts HDFS and local paths. + decode: + False or 'json'. When decode='json', contents is decoded + with json.loads. When False, contents is returned as is. + """ + graph = tf.Graph() + with graph.as_default(): + read = tf.read_file(filename) + + with tf.Session(graph=graph) as sess: + contents = sess.run(read) + if not isinstance(contents, str): + contents = contents.decode() + + if decode == "json": + contents = json.loads(contents) + + return contents + + +def read_feat_list_from_disk(file_path): + return read_file(file_path, decode="json") + + +def get_feature_list_for_light_ranking(feature_list_path, data_spec_path): + feature_list = read_config(feature_list_path).items() + string_feat_list = [f[0] for f in feature_list if f[1] != "S"] + + feature_config_builder = twml.contrib.feature_config.FeatureConfigBuilder( + data_spec_path=data_spec_path + ) + feature_config_builder = feature_config_builder.extract_feature_group( + feature_regexes=string_feat_list, + group_name="continuous", + default_value=-1, + type_filter=["CONTINUOUS"], + ) + feature_config = feature_config_builder.build() + feature_list = feature_config_builder._feature_group_extraction_configs[0].feature_map[ + "CONTINUOUS" + ] + return feature_list + + +def get_feature_list_for_heavy_ranking(feature_list_path, data_spec_path): + feature_list = read_config(feature_list_path).items() + string_feat_list = [f[0] for f in feature_list if f[1] != "S"] + + feature_config_builder = twml.contrib.feature_config.FeatureConfigBuilder( + data_spec_path=data_spec_path + ) + feature_config_builder = feature_config_builder.extract_feature_group( + feature_regexes=string_feat_list, + group_name="continuous", + default_value=-1, + type_filter=["CONTINUOUS"], + ) + + feature_config_builder = feature_config_builder.extract_feature_group( + feature_regexes=string_feat_list, + group_name="binary", + default_value=False, + type_filter=["BINARY"], + ) + + feature_config_builder = feature_config_builder.build() + + continuous_feature_list = feature_config_builder._feature_group_extraction_configs[0].feature_map[ + "CONTINUOUS" + ] + + binary_feature_list = feature_config_builder._feature_group_extraction_configs[1].feature_map[ + "BINARY" + ] + return {"continuous": continuous_feature_list, "binary": binary_feature_list} + + +def warm_start_checkpoint( + old_best_ckpt_folder, + old_feature_list_path, + feature_allow_list_path, + data_spec_path, + output_ckpt_folder, + *args, +): + """ + Reads old checkpoint and the old feature list, and create a new ckpt warm started from old ckpt using new features . + + Arguments: + old_best_ckpt_folder: + path to the best_checkpoint_folder for old model + old_feature_list_path: + path to the json file that stores the list of continuous features used in old models. + feature_allow_list_path: + yaml file that contain the feature allow list. + data_spec_path: + path to the data_spec file + output_ckpt_folder: + folder that contains the modified ckpt. + + Returns: + path to the modified ckpt.""" + old_ckpt_path = tf.train.latest_checkpoint(old_best_ckpt_folder, latest_filename=None) + + new_feature_dict = get_feature_list(feature_allow_list_path, data_spec_path) + old_feature_dict = read_feat_list_from_disk(old_feature_list_path) + + var_ind_dict = get_continuous_mapping_from_feat_dict(new_feature_dict, old_feature_dict) + + new_len_var = { + "continuous": len(new_feature_dict["continuous"]), + "all": len(new_feature_dict["continuous"] + new_feature_dict["binary"]) + 100, + } + + warm_started_ckpt_path = warm_start_from_var_dict( + old_ckpt_path, + var_ind_dict, + output_dir=output_ckpt_folder, + new_len_var=new_len_var, + ) + + return warm_started_ckpt_path diff --git a/pushservice/src/main/python/models/light_ranking/BUILD b/pushservice/src/main/python/models/light_ranking/BUILD new file mode 100644 index 000000000..e88d7de7c --- /dev/null +++ b/pushservice/src/main/python/models/light_ranking/BUILD @@ -0,0 +1,69 @@ +#":mlwf_libs", + +python37_binary( + name = "eval_model", + source = "eval_model.py", + dependencies = [ + ":libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/light_ranking:eval_model", + ], +) + +python37_binary( + name = "train_model", + source = "deep_norm.py", + dependencies = [ + ":libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/light_ranking:train_model", + ], +) + +python37_binary( + name = "train_model_local", + source = "deep_norm.py", + dependencies = [ + ":libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/light_ranking:train_model_local", + "twml", + ], +) + +python37_binary( + name = "eval_model_local", + source = "eval_model.py", + dependencies = [ + ":libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/light_ranking:eval_model_local", + "twml", + ], +) + +python37_binary( + name = "mlwf_model", + source = "deep_norm.py", + dependencies = [ + ":mlwf_libs", + "3rdparty/python/_closures/frigate/frigate-pushservice-opensource/src/main/python/models/light_ranking:mlwf_model", + ], +) + +python3_library( + name = "libs", + sources = ["**/*.py"], + tags = ["no-mypy"], + dependencies = [ + "src/python/twitter/deepbird/projects/magic_recs/libs", + "src/python/twitter/deepbird/util/data", + "twml:twml-nodeps", + ], +) + +python3_library( + name = "mlwf_libs", + sources = ["**/*.py"], + tags = ["no-mypy"], + dependencies = [ + "src/python/twitter/deepbird/projects/magic_recs/libs", + "twml", + ], +) diff --git a/pushservice/src/main/python/models/light_ranking/README.md b/pushservice/src/main/python/models/light_ranking/README.md new file mode 100644 index 000000000..9d7bd2682 --- /dev/null +++ b/pushservice/src/main/python/models/light_ranking/README.md @@ -0,0 +1,14 @@ +# Notification Light Ranker Model + +## Model Context +There are 4 major components of Twitter notifications recommendation system: 1) candidate generation 2) light ranking 3) heavy ranking & 4) quality control. This notification light ranker model bridges candidate generation and heavy ranking by pre-selecting highly-relevant candidates from the initial huge candidate pool. It’s a light-weight model to reduce system cost during heavy ranking without hurting user experience. + +## Directory Structure +- BUILD: this file defines python library dependencies +- model_pools_mlp.py: this file defines tensorflow model architecture for the notification light ranker model +- deep_norm.py: this file contains 1) how to build the tensorflow graph with specified model architecture, loss function and training configuration. 2) how to set up the overall model training & evaluation pipeline +- eval_model.py: the main python entry file to set up the overall model evaluation pipeline + + + + diff --git a/pushservice/src/main/python/models/light_ranking/__init__.py b/pushservice/src/main/python/models/light_ranking/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pushservice/src/main/python/models/light_ranking/deep_norm.py b/pushservice/src/main/python/models/light_ranking/deep_norm.py new file mode 100644 index 000000000..bc90deba4 --- /dev/null +++ b/pushservice/src/main/python/models/light_ranking/deep_norm.py @@ -0,0 +1,226 @@ +from datetime import datetime +from functools import partial +import os + +from twitter.cortex.ml.embeddings.common.helpers import decode_str_or_unicode +import twml +from twml.trainers import DataRecordTrainer + +from ..libs.get_feat_config import get_feature_config_light_ranking, LABELS_LR +from ..libs.graph_utils import get_trainable_variables +from ..libs.group_metrics import ( + run_group_metrics_light_ranking, + run_group_metrics_light_ranking_in_bq, +) +from ..libs.metric_fn_utils import get_metric_fn +from ..libs.model_args import get_arg_parser_light_ranking +from ..libs.model_utils import read_config +from ..libs.warm_start_utils import get_feature_list_for_light_ranking +from .model_pools_mlp import light_ranking_mlp_ngbdt + +import tensorflow.compat.v1 as tf +from tensorflow.compat.v1 import logging + + +# checkstyle: noqa + + +def build_graph( + features, label, mode, params, config=None, run_light_ranking_group_metrics_in_bq=False +): + is_training = mode == tf.estimator.ModeKeys.TRAIN + this_model_func = light_ranking_mlp_ngbdt + model_output = this_model_func(features, is_training, params, label) + + logits = model_output["output"] + graph_output = {} + # -------------------------------------------------------- + # define graph output dict + # -------------------------------------------------------- + if mode == tf.estimator.ModeKeys.PREDICT: + loss = None + output_label = "prediction" + if params.task_name in LABELS_LR: + output = tf.nn.sigmoid(logits) + output = tf.clip_by_value(output, 0, 1) + + if run_light_ranking_group_metrics_in_bq: + graph_output["trace_id"] = features["meta.trace_id"] + graph_output["target"] = features["meta.ranking.weighted_oonc_model_score"] + + else: + raise ValueError("Invalid Task Name !") + + else: + output_label = "output" + weights = tf.cast(features["weights"], dtype=tf.float32, name="RecordWeights") + + if params.task_name in LABELS_LR: + if params.use_record_weight: + weights = tf.clip_by_value( + 1.0 / (1.0 + weights + params.smooth_weight), params.min_record_weight, 1.0 + ) + + loss = tf.reduce_sum( + tf.nn.sigmoid_cross_entropy_with_logits(labels=label, logits=logits) * weights + ) / (tf.reduce_sum(weights)) + else: + loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=label, logits=logits)) + output = tf.nn.sigmoid(logits) + + else: + raise ValueError("Invalid Task Name !") + + train_op = None + if mode == tf.estimator.ModeKeys.TRAIN: + # -------------------------------------------------------- + # get train_op + # -------------------------------------------------------- + optimizer = tf.train.GradientDescentOptimizer(learning_rate=params.learning_rate) + update_ops = set(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) + variables = get_trainable_variables( + all_trainable_variables=tf.trainable_variables(), trainable_regexes=params.trainable_regexes + ) + with tf.control_dependencies(update_ops): + train_op = twml.optimizers.optimize_loss( + loss=loss, + variables=variables, + global_step=tf.train.get_global_step(), + optimizer=optimizer, + learning_rate=params.learning_rate, + learning_rate_decay_fn=twml.learning_rate_decay.get_learning_rate_decay_fn(params), + ) + + graph_output[output_label] = output + graph_output["loss"] = loss + graph_output["train_op"] = train_op + return graph_output + + +def get_params(args=None): + parser = get_arg_parser_light_ranking() + if args is None: + return parser.parse_args() + else: + return parser.parse_args(args) + + +def _main(): + opt = get_params() + logging.info("parse is: ") + logging.info(opt) + + feature_list = read_config(opt.feature_list).items() + feature_config = get_feature_config_light_ranking( + data_spec_path=opt.data_spec, + feature_list_provided=feature_list, + opt=opt, + add_gbdt=opt.use_gbdt_features, + run_light_ranking_group_metrics_in_bq=opt.run_light_ranking_group_metrics_in_bq, + ) + feature_list_path = opt.feature_list + + # -------------------------------------------------------- + # Create Trainer + # -------------------------------------------------------- + trainer = DataRecordTrainer( + name=opt.model_trainer_name, + params=opt, + build_graph_fn=build_graph, + save_dir=opt.save_dir, + run_config=None, + feature_config=feature_config, + metric_fn=get_metric_fn(opt.task_name, use_stratify_metrics=False), + ) + if opt.directly_export_best: + logging.info("Directly exporting the model without training") + else: + # ---------------------------------------------------- + # Model Training & Evaluation + # ---------------------------------------------------- + eval_input_fn = trainer.get_eval_input_fn(repeat=False, shuffle=False) + train_input_fn = trainer.get_train_input_fn(shuffle=True) + + if opt.distributed or opt.num_workers is not None: + learn = trainer.train_and_evaluate + else: + learn = trainer.learn + logging.info("Training...") + start = datetime.now() + + early_stop_metric = "rce_unweighted_" + opt.task_name + learn( + early_stop_minimize=False, + early_stop_metric=early_stop_metric, + early_stop_patience=opt.early_stop_patience, + early_stop_tolerance=opt.early_stop_tolerance, + eval_input_fn=eval_input_fn, + train_input_fn=train_input_fn, + ) + + end = datetime.now() + logging.info("Training time: " + str(end - start)) + + logging.info("Exporting the models...") + + # -------------------------------------------------------- + # Do the model exporting + # -------------------------------------------------------- + start = datetime.now() + if not opt.export_dir: + opt.export_dir = os.path.join(opt.save_dir, "exported_models") + + raw_model_path = twml.contrib.export.export_fn.export_all_models( + trainer=trainer, + export_dir=opt.export_dir, + parse_fn=feature_config.get_parse_fn(), + serving_input_receiver_fn=feature_config.get_serving_input_receiver_fn(), + export_output_fn=twml.export_output_fns.batch_prediction_continuous_output_fn, + ) + export_model_dir = decode_str_or_unicode(raw_model_path) + + logging.info("Model export time: " + str(datetime.now() - start)) + logging.info("The saved model directory is: " + opt.save_dir) + + tf.logging.info("getting default continuous_feature_list") + continuous_feature_list = get_feature_list_for_light_ranking(feature_list_path, opt.data_spec) + continous_feature_list_save_path = os.path.join(opt.save_dir, "continuous_feature_list.json") + twml.util.write_file(continous_feature_list_save_path, continuous_feature_list, encode="json") + tf.logging.info(f"Finish writting files to {continous_feature_list_save_path}") + + if opt.run_light_ranking_group_metrics: + # -------------------------------------------- + # Run Light Ranking Group Metrics + # -------------------------------------------- + run_group_metrics_light_ranking( + trainer=trainer, + data_dir=os.path.join(opt.eval_data_dir, opt.eval_start_datetime), + model_path=export_model_dir, + parse_fn=feature_config.get_parse_fn(), + ) + + if opt.run_light_ranking_group_metrics_in_bq: + # ---------------------------------------------------------------------------------------- + # Get Light/Heavy Ranker Predictions for Light Ranking Group Metrics in BigQuery + # ---------------------------------------------------------------------------------------- + trainer_pred = DataRecordTrainer( + name=opt.model_trainer_name, + params=opt, + build_graph_fn=partial(build_graph, run_light_ranking_group_metrics_in_bq=True), + save_dir=opt.save_dir + "/tmp/", + run_config=None, + feature_config=feature_config, + metric_fn=get_metric_fn(opt.task_name, use_stratify_metrics=False), + ) + checkpoint_folder = os.path.join(opt.save_dir, "best_checkpoint") + checkpoint = tf.train.latest_checkpoint(checkpoint_folder, latest_filename=None) + tf.logging.info("\n\nPrediction from Checkpoint: {:}.\n\n".format(checkpoint)) + run_group_metrics_light_ranking_in_bq( + trainer=trainer_pred, params=opt, checkpoint_path=checkpoint + ) + + tf.logging.info("Done Training & Prediction.") + + +if __name__ == "__main__": + _main() diff --git a/pushservice/src/main/python/models/light_ranking/eval_model.py b/pushservice/src/main/python/models/light_ranking/eval_model.py new file mode 100644 index 000000000..1726685cf --- /dev/null +++ b/pushservice/src/main/python/models/light_ranking/eval_model.py @@ -0,0 +1,89 @@ +from datetime import datetime +from functools import partial +import os + +from ..libs.group_metrics import ( + run_group_metrics_light_ranking, + run_group_metrics_light_ranking_in_bq, +) +from ..libs.metric_fn_utils import get_metric_fn +from ..libs.model_args import get_arg_parser_light_ranking +from ..libs.model_utils import read_config +from .deep_norm import build_graph, DataRecordTrainer, get_config_func, logging + + +# checkstyle: noqa + +if __name__ == "__main__": + parser = get_arg_parser_light_ranking() + parser.add_argument( + "--eval_checkpoint", + default=None, + type=str, + help="Which checkpoint to use for evaluation", + ) + parser.add_argument( + "--saved_model_path", + default=None, + type=str, + help="Path to saved model for evaluation", + ) + parser.add_argument( + "--run_binary_metrics", + default=False, + action="store_true", + help="Whether to compute the basic binary metrics for Light Ranking.", + ) + + opt = parser.parse_args() + logging.info("parse is: ") + logging.info(opt) + + feature_list = read_config(opt.feature_list).items() + feature_config = get_config_func(opt.feat_config_type)( + data_spec_path=opt.data_spec, + feature_list_provided=feature_list, + opt=opt, + add_gbdt=opt.use_gbdt_features, + run_light_ranking_group_metrics_in_bq=opt.run_light_ranking_group_metrics_in_bq, + ) + + # ----------------------------------------------- + # Create Trainer + # ----------------------------------------------- + trainer = DataRecordTrainer( + name=opt.model_trainer_name, + params=opt, + build_graph_fn=partial(build_graph, run_light_ranking_group_metrics_in_bq=True), + save_dir=opt.save_dir, + run_config=None, + feature_config=feature_config, + metric_fn=get_metric_fn(opt.task_name, use_stratify_metrics=False), + ) + + # ----------------------------------------------- + # Model Evaluation + # ----------------------------------------------- + logging.info("Evaluating...") + start = datetime.now() + + if opt.run_binary_metrics: + eval_input_fn = trainer.get_eval_input_fn(repeat=False, shuffle=False) + eval_steps = None if (opt.eval_steps is not None and opt.eval_steps < 0) else opt.eval_steps + trainer.estimator.evaluate(eval_input_fn, steps=eval_steps, checkpoint_path=opt.eval_checkpoint) + + if opt.run_light_ranking_group_metrics_in_bq: + run_group_metrics_light_ranking_in_bq( + trainer=trainer, params=opt, checkpoint_path=opt.eval_checkpoint + ) + + if opt.run_light_ranking_group_metrics: + run_group_metrics_light_ranking( + trainer=trainer, + data_dir=os.path.join(opt.eval_data_dir, opt.eval_start_datetime), + model_path=opt.saved_model_path, + parse_fn=feature_config.get_parse_fn(), + ) + + end = datetime.now() + logging.info("Evaluating time: " + str(end - start)) diff --git a/pushservice/src/main/python/models/light_ranking/model_pools_mlp.py b/pushservice/src/main/python/models/light_ranking/model_pools_mlp.py new file mode 100644 index 000000000..b45c85e47 --- /dev/null +++ b/pushservice/src/main/python/models/light_ranking/model_pools_mlp.py @@ -0,0 +1,187 @@ +import warnings + +from twml.contrib.layers import ZscoreNormalization + +from ...libs.customized_full_sparse import FullSparse +from ...libs.get_feat_config import FEAT_CONFIG_DEFAULT_VAL as MISSING_VALUE_MARKER +from ...libs.model_utils import ( + _sparse_feature_fixup, + adaptive_transformation, + filter_nans_and_infs, + get_dense_out, + tensor_dropout, +) + +import tensorflow.compat.v1 as tf +# checkstyle: noqa + +def light_ranking_mlp_ngbdt(features, is_training, params, label=None): + return deepnorm_light_ranking( + features, + is_training, + params, + label=label, + decay=params.momentum, + dense_emb_size=params.dense_embedding_size, + base_activation=tf.keras.layers.LeakyReLU(), + input_dropout_rate=params.dropout, + use_gbdt=False, + ) + + +def deepnorm_light_ranking( + features, + is_training, + params, + label=None, + decay=0.99999, + dense_emb_size=128, + base_activation=None, + input_dropout_rate=None, + input_dense_type="self_atten_dense", + emb_dense_type="self_atten_dense", + mlp_dense_type="self_atten_dense", + use_gbdt=False, +): + # -------------------------------------------------------- + # Initial Parameter Checking + # -------------------------------------------------------- + if base_activation is None: + base_activation = tf.keras.layers.LeakyReLU() + + if label is not None: + warnings.warn( + "Label is unused in deepnorm_gbdt. Stop using this argument.", + DeprecationWarning, + ) + + with tf.variable_scope("helper_layers"): + full_sparse_layer = FullSparse( + output_size=params.sparse_embedding_size, + activation=base_activation, + use_sparse_grads=is_training, + use_binary_values=False, + dtype=tf.float32, + ) + input_normalizing_layer = ZscoreNormalization(decay=decay, name="input_normalizing_layer") + + # -------------------------------------------------------- + # Feature Selection & Embedding + # -------------------------------------------------------- + if use_gbdt: + sparse_gbdt_features = _sparse_feature_fixup(features["gbdt_sparse"], params.input_size_bits) + if input_dropout_rate is not None: + sparse_gbdt_features = tensor_dropout( + sparse_gbdt_features, input_dropout_rate, is_training, sparse_tensor=True + ) + + total_embed = full_sparse_layer(sparse_gbdt_features, use_binary_values=True) + + if (input_dropout_rate is not None) and is_training: + total_embed = total_embed / (1 - input_dropout_rate) + + else: + with tf.variable_scope("dense_branch"): + dense_continuous_features = filter_nans_and_infs(features["continuous"]) + + if params.use_missing_sub_branch: + is_missing = tf.equal(dense_continuous_features, MISSING_VALUE_MARKER) + continuous_features_filled = tf.where( + is_missing, + tf.zeros_like(dense_continuous_features), + dense_continuous_features, + ) + normalized_features = input_normalizing_layer( + continuous_features_filled, is_training, tf.math.logical_not(is_missing) + ) + + with tf.variable_scope("missing_sub_branch"): + missing_feature_embed = get_dense_out( + tf.cast(is_missing, tf.float32), + dense_emb_size, + activation=base_activation, + dense_type=input_dense_type, + ) + + else: + continuous_features_filled = dense_continuous_features + normalized_features = input_normalizing_layer(continuous_features_filled, is_training) + + with tf.variable_scope("continuous_sub_branch"): + normalized_features = adaptive_transformation( + normalized_features, is_training, func_type="tiny" + ) + + if input_dropout_rate is not None: + normalized_features = tensor_dropout( + normalized_features, + input_dropout_rate, + is_training, + sparse_tensor=False, + ) + filled_feature_embed = get_dense_out( + normalized_features, + dense_emb_size, + activation=base_activation, + dense_type=input_dense_type, + ) + + if params.use_missing_sub_branch: + dense_embed = tf.concat( + [filled_feature_embed, missing_feature_embed], axis=1, name="merge_dense_emb" + ) + else: + dense_embed = filled_feature_embed + + with tf.variable_scope("sparse_branch"): + sparse_discrete_features = _sparse_feature_fixup( + features["sparse_no_continuous"], params.input_size_bits + ) + if input_dropout_rate is not None: + sparse_discrete_features = tensor_dropout( + sparse_discrete_features, input_dropout_rate, is_training, sparse_tensor=True + ) + + discrete_features_embed = full_sparse_layer(sparse_discrete_features, use_binary_values=True) + + if (input_dropout_rate is not None) and is_training: + discrete_features_embed = discrete_features_embed / (1 - input_dropout_rate) + + total_embed = tf.concat( + [dense_embed, discrete_features_embed], + axis=1, + name="total_embed", + ) + + total_embed = tf.layers.batch_normalization( + total_embed, + training=is_training, + renorm_momentum=decay, + momentum=decay, + renorm=is_training, + trainable=True, + ) + + # -------------------------------------------------------- + # MLP Layers + # -------------------------------------------------------- + with tf.variable_scope("MLP_branch"): + + assert params.num_mlp_layers >= 0 + embed_list = [total_embed] + [None for _ in range(params.num_mlp_layers)] + dense_types = [emb_dense_type] + [mlp_dense_type for _ in range(params.num_mlp_layers - 1)] + + for xl in range(1, params.num_mlp_layers + 1): + neurons = params.mlp_neuron_scale ** (params.num_mlp_layers + 1 - xl) + embed_list[xl] = get_dense_out( + embed_list[xl - 1], neurons, activation=base_activation, dense_type=dense_types[xl - 1] + ) + + if params.task_name in ["Sent", "HeavyRankPosition", "HeavyRankProbability"]: + logits = get_dense_out(embed_list[-1], 1, activation=None, dense_type=mlp_dense_type) + + else: + raise ValueError("Invalid Task Name !") + + output_dict = {"output": logits} + return output_dict diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/BUILD.bazel b/pushservice/src/main/scala/com/twitter/frigate/pushservice/BUILD.bazel new file mode 100644 index 000000000..d53d4e251 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/BUILD.bazel @@ -0,0 +1,337 @@ +scala_library( + sources = ["**/*.scala"], + compiler_option_sets = ["fatal_warnings"], + strict_deps = True, + tags = [ + "bazel-compatible", + ], + dependencies = [ + "3rdparty/jvm/com/twitter/bijection:scrooge", + "3rdparty/jvm/com/twitter/storehaus:core", + "abdecider", + "abuse/detection/src/main/thrift/com/twitter/abuse/detection/scoring:thrift-scala", + "ann/src/main/scala/com/twitter/ann/common", + "ann/src/main/thrift/com/twitter/ann/common:ann-common-scala", + "audience-rewards/thrift/src/main/thrift:thrift-scala", + "communities/thrift/src/main/thrift/com/twitter/communities:thrift-scala", + "configapi/configapi-core", + "configapi/configapi-decider", + "content-mixer/thrift/src/main/thrift:thrift-scala", + "content-recommender/thrift/src/main/thrift:thrift-scala", + "copyselectionservice/server/src/main/scala/com/twitter/copyselectionservice/algorithms", + "copyselectionservice/thrift/src/main/thrift:copyselectionservice-scala", + "cortex-deepbird/thrift/src/main/thrift:thrift-java", + "cr-mixer/thrift/src/main/thrift:thrift-scala", + "cuad/projects/hashspace/thrift:thrift-scala", + "cuad/projects/tagspace/thrift/src/main/thrift:thrift-scala", + "detopic/thrift/src/main/thrift:thrift-scala", + "discovery-common/src/main/scala/com/twitter/discovery/common/configapi", + "discovery-common/src/main/scala/com/twitter/discovery/common/ddg", + "discovery-common/src/main/scala/com/twitter/discovery/common/environment", + "discovery-common/src/main/scala/com/twitter/discovery/common/fatigue", + "discovery-common/src/main/scala/com/twitter/discovery/common/nackwarmupfilter", + "discovery-common/src/main/scala/com/twitter/discovery/common/server", + "discovery-ds/src/main/thrift/com/twitter/dds/scio/searcher_aggregate_history_srp:searcher_aggregate_history_srp-scala", + "escherbird/src/scala/com/twitter/escherbird/util/metadatastitch", + "escherbird/src/scala/com/twitter/escherbird/util/uttclient", + "escherbird/src/thrift/com/twitter/escherbird/utt:strato-columns-scala", + "eventbus/client", + "eventdetection/event_context/src/main/scala/com/twitter/eventdetection/event_context/util", + "events-recos/events-recos-service/src/main/thrift:events-recos-thrift-scala", + "explore/explore-ranker/thrift/src/main/thrift:thrift-scala", + "featureswitches/featureswitches-core/src/main/scala", + "featureswitches/featureswitches-core/src/main/scala:dynmap", + "featureswitches/featureswitches-core/src/main/scala:recipient", + "featureswitches/featureswitches-core/src/main/scala:useragent", + "featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", + "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/server", + "finagle-internal/ostrich-stats", + "finagle/finagle-core/src/main", + "finagle/finagle-http/src/main/scala", + "finagle/finagle-memcached/src/main/scala", + "finagle/finagle-stats", + "finagle/finagle-thriftmux", + "finagle/finagle-tunable/src/main/scala", + "finagle/finagle-zipkin-scribe", + "finatra-internal/abdecider", + "finatra-internal/decider", + "finatra-internal/mtls-http/src/main/scala", + "finatra-internal/mtls-thriftmux/src/main/scala", + "finatra/http-client/src/main/scala", + "finatra/http-core/src/main/java/com/twitter/finatra/http", + "finatra/http-core/src/main/scala/com/twitter/finatra/http/response", + "finatra/http-server/src/main/scala/com/twitter/finatra/http", + "finatra/http-server/src/main/scala/com/twitter/finatra/http/filters", + "finatra/inject/inject-app/src/main/java/com/twitter/inject/annotations", + "finatra/inject/inject-app/src/main/scala", + "finatra/inject/inject-core/src/main/scala", + "finatra/inject/inject-server/src/main/scala", + "finatra/inject/inject-slf4j/src/main/scala/com/twitter/inject", + "finatra/inject/inject-thrift-client/src/main/scala", + "finatra/inject/inject-utils/src/main/scala", + "finatra/utils/src/main/java/com/twitter/finatra/annotations", + "fleets/fleets-proxy/thrift/src/main/thrift:fleet-scala", + "fleets/fleets-proxy/thrift/src/main/thrift/service:baseservice-scala", + "flock-client/src/main/scala", + "flock-client/src/main/thrift:thrift-scala", + "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", + "frigate/frigate-common:base", + "frigate/frigate-common:config", + "frigate/frigate-common:debug", + "frigate/frigate-common:entity_graph_client", + "frigate/frigate-common:history", + "frigate/frigate-common:logger", + "frigate/frigate-common:ml-base", + "frigate/frigate-common:ml-feature", + "frigate/frigate-common:ml-prediction", + "frigate/frigate-common:ntab", + "frigate/frigate-common:predicate", + "frigate/frigate-common:rec_types", + "frigate/frigate-common:score_summary", + "frigate/frigate-common:util", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/experiments", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/filter", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/modules/store:semantic_core_stores", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/deviceinfo", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests", + "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", + "frigate/push-mixer/thrift/src/main/thrift:thrift-scala", + "geo/geo-prediction/src/main/thrift:local-viral-tweets-thrift-scala", + "geoduck/service/src/main/scala/com/twitter/geoduck/service/common/clientmodules", + "geoduck/util/country", + "gizmoduck/client/src/main/scala/com/twitter/gizmoduck/testusers/client", + "hermit/hermit-core:model-user_state", + "hermit/hermit-core:predicate", + "hermit/hermit-core:predicate-gizmoduck", + "hermit/hermit-core:predicate-scarecrow", + "hermit/hermit-core:predicate-socialgraph", + "hermit/hermit-core:predicate-tweetypie", + "hermit/hermit-core:store-labeled_push_recs", + "hermit/hermit-core:store-metastore", + "hermit/hermit-core:store-timezone", + "hermit/hermit-core:store-tweetypie", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/constants", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/model", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/gizmoduck", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/scarecrow", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/semantic_core", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/user_htl_session_store", + "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/user_interest", + "hmli/hss/src/main/thrift/com/twitter/hss:thrift-scala", + "ibis2/service/src/main/scala/com/twitter/ibis2/lib", + "ibis2/service/src/main/thrift/com/twitter/ibis2/service:ibis2-service-scala", + "interests-service/thrift/src/main/thrift:thrift-scala", + "interests_discovery/thrift/src/main/thrift:batch-thrift-scala", + "interests_discovery/thrift/src/main/thrift:service-thrift-scala", + "kujaku/thrift/src/main/thrift:domain-scala", + "live-video-timeline/client/src/main/scala/com/twitter/livevideo/timeline/client/v2", + "live-video-timeline/domain/src/main/scala/com/twitter/livevideo/timeline/domain", + "live-video-timeline/domain/src/main/scala/com/twitter/livevideo/timeline/domain/v2", + "live-video-timeline/thrift/src/main/thrift/com/twitter/livevideo/timeline:thrift-scala", + "live-video/common/src/main/scala/com/twitter/livevideo/common/domain/v2", + "live-video/common/src/main/scala/com/twitter/livevideo/common/ids", + "notifications-platform/inbound-notifications/src/main/thrift/com/twitter/inbound_notifications:exception-scala", + "notifications-platform/inbound-notifications/src/main/thrift/com/twitter/inbound_notifications:thrift-scala", + "notifications-platform/platform-lib/src/main/thrift/com/twitter/notifications/platform:custom-notification-actions-scala", + "notifications-platform/platform-lib/src/main/thrift/com/twitter/notifications/platform:thrift-scala", + "notifications-relevance/src/scala/com/twitter/nrel/heavyranker", + "notifications-relevance/src/scala/com/twitter/nrel/hydration/base", + "notifications-relevance/src/scala/com/twitter/nrel/hydration/frigate", + "notifications-relevance/src/scala/com/twitter/nrel/hydration/push", + "notifications-relevance/src/scala/com/twitter/nrel/lightranker", + "notificationservice/common/src/main/scala/com/twitter/notificationservice/genericfeedbackstore", + "notificationservice/common/src/main/scala/com/twitter/notificationservice/model:alias", + "notificationservice/common/src/main/scala/com/twitter/notificationservice/model/service", + "notificationservice/common/src/test/scala/com/twitter/notificationservice/mocks", + "notificationservice/scribe/src/main/scala/com/twitter/notificationservice/scribe/manhattan:mh_wrapper", + "notificationservice/thrift/src/main/thrift/com/twitter/notificationservice/api:thrift-scala", + "notificationservice/thrift/src/main/thrift/com/twitter/notificationservice/badgecount-api:thrift-scala", + "notificationservice/thrift/src/main/thrift/com/twitter/notificationservice/generic_notifications:thrift-scala", + "notifinfra/ni-lib/src/main/scala/com/twitter/ni/lib/logged_out_transform", + "observability/observability-manhattan-client/src/main/scala", + "onboarding/service/src/main/scala/com/twitter/onboarding/task/service/models/external", + "onboarding/service/thrift/src/main/thrift:thrift-scala", + "people-discovery/api/thrift/src/main/thrift:thrift-scala", + "periscope/api-proxy-thrift/thrift/src/main/thrift:thrift-scala", + "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module", + "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module/stringcenter", + "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", + "qig-ranker/thrift/src/main/thrift:thrift-scala", + "rux-ds/src/main/thrift/com/twitter/ruxds/jobs/user_past_aggregate:user_past_aggregate-scala", + "rux/common/src/main/scala/com/twitter/rux/common/encode", + "rux/common/thrift/src/main/thrift/rux-context:rux-context-scala", + "rux/common/thrift/src/main/thrift/strato:strato-scala", + "scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers", + "scrooge/scrooge-core", + "scrooge/scrooge-serializer/src/main/scala", + "sensitive-ds/src/main/thrift/com/twitter/scio/nsfw_user_segmentation:nsfw_user_segmentation-scala", + "servo/decider/src/main/scala", + "servo/request/src/main/scala", + "servo/util/src/main/scala", + "src/java/com/twitter/ml/api:api-base", + "src/java/com/twitter/ml/prediction/core", + "src/scala/com/twitter/frigate/data_pipeline/common", + "src/scala/com/twitter/frigate/data_pipeline/embedding_cg:embedding_cg-test-user-ids", + "src/scala/com/twitter/frigate/data_pipeline/features_common", + "src/scala/com/twitter/frigate/news_article_recs/news_articles_metadata:thrift-scala", + "src/scala/com/twitter/frontpage/stream/util", + "src/scala/com/twitter/language/normalization", + "src/scala/com/twitter/ml/api/embedding", + "src/scala/com/twitter/ml/api/util:datarecord", + "src/scala/com/twitter/ml/featurestore/catalog/entities/core", + "src/scala/com/twitter/ml/featurestore/catalog/entities/magicrecs", + "src/scala/com/twitter/ml/featurestore/catalog/features/core:aggregate", + "src/scala/com/twitter/ml/featurestore/catalog/features/cuad:aggregate", + "src/scala/com/twitter/ml/featurestore/catalog/features/embeddings", + "src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:aggregate", + "src/scala/com/twitter/ml/featurestore/catalog/features/topic_signals:aggregate", + "src/scala/com/twitter/ml/featurestore/lib", + "src/scala/com/twitter/ml/featurestore/lib/data", + "src/scala/com/twitter/ml/featurestore/lib/dynamic", + "src/scala/com/twitter/ml/featurestore/lib/entity", + "src/scala/com/twitter/ml/featurestore/lib/online", + "src/scala/com/twitter/recommendation/interests/discovery/core/config", + "src/scala/com/twitter/recommendation/interests/discovery/core/deploy", + "src/scala/com/twitter/recommendation/interests/discovery/core/model", + "src/scala/com/twitter/recommendation/interests/discovery/popgeo/deploy", + "src/scala/com/twitter/simclusters_v2/common", + "src/scala/com/twitter/storehaus_internal/manhattan", + "src/scala/com/twitter/storehaus_internal/manhattan/config", + "src/scala/com/twitter/storehaus_internal/memcache", + "src/scala/com/twitter/storehaus_internal/memcache/config", + "src/scala/com/twitter/storehaus_internal/util", + "src/scala/com/twitter/taxi/common", + "src/scala/com/twitter/taxi/config", + "src/scala/com/twitter/taxi/deploy", + "src/scala/com/twitter/taxi/trending/common", + "src/thrift/com/twitter/ads/adserver:adserver_rpc-scala", + "src/thrift/com/twitter/clientapp/gen:clientapp-scala", + "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", + "src/thrift/com/twitter/escherbird/common:constants-scala", + "src/thrift/com/twitter/escherbird/metadata:megadata-scala", + "src/thrift/com/twitter/escherbird/metadata:metadata-service-scala", + "src/thrift/com/twitter/escherbird/search:search-service-scala", + "src/thrift/com/twitter/expandodo:only-scala", + "src/thrift/com/twitter/frigate:frigate-common-thrift-scala", + "src/thrift/com/twitter/frigate:frigate-ml-thrift-scala", + "src/thrift/com/twitter/frigate:frigate-notification-thrift-scala", + "src/thrift/com/twitter/frigate:frigate-secondary-accounts-thrift-scala", + "src/thrift/com/twitter/frigate:frigate-thrift-scala", + "src/thrift/com/twitter/frigate:frigate-user-media-representation-thrift-scala", + "src/thrift/com/twitter/frigate/data_pipeline:frigate-user-history-thrift-scala", + "src/thrift/com/twitter/frigate/dau_model:frigate-dau-thrift-scala", + "src/thrift/com/twitter/frigate/magic_events:frigate-magic-events-thrift-scala", + "src/thrift/com/twitter/frigate/magic_events/scribe:thrift-scala", + "src/thrift/com/twitter/frigate/pushcap:frigate-pushcap-thrift-scala", + "src/thrift/com/twitter/frigate/pushservice:frigate-pushservice-thrift-scala", + "src/thrift/com/twitter/frigate/scribe:frigate-scribe-thrift-scala", + "src/thrift/com/twitter/frigate/subscribed_search:frigate-subscribed-search-thrift-scala", + "src/thrift/com/twitter/frigate/user_states:frigate-userstates-thrift-scala", + "src/thrift/com/twitter/geoduck:geoduck-scala", + "src/thrift/com/twitter/gizmoduck:thrift-scala", + "src/thrift/com/twitter/gizmoduck:user-thrift-scala", + "src/thrift/com/twitter/hermit:hermit-scala", + "src/thrift/com/twitter/hermit/pop_geo:hermit-pop-geo-scala", + "src/thrift/com/twitter/hermit/stp:hermit-stp-scala", + "src/thrift/com/twitter/ibis:service-scala", + "src/thrift/com/twitter/manhattan:v1-scala", + "src/thrift/com/twitter/manhattan:v2-scala", + "src/thrift/com/twitter/ml/api:data-java", + "src/thrift/com/twitter/ml/api:data-scala", + "src/thrift/com/twitter/ml/featurestore/timelines:ml-features-timelines-scala", + "src/thrift/com/twitter/ml/featurestore/timelines:ml-features-timelines-strato", + "src/thrift/com/twitter/ml/prediction_service:prediction_service-java", + "src/thrift/com/twitter/permissions_storage:thrift-scala", + "src/thrift/com/twitter/pink-floyd/thrift:thrift-scala", + "src/thrift/com/twitter/recos:recos-common-scala", + "src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala", + "src/thrift/com/twitter/recos/user_user_graph:user_user_graph-scala", + "src/thrift/com/twitter/relevance/feature_store:feature_store-scala", + "src/thrift/com/twitter/search:earlybird-scala", + "src/thrift/com/twitter/search/common:features-scala", + "src/thrift/com/twitter/search/query_interaction_graph:query_interaction_graph-scala", + "src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala", + "src/thrift/com/twitter/service/metastore/gen:thrift-scala", + "src/thrift/com/twitter/service/scarecrow/gen:scarecrow-scala", + "src/thrift/com/twitter/service/scarecrow/gen:tiered-actions-scala", + "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", + "src/thrift/com/twitter/socialgraph:thrift-scala", + "src/thrift/com/twitter/spam/rtf:safety-level-scala", + "src/thrift/com/twitter/timelinemixer:thrift-scala", + "src/thrift/com/twitter/timelinemixer/server/internal:thrift-scala", + "src/thrift/com/twitter/timelines/author_features/user_health:thrift-scala", + "src/thrift/com/twitter/timelines/real_graph:real_graph-scala", + "src/thrift/com/twitter/timelinescorer:thrift-scala", + "src/thrift/com/twitter/timelinescorer/server/internal:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/internal:thrift-scala", + "src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala", + "src/thrift/com/twitter/trends/common:common-scala", + "src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala", + "src/thrift/com/twitter/tweetypie:service-scala", + "src/thrift/com/twitter/tweetypie:tweet-scala", + "src/thrift/com/twitter/user_session_store:thrift-scala", + "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", + "src/thrift/com/twitter/wtf/interest:interest-thrift-scala", + "src/thrift/com/twitter/wtf/scalding/common:thrift-scala", + "stitch/stitch-core", + "stitch/stitch-gizmoduck", + "stitch/stitch-socialgraph/src/main/scala", + "stitch/stitch-storehaus/src/main/scala", + "stitch/stitch-tweetypie/src/main/scala", + "storage/clients/manhattan/client/src/main/scala", + "strato/config/columns/clients:clients-strato-client", + "strato/config/columns/geo/user:user-strato-client", + "strato/config/columns/globe/curation:curation-strato-client", + "strato/config/columns/interests:interests-strato-client", + "strato/config/columns/ml/featureStore:featureStore-strato-client", + "strato/config/columns/notifications:notifications-strato-client", + "strato/config/columns/notifinfra:notifinfra-strato-client", + "strato/config/columns/periscope:periscope-strato-client", + "strato/config/columns/rux", + "strato/config/columns/rux:rux-strato-client", + "strato/config/columns/rux/open-app:open-app-strato-client", + "strato/config/columns/socialgraph/graphs:graphs-strato-client", + "strato/config/columns/socialgraph/service/soft_users:soft_users-strato-client", + "strato/config/columns/translation/service:service-strato-client", + "strato/config/columns/translation/service/platform:platform-strato-client", + "strato/config/columns/trends/trip:trip-strato-client", + "strato/config/src/thrift/com/twitter/strato/columns/frigate:logged-out-web-notifications-scala", + "strato/config/src/thrift/com/twitter/strato/columns/notifications:thrift-scala", + "strato/src/main/scala/com/twitter/strato/config", + "strato/src/main/scala/com/twitter/strato/response", + "thrift-web-forms", + "timeline-training-service/service/thrift/src/main/thrift:thrift-scala", + "timelines/src/main/scala/com/twitter/timelines/features/app", + "topic-social-proof/server/src/main/thrift:thrift-scala", + "topiclisting/topiclisting-core/src/main/scala/com/twitter/topiclisting", + "topiclisting/topiclisting-utt/src/main/scala/com/twitter/topiclisting/utt", + "trends/common/src/main/thrift/com/twitter/trends/common:thrift-scala", + "tweetypie/src/scala/com/twitter/tweetypie/tweettext", + "twitter-context/src/main/scala", + "twitter-server-internal", + "twitter-server/server/src/main/scala", + "twitter-text/lib/java/src/main/java/com/twitter/twittertext", + "twml/runtime/src/main/scala/com/twitter/deepbird/runtime/prediction_engine:prediction_engine_mkl", + "ubs/common/src/main/thrift/com/twitter/ubs:broadcast-thrift-scala", + "ubs/common/src/main/thrift/com/twitter/ubs:seller_application-thrift-scala", + "user_session_store/src/main/scala/com/twitter/user_session_store/impl/manhattan/readwrite", + "util-internal/scribe", + "util-internal/tunable/src/main/scala/com/twitter/util/tunable", + "util/util-app", + "util/util-hashing/src/main/scala", + "util/util-slf4j-api/src/main/scala", + "util/util-stats/src/main/scala", + "visibility/lib/src/main/scala/com/twitter/visibility/builder", + "visibility/lib/src/main/scala/com/twitter/visibility/interfaces/push_service", + "visibility/lib/src/main/scala/com/twitter/visibility/interfaces/spaces", + "visibility/lib/src/main/scala/com/twitter/visibility/util", + ], + exports = [ + "strato/config/src/thrift/com/twitter/strato/columns/frigate:logged-out-web-notifications-scala", + ], +) diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushMixerThriftServerWarmupHandler.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushMixerThriftServerWarmupHandler.scala new file mode 100644 index 000000000..b13d3b093 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushMixerThriftServerWarmupHandler.scala @@ -0,0 +1,93 @@ +package com.twitter.frigate.pushservice + +import com.google.inject.Inject +import com.google.inject.Singleton +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.thrift.ClientId +import com.twitter.finatra.thrift.routing.ThriftWarmup +import com.twitter.util.logging.Logging +import com.twitter.inject.utils.Handler +import com.twitter.frigate.pushservice.{thriftscala => t} +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.util.Stopwatch +import com.twitter.scrooge.Request +import com.twitter.scrooge.Response +import com.twitter.util.Return +import com.twitter.util.Throw +import com.twitter.util.Try + +/** + * Warms up the refresh request path. + * If service is running as pushservice-send then the warmup does nothing. + * + * When making the warmup refresh requests we + * - Set skipFilters to true to execute as much of the request path as possible + * - Set darkWrite to true to prevent sending a push + */ +@Singleton +class PushMixerThriftServerWarmupHandler @Inject() ( + warmup: ThriftWarmup, + serviceIdentifier: ServiceIdentifier) + extends Handler + with Logging { + + private val clientId = ClientId("thrift-warmup-client") + + def handle(): Unit = { + val refreshServices = Set( + "frigate-pushservice", + "frigate-pushservice-canary", + "frigate-pushservice-canary-control", + "frigate-pushservice-canary-treatment" + ) + val isRefresh = refreshServices.contains(serviceIdentifier.service) + if (isRefresh && !serviceIdentifier.isLocal) refreshWarmup() + } + + def refreshWarmup(): Unit = { + val elapsed = Stopwatch.start() + val testIds = Seq( + 1, + 2, + 3 + ) + try { + clientId.asCurrent { + testIds.foreach { id => + val warmupReq = warmupQuery(id) + info(s"Sending warm-up request to service with query: $warmupReq") + warmup.sendRequest( + method = t.PushService.Refresh, + req = Request(t.PushService.Refresh.Args(warmupReq)))(assertWarmupResponse) + } + } + } catch { + case e: Throwable => + error(e.getMessage, e) + } + info(s"Warm up complete. Time taken: ${elapsed().toString}") + } + + private def warmupQuery(userId: Long): t.RefreshRequest = { + t.RefreshRequest( + userId = userId, + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice, + context = Some( + t.PushContext( + skipFilters = Some(true), + darkWrite = Some(true) + )) + ) + } + + private def assertWarmupResponse( + result: Try[Response[t.PushService.Refresh.SuccessType]] + ): Unit = { + result match { + case Return(_) => // ok + case Throw(exception) => + warn("Error performing warm-up request.") + error(exception.getMessage, exception) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushServiceMain.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushServiceMain.scala new file mode 100644 index 000000000..c60f6e352 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/PushServiceMain.scala @@ -0,0 +1,193 @@ +package com.twitter.frigate.pushservice + +import com.twitter.discovery.common.environment.modules.EnvironmentModule +import com.twitter.finagle.Filter +import com.twitter.finatra.annotations.DarkTrafficFilterType +import com.twitter.finatra.decider.modules.DeciderModule +import com.twitter.finatra.http.HttpServer +import com.twitter.finatra.http.filters.CommonFilters +import com.twitter.finatra.http.routing.HttpRouter +import com.twitter.finatra.mtls.http.{Mtls => HttpMtls} +import com.twitter.finatra.mtls.thriftmux.{Mtls => ThriftMtls} +import com.twitter.finatra.mtls.thriftmux.filters.MtlsServerSessionTrackerFilter +import com.twitter.finatra.thrift.ThriftServer +import com.twitter.finatra.thrift.filters.ExceptionMappingFilter +import com.twitter.finatra.thrift.filters.LoggingMDCFilter +import com.twitter.finatra.thrift.filters.StatsFilter +import com.twitter.finatra.thrift.filters.ThriftMDCFilter +import com.twitter.finatra.thrift.filters.TraceIdMDCFilter +import com.twitter.finatra.thrift.routing.ThriftRouter +import com.twitter.frigate.common.logger.MRLoggerGlobalVariables +import com.twitter.frigate.pushservice.controller.PushServiceController +import com.twitter.frigate.pushservice.module._ +import com.twitter.inject.TwitterModule +import com.twitter.inject.annotations.Flags +import com.twitter.inject.thrift.modules.ThriftClientIdModule +import com.twitter.logging.BareFormatter +import com.twitter.logging.Level +import com.twitter.logging.LoggerFactory +import com.twitter.logging.{Logging => JLogging} +import com.twitter.logging.QueueingHandler +import com.twitter.logging.ScribeHandler +import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule +import com.twitter.product_mixer.core.module.ABDeciderModule +import com.twitter.product_mixer.core.module.FeatureSwitchesModule +import com.twitter.product_mixer.core.module.StratoClientModule + +object PushServiceMain extends PushServiceFinatraServer + +class PushServiceFinatraServer + extends ThriftServer + with ThriftMtls + with HttpServer + with HttpMtls + with JLogging { + + override val name = "PushService" + + override val modules: Seq[TwitterModule] = { + Seq( + ABDeciderModule, + DeciderModule, + FeatureSwitchesModule, + FilterModule, + FlagModule, + EnvironmentModule, + ThriftClientIdModule, + DeployConfigModule, + ProductMixerFlagModule, + StratoClientModule, + PushHandlerModule, + PushTargetUserBuilderModule, + PushServiceDarkTrafficModule, + LoggedOutPushTargetUserBuilderModule, + new ThriftWebFormsModule(this), + ) + } + + override def configureThrift(router: ThriftRouter): Unit = { + router + .filter[ExceptionMappingFilter] + .filter[LoggingMDCFilter] + .filter[TraceIdMDCFilter] + .filter[ThriftMDCFilter] + .filter[MtlsServerSessionTrackerFilter] + .filter[StatsFilter] + .filter[Filter.TypeAgnostic, DarkTrafficFilterType] + .add[PushServiceController] + } + + override def configureHttp(router: HttpRouter): Unit = + router + .filter[CommonFilters] + + override protected def start(): Unit = { + MRLoggerGlobalVariables.setRequiredFlags( + traceLogFlag = injector.instance[Boolean](Flags.named(FlagModule.mrLoggerIsTraceAll.name)), + nthLogFlag = injector.instance[Boolean](Flags.named(FlagModule.mrLoggerNthLog.name)), + nthLogValFlag = injector.instance[Long](Flags.named(FlagModule.mrLoggerNthVal.name)) + ) + } + + override protected def warmup(): Unit = { + handle[PushMixerThriftServerWarmupHandler]() + } + + override protected def configureLoggerFactories(): Unit = { + loggerFactories.foreach { _() } + } + + override def loggerFactories: List[LoggerFactory] = { + val scribeScope = statsReceiver.scope("scribe") + List( + LoggerFactory( + level = Some(levelFlag()), + handlers = handlers + ), + LoggerFactory( + node = "request_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 10000, + handler = ScribeHandler( + category = "frigate_pushservice_log", + formatter = BareFormatter, + statsReceiver = scribeScope.scope("frigate_pushservice_log") + ) + ) :: Nil + ), + LoggerFactory( + node = "notification_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 10000, + handler = ScribeHandler( + category = "frigate_notifier", + formatter = BareFormatter, + statsReceiver = scribeScope.scope("frigate_notifier") + ) + ) :: Nil + ), + LoggerFactory( + node = "push_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 10000, + handler = ScribeHandler( + category = "test_frigate_push", + formatter = BareFormatter, + statsReceiver = scribeScope.scope("test_frigate_push") + ) + ) :: Nil + ), + LoggerFactory( + node = "push_subsample_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 2500, + handler = ScribeHandler( + category = "magicrecs_candidates_subsample_scribe", + maxMessagesPerTransaction = 250, + maxMessagesToBuffer = 2500, + formatter = BareFormatter, + statsReceiver = scribeScope.scope("magicrecs_candidates_subsample_scribe") + ) + ) :: Nil + ), + LoggerFactory( + node = "mr_request_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 2500, + handler = ScribeHandler( + category = "mr_request_scribe", + maxMessagesPerTransaction = 250, + maxMessagesToBuffer = 2500, + formatter = BareFormatter, + statsReceiver = scribeScope.scope("mr_request_scribe") + ) + ) :: Nil + ), + LoggerFactory( + node = "high_quality_candidates_scribe", + level = Some(Level.INFO), + useParents = false, + handlers = QueueingHandler( + maxQueueSize = 2500, + handler = ScribeHandler( + category = "frigate_high_quality_candidates_log", + maxMessagesPerTransaction = 250, + maxMessagesToBuffer = 2500, + formatter = BareFormatter, + statsReceiver = scribeScope.scope("high_quality_candidates_scribe") + ) + ) :: Nil + ), + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ContentRecommenderMixerAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ContentRecommenderMixerAdaptor.scala new file mode 100644 index 000000000..946923fb9 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ContentRecommenderMixerAdaptor.scala @@ -0,0 +1,323 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.contentrecommender.thriftscala.MetricTag +import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest +import com.twitter.cr_mixer.thriftscala.NotificationsContext +import com.twitter.cr_mixer.thriftscala.Product +import com.twitter.cr_mixer.thriftscala.ProductContext +import com.twitter.cr_mixer.thriftscala.{MetricTag => CrMixerMetricTag} +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.AlgorithmScore +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.CrMixerCandidate +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TopicProofTweetCandidate +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutInNetworkTweets +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.store.CrMixerTweetStore +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.util.AdaptorUtils +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.pushservice.util.TopicsUtil +import com.twitter.frigate.pushservice.util.TweetWithTopicProof +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.product_mixer.core.thriftscala.ClientContext +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse +import com.twitter.util.Future +import scala.collection.Map + +case class ContentRecommenderMixerAdaptor( + crMixerTweetStore: CrMixerTweetStore, + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + edgeStore: ReadableStore[RelationEdge, Boolean], + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + uttEntityHydrationStore: UttEntityHydrationStore, + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override val name: String = this.getClass.getSimpleName + + private[this] val stats = globalStats.scope("ContentRecommenderMixerAdaptor") + private[this] val numOfValidAuthors = stats.stat("num_of_valid_authors") + private[this] val numOutOfMaximumDropped = stats.stat("dropped_due_out_of_maximum") + private[this] val totalInputRecs = stats.counter("input_recs") + private[this] val totalOutputRecs = stats.stat("output_recs") + private[this] val totalRequests = stats.counter("total_requests") + private[this] val nonReplyTweetsCounter = stats.counter("non_reply_tweets") + private[this] val totalOutNetworkRecs = stats.counter("out_network_tweets") + private[this] val totalInNetworkRecs = stats.counter("in_network_tweets") + + /** + * Builds OON raw candidates based on input OON Tweets + */ + def buildOONRawCandidates( + inputTarget: Target, + oonTweets: Seq[TweetyPieResult], + tweetScoreMap: Map[Long, Double], + tweetIdToTagsMap: Map[Long, Seq[CrMixerMetricTag]], + maxNumOfCandidates: Int + ): Option[Seq[RawCandidate]] = { + val cands = oonTweets.flatMap { tweetResult => + val tweetId = tweetResult.tweet.id + generateOONRawCandidate( + inputTarget, + tweetId, + Some(tweetResult), + tweetScoreMap, + tweetIdToTagsMap + ) + } + + val candidates = restrict( + maxNumOfCandidates, + cands, + numOutOfMaximumDropped, + totalOutputRecs + ) + + Some(candidates) + } + + /** + * Builds a single RawCandidate With TopicProofTweetCandidate + */ + def buildTopicTweetRawCandidate( + inputTarget: Target, + tweetWithTopicProof: TweetWithTopicProof, + localizedEntity: LocalizedEntity, + tags: Option[Seq[MetricTag]], + ): RawCandidate with TopicProofTweetCandidate = { + new RawCandidate with TopicProofTweetCandidate { + override def target: Target = inputTarget + override def topicListingSetting: Option[String] = Some( + tweetWithTopicProof.topicListingSetting) + override def tweetId: Long = tweetWithTopicProof.tweetId + override def tweetyPieResult: Option[TweetyPieResult] = Some( + tweetWithTopicProof.tweetyPieResult) + override def semanticCoreEntityId: Option[Long] = Some(tweetWithTopicProof.topicId) + override def localizedUttEntity: Option[LocalizedEntity] = Some(localizedEntity) + override def algorithmCR: Option[String] = tweetWithTopicProof.algorithmCR + override def tagsCR: Option[Seq[MetricTag]] = tags + override def isOutOfNetwork: Boolean = tweetWithTopicProof.isOON + } + } + + /** + * Takes a group of TopicTweets and transforms them into RawCandidates + */ + def buildTopicTweetRawCandidates( + inputTarget: Target, + topicProofCandidates: Seq[TweetWithTopicProof], + tweetIdToTagsMap: Map[Long, Seq[CrMixerMetricTag]], + maxNumberOfCands: Int + ): Future[Option[Seq[RawCandidate]]] = { + val semanticCoreEntityIds = topicProofCandidates + .map(_.topicId) + .toSet + + TopicsUtil + .getLocalizedEntityMap(inputTarget, semanticCoreEntityIds, uttEntityHydrationStore) + .map { localizedEntityMap => + val rawCandidates = topicProofCandidates.collect { + case topicSocialProof: TweetWithTopicProof + if localizedEntityMap.contains(topicSocialProof.topicId) => + // Once we deprecate CR calls, we should replace this code to use the CrMixerMetricTag + val tags = tweetIdToTagsMap.get(topicSocialProof.tweetId).map { + _.flatMap { tag => MetricTag.get(tag.value) } + } + buildTopicTweetRawCandidate( + inputTarget, + topicSocialProof, + localizedEntityMap(topicSocialProof.topicId), + tags + ) + } + + val candResult = restrict( + maxNumberOfCands, + rawCandidates, + numOutOfMaximumDropped, + totalOutputRecs + ) + + Some(candResult) + } + } + + private def generateOONRawCandidate( + inputTarget: Target, + id: Long, + result: Option[TweetyPieResult], + tweetScoreMap: Map[Long, Double], + tweetIdToTagsMap: Map[Long, Seq[CrMixerMetricTag]] + ): Option[RawCandidate with TweetCandidate] = { + val tagsFromCR = tweetIdToTagsMap.get(id).map { _.flatMap { tag => MetricTag.get(tag.value) } } + val candidate = new RawCandidate with CrMixerCandidate with TopicCandidate with AlgorithmScore { + override val tweetId = id + override val target = inputTarget + override val tweetyPieResult = result + override val localizedUttEntity = None + override val semanticCoreEntityId = None + override def commonRecType = + getMediaBasedCRT( + CommonRecommendationType.TwistlyTweet, + CommonRecommendationType.TwistlyPhoto, + CommonRecommendationType.TwistlyVideo) + override def tagsCR = tagsFromCR + override def algorithmScore = tweetScoreMap.get(id) + override def algorithmCR = None + } + Some(candidate) + } + + private def restrict( + maxNumToReturn: Int, + candidates: Seq[RawCandidate], + numOutOfMaximumDropped: Stat, + totalOutputRecs: Stat + ): Seq[RawCandidate] = { + val newCandidates = candidates.take(maxNumToReturn) + val numDropped = candidates.length - newCandidates.length + numOutOfMaximumDropped.add(numDropped) + totalOutputRecs.add(newCandidates.size) + newCandidates + } + + private def buildCrMixerRequest( + target: Target, + countryCode: Option[String], + language: Option[String], + seenTweets: Seq[Long] + ): CrMixerTweetRequest = { + CrMixerTweetRequest( + clientContext = ClientContext( + userId = Some(target.targetId), + countryCode = countryCode, + languageCode = language + ), + product = Product.Notifications, + productContext = Some(ProductContext.NotificationsContext(NotificationsContext())), + excludedTweetIds = Some(seenTweets) + ) + } + + private def selectCandidatesToSendBasedOnSettings( + isRecommendationsEligible: Boolean, + isTopicsEligible: Boolean, + oonRawCandidates: Option[Seq[RawCandidate]], + topicTweetCandidates: Option[Seq[RawCandidate]] + ): Option[Seq[RawCandidate]] = { + if (isRecommendationsEligible && isTopicsEligible) { + Some(topicTweetCandidates.getOrElse(Seq.empty) ++ oonRawCandidates.getOrElse(Seq.empty)) + } else if (isRecommendationsEligible) { + oonRawCandidates + } else if (isTopicsEligible) { + topicTweetCandidates + } else None + } + + override def get(target: Target): Future[Option[Seq[RawCandidate]]] = { + Future + .join( + target.seenTweetIds, + target.countryCode, + target.inferredUserDeviceLanguage, + PushDeviceUtil.isTopicsEligible(target), + PushDeviceUtil.isRecommendationsEligible(target) + ).flatMap { + case (seenTweets, countryCode, language, isTopicsEligible, isRecommendationsEligible) => + val request = buildCrMixerRequest(target, countryCode, language, seenTweets) + crMixerTweetStore.getTweetRecommendations(request).flatMap { + case Some(response) => + totalInputRecs.incr(response.tweets.size) + totalRequests.incr() + AdaptorUtils + .getTweetyPieResults( + response.tweets.map(_.tweetId).toSet, + tweetyPieStore).flatMap { tweetyPieResultMap => + filterOutInNetworkTweets( + target, + filterOutReplyTweet(tweetyPieResultMap.toMap, nonReplyTweetsCounter), + edgeStore, + numOfValidAuthors).flatMap { + outNetworkTweetsWithId: Seq[(Long, TweetyPieResult)] => + totalOutNetworkRecs.incr(outNetworkTweetsWithId.size) + totalInNetworkRecs.incr(response.tweets.size - outNetworkTweetsWithId.size) + val outNetworkTweets: Seq[TweetyPieResult] = outNetworkTweetsWithId.map { + case (_, tweetyPieResult) => tweetyPieResult + } + + val tweetIdToTagsMap = response.tweets.map { tweet => + tweet.tweetId -> tweet.metricTags.getOrElse(Seq.empty) + }.toMap + + val tweetScoreMap = response.tweets.map { tweet => + tweet.tweetId -> tweet.score + }.toMap + + val maxNumOfCandidates = + target.params(PushFeatureSwitchParams.NumberOfMaxCrMixerCandidatesParam) + + val oonRawCandidates = + buildOONRawCandidates( + target, + outNetworkTweets, + tweetScoreMap, + tweetIdToTagsMap, + maxNumOfCandidates) + + TopicsUtil + .getTopicSocialProofs( + target, + outNetworkTweets, + topicSocialProofServiceStore, + edgeStore, + PushFeatureSwitchParams.TopicProofTweetCandidatesTopicScoreThreshold).flatMap { + tweetsWithTopicProof => + buildTopicTweetRawCandidates( + target, + tweetsWithTopicProof, + tweetIdToTagsMap, + maxNumOfCandidates) + }.map { topicTweetCandidates => + selectCandidatesToSendBasedOnSettings( + isRecommendationsEligible, + isTopicsEligible, + oonRawCandidates, + topicTweetCandidates) + } + } + } + case _ => Future.None + } + } + } + + /** + * For a user to be available the following news to happen + */ + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + Future + .join( + PushDeviceUtil.isRecommendationsEligible(target), + PushDeviceUtil.isTopicsEligible(target) + ).map { + case (isRecommendationsEligible, isTopicsEligible) => + (isRecommendationsEligible || isTopicsEligible) && + target.params(PushParams.ContentRecommenderMixerAdaptorDecider) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/EarlyBirdFirstDegreeCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/EarlyBirdFirstDegreeCandidateAdaptor.scala new file mode 100644 index 000000000..ab631841a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/EarlyBirdFirstDegreeCandidateAdaptor.scala @@ -0,0 +1,293 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.hermit.store.tweetypie.UserTweet +import com.twitter.recos.recos_common.thriftscala.SocialProofType +import com.twitter.search.common.features.thriftscala.ThriftSearchResultFeatures +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.Param +import com.twitter.util.Future +import com.twitter.util.Time +import scala.collection.Map + +case class EarlyBirdFirstDegreeCandidateAdaptor( + earlyBirdFirstDegreeCandidates: CandidateSource[ + EarlybirdCandidateSource.Query, + EarlybirdCandidate + ], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + userTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult], + maxResultsParam: Param[Int], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + type EBCandidate = EarlybirdCandidate with TweetDetails + private val stats = globalStats.scope("EarlyBirdFirstDegreeAdaptor") + private val earlyBirdCandsStat: Stat = stats.stat("early_bird_cands_dist") + private val emptyEarlyBirdCands = stats.counter("empty_early_bird_candidates") + private val seedSetEmpty = stats.counter("empty_seedset") + private val seenTweetsStat = stats.stat("filtered_by_seen_tweets") + private val emptyTweetyPieResult = stats.stat("empty_tweetypie_result") + private val nonReplyTweetsCounter = stats.counter("non_reply_tweets") + private val enableRetweets = stats.counter("enable_retweets") + private val f1withoutSocialContexts = stats.counter("f1_without_social_context") + private val userTweetTweetyPieStoreCounter = stats.counter("user_tweet_tweetypie_store") + + override val name: String = earlyBirdFirstDegreeCandidates.name + + private def getAllSocialContextActions( + socialProofTypes: Seq[(SocialProofType, Seq[Long])] + ): Seq[SocialContextAction] = { + socialProofTypes.flatMap { + case (SocialProofType.Favorite, scIds) => + scIds.map { scId => + SocialContextAction( + scId, + Time.now.inMilliseconds, + socialContextActionType = Some(SocialContextActionType.Favorite) + ) + } + case (SocialProofType.Retweet, scIds) => + scIds.map { scId => + SocialContextAction( + scId, + Time.now.inMilliseconds, + socialContextActionType = Some(SocialContextActionType.Retweet) + ) + } + case (SocialProofType.Reply, scIds) => + scIds.map { scId => + SocialContextAction( + scId, + Time.now.inMilliseconds, + socialContextActionType = Some(SocialContextActionType.Reply) + ) + } + case (SocialProofType.Tweet, scIds) => + scIds.map { scId => + SocialContextAction( + scId, + Time.now.inMilliseconds, + socialContextActionType = Some(SocialContextActionType.Tweet) + ) + } + case _ => Nil + } + } + + private def generateRetweetCandidate( + inputTarget: Target, + candidate: EBCandidate, + scIds: Seq[Long], + socialProofTypes: Seq[(SocialProofType, Seq[Long])] + ): RawCandidate = { + val scActions = scIds.map { scId => SocialContextAction(scId, Time.now.inMilliseconds) } + new RawCandidate with TweetRetweetCandidate with EarlybirdTweetFeatures { + override val socialContextActions = scActions + override val socialContextAllTypeActions = getAllSocialContextActions(socialProofTypes) + override val tweetId = candidate.tweetId + override val target = inputTarget + override val tweetyPieResult = candidate.tweetyPieResult + override val features = candidate.features + } + } + + private def generateF1CandidateWithoutSocialContext( + inputTarget: Target, + candidate: EBCandidate + ): RawCandidate = { + f1withoutSocialContexts.incr() + new RawCandidate with F1FirstDegree with EarlybirdTweetFeatures { + override val tweetId = candidate.tweetId + override val target = inputTarget + override val tweetyPieResult = candidate.tweetyPieResult + override val features = candidate.features + } + } + + private def generateEarlyBirdCandidate( + id: Long, + result: Option[TweetyPieResult], + ebFeatures: Option[ThriftSearchResultFeatures] + ): EBCandidate = { + new EarlybirdCandidate with TweetDetails { + override val tweetyPieResult: Option[TweetyPieResult] = result + override val tweetId: Long = id + override val features: Option[ThriftSearchResultFeatures] = ebFeatures + } + } + + private def filterOutSeenTweets(seenTweetIds: Seq[Long], inputTweetIds: Seq[Long]): Seq[Long] = { + inputTweetIds.filterNot(seenTweetIds.contains) + } + + private def filterInvalidTweets( + tweetIds: Seq[Long], + target: Target + ): Future[Seq[(Long, TweetyPieResult)]] = { + + val resMap = { + if (target.params(PushFeatureSwitchParams.EnableF1FromProtectedTweetAuthors)) { + userTweetTweetyPieStoreCounter.incr() + val keys = tweetIds.map { tweetId => + UserTweet(tweetId, Some(target.targetId)) + } + + userTweetTweetyPieStore + .multiGet(keys.toSet).map { + case (userTweet, resultFut) => + userTweet.tweetId -> resultFut + }.toMap + } else { + (target.params(PushFeatureSwitchParams.EnableVFInTweetypie) match { + case true => tweetyPieStore + case false => tweetyPieStoreNoVF + }).multiGet(tweetIds.toSet) + } + } + Future.collect(resMap).map { tweetyPieResultMap => + val cands = filterOutReplyTweet(tweetyPieResultMap, nonReplyTweetsCounter).collect { + case (id: Long, Some(result)) => + id -> result + } + + emptyTweetyPieResult.add(tweetyPieResultMap.size - cands.size) + cands.toSeq + } + } + + private def getEBRetweetCandidates( + inputTarget: Target, + retweets: Seq[(Long, TweetyPieResult)] + ): Seq[RawCandidate] = { + retweets.flatMap { + case (_, tweetypieResult) => + tweetypieResult.tweet.coreData.flatMap { coreData => + tweetypieResult.sourceTweet.map { sourceTweet => + val tweetId = sourceTweet.id + val scId = coreData.userId + val socialProofTypes = Seq((SocialProofType.Retweet, Seq(scId))) + val candidate = generateEarlyBirdCandidate( + tweetId, + Some(TweetyPieResult(sourceTweet, None, None)), + None + ) + generateRetweetCandidate( + inputTarget, + candidate, + Seq(scId), + socialProofTypes + ) + } + } + } + } + + private def getEBFirstDegreeCands( + tweets: Seq[(Long, TweetyPieResult)], + ebTweetIdMap: Map[Long, Option[ThriftSearchResultFeatures]] + ): Seq[EBCandidate] = { + tweets.map { + case (id, tweetypieResult) => + val features = ebTweetIdMap.getOrElse(id, None) + generateEarlyBirdCandidate(id, Some(tweetypieResult), features) + } + } + + /** + * Returns a combination of raw candidates made of: f1 recs, topic social proof recs, sc recs and retweet candidates + */ + def buildRawCandidates( + inputTarget: Target, + firstDegreeCandidates: Seq[EBCandidate], + retweetCandidates: Seq[RawCandidate] + ): Seq[RawCandidate] = { + val hydratedF1Recs = + firstDegreeCandidates.map(generateF1CandidateWithoutSocialContext(inputTarget, _)) + hydratedF1Recs ++ retweetCandidates + } + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate]]] = { + inputTarget.seedsWithWeight.flatMap { seedsetOpt => + val seedsetMap = seedsetOpt.getOrElse(Map.empty) + + if (seedsetMap.isEmpty) { + seedSetEmpty.incr() + Future.None + } else { + val maxResultsToReturn = inputTarget.params(maxResultsParam) + val maxTweetAge = inputTarget.params(PushFeatureSwitchParams.F1CandidateMaxTweetAgeParam) + val earlybirdQuery = EarlybirdCandidateSource.Query( + maxNumResultsToReturn = maxResultsToReturn, + seedset = seedsetMap, + maxConsecutiveResultsByTheSameUser = Some(1), + maxTweetAge = maxTweetAge, + disableTimelinesMLModel = false, + searcherId = Some(inputTarget.targetId), + isProtectTweetsEnabled = + inputTarget.params(PushFeatureSwitchParams.EnableF1FromProtectedTweetAuthors), + followedUserIds = Some(seedsetMap.keySet.toSeq) + ) + + Future + .join(inputTarget.seenTweetIds, earlyBirdFirstDegreeCandidates.get(earlybirdQuery)) + .flatMap { + case (seenTweetIds, Some(candidates)) => + earlyBirdCandsStat.add(candidates.size) + + val ebTweetIdMap = candidates.map { cand => cand.tweetId -> cand.features }.toMap + + val ebTweetIds = ebTweetIdMap.keys.toSeq + + val tweetIds = filterOutSeenTweets(seenTweetIds, ebTweetIds) + seenTweetsStat.add(ebTweetIds.size - tweetIds.size) + + filterInvalidTweets(tweetIds, inputTarget) + .map { validTweets => + val (retweets, tweets) = validTweets.partition { + case (_, tweetypieResult) => + tweetypieResult.sourceTweet.isDefined + } + + val firstDegreeCandidates = getEBFirstDegreeCands(tweets, ebTweetIdMap) + + val retweetCandidates = { + if (inputTarget.params(PushParams.EarlyBirdSCBasedCandidatesParam) && + inputTarget.params(PushParams.MRTweetRetweetRecsParam)) { + enableRetweets.incr() + getEBRetweetCandidates(inputTarget, retweets) + } else Nil + } + + Some( + buildRawCandidates( + inputTarget, + firstDegreeCandidates, + retweetCandidates + )) + } + + case _ => + emptyEarlyBirdCands.incr() + Future.None + } + } + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil.isRecommendationsEligible(target) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ExploreVideoTweetCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ExploreVideoTweetCandidateAdaptor.scala new file mode 100644 index 000000000..345fdbd3c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ExploreVideoTweetCandidateAdaptor.scala @@ -0,0 +1,120 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.explore_ranker.thriftscala.ExploreRankerProductResponse +import com.twitter.explore_ranker.thriftscala.ExploreRankerRequest +import com.twitter.explore_ranker.thriftscala.ExploreRankerResponse +import com.twitter.explore_ranker.thriftscala.ExploreRecommendation +import com.twitter.explore_ranker.thriftscala.ImmersiveRecsResponse +import com.twitter.explore_ranker.thriftscala.ImmersiveRecsResult +import com.twitter.explore_ranker.thriftscala.NotificationsVideoRecs +import com.twitter.explore_ranker.thriftscala.Product +import com.twitter.explore_ranker.thriftscala.ProductContext +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.AdaptorUtils +import com.twitter.frigate.pushservice.util.MediaCRT +import com.twitter.frigate.pushservice.util.PushAdaptorUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.product_mixer.core.thriftscala.ClientContext +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class ExploreVideoTweetCandidateAdaptor( + exploreRankerStore: ReadableStore[ExploreRankerRequest, ExploreRankerResponse], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override def name: String = this.getClass.getSimpleName + private[this] val stats = globalStats.scope("ExploreVideoTweetCandidateAdaptor") + private[this] val totalInputRecs = stats.stat("input_recs") + private[this] val totalRequests = stats.counter("total_requests") + private[this] val totalEmptyResponse = stats.counter("total_empty_response") + + private def buildExploreRankerRequest( + target: Target, + countryCode: Option[String], + language: Option[String], + ): ExploreRankerRequest = { + ExploreRankerRequest( + clientContext = ClientContext( + userId = Some(target.targetId), + countryCode = countryCode, + languageCode = language, + ), + product = Product.NotificationsVideoRecs, + productContext = Some(ProductContext.NotificationsVideoRecs(NotificationsVideoRecs())), + maxResults = Some(target.params(PushFeatureSwitchParams.MaxExploreVideoTweets)) + ) + } + + override def get(target: Target): Future[Option[Seq[RawCandidate]]] = { + Future + .join( + target.countryCode, + target.inferredUserDeviceLanguage + ).flatMap { + case (countryCode, language) => + val request = buildExploreRankerRequest(target, countryCode, language) + exploreRankerStore.get(request).flatMap { + case Some(response) => + val exploreResonseTweetIds = response match { + case ExploreRankerResponse(ExploreRankerProductResponse + .ImmersiveRecsResponse(ImmersiveRecsResponse(immersiveRecsResult))) => + immersiveRecsResult.collect { + case ImmersiveRecsResult(ExploreRecommendation + .ExploreTweetRecommendation(exploreTweetRecommendation)) => + exploreTweetRecommendation.tweetId + } + case _ => + Seq.empty + } + + totalInputRecs.add(exploreResonseTweetIds.size) + totalRequests.incr() + AdaptorUtils + .getTweetyPieResults(exploreResonseTweetIds.toSet, tweetyPieStore).map { + tweetyPieResultMap => + val candidates = tweetyPieResultMap.values.flatten + .map(buildVideoRawCandidates(target, _)) + Some(candidates.toSeq) + } + case _ => + totalEmptyResponse.incr() + Future.None + } + case _ => + Future.None + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil.isRecommendationsEligible(target).map { userRecommendationsEligible => + userRecommendationsEligible && target.params(PushFeatureSwitchParams.EnableExploreVideoTweets) + } + } + private def buildVideoRawCandidates( + target: Target, + tweetyPieResult: TweetyPieResult + ): RawCandidate with OutOfNetworkTweetCandidate = { + PushAdaptorUtil.generateOutOfNetworkTweetCandidates( + inputTarget = target, + id = tweetyPieResult.tweet.id, + mediaCRT = MediaCRT( + CommonRecommendationType.ExploreVideoTweet, + CommonRecommendationType.ExploreVideoTweet, + CommonRecommendationType.ExploreVideoTweet + ), + result = Some(tweetyPieResult), + localizedEntity = None + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/FRSTweetCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/FRSTweetCandidateAdaptor.scala new file mode 100644 index 000000000..49610c645 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/FRSTweetCandidateAdaptor.scala @@ -0,0 +1,272 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.cr_mixer.thriftscala.FrsTweetRequest +import com.twitter.cr_mixer.thriftscala.NotificationsContext +import com.twitter.cr_mixer.thriftscala.Product +import com.twitter.cr_mixer.thriftscala.ProductContext +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.store.CrMixerTweetStore +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.util.MediaCRT +import com.twitter.frigate.pushservice.util.PushAdaptorUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.pushservice.util.TopicsUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.constants.AlgorithmFeedbackTokens +import com.twitter.hermit.model.Algorithm.Algorithm +import com.twitter.hermit.model.Algorithm.CrowdSearchAccounts +import com.twitter.hermit.model.Algorithm.ForwardEmailBook +import com.twitter.hermit.model.Algorithm.ForwardPhoneBook +import com.twitter.hermit.model.Algorithm.ReverseEmailBookIbis +import com.twitter.hermit.model.Algorithm.ReversePhoneBook +import com.twitter.hermit.store.tweetypie.UserTweet +import com.twitter.product_mixer.core.thriftscala.ClientContext +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse +import com.twitter.util.Future + +object FRSAlgorithmFeedbackTokenUtil { + private val crtsByAlgoToken = Map( + getAlgorithmToken(ReverseEmailBookIbis) -> CommonRecommendationType.ReverseAddressbookTweet, + getAlgorithmToken(ReversePhoneBook) -> CommonRecommendationType.ReverseAddressbookTweet, + getAlgorithmToken(ForwardEmailBook) -> CommonRecommendationType.ForwardAddressbookTweet, + getAlgorithmToken(ForwardPhoneBook) -> CommonRecommendationType.ForwardAddressbookTweet, + getAlgorithmToken(CrowdSearchAccounts) -> CommonRecommendationType.CrowdSearchTweet + ) + + def getAlgorithmToken(algorithm: Algorithm): Int = { + AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap(algorithm) + } + + def getCRTForAlgoToken(algorithmToken: Int): Option[CommonRecommendationType] = { + crtsByAlgoToken.get(algorithmToken) + } +} + +case class FRSTweetCandidateAdaptor( + crMixerTweetStore: CrMixerTweetStore, + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + userTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult], + uttEntityHydrationStore: UttEntityHydrationStore, + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + private val stats = globalStats.scope(this.getClass.getSimpleName) + private val crtStats = stats.scope("CandidateDistribution") + private val totalRequests = stats.counter("total_requests") + + // Candidate Distribution stats + private val reverseAddressbookCounter = crtStats.counter("reverse_addressbook") + private val forwardAddressbookCounter = crtStats.counter("forward_addressbook") + private val frsTweetCounter = crtStats.counter("frs_tweet") + private val nonReplyTweetsCounter = stats.counter("non_reply_tweets") + private val crtToCounterMapping: Map[CommonRecommendationType, Counter] = Map( + CommonRecommendationType.ReverseAddressbookTweet -> reverseAddressbookCounter, + CommonRecommendationType.ForwardAddressbookTweet -> forwardAddressbookCounter, + CommonRecommendationType.FrsTweet -> frsTweetCounter + ) + + private val emptyTweetyPieResult = stats.stat("empty_tweetypie_result") + + private[this] val numberReturnedCandidates = stats.stat("returned_candidates_from_earlybird") + private[this] val numberCandidateWithTopic: Counter = stats.counter("num_can_with_topic") + private[this] val numberCandidateWithoutTopic: Counter = stats.counter("num_can_without_topic") + + private val userTweetTweetyPieStoreCounter = stats.counter("user_tweet_tweetypie_store") + + override val name: String = this.getClass.getSimpleName + + private def filterInvalidTweets( + tweetIds: Seq[Long], + target: Target + ): Future[Map[Long, TweetyPieResult]] = { + val resMap = { + if (target.params(PushFeatureSwitchParams.EnableF1FromProtectedTweetAuthors)) { + userTweetTweetyPieStoreCounter.incr() + val keys = tweetIds.map { tweetId => + UserTweet(tweetId, Some(target.targetId)) + } + userTweetTweetyPieStore + .multiGet(keys.toSet).map { + case (userTweet, resultFut) => + userTweet.tweetId -> resultFut + }.toMap + } else { + (if (target.params(PushFeatureSwitchParams.EnableVFInTweetypie)) { + tweetyPieStore + } else { + tweetyPieStoreNoVF + }).multiGet(tweetIds.toSet) + } + } + + Future.collect(resMap).map { tweetyPieResultMap => + // Filter out replies and generate earlybird candidates only for non-empty tweetypie result + val cands = filterOutReplyTweet(tweetyPieResultMap, nonReplyTweetsCounter).collect { + case (id: Long, Some(result)) => + id -> result + } + + emptyTweetyPieResult.add(tweetyPieResultMap.size - cands.size) + cands + } + } + + private def buildRawCandidates( + target: Target, + ebCandidates: Seq[FRSTweetCandidate] + ): Future[Option[Seq[RawCandidate with TweetCandidate]]] = { + + val enableTopic = target.params(PushFeatureSwitchParams.EnableFrsTweetCandidatesTopicAnnotation) + val topicScoreThre = + target.params(PushFeatureSwitchParams.FrsTweetCandidatesTopicScoreThreshold) + + val ebTweets = ebCandidates.map { ebCandidate => + ebCandidate.tweetId -> ebCandidate.tweetyPieResult + }.toMap + + val tweetIdLocalizedEntityMapFut = TopicsUtil.getTweetIdLocalizedEntityMap( + target, + ebTweets, + uttEntityHydrationStore, + topicSocialProofServiceStore, + enableTopic, + topicScoreThre + ) + + Future.join(target.deviceInfo, tweetIdLocalizedEntityMapFut).map { + case (Some(deviceInfo), tweetIdLocalizedEntityMap) => + val candidates = ebCandidates + .map { ebCandidate => + val crt = ebCandidate.commonRecType + crtToCounterMapping.get(crt).foreach(_.incr()) + + val tweetId = ebCandidate.tweetId + val localizedEntityOpt = { + if (tweetIdLocalizedEntityMap + .contains(tweetId) && tweetIdLocalizedEntityMap.contains( + tweetId) && deviceInfo.isTopicsEligible) { + tweetIdLocalizedEntityMap(tweetId) + } else { + None + } + } + + PushAdaptorUtil.generateOutOfNetworkTweetCandidates( + inputTarget = target, + id = ebCandidate.tweetId, + mediaCRT = MediaCRT( + crt, + crt, + crt + ), + result = ebCandidate.tweetyPieResult, + localizedEntity = localizedEntityOpt) + }.filter { candidate => + // If user only has the topic setting enabled, filter out all non-topic cands + deviceInfo.isRecommendationsEligible || (deviceInfo.isTopicsEligible && candidate.semanticCoreEntityId.nonEmpty) + } + + candidates.map { candidate => + if (candidate.semanticCoreEntityId.nonEmpty) { + numberCandidateWithTopic.incr() + } else { + numberCandidateWithoutTopic.incr() + } + } + + numberReturnedCandidates.add(candidates.length) + Some(candidates) + case _ => Some(Seq.empty) + } + } + + def getTweetCandidatesFromCrMixer( + inputTarget: Target, + showAllResultsFromFrs: Boolean, + ): Future[Option[Seq[RawCandidate with TweetCandidate]]] = { + Future + .join( + inputTarget.seenTweetIds, + inputTarget.pushRecItems, + inputTarget.countryCode, + inputTarget.targetLanguage).flatMap { + case (seenTweetIds, pastRecItems, countryCode, language) => + val pastUserRecs = pastRecItems.userIds.toSeq + val request = FrsTweetRequest( + clientContext = ClientContext( + userId = Some(inputTarget.targetId), + countryCode = countryCode, + languageCode = language + ), + product = Product.Notifications, + productContext = Some(ProductContext.NotificationsContext(NotificationsContext())), + excludedUserIds = Some(pastUserRecs), + excludedTweetIds = Some(seenTweetIds) + ) + crMixerTweetStore.getFRSTweetCandidates(request).flatMap { + case Some(response) => + val tweetIds = response.tweets.map(_.tweetId) + val validTweets = filterInvalidTweets(tweetIds, inputTarget) + validTweets.flatMap { tweetypieMap => + val ebCandidates = response.tweets + .map { frsTweet => + val candidateTweetId = frsTweet.tweetId + val resultFromTweetyPie = tweetypieMap.get(candidateTweetId) + new FRSTweetCandidate { + override val tweetId = candidateTweetId + override val features = None + override val tweetyPieResult = resultFromTweetyPie + override val feedbackToken = frsTweet.frsPrimarySource + override val commonRecType: CommonRecommendationType = feedbackToken + .flatMap(token => + FRSAlgorithmFeedbackTokenUtil.getCRTForAlgoToken(token)).getOrElse( + CommonRecommendationType.FrsTweet) + } + }.filter { ebCandidate => + showAllResultsFromFrs || ebCandidate.commonRecType == CommonRecommendationType.ReverseAddressbookTweet + } + + numberReturnedCandidates.add(ebCandidates.length) + buildRawCandidates( + inputTarget, + ebCandidates + ) + } + case _ => Future.None + } + } + } + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate with TweetCandidate]]] = { + totalRequests.incr() + val enableResultsFromFrs = + inputTarget.params(PushFeatureSwitchParams.EnableResultFromFrsCandidates) + getTweetCandidatesFromCrMixer(inputTarget, enableResultsFromFrs) + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + lazy val enableFrsCandidates = target.params(PushFeatureSwitchParams.EnableFrsCandidates) + PushDeviceUtil.isRecommendationsEligible(target).flatMap { isEnabledForRecosSetting => + PushDeviceUtil.isTopicsEligible(target).map { topicSettingEnabled => + val isEnabledForTopics = + topicSettingEnabled && target.params( + PushFeatureSwitchParams.EnableFrsTweetCandidatesTopicSetting) + (isEnabledForRecosSetting || isEnabledForTopics) && enableFrsCandidates + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/GenericCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/GenericCandidateAdaptor.scala new file mode 100644 index 000000000..24d0cb64a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/GenericCandidateAdaptor.scala @@ -0,0 +1,107 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object GenericCandidates { + type Target = + TargetUser + with UserDetails + with TargetDecider + with TargetABDecider + with TweetImpressionHistory + with HTLVisitHistory + with MaxTweetAge + with NewUserDetails + with FrigateHistory + with TargetWithSeedUsers +} + +case class GenericCandidateAdaptor( + genericCandidates: CandidateSource[GenericCandidates.Target, Candidate], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + stats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override val name: String = genericCandidates.name + + private def generateTweetFavCandidate( + _target: Target, + _tweetId: Long, + _socialContextActions: Seq[SocialContextAction], + socialContextActionsAllTypes: Seq[SocialContextAction], + _tweetyPieResult: Option[TweetyPieResult] + ): RawCandidate = { + new RawCandidate with TweetFavoriteCandidate { + override val socialContextActions = _socialContextActions + override val socialContextAllTypeActions = + socialContextActionsAllTypes + val tweetId = _tweetId + val target = _target + val tweetyPieResult = _tweetyPieResult + } + } + + private def generateTweetRetweetCandidate( + _target: Target, + _tweetId: Long, + _socialContextActions: Seq[SocialContextAction], + socialContextActionsAllTypes: Seq[SocialContextAction], + _tweetyPieResult: Option[TweetyPieResult] + ): RawCandidate = { + new RawCandidate with TweetRetweetCandidate { + override val socialContextActions = _socialContextActions + override val socialContextAllTypeActions = socialContextActionsAllTypes + val tweetId = _tweetId + val target = _target + val tweetyPieResult = _tweetyPieResult + } + } + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate]]] = { + genericCandidates.get(inputTarget).map { candidatesOpt => + candidatesOpt + .map { candidates => + val candidatesSeq = + candidates.collect { + case tweetRetweet: TweetRetweetCandidate + if inputTarget.params(PushParams.MRTweetRetweetRecsParam) => + generateTweetRetweetCandidate( + inputTarget, + tweetRetweet.tweetId, + tweetRetweet.socialContextActions, + tweetRetweet.socialContextAllTypeActions, + tweetRetweet.tweetyPieResult) + case tweetFavorite: TweetFavoriteCandidate + if inputTarget.params(PushParams.MRTweetFavRecsParam) => + generateTweetFavCandidate( + inputTarget, + tweetFavorite.tweetId, + tweetFavorite.socialContextActions, + tweetFavorite.socialContextAllTypeActions, + tweetFavorite.tweetyPieResult) + } + candidatesSeq.foreach { candidate => + stats.counter(s"${candidate.commonRecType}_count").incr() + } + candidatesSeq + } + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil.isRecommendationsEligible(target).map { isAvailable => + isAvailable && target.params(PushParams.GenericCandidateAdaptorDecider) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/HighQualityTweetsAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/HighQualityTweetsAdaptor.scala new file mode 100644 index 000000000..37d11535f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/HighQualityTweetsAdaptor.scala @@ -0,0 +1,280 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.HighQualityCandidateGroupEnum +import com.twitter.frigate.pushservice.params.HighQualityCandidateGroupEnum._ +import com.twitter.frigate.pushservice.params.PushConstants.targetUserAgeFeatureName +import com.twitter.frigate.pushservice.params.PushConstants.targetUserPreferredLanguage +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.frigate.pushservice.util.MediaCRT +import com.twitter.frigate.pushservice.util.PushAdaptorUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.pushservice.util.TopicsUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.interests.thriftscala.InterestId.SemanticCore +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.language.normalization.UserDisplayLanguage +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.util.Future + +object HighQualityTweetsHelper { + def getFollowedTopics( + target: Target, + interestsWithLookupContextStore: ReadableStore[ + InterestsLookupRequestWithContext, + UserInterests + ], + followedTopicsStats: Stat + ): Future[Seq[Long]] = { + TopicsUtil + .getTopicsFollowedByUser(target, interestsWithLookupContextStore, followedTopicsStats).map { + userInterestsOpt => + val userInterests = userInterestsOpt.getOrElse(Seq.empty) + val extractedTopicIds = userInterests.flatMap { + _.interestId match { + case SemanticCore(semanticCore) => Some(semanticCore.id) + case _ => None + } + } + extractedTopicIds + } + } + + def getTripQueries( + target: Target, + enabledGroups: Set[HighQualityCandidateGroupEnum.Value], + interestsWithLookupContextStore: ReadableStore[ + InterestsLookupRequestWithContext, + UserInterests + ], + sourceIds: Seq[String], + stat: Stat + ): Future[Set[TripDomain]] = { + + val followedTopicIdsSetFut: Future[Set[Long]] = if (enabledGroups.contains(Topic)) { + getFollowedTopics(target, interestsWithLookupContextStore, stat).map(topicIds => + topicIds.toSet) + } else { + Future.value(Set.empty) + } + + Future + .join(target.featureMap, target.inferredUserDeviceLanguage, followedTopicIdsSetFut).map { + case ( + featureMap, + deviceLanguageOpt, + followedTopicIds + ) => + val ageBucketOpt = if (enabledGroups.contains(AgeBucket)) { + featureMap.categoricalFeatures.get(targetUserAgeFeatureName) + } else { + None + } + + val languageOptions: Set[Option[String]] = if (enabledGroups.contains(Language)) { + val userPreferredLanguages = featureMap.sparseBinaryFeatures + .getOrElse(targetUserPreferredLanguage, Set.empty[String]) + if (userPreferredLanguages.nonEmpty) { + userPreferredLanguages.map(lang => Some(UserDisplayLanguage.toTweetLanguage(lang))) + } else { + Set(deviceLanguageOpt.map(UserDisplayLanguage.toTweetLanguage)) + } + } else Set(None) + + val followedTopicOptions: Set[Option[Long]] = if (followedTopicIds.nonEmpty) { + followedTopicIds.map(topic => Some(topic)) + } else Set(None) + + val tripQueries = followedTopicOptions.flatMap { topicOption => + languageOptions.flatMap { languageOption => + sourceIds.map { sourceId => + TripDomain( + sourceId = sourceId, + language = languageOption, + placeId = None, + topicId = topicOption, + gender = None, + ageBucket = ageBucketOpt + ) + } + } + } + + tripQueries + } + } +} + +case class HighQualityTweetsAdaptor( + tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets], + interestsWithLookupContextStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override def name: String = this.getClass.getSimpleName + + private val stats = globalStats.scope("HighQualityCandidateAdaptor") + private val followedTopicsStats = stats.stat("followed_topics") + private val missingResponseCounter = stats.counter("missing_respond_counter") + private val crtFatigueCounter = stats.counter("fatigue_by_crt") + private val fallbackRequestsCounter = stats.counter("fallback_requests") + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil.isRecommendationsEligible(target).map { + _ && target.params(FS.HighQualityCandidatesEnableCandidateSource) + } + } + + private val highQualityCandidateFrequencyPredicate = { + TargetPredicates + .pushRecTypeFatiguePredicate( + CommonRecommendationType.TripHqTweet, + FS.HighQualityTweetsPushInterval, + FS.MaxHighQualityTweetsPushGivenInterval, + stats + ) + } + + private def getTripCandidatesStrato( + target: Target + ): Future[Map[Long, Set[TripDomain]]] = { + val tripQueriesF: Future[Set[TripDomain]] = HighQualityTweetsHelper.getTripQueries( + target = target, + enabledGroups = target.params(FS.HighQualityCandidatesEnableGroups).toSet, + interestsWithLookupContextStore = interestsWithLookupContextStore, + sourceIds = target.params(FS.TripTweetCandidateSourceIds), + stat = followedTopicsStats + ) + + lazy val fallbackTripQueriesFut: Future[Set[TripDomain]] = + if (target.params(FS.HighQualityCandidatesEnableFallback)) + HighQualityTweetsHelper.getTripQueries( + target = target, + enabledGroups = target.params(FS.HighQualityCandidatesFallbackEnabledGroups).toSet, + interestsWithLookupContextStore = interestsWithLookupContextStore, + sourceIds = target.params(FS.HighQualityCandidatesFallbackSourceIds), + stat = followedTopicsStats + ) + else Future.value(Set.empty) + + val initialTweetsFut: Future[Map[TripDomain, Seq[TripTweet]]] = tripQueriesF.flatMap { + tripQueries => getTripTweetsByDomains(tripQueries) + } + + val tweetsByDomainFut: Future[Map[TripDomain, Seq[TripTweet]]] = + if (target.params(FS.HighQualityCandidatesEnableFallback)) { + initialTweetsFut.flatMap { candidates => + val minCandidatesForFallback: Int = + target.params(FS.HighQualityCandidatesMinNumOfCandidatesToFallback) + val validCandidates = candidates.filter(_._2.size >= minCandidatesForFallback) + + if (validCandidates.nonEmpty) { + Future.value(validCandidates) + } else { + fallbackTripQueriesFut.flatMap { fallbackTripDomains => + fallbackRequestsCounter.incr(fallbackTripDomains.size) + getTripTweetsByDomains(fallbackTripDomains) + } + } + } + } else { + initialTweetsFut + } + + val numOfCandidates: Int = target.params(FS.HighQualityCandidatesNumberOfCandidates) + tweetsByDomainFut.map(tweetsByDomain => reformatDomainTweetMap(tweetsByDomain, numOfCandidates)) + } + + private def getTripTweetsByDomains( + tripQueries: Set[TripDomain] + ): Future[Map[TripDomain, Seq[TripTweet]]] = { + Future.collect(tripTweetCandidateStore.multiGet(tripQueries)).map { response => + response + .filter(p => p._2.exists(_.tweets.nonEmpty)) + .mapValues(_.map(_.tweets).getOrElse(Seq.empty)) + } + } + + private def reformatDomainTweetMap( + tweetsByDomain: Map[TripDomain, Seq[TripTweet]], + numOfCandidates: Int + ): Map[Long, Set[TripDomain]] = tweetsByDomain + .flatMap { + case (tripDomain, tripTweets) => + tripTweets + .sortBy(_.score)(Ordering[Double].reverse) + .take(numOfCandidates) + .map { tweet => (tweet.tweetId, tripDomain) } + }.groupBy(_._1).mapValues(_.map(_._2).toSet) + + private def buildRawCandidate( + target: Target, + tweetyPieResult: TweetyPieResult, + tripDomain: Option[scala.collection.Set[TripDomain]] + ): RawCandidate = { + PushAdaptorUtil.generateOutOfNetworkTweetCandidates( + inputTarget = target, + id = tweetyPieResult.tweet.id, + mediaCRT = MediaCRT( + CommonRecommendationType.TripHqTweet, + CommonRecommendationType.TripHqTweet, + CommonRecommendationType.TripHqTweet + ), + result = Some(tweetyPieResult), + tripTweetDomain = tripDomain + ) + } + + private def getTweetyPieResults( + target: Target, + tweetToTripDomain: Map[Long, Set[TripDomain]] + ): Future[Map[Long, Option[TweetyPieResult]]] = { + Future.collect((if (target.params(FS.EnableVFInTweetypie)) { + tweetyPieStore + } else { + tweetyPieStoreNoVF + }).multiGet(tweetToTripDomain.keySet)) + } + + override def get(target: Target): Future[Option[Seq[RawCandidate]]] = { + for { + tweetsToTripDomainMap <- getTripCandidatesStrato(target) + tweetyPieResults <- getTweetyPieResults(target, tweetsToTripDomainMap) + } yield { + val candidates = tweetyPieResults.flatMap { + case (tweetId, tweetyPieResultOpt) => + tweetyPieResultOpt.map(buildRawCandidate(target, _, tweetsToTripDomainMap.get(tweetId))) + } + if (candidates.nonEmpty) { + highQualityCandidateFrequencyPredicate(Seq(target)) + .map(_.head) + .map { isTargetFatigueEligible => + if (isTargetFatigueEligible) Some(candidates) + else { + crtFatigueCounter.incr() + None + } + } + + Some(candidates.toSeq) + } else { + missingResponseCounter.incr() + None + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ListsToRecommendCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ListsToRecommendCandidateAdaptor.scala new file mode 100644 index 000000000..59744b375 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/ListsToRecommendCandidateAdaptor.scala @@ -0,0 +1,152 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.ListPushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.interests_discovery.thriftscala.DisplayLocation +import com.twitter.interests_discovery.thriftscala.NonPersonalizedRecommendedLists +import com.twitter.interests_discovery.thriftscala.RecommendedListsRequest +import com.twitter.interests_discovery.thriftscala.RecommendedListsResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class ListsToRecommendCandidateAdaptor( + listRecommendationsStore: ReadableStore[String, NonPersonalizedRecommendedLists], + geoDuckV2Store: ReadableStore[Long, LocationResponse], + idsStore: ReadableStore[RecommendedListsRequest, RecommendedListsResponse], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override val name: String = this.getClass.getSimpleName + + private[this] val stats = globalStats.scope(name) + private[this] val noLocationCodeCounter = stats.counter("no_location_code") + private[this] val noCandidatesCounter = stats.counter("no_candidates_for_geo") + private[this] val disablePopGeoListsCounter = stats.counter("disable_pop_geo_lists") + private[this] val disableIDSListsCounter = stats.counter("disable_ids_lists") + + private def getListCandidate( + targetUser: Target, + _listId: Long + ): RawCandidate with ListPushCandidate = { + new RawCandidate with ListPushCandidate { + override val listId: Long = _listId + + override val commonRecType: CommonRecommendationType = CommonRecommendationType.List + + override val target: Target = targetUser + } + } + + private def getListsRecommendedFromHistory( + target: Target + ): Future[Seq[Long]] = { + target.history.map { history => + history.sortedHistory.flatMap { + case (_, notif) if notif.commonRecommendationType == List => + notif.listNotification.map(_.listId) + case _ => None + } + } + } + + private def getIDSListRecs( + target: Target, + historicalListIds: Seq[Long] + ): Future[Seq[Long]] = { + val request = RecommendedListsRequest( + target.targetId, + DisplayLocation.ListDiscoveryPage, + Some(historicalListIds) + ) + if (target.params(PushFeatureSwitchParams.EnableIDSListRecommendations)) { + idsStore.get(request).map { + case Some(response) => + response.channels.map(_.id) + case _ => Nil + } + } else { + disableIDSListsCounter.incr() + Future.Nil + } + } + + private def getPopGeoLists( + target: Target, + historicalListIds: Seq[Long] + ): Future[Seq[Long]] = { + if (target.params(PushFeatureSwitchParams.EnablePopGeoListRecommendations)) { + geoDuckV2Store.get(target.targetId).flatMap { + case Some(locationResponse) if locationResponse.geohash.isDefined => + val geoHashLength = + target.params(PushFeatureSwitchParams.ListRecommendationsGeoHashLength) + val geoHash = locationResponse.geohash.get.take(geoHashLength) + listRecommendationsStore + .get(s"geohash_$geoHash") + .map { + case Some(recommendedLists) => + recommendedLists.recommendedListsByAlgo.flatMap { topLists => + topLists.lists.collect { + case list if !historicalListIds.contains(list.listId) => list.listId + } + } + case _ => Nil + } + case _ => + noLocationCodeCounter.incr() + Future.Nil + } + } else { + disablePopGeoListsCounter.incr() + Future.Nil + } + } + + override def get(target: Target): Future[Option[Seq[RawCandidate]]] = { + getListsRecommendedFromHistory(target).flatMap { historicalListIds => + Future + .join( + getPopGeoLists(target, historicalListIds), + getIDSListRecs(target, historicalListIds) + ) + .map { + case (popGeoListsIds, idsListIds) => + val candidates = (idsListIds ++ popGeoListsIds).map(getListCandidate(target, _)) + Some(candidates) + case _ => + noCandidatesCounter.incr() + None + } + } + } + + private val pushCapFatiguePredicate = TargetPredicates.pushRecTypeFatiguePredicate( + CommonRecommendationType.List, + PushFeatureSwitchParams.ListRecommendationsPushInterval, + PushFeatureSwitchParams.MaxListRecommendationsPushGivenInterval, + stats, + ) + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + + val isNotFatigued = pushCapFatiguePredicate.apply(Seq(target)).map(_.head) + + Future + .join( + PushDeviceUtil.isRecommendationsEligible(target), + isNotFatigued + ).map { + case (userRecommendationsEligible, isUnderCAP) => + userRecommendationsEligible && isUnderCAP && target.params( + PushFeatureSwitchParams.EnableListRecommendations) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/LoggedOutPushCandidateSourceGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/LoggedOutPushCandidateSourceGenerator.scala new file mode 100644 index 000000000..e5ac0b516 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/LoggedOutPushCandidateSourceGenerator.scala @@ -0,0 +1,54 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.content_mixer.thriftscala.ContentMixerRequest +import com.twitter.content_mixer.thriftscala.ContentMixerResponse +import com.twitter.geoduck.common.thriftscala.Location +import com.twitter.hermit.pop_geo.thriftscala.PopTweetsInPlace +import com.twitter.recommendation.interests.discovery.core.model.InterestDomain + +class LoggedOutPushCandidateSourceGenerator( + tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets], + geoDuckV2Store: ReadableStore[Long, LocationResponse], + safeCachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult], + cachedTweetyPieStoreV2NoVF: ReadableStore[Long, TweetyPieResult], + cachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult], + contentMixerStore: ReadableStore[ContentMixerRequest, ContentMixerResponse], + softUserLocationStore: ReadableStore[Long, Location], + topTweetsByGeoStore: ReadableStore[InterestDomain[String], Map[String, List[(Long, Double)]]], + topTweetsByGeoV2VersionedStore: ReadableStore[String, PopTweetsInPlace], +)( + implicit val globalStats: StatsReceiver) { + val sources: Seq[CandidateSource[Target, RawCandidate] with CandidateSourceEligible[ + Target, + RawCandidate + ]] = { + Seq( + TripGeoCandidatesAdaptor( + tripTweetCandidateStore, + contentMixerStore, + safeCachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats + ), + TopTweetsByGeoAdaptor( + geoDuckV2Store, + softUserLocationStore, + topTweetsByGeoStore, + topTweetsByGeoV2VersionedStore, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats + ) + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/OnboardingPushCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/OnboardingPushCandidateAdaptor.scala new file mode 100644 index 000000000..98568e9dc --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/OnboardingPushCandidateAdaptor.scala @@ -0,0 +1,101 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.DiscoverTwitterCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.predicate.DiscoverTwitterPredicate +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.frigate.pushservice.util.PushAppPermissionUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.util.Future + +class OnboardingPushCandidateAdaptor( + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override val name: String = this.getClass.getSimpleName + + private[this] val stats = globalStats.scope(name) + private[this] val requestNum = stats.counter("request_num") + private[this] val addressBookCandNum = stats.counter("address_book_cand_num") + private[this] val completeOnboardingCandNum = stats.counter("complete_onboarding_cand_num") + + private def generateOnboardingPushRawCandidate( + _target: Target, + _commonRecType: CRT + ): RawCandidate = { + new RawCandidate with DiscoverTwitterCandidate { + override val target = _target + override val commonRecType = _commonRecType + } + } + + private def getEligibleCandsForTarget( + target: Target + ): Future[Option[Seq[RawCandidate]]] = { + val addressBookFatigue = + TargetPredicates + .pushRecTypeFatiguePredicate( + CRT.AddressBookUploadPush, + FS.FatigueForOnboardingPushes, + FS.MaxOnboardingPushInInterval, + stats)(Seq(target)).map(_.head) + val completeOnboardingFatigue = + TargetPredicates + .pushRecTypeFatiguePredicate( + CRT.CompleteOnboardingPush, + FS.FatigueForOnboardingPushes, + FS.MaxOnboardingPushInInterval, + stats)(Seq(target)).map(_.head) + + Future + .join( + target.appPermissions, + addressBookFatigue, + completeOnboardingFatigue + ).map { + case (appPermissionOpt, addressBookPredicate, completeOnboardingPredicate) => + val addressBookUploaded = + PushAppPermissionUtil.hasTargetUploadedAddressBook(appPermissionOpt) + val abUploadCandidate = + if (!addressBookUploaded && addressBookPredicate && target.params( + FS.EnableAddressBookPush)) { + addressBookCandNum.incr() + Some(generateOnboardingPushRawCandidate(target, CRT.AddressBookUploadPush)) + } else if (!addressBookUploaded && (completeOnboardingPredicate || + target.params(FS.DisableOnboardingPushFatigue)) && target.params( + FS.EnableCompleteOnboardingPush)) { + completeOnboardingCandNum.incr() + Some(generateOnboardingPushRawCandidate(target, CRT.CompleteOnboardingPush)) + } else None + + val allCandidates = + Seq(abUploadCandidate).filter(_.isDefined).flatten + if (allCandidates.nonEmpty) Some(allCandidates) else None + } + } + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate]]] = { + requestNum.incr() + val minDurationForMRElapsed = + DiscoverTwitterPredicate + .minDurationElapsedSinceLastMrPushPredicate( + name, + FS.MrMinDurationSincePushForOnboardingPushes, + stats)(Seq(inputTarget)).map(_.head) + minDurationForMRElapsed.flatMap { minDurationElapsed => + if (minDurationElapsed) getEligibleCandsForTarget(inputTarget) else Future.None + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil + .isRecommendationsEligible(target).map(_ && target.params(FS.EnableOnboardingPushes)) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/PushCandidateSourceGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/PushCandidateSourceGenerator.scala new file mode 100644 index 000000000..ea2dcd008 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/PushCandidateSourceGenerator.scala @@ -0,0 +1,162 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.content_mixer.thriftscala.ContentMixerRequest +import com.twitter.content_mixer.thriftscala.ContentMixerResponse +import com.twitter.explore_ranker.thriftscala.ExploreRankerRequest +import com.twitter.explore_ranker.thriftscala.ExploreRankerResponse +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.common.store.RecentTweetsQuery +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.store._ +import com.twitter.geoduck.common.thriftscala.Location +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.hermit.pop_geo.thriftscala.PopTweetsInPlace +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.store.tweetypie.UserTweet +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.interests_discovery.thriftscala.NonPersonalizedRecommendedLists +import com.twitter.interests_discovery.thriftscala.RecommendedListsRequest +import com.twitter.interests_discovery.thriftscala.RecommendedListsResponse +import com.twitter.recommendation.interests.discovery.core.model.InterestDomain +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse + +/** + * PushCandidateSourceGenerator generates candidate source list for a given Target user + */ +class PushCandidateSourceGenerator( + earlybirdCandidates: CandidateSource[EarlybirdCandidateSource.Query, EarlybirdCandidate], + userTweetEntityGraphCandidates: CandidateSource[UserTweetEntityGraphCandidates.Target, Candidate], + cachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult], + safeCachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult], + userTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult], + safeUserTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult], + cachedTweetyPieStoreV2NoVF: ReadableStore[Long, TweetyPieResult], + edgeStore: ReadableStore[RelationEdge, Boolean], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore, + geoDuckV2Store: ReadableStore[Long, LocationResponse], + topTweetsByGeoStore: ReadableStore[InterestDomain[String], Map[String, List[(Long, Double)]]], + topTweetsByGeoV2VersionedStore: ReadableStore[String, PopTweetsInPlace], + tweetImpressionsStore: TweetImpressionsStore, + recommendedTrendsCandidateSource: RecommendedTrendsCandidateSource, + recentTweetsByAuthorStore: ReadableStore[RecentTweetsQuery, Seq[Seq[Long]]], + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + crMixerStore: CrMixerTweetStore, + contentMixerStore: ReadableStore[ContentMixerRequest, ContentMixerResponse], + exploreRankerStore: ReadableStore[ExploreRankerRequest, ExploreRankerResponse], + softUserLocationStore: ReadableStore[Long, Location], + tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets], + listRecsStore: ReadableStore[String, NonPersonalizedRecommendedLists], + idsStore: ReadableStore[RecommendedListsRequest, RecommendedListsResponse] +)( + implicit val globalStats: StatsReceiver) { + + private val earlyBirdFirstDegreeCandidateAdaptor = EarlyBirdFirstDegreeCandidateAdaptor( + earlybirdCandidates, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + userTweetTweetyPieStore, + PushFeatureSwitchParams.NumberOfMaxEarlybirdInNetworkCandidatesParam, + globalStats + ) + + private val frsTweetCandidateAdaptor = FRSTweetCandidateAdaptor( + crMixerStore, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + userTweetTweetyPieStore, + uttEntityHydrationStore, + topicSocialProofServiceStore, + globalStats + ) + + private val contentRecommenderMixerAdaptor = ContentRecommenderMixerAdaptor( + crMixerStore, + safeCachedTweetyPieStoreV2, + edgeStore, + topicSocialProofServiceStore, + uttEntityHydrationStore, + globalStats + ) + + private val tripGeoCandidatesAdaptor = TripGeoCandidatesAdaptor( + tripTweetCandidateStore, + contentMixerStore, + safeCachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats + ) + + val sources: Seq[ + CandidateSource[Target, RawCandidate] with CandidateSourceEligible[ + Target, + RawCandidate + ] + ] = { + Seq( + earlyBirdFirstDegreeCandidateAdaptor, + GenericCandidateAdaptor( + userTweetEntityGraphCandidates, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats.scope("UserTweetEntityGraphCandidates") + ), + new OnboardingPushCandidateAdaptor(globalStats), + TopTweetsByGeoAdaptor( + geoDuckV2Store, + softUserLocationStore, + topTweetsByGeoStore, + topTweetsByGeoV2VersionedStore, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats + ), + frsTweetCandidateAdaptor, + TopTweetImpressionsCandidateAdaptor( + recentTweetsByAuthorStore, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + tweetImpressionsStore, + globalStats + ), + TrendsCandidatesAdaptor( + softUserLocationStore, + recommendedTrendsCandidateSource, + safeCachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + safeUserTweetTweetyPieStore, + globalStats + ), + contentRecommenderMixerAdaptor, + tripGeoCandidatesAdaptor, + HighQualityTweetsAdaptor( + tripTweetCandidateStore, + interestsLookupStore, + cachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + globalStats + ), + ExploreVideoTweetCandidateAdaptor( + exploreRankerStore, + cachedTweetyPieStoreV2, + globalStats + ), + ListsToRecommendCandidateAdaptor( + listRecsStore, + geoDuckV2Store, + idsStore, + globalStats + ) + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetImpressionsCandidateAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetImpressionsCandidateAdaptor.scala new file mode 100644 index 000000000..25ab31e85 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetImpressionsCandidateAdaptor.scala @@ -0,0 +1,326 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.TopTweetImpressionsCandidate +import com.twitter.frigate.common.store.RecentTweetsQuery +import com.twitter.frigate.common.util.SnowflakeUtils +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.store.TweetImpressionsStore +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class TweetImpressionsCandidate( + tweetId: Long, + tweetyPieResultOpt: Option[TweetyPieResult], + impressionsCountOpt: Option[Long]) + +case class TopTweetImpressionsCandidateAdaptor( + recentTweetsFromTflockStore: ReadableStore[RecentTweetsQuery, Seq[Seq[Long]]], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + tweetImpressionsStore: TweetImpressionsStore, + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + private val stats = globalStats.scope("TopTweetImpressionsAdaptor") + private val tweetImpressionsCandsStat = stats.stat("top_tweet_impressions_cands_dist") + + private val eligibleUsersCounter = stats.counter("eligible_users") + private val noneligibleUsersCounter = stats.counter("noneligible_users") + private val meetsMinTweetsRequiredCounter = stats.counter("meets_min_tweets_required") + private val belowMinTweetsRequiredCounter = stats.counter("below_min_tweets_required") + private val aboveMaxInboundFavoritesCounter = stats.counter("above_max_inbound_favorites") + private val meetsImpressionsRequiredCounter = stats.counter("meets_impressions_required") + private val belowImpressionsRequiredCounter = stats.counter("below_impressions_required") + private val meetsFavoritesThresholdCounter = stats.counter("meets_favorites_threshold") + private val aboveFavoritesThresholdCounter = stats.counter("above_favorites_threshold") + private val emptyImpressionsMapCounter = stats.counter("empty_impressions_map") + + private val tflockResultsStat = stats.stat("tflock", "results") + private val emptyTflockResult = stats.counter("tflock", "empty_result") + private val nonEmptyTflockResult = stats.counter("tflock", "non_empty_result") + + private val originalTweetsStat = stats.stat("tweets", "original_tweets") + private val retweetsStat = stats.stat("tweets", "retweets") + private val allRetweetsOnlyCounter = stats.counter("tweets", "all_retweets_only") + private val allOriginalTweetsOnlyCounter = stats.counter("tweets", "all_original_tweets_only") + + private val emptyTweetypieMap = stats.counter("", "empty_tweetypie_map") + private val emptyTweetyPieResult = stats.stat("", "empty_tweetypie_result") + private val allEmptyTweetypieResults = stats.counter("", "all_empty_tweetypie_results") + + private val eligibleUsersAfterImpressionsFilter = + stats.counter("eligible_users_after_impressions_filter") + private val eligibleUsersAfterFavoritesFilter = + stats.counter("eligible_users_after_favorites_filter") + private val eligibleUsersWithEligibleTweets = + stats.counter("eligible_users_with_eligible_tweets") + + private val eligibleTweetCands = stats.stat("eligible_tweet_cands") + private val getCandsRequestCounter = + stats.counter("top_tweet_impressions_get_request") + + override val name: String = this.getClass.getSimpleName + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate]]] = { + getCandsRequestCounter.incr() + val eligibleCandidatesFut = getTweetImpressionsCandidates(inputTarget) + eligibleCandidatesFut.map { eligibleCandidates => + if (eligibleCandidates.nonEmpty) { + eligibleUsersWithEligibleTweets.incr() + eligibleTweetCands.add(eligibleCandidates.size) + val candidate = getMostImpressionsTweet(eligibleCandidates) + Some( + Seq( + generateTopTweetImpressionsCandidate( + inputTarget, + candidate.tweetId, + candidate.tweetyPieResultOpt, + candidate.impressionsCountOpt.getOrElse(0L)))) + } else None + } + } + + private def getTweetImpressionsCandidates( + inputTarget: Target + ): Future[Seq[TweetImpressionsCandidate]] = { + val originalTweets = getRecentOriginalTweetsForUser(inputTarget) + originalTweets.flatMap { tweetyPieResultsMap => + val numDaysSearchForOriginalTweets = + inputTarget.params(FS.TopTweetImpressionsOriginalTweetsNumDaysSearch) + val moreRecentTweetIds = + getMoreRecentTweetIds(tweetyPieResultsMap.keySet.toSeq, numDaysSearchForOriginalTweets) + val isEligible = isEligibleUser(inputTarget, tweetyPieResultsMap, moreRecentTweetIds) + if (isEligible) filterByEligibility(inputTarget, tweetyPieResultsMap, moreRecentTweetIds) + else Future.Nil + } + } + + private def getRecentOriginalTweetsForUser( + targetUser: Target + ): Future[Map[Long, TweetyPieResult]] = { + val tweetyPieResultsMapFut = getTflockStoreResults(targetUser).flatMap { recentTweetIds => + FutureOps.mapCollect((targetUser.params(FS.EnableVFInTweetypie) match { + case true => tweetyPieStore + case false => tweetyPieStoreNoVF + }).multiGet(recentTweetIds.toSet)) + } + tweetyPieResultsMapFut.map { tweetyPieResultsMap => + if (tweetyPieResultsMap.isEmpty) { + emptyTweetypieMap.incr() + Map.empty + } else removeRetweets(tweetyPieResultsMap) + } + } + + private def getTflockStoreResults(targetUser: Target): Future[Seq[Long]] = { + val maxResults = targetUser.params(FS.TopTweetImpressionsRecentTweetsByAuthorStoreMaxResults) + val maxAge = targetUser.params(FS.TopTweetImpressionsTotalFavoritesLimitNumDaysSearch) + val recentTweetsQuery = + RecentTweetsQuery( + userIds = Seq(targetUser.targetId), + maxResults = maxResults, + maxAge = maxAge.days + ) + recentTweetsFromTflockStore + .get(recentTweetsQuery).map { + case Some(tweetIdsAll) => + val tweetIds = tweetIdsAll.headOption.getOrElse(Seq.empty) + val numTweets = tweetIds.size + if (numTweets > 0) { + tflockResultsStat.add(numTweets) + nonEmptyTflockResult.incr() + } else emptyTflockResult.incr() + tweetIds + case _ => Nil + } + } + + private def removeRetweets( + tweetyPieResultsMap: Map[Long, Option[TweetyPieResult]] + ): Map[Long, TweetyPieResult] = { + val nonEmptyTweetyPieResults: Map[Long, TweetyPieResult] = tweetyPieResultsMap.collect { + case (key, Some(value)) => (key, value) + } + emptyTweetyPieResult.add(tweetyPieResultsMap.size - nonEmptyTweetyPieResults.size) + + if (nonEmptyTweetyPieResults.nonEmpty) { + val originalTweets = nonEmptyTweetyPieResults.filter { + case (_, tweetyPieResult) => + tweetyPieResult.sourceTweet.isEmpty + } + val numOriginalTweets = originalTweets.size + val numRetweets = nonEmptyTweetyPieResults.size - originalTweets.size + originalTweetsStat.add(numOriginalTweets) + retweetsStat.add(numRetweets) + if (numRetweets == 0) allOriginalTweetsOnlyCounter.incr() + if (numOriginalTweets == 0) allRetweetsOnlyCounter.incr() + originalTweets + } else { + allEmptyTweetypieResults.incr() + Map.empty + } + } + + private def getMoreRecentTweetIds( + tweetIds: Seq[Long], + numDays: Int + ): Seq[Long] = { + tweetIds.filter { tweetId => + SnowflakeUtils.isRecent(tweetId, numDays.days) + } + } + + private def isEligibleUser( + inputTarget: Target, + tweetyPieResults: Map[Long, TweetyPieResult], + recentTweetIds: Seq[Long] + ): Boolean = { + val minNumTweets = inputTarget.params(FS.TopTweetImpressionsMinNumOriginalTweets) + lazy val totalFavoritesLimit = + inputTarget.params(FS.TopTweetImpressionsTotalInboundFavoritesLimit) + if (recentTweetIds.size >= minNumTweets) { + meetsMinTweetsRequiredCounter.incr() + val isUnderLimit = isUnderTotalInboundFavoritesLimit(tweetyPieResults, totalFavoritesLimit) + if (isUnderLimit) eligibleUsersCounter.incr() + else { + aboveMaxInboundFavoritesCounter.incr() + noneligibleUsersCounter.incr() + } + isUnderLimit + } else { + belowMinTweetsRequiredCounter.incr() + noneligibleUsersCounter.incr() + false + } + } + + private def getFavoriteCounts( + tweetyPieResult: TweetyPieResult + ): Long = tweetyPieResult.tweet.counts.flatMap(_.favoriteCount).getOrElse(0L) + + private def isUnderTotalInboundFavoritesLimit( + tweetyPieResults: Map[Long, TweetyPieResult], + totalFavoritesLimit: Long + ): Boolean = { + val favoritesIterator = tweetyPieResults.valuesIterator.map(getFavoriteCounts) + val totalInboundFavorites = favoritesIterator.sum + totalInboundFavorites <= totalFavoritesLimit + } + + def filterByEligibility( + inputTarget: Target, + tweetyPieResults: Map[Long, TweetyPieResult], + tweetIds: Seq[Long] + ): Future[Seq[TweetImpressionsCandidate]] = { + lazy val minNumImpressions: Long = inputTarget.params(FS.TopTweetImpressionsMinRequired) + lazy val maxNumLikes: Long = inputTarget.params(FS.TopTweetImpressionsMaxFavoritesPerTweet) + for { + filteredImpressionsMap <- getFilteredImpressionsMap(tweetIds, minNumImpressions) + tweetIdsFilteredByFavorites <- + getTweetIdsFilteredByFavorites(filteredImpressionsMap.keySet, tweetyPieResults, maxNumLikes) + } yield { + if (filteredImpressionsMap.nonEmpty) eligibleUsersAfterImpressionsFilter.incr() + if (tweetIdsFilteredByFavorites.nonEmpty) eligibleUsersAfterFavoritesFilter.incr() + + val candidates = tweetIdsFilteredByFavorites.map { tweetId => + TweetImpressionsCandidate( + tweetId, + tweetyPieResults.get(tweetId), + filteredImpressionsMap.get(tweetId)) + } + tweetImpressionsCandsStat.add(candidates.length) + candidates + } + } + + private def getFilteredImpressionsMap( + tweetIds: Seq[Long], + minNumImpressions: Long + ): Future[Map[Long, Long]] = { + getImpressionsCounts(tweetIds).map { impressionsMap => + if (impressionsMap.isEmpty) emptyImpressionsMapCounter.incr() + impressionsMap.filter { + case (_, numImpressions) => + val isValid = numImpressions >= minNumImpressions + if (isValid) { + meetsImpressionsRequiredCounter.incr() + } else { + belowImpressionsRequiredCounter.incr() + } + isValid + } + } + } + + private def getTweetIdsFilteredByFavorites( + filteredTweetIds: Set[Long], + tweetyPieResults: Map[Long, TweetyPieResult], + maxNumLikes: Long + ): Future[Seq[Long]] = { + val filteredByFavoritesTweetIds = filteredTweetIds.filter { tweetId => + val tweetyPieResultOpt = tweetyPieResults.get(tweetId) + val isValid = tweetyPieResultOpt.exists { tweetyPieResult => + getFavoriteCounts(tweetyPieResult) <= maxNumLikes + } + if (isValid) meetsFavoritesThresholdCounter.incr() + else aboveFavoritesThresholdCounter.incr() + isValid + } + Future(filteredByFavoritesTweetIds.toSeq) + } + + private def getMostImpressionsTweet( + filteredResults: Seq[TweetImpressionsCandidate] + ): TweetImpressionsCandidate = { + val maxImpressions: Long = filteredResults.map { + _.impressionsCountOpt.getOrElse(0L) + }.max + + val mostImpressionsCandidates: Seq[TweetImpressionsCandidate] = + filteredResults.filter(_.impressionsCountOpt.getOrElse(0L) == maxImpressions) + + mostImpressionsCandidates.maxBy(_.tweetId) + } + + private def getImpressionsCounts( + tweetIds: Seq[Long] + ): Future[Map[Long, Long]] = { + val impressionCountMap = tweetIds.map { tweetId => + tweetId -> tweetImpressionsStore + .getCounts(tweetId).map(_.getOrElse(0L)) + }.toMap + Future.collect(impressionCountMap) + } + + private def generateTopTweetImpressionsCandidate( + inputTarget: Target, + _tweetId: Long, + result: Option[TweetyPieResult], + _impressionsCount: Long + ): RawCandidate = { + new RawCandidate with TopTweetImpressionsCandidate { + override val target: Target = inputTarget + override val tweetId: Long = _tweetId + override val tweetyPieResult: Option[TweetyPieResult] = result + override val impressionsCount: Long = _impressionsCount + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + val enabledTopTweetImpressionsNotification = + target.params(FS.EnableTopTweetImpressionsNotification) + + PushDeviceUtil + .isRecommendationsEligible(target).map(_ && enabledTopTweetImpressionsNotification) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetsByGeoAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetsByGeoAdaptor.scala new file mode 100644 index 000000000..3228760fd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TopTweetsByGeoAdaptor.scala @@ -0,0 +1,413 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.pushservice.params.PopGeoTweetVersion +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.params.TopTweetsForGeoCombination +import com.twitter.frigate.pushservice.params.TopTweetsForGeoRankingFunction +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.predicate.DiscoverTwitterPredicate +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.frigate.pushservice.util.MediaCRT +import com.twitter.frigate.pushservice.util.PushAdaptorUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.geoduck.common.thriftscala.{Location => GeoLocation} +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.hermit.pop_geo.thriftscala.PopTweetsInPlace +import com.twitter.recommendation.interests.discovery.core.model.InterestDomain +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future +import com.twitter.util.Time +import scala.collection.Map + +case class PlaceTweetScore(place: String, tweetId: Long, score: Double) { + def toTweetScore: (Long, Double) = (tweetId, score) +} +case class TopTweetsByGeoAdaptor( + geoduckStoreV2: ReadableStore[Long, LocationResponse], + softUserGeoLocationStore: ReadableStore[Long, GeoLocation], + topTweetsByGeoStore: ReadableStore[InterestDomain[String], Map[String, List[(Long, Double)]]], + topTweetsByGeoStoreV2: ReadableStore[String, PopTweetsInPlace], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + globalStats: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override def name: String = this.getClass.getSimpleName + + private[this] val stats = globalStats.scope("TopTweetsByGeoAdaptor") + private[this] val noGeohashUserCounter: Counter = stats.counter("users_with_no_geohash_counter") + private[this] val incomingRequestCounter: Counter = stats.counter("incoming_request_counter") + private[this] val incomingLoggedOutRequestCounter: Counter = + stats.counter("incoming_logged_out_request_counter") + private[this] val loggedOutRawCandidatesCounter = + stats.counter("logged_out_raw_candidates_counter") + private[this] val emptyLoggedOutRawCandidatesCounter = + stats.counter("logged_out_empty_raw_candidates") + private[this] val outputTopTweetsByGeoCounter: Stat = + stats.stat("output_top_tweets_by_geo_counter") + private[this] val loggedOutPopByGeoV2CandidatesCounter: Counter = + stats.counter("logged_out_pop_by_geo_candidates") + private[this] val dormantUsersSince14DaysCounter: Counter = + stats.counter("dormant_user_since_14_days_counter") + private[this] val dormantUsersSince30DaysCounter: Counter = + stats.counter("dormant_user_since_30_days_counter") + private[this] val nonDormantUsersSince14DaysCounter: Counter = + stats.counter("non_dormant_user_since_14_days_counter") + private[this] val topTweetsByGeoTake100Counter: Counter = + stats.counter("top_tweets_by_geo_take_100_counter") + private[this] val combinationRequestsCounter = + stats.scope("combination_method_request_counter") + private[this] val popGeoTweetVersionCounter = + stats.scope("popgeo_tweet_version_counter") + private[this] val nonReplyTweetsCounter = stats.counter("non_reply_tweets") + + val MaxGeoHashSize = 4 + + private def constructKeys( + geohash: Option[String], + accountCountryCode: Option[String], + keyLengths: Seq[Int], + version: PopGeoTweetVersion.Value + ): Set[String] = { + val geohashKeys = geohash match { + case Some(hash) => keyLengths.map { version + "_geohash_" + hash.take(_) } + case _ => Seq.empty + } + + val accountCountryCodeKeys = + accountCountryCode.toSeq.map(version + "_country_" + _.toUpperCase) + (geohashKeys ++ accountCountryCodeKeys).toSet + } + + def convertToPlaceTweetScore( + popTweetsInPlace: Seq[PopTweetsInPlace] + ): Seq[PlaceTweetScore] = { + popTweetsInPlace.flatMap { + case p => + p.popTweets.map { + case popTweet => PlaceTweetScore(p.place, popTweet.tweetId, popTweet.score) + } + } + } + + def sortGeoHashTweets( + placeTweetScores: Seq[PlaceTweetScore], + rankingFunction: TopTweetsForGeoRankingFunction.Value + ): Seq[PlaceTweetScore] = { + rankingFunction match { + case TopTweetsForGeoRankingFunction.Score => + placeTweetScores.sortBy(_.score)(Ordering[Double].reverse) + case TopTweetsForGeoRankingFunction.GeohashLengthAndThenScore => + placeTweetScores + .sortBy(row => (row.place.length, row.score))(Ordering[(Int, Double)].reverse) + } + } + + def getResultsForLambdaStore( + inputTarget: Target, + geohash: Option[String], + store: ReadableStore[String, PopTweetsInPlace], + topk: Int, + version: PopGeoTweetVersion.Value + ): Future[Seq[(Long, Double)]] = { + inputTarget.accountCountryCode.flatMap { countryCode => + val keys = { + if (inputTarget.params(FS.EnableCountryCodeBackoffTopTweetsByGeo)) + constructKeys(geohash, countryCode, inputTarget.params(FS.GeoHashLengthList), version) + else + constructKeys(geohash, None, inputTarget.params(FS.GeoHashLengthList), version) + } + FutureOps + .mapCollect(store.multiGet(keys)).map { + case geohashTweetMap => + val popTweets = + geohashTweetMap.values.flatten.toSeq + val results = sortGeoHashTweets( + convertToPlaceTweetScore(popTweets), + inputTarget.params(FS.RankingFunctionForTopTweetsByGeo)) + .map(_.toTweetScore).take(topk) + results + } + } + } + + def getPopGeoTweetsForLoggedOutUsers( + inputTarget: Target, + store: ReadableStore[String, PopTweetsInPlace] + ): Future[Seq[(Long, Double)]] = { + inputTarget.countryCode.flatMap { countryCode => + val keys = constructKeys(None, countryCode, Seq(4), PopGeoTweetVersion.Prod) + FutureOps.mapCollect(store.multiGet(keys)).map { + case tweetMap => + val tweets = tweetMap.values.flatten.toSeq + loggedOutPopByGeoV2CandidatesCounter.incr(tweets.size) + val popTweets = sortGeoHashTweets( + convertToPlaceTweetScore(tweets), + TopTweetsForGeoRankingFunction.Score).map(_.toTweetScore) + popTweets + } + } + } + + def getRankedTweets( + inputTarget: Target, + geohash: Option[String] + ): Future[Seq[(Long, Double)]] = { + val MaxTopTweetsByGeoCandidatesToTake = + inputTarget.params(FS.MaxTopTweetsByGeoCandidatesToTake) + val scoringFn: String = inputTarget.params(FS.ScoringFuncForTopTweetsByGeo) + val combinationMethod = inputTarget.params(FS.TopTweetsByGeoCombinationParam) + val popGeoTweetVersion = inputTarget.params(FS.PopGeoTweetVersionParam) + + inputTarget.isHeavyUserState.map { isHeavyUser => + stats + .scope(combinationMethod.toString).scope(popGeoTweetVersion.toString).scope( + "IsHeavyUser_" + isHeavyUser.toString).counter().incr() + } + combinationRequestsCounter.scope(combinationMethod.toString).counter().incr() + popGeoTweetVersionCounter.scope(popGeoTweetVersion.toString).counter().incr() + lazy val geoStoreResults = if (geohash.isDefined) { + val hash = geohash.get.take(MaxGeoHashSize) + topTweetsByGeoStore + .get( + InterestDomain[String](hash) + ) + .map { + case Some(scoringFnToTweetsMapOpt) => + val tweetsWithScore = scoringFnToTweetsMapOpt + .getOrElse(scoringFn, List.empty) + val sortedResults = sortGeoHashTweets( + tweetsWithScore.map { + case (tweetId, score) => PlaceTweetScore(hash, tweetId, score) + }, + TopTweetsForGeoRankingFunction.Score + ).map(_.toTweetScore).take( + MaxTopTweetsByGeoCandidatesToTake + ) + sortedResults + case _ => Seq.empty + } + } else Future.value(Seq.empty) + lazy val versionPopGeoTweetResults = + getResultsForLambdaStore( + inputTarget, + geohash, + topTweetsByGeoStoreV2, + MaxTopTweetsByGeoCandidatesToTake, + popGeoTweetVersion + ) + combinationMethod match { + case TopTweetsForGeoCombination.Default => geoStoreResults + case TopTweetsForGeoCombination.AccountsTweetFavAsBackfill => + Future.join(geoStoreResults, versionPopGeoTweetResults).map { + case (geoStoreTweets, versionPopGeoTweets) => + (geoStoreTweets ++ versionPopGeoTweets).take(MaxTopTweetsByGeoCandidatesToTake) + } + case TopTweetsForGeoCombination.AccountsTweetFavIntermixed => + Future.join(geoStoreResults, versionPopGeoTweetResults).map { + case (geoStoreTweets, versionPopGeoTweets) => + CandidateSource.interleaveSeqs(Seq(geoStoreTweets, versionPopGeoTweets)) + } + } + } + + override def get(inputTarget: Target): Future[Option[Seq[RawCandidate]]] = { + if (inputTarget.isLoggedOutUser) { + incomingLoggedOutRequestCounter.incr() + val rankedTweets = getPopGeoTweetsForLoggedOutUsers(inputTarget, topTweetsByGeoStoreV2) + val rawCandidates = { + rankedTweets.map { rt => + FutureOps + .mapCollect( + tweetyPieStore + .multiGet(rt.map { case (tweetId, _) => tweetId }.toSet)) + .map { tweetyPieResultMap => + val results = buildTopTweetsByGeoRawCandidates( + inputTarget, + None, + tweetyPieResultMap + ) + if (results.isEmpty) { + emptyLoggedOutRawCandidatesCounter.incr() + } + loggedOutRawCandidatesCounter.incr(results.size) + Some(results) + } + }.flatten + } + rawCandidates + } else { + incomingRequestCounter.incr() + getGeoHashForUsers(inputTarget).flatMap { geohash => + if (geohash.isEmpty) noGeohashUserCounter.incr() + getRankedTweets(inputTarget, geohash).map { rt => + if (rt.size == 100) { + topTweetsByGeoTake100Counter.incr(1) + } + FutureOps + .mapCollect((inputTarget.params(FS.EnableVFInTweetypie) match { + case true => tweetyPieStore + case false => tweetyPieStoreNoVF + }).multiGet(rt.map { case (tweetId, _) => tweetId }.toSet)) + .map { tweetyPieResultMap => + Some( + buildTopTweetsByGeoRawCandidates( + inputTarget, + None, + filterOutReplyTweet( + tweetyPieResultMap, + nonReplyTweetsCounter + ) + ) + ) + } + }.flatten + } + } + } + + private def getGeoHashForUsers( + inputTarget: Target + ): Future[Option[String]] = { + + inputTarget.targetUser.flatMap { + case Some(user) => + user.userType match { + case UserType.Soft => + softUserGeoLocationStore + .get(inputTarget.targetId) + .map(_.flatMap(_.geohash.flatMap(_.stringGeohash))) + + case _ => + geoduckStoreV2.get(inputTarget.targetId).map(_.flatMap(_.geohash)) + } + + case None => Future.None + } + } + + private def buildTopTweetsByGeoRawCandidates( + target: PushTypes.Target, + locationName: Option[String], + topTweets: Map[Long, Option[TweetyPieResult]] + ): Seq[RawCandidate with TweetCandidate] = { + val candidates = topTweets.map { tweetIdTweetyPieResultMap => + PushAdaptorUtil.generateOutOfNetworkTweetCandidates( + inputTarget = target, + id = tweetIdTweetyPieResultMap._1, + mediaCRT = MediaCRT( + CommonRecommendationType.GeoPopTweet, + CommonRecommendationType.GeoPopTweet, + CommonRecommendationType.GeoPopTweet + ), + result = tweetIdTweetyPieResultMap._2, + localizedEntity = None + ) + }.toSeq + outputTopTweetsByGeoCounter.add(candidates.length) + candidates + } + + private val topTweetsByGeoFrequencyPredicate = { + TargetPredicates + .pushRecTypeFatiguePredicate( + CommonRecommendationType.GeoPopTweet, + FS.TopTweetsByGeoPushInterval, + FS.MaxTopTweetsByGeoPushGivenInterval, + stats + ) + } + + def getAvailabilityForDormantUser(target: Target): Future[Boolean] = { + lazy val isDormantUserNotFatigued = topTweetsByGeoFrequencyPredicate(Seq(target)).map(_.head) + lazy val enableTopTweetsByGeoForDormantUsers = + target.params(FS.EnableTopTweetsByGeoCandidatesForDormantUsers) + + target.lastHTLVisitTimestamp.flatMap { + case Some(lastHTLTimestamp) => + val minTimeSinceLastLogin = + target.params(FS.MinimumTimeSinceLastLoginForGeoPopTweetPush).ago + val timeSinceInactive = target.params(FS.TimeSinceLastLoginForGeoPopTweetPush).ago + val lastActiveTimestamp = Time.fromMilliseconds(lastHTLTimestamp) + if (lastActiveTimestamp > minTimeSinceLastLogin) { + nonDormantUsersSince14DaysCounter.incr() + Future.False + } else { + dormantUsersSince14DaysCounter.incr() + isDormantUserNotFatigued.map { isUserNotFatigued => + lastActiveTimestamp < timeSinceInactive && + enableTopTweetsByGeoForDormantUsers && + isUserNotFatigued + } + } + case _ => + dormantUsersSince30DaysCounter.incr() + isDormantUserNotFatigued.map { isUserNotFatigued => + enableTopTweetsByGeoForDormantUsers && isUserNotFatigued + } + } + } + + def getAvailabilityForPlaybookSetUp(target: Target): Future[Boolean] = { + lazy val enableTopTweetsByGeoForNewUsers = target.params(FS.EnableTopTweetsByGeoCandidates) + val isTargetEligibleForMrFatigueCheck = target.isAccountAtleastNDaysOld( + target.params(FS.MrMinDurationSincePushForTopTweetsByGeoPushes)) + val isMrFatigueCheckEnabled = + target.params(FS.EnableMrMinDurationSinceMrPushFatigue) + val applyPredicateForTopTweetsByGeo = + if (isMrFatigueCheckEnabled) { + if (isTargetEligibleForMrFatigueCheck) { + DiscoverTwitterPredicate + .minDurationElapsedSinceLastMrPushPredicate( + name, + FS.MrMinDurationSincePushForTopTweetsByGeoPushes, + stats + ).andThen( + topTweetsByGeoFrequencyPredicate + )(Seq(target)).map(_.head) + } else { + Future.False + } + } else { + topTweetsByGeoFrequencyPredicate(Seq(target)).map(_.head) + } + applyPredicateForTopTweetsByGeo.map { predicateResult => + predicateResult && enableTopTweetsByGeoForNewUsers + } + } + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + if (target.isLoggedOutUser) { + Future.True + } else { + PushDeviceUtil + .isRecommendationsEligible(target).map( + _ && target.params(PushParams.PopGeoCandidatesDecider)).flatMap { isAvailable => + if (isAvailable) { + Future + .join(getAvailabilityForDormantUser(target), getAvailabilityForPlaybookSetUp(target)) + .map { + case (isAvailableForDormantUser, isAvailableForPlaybook) => + isAvailableForDormantUser || isAvailableForPlaybook + case _ => false + } + } else Future.False + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TrendsCandidatesAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TrendsCandidatesAdaptor.scala new file mode 100644 index 000000000..4e7ec3314 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TrendsCandidatesAdaptor.scala @@ -0,0 +1,215 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.events.recos.thriftscala.DisplayLocation +import com.twitter.events.recos.thriftscala.TrendsContext +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.base.TrendTweetCandidate +import com.twitter.frigate.common.base.TrendsCandidate +import com.twitter.frigate.common.candidate.RecommendedTrendsCandidateSource +import com.twitter.frigate.common.candidate.RecommendedTrendsCandidateSource.Query +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.adaptor.TrendsCandidatesAdaptor._ +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.geoduck.common.thriftscala.Location +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.hermit.store.tweetypie.UserTweet +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future +import scala.collection.Map + +object TrendsCandidatesAdaptor { + type TweetId = Long + type EventId = Long +} + +case class TrendsCandidatesAdaptor( + softUserGeoLocationStore: ReadableStore[Long, Location], + recommendedTrendsCandidateSource: RecommendedTrendsCandidateSource, + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + safeUserTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult], + statsReceiver: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + override val name = this.getClass.getSimpleName + + private val trendAdaptorStats = statsReceiver.scope("TrendsCandidatesAdaptor") + private val trendTweetCandidateNumber = trendAdaptorStats.counter("trend_tweet_candidate") + private val nonReplyTweetsCounter = trendAdaptorStats.counter("non_reply_tweets") + + private def getQuery(target: Target): Future[Query] = { + def getUserCountryCode(target: Target): Future[Option[String]] = { + target.targetUser.flatMap { + case Some(user) if user.userType == UserType.Soft => + softUserGeoLocationStore + .get(user.id) + .map(_.flatMap(_.simpleRgcResult.flatMap(_.countryCodeAlpha2))) + + case _ => target.accountCountryCode + } + } + + for { + countryCode <- getUserCountryCode(target) + inferredLanguage <- target.inferredUserDeviceLanguage + } yield { + Query( + userId = target.targetId, + displayLocation = DisplayLocation.MagicRecs, + languageCode = inferredLanguage, + countryCode = countryCode, + maxResults = target.params(PushFeatureSwitchParams.MaxRecommendedTrendsToQuery) + ) + } + } + + /** + * Query candidates only if sent at most [[PushFeatureSwitchParams.MaxTrendTweetNotificationsInDuration]] + * trend tweet notifications in [[PushFeatureSwitchParams.TrendTweetNotificationsFatigueDuration]] + */ + val trendTweetFatiguePredicate = TargetPredicates.pushRecTypeFatiguePredicate( + CommonRecommendationType.TrendTweet, + PushFeatureSwitchParams.TrendTweetNotificationsFatigueDuration, + PushFeatureSwitchParams.MaxTrendTweetNotificationsInDuration, + trendAdaptorStats + ) + + private val recommendedTrendsWithTweetsCandidateSource: CandidateSource[ + Target, + RawCandidate with TrendsCandidate + ] = recommendedTrendsCandidateSource + .convert[Target, TrendsCandidate]( + getQuery, + recommendedTrendsCandidateSource.identityCandidateMapper + ) + .batchMapValues[Target, RawCandidate with TrendsCandidate]( + trendsCandidatesToTweetCandidates(_, _, getTweetyPieResults)) + + private def getTweetyPieResults( + tweetIds: Seq[TweetId], + target: Target + ): Future[Map[TweetId, TweetyPieResult]] = { + if (target.params(PushFeatureSwitchParams.EnableSafeUserTweetTweetypieStore)) { + Future + .collect( + safeUserTweetTweetyPieStore.multiGet( + tweetIds.toSet.map(UserTweet(_, Some(target.targetId))))).map { + _.collect { + case (userTweet, Some(tweetyPieResult)) => userTweet.tweetId -> tweetyPieResult + } + } + } else { + Future + .collect((target.params(PushFeatureSwitchParams.EnableVFInTweetypie) match { + case true => tweetyPieStore + case false => tweetyPieStoreNoVF + }).multiGet(tweetIds.toSet)).map { tweetyPieResultMap => + filterOutReplyTweet(tweetyPieResultMap, nonReplyTweetsCounter).collect { + case (tweetId, Some(tweetyPieResult)) => tweetId -> tweetyPieResult + } + } + } + } + + /** + * + * @param _target: [[Target]] object representing notificaion recipient user + * @param trendsCandidates: Sequence of [[TrendsCandidate]] returned from ERS + * @return: Seq of trends candidates expanded to associated tweets. + */ + private def trendsCandidatesToTweetCandidates( + _target: Target, + trendsCandidates: Seq[TrendsCandidate], + getTweetyPieResults: (Seq[TweetId], Target) => Future[Map[TweetId, TweetyPieResult]] + ): Future[Seq[RawCandidate with TrendsCandidate]] = { + + def generateTrendTweetCandidates( + trendCandidate: TrendsCandidate, + tweetyPieResults: Map[TweetId, TweetyPieResult] + ) = { + val tweetIds = trendCandidate.context.curatedRepresentativeTweets.getOrElse(Seq.empty) ++ + trendCandidate.context.algoRepresentativeTweets.getOrElse(Seq.empty) + + tweetIds.flatMap { tweetId => + tweetyPieResults.get(tweetId).map { _tweetyPieResult => + new RawCandidate with TrendTweetCandidate { + override val trendId: String = trendCandidate.trendId + override val trendName: String = trendCandidate.trendName + override val landingUrl: String = trendCandidate.landingUrl + override val timeBoundedLandingUrl: Option[String] = + trendCandidate.timeBoundedLandingUrl + override val context: TrendsContext = trendCandidate.context + override val tweetyPieResult: Option[TweetyPieResult] = Some(_tweetyPieResult) + override val tweetId: TweetId = _tweetyPieResult.tweet.id + override val target: Target = _target + } + } + } + } + + // collect all tweet ids associated with all trends + val allTweetIds = trendsCandidates.flatMap { trendsCandidate => + val context = trendsCandidate.context + context.curatedRepresentativeTweets.getOrElse(Seq.empty) ++ + context.algoRepresentativeTweets.getOrElse(Seq.empty) + } + + getTweetyPieResults(allTweetIds, _target) + .map { tweetIdToTweetyPieResult => + val trendTweetCandidates = trendsCandidates.flatMap { trendCandidate => + val allTrendTweetCandidates = generateTrendTweetCandidates( + trendCandidate, + tweetIdToTweetyPieResult + ) + + val (tweetCandidatesFromCuratedTrends, tweetCandidatesFromNonCuratedTrends) = + allTrendTweetCandidates.partition(_.isCuratedTrend) + + tweetCandidatesFromCuratedTrends.filter( + _.target.params(PushFeatureSwitchParams.EnableCuratedTrendTweets)) ++ + tweetCandidatesFromNonCuratedTrends.filter( + _.target.params(PushFeatureSwitchParams.EnableNonCuratedTrendTweets)) + } + + trendTweetCandidateNumber.incr(trendTweetCandidates.size) + trendTweetCandidates + } + } + + /** + * + * @param target: [[Target]] user + * @return: true if customer is eligible to receive trend tweet notifications + * + */ + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + PushDeviceUtil + .isRecommendationsEligible(target) + .map(target.params(PushParams.TrendsCandidateDecider) && _) + } + + override def get(target: Target): Future[Option[Seq[RawCandidate with TrendsCandidate]]] = { + recommendedTrendsWithTweetsCandidateSource + .get(target) + .flatMap { + case Some(candidates) if candidates.nonEmpty => + trendTweetFatiguePredicate(Seq(target)) + .map(_.head) + .map { isTargetFatigueEligible => + if (isTargetFatigueEligible) Some(candidates) + else None + } + + case _ => Future.None + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TripGeoCandidatesAdaptor.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TripGeoCandidatesAdaptor.scala new file mode 100644 index 000000000..2bdef162c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/adaptor/TripGeoCandidatesAdaptor.scala @@ -0,0 +1,188 @@ +package com.twitter.frigate.pushservice.adaptor + +import com.twitter.content_mixer.thriftscala.ContentMixerProductResponse +import com.twitter.content_mixer.thriftscala.ContentMixerRequest +import com.twitter.content_mixer.thriftscala.ContentMixerResponse +import com.twitter.content_mixer.thriftscala.NotificationsTripTweetsProductContext +import com.twitter.content_mixer.thriftscala.Product +import com.twitter.content_mixer.thriftscala.ProductContext +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.CandidateSourceEligible +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.filterOutReplyTweet +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.MediaCRT +import com.twitter.frigate.pushservice.util.PushAdaptorUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.geoduck.util.country.CountryInfo +import com.twitter.product_mixer.core.thriftscala.ClientContext +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.util.Future + +case class TripGeoCandidatesAdaptor( + tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets], + contentMixerStore: ReadableStore[ContentMixerRequest, ContentMixerResponse], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + tweetyPieStoreNoVF: ReadableStore[Long, TweetyPieResult], + statsReceiver: StatsReceiver) + extends CandidateSource[Target, RawCandidate] + with CandidateSourceEligible[Target, RawCandidate] { + + override def name: String = this.getClass.getSimpleName + + private val stats = statsReceiver.scope(name.stripSuffix("$")) + + private val contentMixerRequests = stats.counter("getTripCandidatesContentMixerRequests") + private val loggedOutTripTweetIds = stats.counter("logged_out_trip_tweet_ids_count") + private val loggedOutRawCandidates = stats.counter("logged_out_raw_candidates_count") + private val rawCandidates = stats.counter("raw_candidates_count") + private val loggedOutEmptyplaceId = stats.counter("logged_out_empty_place_id_count") + private val loggedOutPlaceId = stats.counter("logged_out_place_id_count") + private val nonReplyTweetsCounter = stats.counter("non_reply_tweets") + + override def isCandidateSourceAvailable(target: Target): Future[Boolean] = { + if (target.isLoggedOutUser) { + Future.True + } else { + for { + isRecommendationsSettingEnabled <- PushDeviceUtil.isRecommendationsEligible(target) + inferredLanguage <- target.inferredUserDeviceLanguage + } yield { + isRecommendationsSettingEnabled && + inferredLanguage.nonEmpty && + target.params(PushParams.TripGeoTweetCandidatesDecider) + } + } + + } + + private def buildRawCandidate(target: Target, tweetyPieResult: TweetyPieResult): RawCandidate = { + PushAdaptorUtil.generateOutOfNetworkTweetCandidates( + inputTarget = target, + id = tweetyPieResult.tweet.id, + mediaCRT = MediaCRT( + CommonRecommendationType.TripGeoTweet, + CommonRecommendationType.TripGeoTweet, + CommonRecommendationType.TripGeoTweet + ), + result = Some(tweetyPieResult), + localizedEntity = None + ) + } + + override def get(target: Target): Future[Option[Seq[RawCandidate]]] = { + if (target.isLoggedOutUser) { + for { + tripTweetIds <- getTripCandidatesForLoggedOutTarget(target) + tweetyPieResults <- Future.collect(tweetyPieStoreNoVF.multiGet(tripTweetIds)) + } yield { + val candidates = tweetyPieResults.values.flatten.map(buildRawCandidate(target, _)) + if (candidates.nonEmpty) { + loggedOutRawCandidates.incr(candidates.size) + Some(candidates.toSeq) + } else None + } + } else { + for { + tripTweetIds <- getTripCandidatesContentMixer(target) + tweetyPieResults <- + Future.collect((target.params(PushFeatureSwitchParams.EnableVFInTweetypie) match { + case true => tweetyPieStore + case false => tweetyPieStoreNoVF + }).multiGet(tripTweetIds)) + } yield { + val nonReplyTweets = filterOutReplyTweet(tweetyPieResults, nonReplyTweetsCounter) + val candidates = nonReplyTweets.values.flatten.map(buildRawCandidate(target, _)) + if (candidates.nonEmpty && target.params( + PushFeatureSwitchParams.TripTweetCandidateReturnEnable)) { + rawCandidates.incr(candidates.size) + Some(candidates.toSeq) + } else None + } + } + } + + private def getTripCandidatesContentMixer( + target: Target + ): Future[Set[Long]] = { + contentMixerRequests.incr() + Future + .join( + target.inferredUserDeviceLanguage, + target.deviceInfo + ) + .flatMap { + case (languageOpt, deviceInfoOpt) => + contentMixerStore + .get( + ContentMixerRequest( + clientContext = ClientContext( + userId = Some(target.targetId), + languageCode = languageOpt, + userAgent = deviceInfoOpt.flatMap(_.guessedPrimaryDeviceUserAgent.map(_.toString)) + ), + product = Product.NotificationsTripTweets, + productContext = Some( + ProductContext.NotificationsTripTweetsProductContext( + NotificationsTripTweetsProductContext() + )), + cursor = None, + maxResults = + Some(target.params(PushFeatureSwitchParams.TripTweetMaxTotalCandidates)) + ) + ).map { + _.map { rawResponse => + val tripResponse = + rawResponse.contentMixerProductResponse + .asInstanceOf[ + ContentMixerProductResponse.NotificationsTripTweetsProductResponse] + .notificationsTripTweetsProductResponse + + tripResponse.results.map(_.tweetResult.tweetId).toSet + }.getOrElse(Set.empty) + } + } + } + + private def getTripCandidatesForLoggedOutTarget( + target: Target + ): Future[Set[Long]] = { + Future.join(target.targetLanguage, target.countryCode).flatMap { + case (Some(lang), Some(country)) => + val placeId = CountryInfo.lookupByCode(country).map(_.placeIdLong) + if (placeId.nonEmpty) { + loggedOutPlaceId.incr() + } else { + loggedOutEmptyplaceId.incr() + } + val tripSource = "TOP_GEO_V3_LR" + val tripQuery = TripDomain( + sourceId = tripSource, + language = Some(lang), + placeId = placeId, + topicId = None + ) + val response = tripTweetCandidateStore.get(tripQuery) + val tripTweetIds = + response.map { res => + if (res.isDefined) { + res.get.tweets + .sortBy(_.score)(Ordering[Double].reverse).map(_.tweetId).toSet + } else { + Set.empty[Long] + } + } + tripTweetIds.map { ids => loggedOutTripTweetIds.incr(ids.size) } + tripTweetIds + + case (_, _) => Future.value(Set.empty) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/Config.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/Config.scala new file mode 100644 index 000000000..3a0e1dc70 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/Config.scala @@ -0,0 +1,461 @@ +package com.twitter.frigate.pushservice.config + +import com.twitter.abdecider.LoggingABDecider +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.audience_rewards.thriftscala.HasSuperFollowingRelationshipRequest +import com.twitter.channels.common.thriftscala.ApiList +import com.twitter.datatools.entityservice.entities.sports.thriftscala._ +import com.twitter.decider.Decider +import com.twitter.discovery.common.configapi.ConfigParamsBuilder +import com.twitter.escherbird.common.thriftscala.QualifiedId +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.common.history._ +import com.twitter.frigate.common.ml.base._ +import com.twitter.frigate.common.ml.feature._ +import com.twitter.frigate.common.store._ +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.common.store.interests.UserId +import com.twitter.frigate.common.util._ +import com.twitter.frigate.data_pipeline.features_common._ +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryKey +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.frigate.dau_model.thriftscala.DauProbability +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.pushcap.thriftscala.PushcapUserHistory +import com.twitter.frigate.pushservice.ml._ +import com.twitter.frigate.pushservice.params.DeciderKey +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushFeatureSwitches +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.send_handler.SendHandlerPushCandidateHydrator +import com.twitter.frigate.pushservice.refresh_handler.PushCandidateHydrator +import com.twitter.frigate.pushservice.store._ +import com.twitter.frigate.pushservice.store.{Ibis2Store => PushIbis2Store} +import com.twitter.frigate.pushservice.take.NotificationServiceRequest +import com.twitter.frigate.pushservice.thriftscala.PushRequestScribe +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.frigate.thriftscala._ +import com.twitter.frigate.user_states.thriftscala.MRUserHmmState +import com.twitter.geoduck.common.thriftscala.{Location => GeoLocation} +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.pop_geo.thriftscala.PopTweetsInPlace +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.predicate.tweetypie.Perspective +import com.twitter.hermit.predicate.tweetypie.UserTweet +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.hermit.store.tweetypie.{UserTweet => TweetyPieUserTweet} +import com.twitter.hermit.stp.thriftscala.STPResult +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse +import com.twitter.interests.thriftscala.InterestId +import com.twitter.interests.thriftscala.{UserInterests => Interests} +import com.twitter.interests_discovery.thriftscala.NonPersonalizedRecommendedLists +import com.twitter.interests_discovery.thriftscala.RecommendedListsRequest +import com.twitter.interests_discovery.thriftscala.RecommendedListsResponse +import com.twitter.livevideo.timeline.domain.v2.{Event => LiveEvent} +import com.twitter.ml.api.thriftscala.{DataRecord => ThriftDataRecord} +import com.twitter.ml.featurestore.lib.dynamic.DynamicFeatureStoreClient +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.notificationservice.genericfeedbackstore.GenericFeedbackStore +import com.twitter.notificationservice.scribe.manhattan.GenericNotificationsFeedbackRequest +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.nrel.heavyranker.CandidateFeatureHydrator +import com.twitter.nrel.heavyranker.{FeatureHydrator => MRFeatureHydrator} +import com.twitter.nrel.heavyranker.{TargetFeatureHydrator => RelevanceTargetFeatureHydrator} +import com.twitter.onboarding.task.service.thriftscala.FatigueFlowEnrollment +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.recommendation.interests.discovery.core.model.InterestDomain +import com.twitter.recos.user_tweet_entity_graph.thriftscala.RecommendTweetEntityRequest +import com.twitter.recos.user_tweet_entity_graph.thriftscala.RecommendTweetEntityResponse +import com.twitter.recos.user_user_graph.thriftscala.RecommendUserRequest +import com.twitter.recos.user_user_graph.thriftscala.RecommendUserResponse +import com.twitter.rux.common.strato.thriftscala.UserTargetingProperty +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWProducer +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWUserSegmentation +import com.twitter.search.common.features.thriftscala.ThriftSearchResultFeatures +import com.twitter.search.earlybird.thriftscala.EarlybirdRequest +import com.twitter.search.earlybird.thriftscala.ThriftSearchResult +import com.twitter.service.gen.scarecrow.thriftscala.Event +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.service.metastore.gen.thriftscala.UserLanguages +import com.twitter.servo.decider.DeciderGateBuilder +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.columns.frigate.logged_out_web_notifications.thriftscala.LOWebNotificationMetadata +import com.twitter.strato.columns.notifications.thriftscala.SourceDestUserRequest +import com.twitter.strato.client.{UserId => StratoUserId} +import com.twitter.timelines.configapi +import com.twitter.timelines.configapi.CompositeConfig +import com.twitter.timelinescorer.thriftscala.v1.ScoredTweet +import com.twitter.topiclisting.TopicListing +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse +import com.twitter.ubs.thriftscala.SellerTrack +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.ubs.thriftscala.Participants +import com.twitter.ubs.thriftscala.SellerApplicationState +import com.twitter.user_session_store.thriftscala.UserSession +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.wtf.scalding.common.thriftscala.UserFeatures + +trait Config { + self => + + def isServiceLocal: Boolean + + def localConfigRepoPath: String + + def inMemCacheOff: Boolean + + def historyStore: PushServiceHistoryStore + + def emailHistoryStore: PushServiceHistoryStore + + def strongTiesStore: ReadableStore[Long, STPResult] + + def safeUserStore: ReadableStore[Long, User] + + def deviceInfoStore: ReadableStore[Long, DeviceInfo] + + def edgeStore: ReadableStore[RelationEdge, Boolean] + + def socialGraphServiceProcessStore: ReadableStore[RelationEdge, Boolean] + + def userUtcOffsetStore: ReadableStore[Long, Duration] + + def cachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult] + + def safeCachedTweetyPieStoreV2: ReadableStore[Long, TweetyPieResult] + + def userTweetTweetyPieStore: ReadableStore[TweetyPieUserTweet, TweetyPieResult] + + def safeUserTweetTweetyPieStore: ReadableStore[TweetyPieUserTweet, TweetyPieResult] + + def cachedTweetyPieStoreV2NoVF: ReadableStore[Long, TweetyPieResult] + + def tweetContentFeatureCacheStore: ReadableStore[Long, ThriftDataRecord] + + def scarecrowCheckEventStore: ReadableStore[Event, TieredActionResult] + + def userTweetPerspectiveStore: ReadableStore[UserTweet, Perspective] + + def userCountryStore: ReadableStore[Long, Location] + + def pushInfoStore: ReadableStore[Long, UserForPushTargeting] + + def loggedOutPushInfoStore: ReadableStore[Long, LOWebNotificationMetadata] + + def tweetImpressionStore: ReadableStore[Long, Seq[Long]] + + def audioSpaceStore: ReadableStore[String, AudioSpace] + + def basketballGameScoreStore: ReadableStore[QualifiedId, BasketballGameLiveUpdate] + + def baseballGameScoreStore: ReadableStore[QualifiedId, BaseballGameLiveUpdate] + + def cricketMatchScoreStore: ReadableStore[QualifiedId, CricketMatchLiveUpdate] + + def soccerMatchScoreStore: ReadableStore[QualifiedId, SoccerMatchLiveUpdate] + + def nflGameScoreStore: ReadableStore[QualifiedId, NflFootballGameLiveUpdate] + + def topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse] + + def spaceDeviceFollowStore: ReadableStore[SourceDestUserRequest, Boolean] + + def audioSpaceParticipantsStore: ReadableStore[String, Participants] + + def notificationServiceSender: ReadableStore[ + NotificationServiceRequest, + CreateGenericNotificationResponse + ] + + def ocfFatigueStore: ReadableStore[OCFHistoryStoreKey, FatigueFlowEnrollment] + + def dauProbabilityStore: ReadableStore[Long, DauProbability] + + def hydratedLabeledPushRecsStore: ReadableStore[UserHistoryKey, UserHistoryValue] + + def userHTLLastVisitStore: ReadableStore[Long, Seq[Long]] + + def userLanguagesStore: ReadableStore[Long, UserLanguages] + + def topTweetsByGeoStore: ReadableStore[InterestDomain[String], Map[String, List[ + (Long, Double) + ]]] + + def topTweetsByGeoV2VersionedStore: ReadableStore[String, PopTweetsInPlace] + + lazy val pushRecItemStore: ReadableStore[PushRecItemsKey, RecItems] = PushRecItemStore( + hydratedLabeledPushRecsStore + ) + + lazy val labeledPushRecsVerifyingStore: ReadableStore[ + LabeledPushRecsVerifyingStoreKey, + LabeledPushRecsVerifyingStoreResponse + ] = + LabeledPushRecsVerifyingStore( + hydratedLabeledPushRecsStore, + historyStore + ) + + lazy val labeledPushRecsDecideredStore: ReadableStore[LabeledPushRecsStoreKey, UserHistoryValue] = + LabeledPushRecsDecideredStore( + labeledPushRecsVerifyingStore, + useHydratedLabeledSendsForFeaturesDeciderKey, + verifyHydratedLabeledSendsForFeaturesDeciderKey + ) + + def onlineUserHistoryStore: ReadableStore[OnlineUserHistoryKey, UserHistoryValue] + + def nsfwConsumerStore: ReadableStore[Long, NSFWUserSegmentation] + + def nsfwProducerStore: ReadableStore[Long, NSFWProducer] + + def popGeoLists: ReadableStore[String, NonPersonalizedRecommendedLists] + + def listAPIStore: ReadableStore[Long, ApiList] + + def openedPushByHourAggregatedStore: ReadableStore[Long, Map[Int, Int]] + + def userHealthSignalStore: ReadableStore[Long, UserHealthSignalResponse] + + def reactivatedUserInfoStore: ReadableStore[Long, String] + + def weightedOpenOrNtabClickModelScorer: PushMLModelScorer + + def optoutModelScorer: PushMLModelScorer + + def filteringModelScorer: PushMLModelScorer + + def recentFollowsStore: ReadableStore[Long, Seq[Long]] + + def geoDuckV2Store: ReadableStore[UserId, LocationResponse] + + def realGraphScoresTop500InStore: ReadableStore[Long, Map[Long, Double]] + + def tweetEntityGraphStore: ReadableStore[ + RecommendTweetEntityRequest, + RecommendTweetEntityResponse + ] + + def userUserGraphStore: ReadableStore[RecommendUserRequest, RecommendUserResponse] + + def userFeaturesStore: ReadableStore[Long, UserFeatures] + + def userTargetingPropertyStore: ReadableStore[Long, UserTargetingProperty] + + def timelinesUserSessionStore: ReadableStore[Long, UserSession] + + def optOutUserInterestsStore: ReadableStore[UserId, Seq[InterestId]] + + def ntabCaretFeedbackStore: ReadableStore[GenericNotificationsFeedbackRequest, Seq[ + CaretFeedbackDetails + ]] + + def genericFeedbackStore: ReadableStore[FeedbackRequest, Seq[ + FeedbackPromptValue + ]] + + def genericNotificationFeedbackStore: GenericFeedbackStore + + def semanticCoreMegadataStore: ReadableStore[ + SemanticEntityForQuery, + EntityMegadata + ] + + def tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + + def earlybirdFeatureStore: ReadableStore[Long, ThriftSearchResultFeatures] + + def earlybirdFeatureBuilder: FeatureBuilder[Long] + + // Feature builders + + def tweetAuthorLocationFeatureBuilder: FeatureBuilder[Location] + + def tweetAuthorLocationFeatureBuilderById: FeatureBuilder[Long] + + def socialContextActionsFeatureBuilder: FeatureBuilder[SocialContextActions] + + def tweetContentFeatureBuilder: FeatureBuilder[Long] + + def tweetAuthorRecentRealGraphFeatureBuilder: FeatureBuilder[RealGraphEdge] + + def socialContextRecentRealGraphFeatureBuilder: FeatureBuilder[Set[RealGraphEdge]] + + def tweetSocialProofFeatureBuilder: FeatureBuilder[TweetSocialProofKey] + + def targetUserFullRealGraphFeatureBuilder: FeatureBuilder[TargetFullRealGraphFeatureKey] + + def postProcessingFeatureBuilder: PostProcessingFeatureBuilder + + def mrOfflineUserCandidateSparseAggregatesFeatureBuilder: FeatureBuilder[ + OfflineSparseAggregateKey + ] + + def mrOfflineUserAggregatesFeatureBuilder: FeatureBuilder[Long] + + def mrOfflineUserCandidateAggregatesFeatureBuilder: FeatureBuilder[OfflineAggregateKey] + + def tweetAnnotationsFeatureBuilder: FeatureBuilder[Long] + + def targetUserMediaRepresentationFeatureBuilder: FeatureBuilder[Long] + + def targetLevelFeatureBuilder: FeatureBuilder[MrRequestContextForFeatureStore] + + def candidateLevelFeatureBuilder: FeatureBuilder[EntityRequestContextForFeatureStore] + + def targetFeatureHydrator: RelevanceTargetFeatureHydrator + + def useHydratedLabeledSendsForFeaturesDeciderKey: String = + DeciderKey.useHydratedLabeledSendsForFeaturesDeciderKey.toString + + def verifyHydratedLabeledSendsForFeaturesDeciderKey: String = + DeciderKey.verifyHydratedLabeledSendsForFeaturesDeciderKey.toString + + def lexServiceStore: ReadableStore[EventRequest, LiveEvent] + + def userMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation] + + def producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation] + + def mrUserStatePredictionStore: ReadableStore[Long, MRUserHmmState] + + def pushcapDynamicPredictionStore: ReadableStore[Long, PushcapUserHistory] + + def earlybirdCandidateSource: EarlybirdCandidateSource + + def earlybirdSearchStore: ReadableStore[EarlybirdRequest, Seq[ThriftSearchResult]] + + def earlybirdSearchDest: String + + def pushserviceThriftClientId: ClientId + + def simClusterToEntityStore: ReadableStore[Int, SimClustersInferredEntities] + + def fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent] + + /** + * PostRanking Feature Store Client + */ + def postRankingFeatureStoreClient: DynamicFeatureStoreClient[MrRequestContextForFeatureStore] + + /** + * ReadableStore to fetch [[UserInterests]] from INTS service + */ + def interestsWithLookupContextStore: ReadableStore[InterestsLookupRequestWithContext, Interests] + + /** + * + * @return: [[TopicListing]] object to fetch paused topics and scope from productId + */ + def topicListing: TopicListing + + /** + * + * @return: [[UttEntityHydrationStore]] object + */ + def uttEntityHydrationStore: UttEntityHydrationStore + + def appPermissionStore: ReadableStore[(Long, (String, String)), AppPermission] + + lazy val userTweetEntityGraphCandidates: UserTweetEntityGraphCandidates = + UserTweetEntityGraphCandidates( + cachedTweetyPieStoreV2, + tweetEntityGraphStore, + PushParams.UTEGTweetCandidateSourceParam, + PushFeatureSwitchParams.NumberOfMaxUTEGCandidatesQueriedParam, + PushParams.AllowOneSocialProofForTweetInUTEGParam, + PushParams.OutNetworkTweetsOnlyForUTEGParam, + PushFeatureSwitchParams.MaxTweetAgeParam + )(statsReceiver) + + def pushSendEventBusPublisher: EventBusPublisher[NotificationScribe] + + // miscs. + + def isProd: Boolean + + implicit def statsReceiver: StatsReceiver + + def decider: Decider + + def abDecider: LoggingABDecider + + def casLock: CasLock + + def pushIbisV2Store: PushIbis2Store + + // scribe + def notificationScribe(data: NotificationScribe): Unit + + def requestScribe(data: PushRequestScribe): Unit + + def init(): Future[Unit] = Future.Done + + def configParamsBuilder: ConfigParamsBuilder + + def candidateFeatureHydrator: CandidateFeatureHydrator + + def featureHydrator: MRFeatureHydrator + + def candidateHydrator: PushCandidateHydrator + + def sendHandlerCandidateHydrator: SendHandlerPushCandidateHydrator + + lazy val overridesConfig: configapi.Config = { + val pushFeatureSwitchConfigs: configapi.Config = PushFeatureSwitches( + deciderGateBuilder = new DeciderGateBuilder(decider), + statsReceiver = statsReceiver + ).config + + new CompositeConfig(Seq(pushFeatureSwitchConfigs)) + } + + def realTimeClientEventStore: RealTimeClientEventStore + + def inlineActionHistoryStore: ReadableStore[Long, Seq[(Long, String)]] + + def softUserGeoLocationStore: ReadableStore[Long, GeoLocation] + + def tweetTranslationStore: ReadableStore[TweetTranslationStore.Key, TweetTranslationStore.Value] + + def tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets] + + def softUserFollowingStore: ReadableStore[User, Seq[Long]] + + def superFollowEligibilityUserStore: ReadableStore[Long, Boolean] + + def superFollowCreatorTweetCountStore: ReadableStore[StratoUserId, Int] + + def hasSuperFollowingRelationshipStore: ReadableStore[ + HasSuperFollowingRelationshipRequest, + Boolean + ] + + def superFollowApplicationStatusStore: ReadableStore[(Long, SellerTrack), SellerApplicationState] + + def recentHistoryCacheClient: RecentHistoryCacheClient + + def openAppUserStore: ReadableStore[Long, Boolean] + + def loggedOutHistoryStore: PushServiceHistoryStore + + def idsStore: ReadableStore[RecommendedListsRequest, RecommendedListsResponse] + + def htlScoreStore(userId: Long): ReadableStore[Long, ScoredTweet] +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/DeployConfig.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/DeployConfig.scala new file mode 100644 index 000000000..8d6e95a67 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/DeployConfig.scala @@ -0,0 +1,2150 @@ +package com.twitter.frigate.pushservice.config + +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.audience_rewards.thriftscala.HasSuperFollowingRelationshipRequest +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.bijection.scrooge.CompactScalaCodec +import com.twitter.channels.common.thriftscala.ApiList +import com.twitter.channels.common.thriftscala.ApiListDisplayLocation +import com.twitter.channels.common.thriftscala.ApiListView +import com.twitter.content_mixer.thriftscala.ContentMixer +import com.twitter.conversions.DurationOps._ +import com.twitter.cortex.deepbird.thriftjava.DeepbirdPredictionService +import com.twitter.cr_mixer.thriftscala.CrMixer +import com.twitter.datatools.entityservice.entities.sports.thriftscala.BaseballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.BasketballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.CricketMatchLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.NflFootballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SoccerMatchLiveUpdate +import com.twitter.discovery.common.configapi.ConfigParamsBuilder +import com.twitter.discovery.common.configapi.FeatureContextBuilder +import com.twitter.discovery.common.environment.{Environment => NotifEnvironment} +import com.twitter.escherbird.common.thriftscala.Domains +import com.twitter.escherbird.common.thriftscala.QualifiedId +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.escherbird.metadata.thriftscala.MetadataService +import com.twitter.escherbird.util.metadatastitch.MetadataStitchClient +import com.twitter.escherbird.util.uttclient +import com.twitter.escherbird.util.uttclient.CacheConfigV2 +import com.twitter.escherbird.util.uttclient.CachedUttClientV2 +import com.twitter.escherbird.utt.strato.thriftscala.Environment +import com.twitter.eventbus.client.EventBusPublisherBuilder +import com.twitter.events.recos.thriftscala.EventsRecosService +import com.twitter.explore_ranker.thriftscala.ExploreRanker +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.Memcached +import com.twitter.finagle.ThriftMux +import com.twitter.finagle.client.BackupRequestFilter +import com.twitter.finagle.client.ClientRegistry +import com.twitter.finagle.loadbalancer.Balancers +import com.twitter.finagle.memcached.Client +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.mtls.client.MtlsStackClient._ +import com.twitter.finagle.mux.transport.OpportunisticTls +import com.twitter.finagle.service.Retries +import com.twitter.finagle.service.RetryPolicy +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.finagle.thrift.RichClientParam +import com.twitter.finagle.util.DefaultTimer +import com.twitter.flockdb.client._ +import com.twitter.flockdb.client.thriftscala.FlockDB +import com.twitter.frigate.common.base.RandomRanker +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.common.config.RateLimiterGenerator +import com.twitter.frigate.common.entity_graph_client.RecommendedTweetEntitiesStore +import com.twitter.frigate.common.filter.DynamicRequestMeterFilter +import com.twitter.frigate.common.history._ +import com.twitter.frigate.common.ml.feature._ +import com.twitter.frigate.common.store._ +import com.twitter.frigate.common.store.deviceinfo.DeviceInfoStore +import com.twitter.frigate.common.store.deviceinfo.MobileSdkStore +import com.twitter.frigate.common.store.interests._ +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.frigate.common.store.strato.StratoScannableStore +import com.twitter.frigate.common.util.Finagle.readOnlyThriftService +import com.twitter.frigate.common.util._ +import com.twitter.frigate.data_pipeline.features_common.FeatureStoreUtil +import com.twitter.frigate.data_pipeline.features_common._ +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryKey +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.frigate.dau_model.thriftscala.DauProbability +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.pushcap.thriftscala.PushcapUserHistory +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.adaptor.LoggedOutPushCandidateSourceGenerator +import com.twitter.frigate.pushservice.adaptor.PushCandidateSourceGenerator +import com.twitter.frigate.pushservice.config.mlconfig.DeepbirdV2ModelConfig +import com.twitter.frigate.pushservice.ml._ +import com.twitter.frigate.pushservice.params._ +import com.twitter.frigate.pushservice.rank.LoggedOutRanker +import com.twitter.frigate.pushservice.rank.RFPHLightRanker +import com.twitter.frigate.pushservice.rank.RFPHRanker +import com.twitter.frigate.pushservice.rank.SubscriptionCreatorRanker +import com.twitter.frigate.pushservice.refresh_handler._ +import com.twitter.frigate.pushservice.refresh_handler.cross.CandidateCopyExpansion +import com.twitter.frigate.pushservice.send_handler.SendHandlerPushCandidateHydrator +import com.twitter.frigate.pushservice.store._ +import com.twitter.frigate.pushservice.take.CandidateNotifier +import com.twitter.frigate.pushservice.take.NotificationSender +import com.twitter.frigate.pushservice.take.NotificationServiceRequest +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.frigate.pushservice.take.NtabOnlyChannelSelector +import com.twitter.frigate.pushservice.take.history.EventBusWriter +import com.twitter.frigate.pushservice.take.history.HistoryWriter +import com.twitter.frigate.pushservice.take.sender.Ibis2Sender +import com.twitter.frigate.pushservice.take.sender.NtabSender +import com.twitter.frigate.pushservice.take.LoggedOutRefreshForPushNotifier +import com.twitter.frigate.pushservice.util.RFPHTakeStepUtil +import com.twitter.frigate.pushservice.util.SendHandlerPredicateUtil +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.frigate.thriftscala._ +import com.twitter.frigate.user_states.thriftscala.MRUserHmmState +import com.twitter.geoduck.backend.hydration.thriftscala.Hydration +import com.twitter.geoduck.common.thriftscala.PlaceQueryFields +import com.twitter.geoduck.common.thriftscala.PlaceType +import com.twitter.geoduck.common.thriftscala.{Location => GeoLocation} +import com.twitter.geoduck.service.common.clientmodules.GeoduckUserLocate +import com.twitter.geoduck.service.common.clientmodules.GeoduckUserLocateModule +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.geoduck.thriftscala.LocationService +import com.twitter.gizmoduck.context.thriftscala.ReadConfig +import com.twitter.gizmoduck.context.thriftscala.TestUserConfig +import com.twitter.gizmoduck.testusers.client.TestUserClientBuilder +import com.twitter.gizmoduck.thriftscala.LookupContext +import com.twitter.gizmoduck.thriftscala.QueryFields +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.gizmoduck.thriftscala.UserService +import com.twitter.hermit.pop_geo.thriftscala.PopTweetsInPlace +import com.twitter.hermit.predicate.socialgraph.SocialGraphPredicate +import com.twitter.hermit.predicate.tweetypie.PerspectiveReadableStore +import com.twitter.hermit.store._ +import com.twitter.hermit.store.common._ +import com.twitter.hermit.store.gizmoduck.GizmoduckUserStore +import com.twitter.hermit.store.metastore.UserCountryStore +import com.twitter.hermit.store.metastore.UserLanguagesStore +import com.twitter.hermit.store.scarecrow.ScarecrowCheckEventStore +import com.twitter.hermit.store.semantic_core.MetaDataReadableStore +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.hermit.store.timezone.GizmoduckUserUtcOffsetStore +import com.twitter.hermit.store.timezone.UtcOffsetStore +import com.twitter.hermit.store.tweetypie.TweetyPieStore +import com.twitter.hermit.store.tweetypie.UserTweet +import com.twitter.hermit.store.user_htl_session_store.UserHTLLastVisitReadableStore +import com.twitter.hermit.stp.thriftscala.STPResult +import com.twitter.hss.api.thriftscala.UserHealthSignal +import com.twitter.hss.api.thriftscala.UserHealthSignal._ +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse +import com.twitter.interests.thriftscala.InterestId +import com.twitter.interests.thriftscala.InterestsThriftService +import com.twitter.interests.thriftscala.{UserInterests => Interests} +import com.twitter.interests_discovery.thriftscala.InterestsDiscoveryService +import com.twitter.interests_discovery.thriftscala.NonPersonalizedRecommendedLists +import com.twitter.interests_discovery.thriftscala.RecommendedListsRequest +import com.twitter.interests_discovery.thriftscala.RecommendedListsResponse +import com.twitter.kujaku.domain.thriftscala.MachineTranslationResponse +import com.twitter.livevideo.timeline.client.v2.LiveVideoTimelineClient +import com.twitter.livevideo.timeline.domain.v2.{Event => LiveEvent} +import com.twitter.livevideo.timeline.thrift.thriftscala.TimelineService +import com.twitter.logging.Logger +import com.twitter.ml.api.thriftscala.{DataRecord => ThriftDataRecord} +import com.twitter.ml.featurestore.catalog.entities.core.{Author => TweetAuthorEntity} +import com.twitter.ml.featurestore.catalog.entities.core.{User => TargetUserEntity} +import com.twitter.ml.featurestore.catalog.entities.core.{UserAuthor => UserAuthorEntity} +import com.twitter.ml.featurestore.catalog.entities.magicrecs.{SocialContext => SocialContextEntity} +import com.twitter.ml.featurestore.catalog.entities.magicrecs.{UserSocialContext => TargetUserSocialContextEntity} +import com.twitter.ml.featurestore.timelines.thriftscala.TimelineScorerScoreView +import com.twitter.notificationservice.api.thriftscala.DeleteCurrentTimelineForUserRequest +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.notificationservice.genericfeedbackstore.GenericFeedbackStore +import com.twitter.notificationservice.genericfeedbackstore.GenericFeedbackStoreBuilder +import com.twitter.notificationservice.scribe.manhattan.FeedbackSignalManhattanClient +import com.twitter.notificationservice.scribe.manhattan.GenericNotificationsFeedbackRequest +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.notificationservice.thriftscala.DeleteGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.GenericNotificationOverrideKey +import com.twitter.notificationservice.thriftscala.NotificationService$FinagleClient +import com.twitter.nrel.heavyranker.CandidateFeatureHydrator +import com.twitter.nrel.heavyranker.FeatureHydrator +import com.twitter.nrel.heavyranker.{PushPredictionServiceStore => RelevancePushPredictionServiceStore} +import com.twitter.nrel.heavyranker.{TargetFeatureHydrator => RelevanceTargetFeatureHydrator} +import com.twitter.nrel.lightranker.MagicRecsServeDataRecordLightRanker +import com.twitter.nrel.lightranker.{Config => LightRankerConfig} +import com.twitter.onboarding.task.service.thriftscala.FatigueFlowEnrollment +import com.twitter.periscope.api.thriftscala.AudioSpacesLookupContext +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.recommendation.interests.discovery.core.config.{DeployConfig => InterestDeployConfig} +import com.twitter.recommendation.interests.discovery.popgeo.deploy.PopGeoInterestProvider +import com.twitter.recos.user_tweet_entity_graph.thriftscala.UserTweetEntityGraph +import com.twitter.recos.user_user_graph.thriftscala.UserUserGraph +import com.twitter.rux.common.strato.thriftscala.UserTargetingProperty +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWProducer +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWUserSegmentation +import com.twitter.search.earlybird.thriftscala.EarlybirdService +import com.twitter.service.gen.scarecrow.thriftscala.ScarecrowService +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.socialgraph.thriftscala.SocialGraphService +import com.twitter.spam.rtf.thriftscala.SafetyLevel +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storage.client.manhattan.kv.Guarantee +import com.twitter.storage.client.manhattan.kv.ManhattanKVClient +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpointBuilder +import com.twitter.storehaus.ReadableStore +import com.twitter.storehaus_internal.manhattan.Apollo +import com.twitter.storehaus_internal.manhattan.Athena +import com.twitter.storehaus_internal.manhattan.Dataset +import com.twitter.storehaus_internal.manhattan.ManhattanStore +import com.twitter.storehaus_internal.manhattan.Nash +import com.twitter.storehaus_internal.manhattan.Omega +import com.twitter.storehaus_internal.memcache.MemcacheStore +import com.twitter.storehaus_internal.util.ClientName +import com.twitter.storehaus_internal.util.ZkEndPoint +import com.twitter.strato.catalog.Scan.Slice +import com.twitter.strato.client.Strato +import com.twitter.strato.client.UserId +import com.twitter.strato.columns.frigate.logged_out_web_notifications.thriftscala.LOWebNotificationMetadata +import com.twitter.strato.columns.notifications.thriftscala.SourceDestUserRequest +import com.twitter.strato.generated.client.geo.user.FrequentSoftUserLocationClientColumn +import com.twitter.strato.generated.client.ml.featureStore.TimelineScorerTweetScoresV1ClientColumn +import com.twitter.strato.generated.client.notifications.space_device_follow_impl.SpaceDeviceFollowingClientColumn +import com.twitter.strato.generated.client.periscope.CoreOnAudioSpaceClientColumn +import com.twitter.strato.generated.client.periscope.ParticipantsOnAudioSpaceClientColumn +import com.twitter.strato.generated.client.rux.TargetingPropertyOnUserClientColumn +import com.twitter.strato.generated.client.socialgraph.graphs.creatorSubscriptionTimeline.{CountEdgesBySourceClientColumn => CreatorSubscriptionNumTweetsColumn} +import com.twitter.strato.generated.client.translation.service.IsTweetTranslatableClientColumn +import com.twitter.strato.generated.client.translation.service.platform.MachineTranslateTweetClientColumn +import com.twitter.strato.generated.client.trends.trip.TripTweetsAirflowProdClientColumn +import com.twitter.strato.thrift.ScroogeConvImplicits._ +import com.twitter.taxi.common.AppId +import com.twitter.taxi.deploy.Cluster +import com.twitter.taxi.deploy.Env +import com.twitter.topiclisting.TopicListing +import com.twitter.topiclisting.TopicListingBuilder +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse +import com.twitter.tweetypie.thriftscala.GetTweetOptions +import com.twitter.tweetypie.thriftscala.Tweet.VisibleTextRangeField +import com.twitter.tweetypie.thriftscala.TweetService +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.ubs.thriftscala.Participants +import com.twitter.ubs.thriftscala.SellerApplicationState +import com.twitter.user_session_store.thriftscala.UserSession +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Timer +import com.twitter.util.tunable.TunableMap +import com.twitter.wtf.scalding.common.thriftscala.UserFeatures +import org.apache.thrift.protocol.TCompactProtocol +import com.twitter.timelinescorer.thriftscala.v1.ScoredTweet +import com.twitter.ubs.thriftscala.SellerTrack +import com.twitter.wtf.candidate.thriftscala.CandidateSeq + +trait DeployConfig extends Config { + // Any finagle clients should not be defined as lazy. If defined lazy, + // ClientRegistry.expAllRegisteredClientsResolved() call in init will not ensure that the clients + // are active before thrift endpoint is active. We want the clients to be active, because zookeeper + // resolution triggered by first request(s) might result in the request(s) failing. + + def serviceIdentifier: ServiceIdentifier + + def tunableMap: TunableMap + + def featureSwitches: FeatureSwitches + + override val isProd: Boolean = + serviceIdentifier.environment == PushConstants.ServiceProdEnvironmentName + + def shardParams: ShardParams + + def log: Logger + + implicit def statsReceiver: StatsReceiver + + implicit val timer: Timer = DefaultTimer + + def notifierThriftClientId: ClientId + + def loggedOutNotifierThriftClientId: ClientId + + def pushserviceThriftClientId: ClientId + + def deepbirdv2PredictionServiceDest: String + + def featureStoreUtil: FeatureStoreUtil + + def targetLevelFeaturesConfig: PushFeaturesConfig + + private val manhattanClientMtlsParams = ManhattanKVClientMtlsParams( + serviceIdentifier = serviceIdentifier, + opportunisticTls = OpportunisticTls.Required + ) + + // Commonly used clients + val gizmoduckClient = { + + val client = ThriftMux.client + .withMutualTls(serviceIdentifier) + .withClientId(pushserviceThriftClientId) + .build[UserService.MethodPerEndpoint]( + dest = "/s/gizmoduck/gizmoduck" + ) + + /** + * RequestContext test user config to allow reading test user accounts on pushservice for load + * testing + */ + val GizmoduckTestUserConfig = TestUserConfig( + clientId = Some(pushserviceThriftClientId.name), + readConfig = Some(ReadConfig(includeTestUsers = true)) + ) + + TestUserClientBuilder[UserService.MethodPerEndpoint] + .withClient(client) + .withConfig(GizmoduckTestUserConfig) + .build() + } + + val sgsClient = { + val service = readOnlyThriftService( + "", + "/s/socialgraph/socialgraph", + statsReceiver, + pushserviceThriftClientId, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + new SocialGraphService.FinagledClient(service) + } + + val tweetyPieClient = { + val service = readOnlyThriftService( + "", + "/s/tweetypie/tweetypie", + statsReceiver, + notifierThriftClientId, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + new TweetService.FinagledClient(service) + } + + lazy val geoduckHydrationClient: Hydration.MethodPerEndpoint = { + val servicePerEndpoint = ThriftMux.client + .withLabel("geoduck_hydration") + .withClientId(pushserviceThriftClientId) + .withMutualTls(serviceIdentifier) + .methodBuilder("/s/geo/hydration") + .withTimeoutPerRequest(10.seconds) + .withTimeoutTotal(10.seconds) + .idempotent(maxExtraLoad = 0.0) + .servicePerEndpoint[Hydration.ServicePerEndpoint] + Hydration.MethodPerEndpoint(servicePerEndpoint) + } + + lazy val geoduckLocationClient: LocationService.MethodPerEndpoint = { + val servicePerEndpoint = ThriftMux.client + .withLabel("geoduck_location") + .withClientId(pushserviceThriftClientId) + .withMutualTls(serviceIdentifier) + .methodBuilder("/s/geo/geoduck_locationservice") + .withTimeoutPerRequest(10.seconds) + .withTimeoutTotal(10.seconds) + .idempotent(maxExtraLoad = 0.0) + .servicePerEndpoint[LocationService.ServicePerEndpoint] + LocationService.MethodPerEndpoint(servicePerEndpoint) + } + + override val geoDuckV2Store: ReadableStore[Long, LocationResponse] = { + val geoduckLocate: GeoduckUserLocate = GeoduckUserLocateModule.providesGeoduckUserLocate( + locationServiceClient = geoduckLocationClient, + hydrationClient = geoduckHydrationClient, + unscopedStatsReceiver = statsReceiver + ) + + val store: ReadableStore[Long, LocationResponse] = ReadableStore + .convert[GeoduckRequest, Long, LocationResponse, LocationResponse]( + GeoduckStoreV2(geoduckLocate))({ userId: Long => + GeoduckRequest( + userId, + placeTypes = Set( + PlaceType.City, + PlaceType.Metro, + PlaceType.Country, + PlaceType.ZipCode, + PlaceType.Admin0, + PlaceType.Admin1), + placeFields = Set(PlaceQueryFields.PlaceNames), + includeCountryCode = true + ) + })({ locationResponse: LocationResponse => Future.value(locationResponse) }) + + val _cacheName = "geoduckv2_in_memory_cache" + ObservedCachedReadableStore.from( + store, + ttl = 20.seconds, + maxKeys = 1000, + cacheName = _cacheName, + windowSize = 10000L + )(statsReceiver.scope(_cacheName)) + } + + private val deepbirdServiceBase = ThriftMux.client + .withClientId(pushserviceThriftClientId) + .withMutualTls(serviceIdentifier) + .withLoadBalancer(Balancers.p2c()) + .newService(deepbirdv2PredictionServiceDest, "DeepbirdV2PredictionService") + val deepbirdPredictionServiceClient = new DeepbirdPredictionService.ServiceToClient( + Finagle + .retryReadFilter( + tries = 3, + statsReceiver = statsReceiver.scope("DeepbirdV2PredictionService")) + .andThen(Finagle.timeoutFilter(timeout = 10.seconds)) + .andThen(deepbirdServiceBase), + RichClientParam(serviceName = "DeepbirdV2PredictionService", clientStats = statsReceiver) + ) + + val manhattanStarbuckAppId = "frigate_pushservice_starbuck" + val metastoreLocationAppId = "frigate_notifier_metastore_location" + val manhattanMetastoreAppId = "frigate_pushservice_penguin" + + def pushServiceMHCacheDest: String + def pushServiceCoreSvcsCacheDest: String + def poptartImpressionsCacheDest: String = "/srv#/prod/local/cache/poptart_impressions" + def entityGraphCacheDest: String + + val pushServiceCacheClient: Client = MemcacheStore.memcachedClient( + name = ClientName("memcache-pushservice"), + dest = ZkEndPoint(pushServiceMHCacheDest), + statsReceiver = statsReceiver, + timeout = 2.seconds, + serviceIdentifier = serviceIdentifier + ) + + val pushServiceCoreSvcsCacheClient: Client = + MemcacheStore.memcachedClient( + name = ClientName("memcache-pushservice-core-svcs"), + dest = ZkEndPoint(pushServiceCoreSvcsCacheDest), + statsReceiver = statsReceiver, + serviceIdentifier = serviceIdentifier, + timeout = 2.seconds, + ) + + val poptartImpressionsCacheClient: Client = + MemcacheStore.memcachedClient( + name = ClientName("memcache-pushservice-poptart-impressions"), + dest = ZkEndPoint(poptartImpressionsCacheDest), + statsReceiver = statsReceiver, + serviceIdentifier = serviceIdentifier, + timeout = 2.seconds + ) + + val entityGraphCacheClient: Client = MemcacheStore.memcachedClient( + name = ClientName("memcache-pushservice-entity-graph"), + dest = ZkEndPoint(entityGraphCacheDest), + statsReceiver = statsReceiver, + serviceIdentifier = serviceIdentifier, + timeout = 2.seconds + ) + + val stratoClient = { + val pushserviceThriftClient = ThriftMux.client.withClientId(pushserviceThriftClientId) + val baseBuilder = Strato + .Client(pushserviceThriftClient) + .withMutualTls(serviceIdentifier) + val finalBuilder = if (isServiceLocal) { + baseBuilder.withRequestTimeout(Duration.fromSeconds(15)) + } else { + baseBuilder.withRequestTimeout(Duration.fromSeconds(3)) + } + finalBuilder.build() + } + + val interestThriftServiceClient = ThriftMux.client + .withClientId(pushserviceThriftClientId) + .withMutualTls(serviceIdentifier) + .withRequestTimeout(3.seconds) + .configured(Retries.Policy(RetryPolicy.tries(1))) + .configured(BackupRequestFilter.Configured(maxExtraLoad = 0.0, sendInterrupts = false)) + .withStatsReceiver(statsReceiver) + .build[InterestsThriftService.MethodPerEndpoint]( + dest = "/s/interests-thrift-service/interests-thrift-service", + label = "interests-lookup" + ) + + def memcacheCASDest: String + + override val casLock: CasLock = { + val magicrecsCasMemcacheClient = Memcached.client + .withMutualTls(serviceIdentifier) + .withLabel("mr-cas-memcache-client") + .withRequestTimeout(3.seconds) + .withStatsReceiver(statsReceiver) + .configured(Retries.Policy(RetryPolicy.tries(3))) + .newTwemcacheClient(memcacheCASDest) + .withStrings + + MemcacheCasLock(magicrecsCasMemcacheClient) + } + + override val pushInfoStore: ReadableStore[Long, UserForPushTargeting] = { + StratoFetchableStore.withUnitView[Long, UserForPushTargeting]( + stratoClient, + "frigate/magicrecs/pushRecsTargeting.User") + } + + override val loggedOutPushInfoStore: ReadableStore[Long, LOWebNotificationMetadata] = { + StratoFetchableStore.withUnitView[Long, LOWebNotificationMetadata]( + stratoClient, + "frigate/magicrecs/web/loggedOutWebUserStoreMh" + ) + } + + // Setting up model stores + override val dauProbabilityStore: ReadableStore[Long, DauProbability] = { + StratoFetchableStore + .withUnitView[Long, DauProbability](stratoClient, "frigate/magicrecs/dauProbability.User") + } + + override val nsfwConsumerStore = { + StratoFetchableStore.withUnitView[Long, NSFWUserSegmentation]( + stratoClient, + "frigate/nsfw-user-segmentation/nsfwUserSegmentation.User") + } + + override val nsfwProducerStore = { + StratoFetchableStore.withUnitView[Long, NSFWProducer]( + stratoClient, + "frigate/nsfw-user-segmentation/nsfwProducer.User" + ) + } + + override val idsStore: ReadableStore[RecommendedListsRequest, RecommendedListsResponse] = { + val service = Finagle.readOnlyThriftService( + name = "interests-discovery-service", + dest = "/s/interests_discovery/interests_discovery", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 4.seconds, + tries = 2, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + val client = new InterestsDiscoveryService.FinagledClient( + service = service, + RichClientParam(serviceName = "interests-discovery-service") + ) + + InterestDiscoveryStore(client) + } + + override val popGeoLists = { + StratoFetchableStore.withUnitView[String, NonPersonalizedRecommendedLists]( + stratoClient, + column = "recommendations/interests_discovery/recommendations_mh/OrganicPopgeoLists" + ) + } + + override val listAPIStore = { + val fetcher = stratoClient + .fetcher[Long, ApiListView, ApiList]("channels/hydration/apiList.List") + StratoFetchableStore.withView[Long, ApiListView, ApiList]( + fetcher, + ApiListView(ApiListDisplayLocation.Recommendations) + ) + } + + override val reactivatedUserInfoStore = { + val stratoFetchableStore = StratoFetchableStore + .withUnitView[Long, String](stratoClient, "ml/featureStore/recentReactivationTime.User") + + ObservedReadableStore( + stratoFetchableStore + )(statsReceiver.scope("RecentReactivationTime")) + } + + override val openedPushByHourAggregatedStore: ReadableStore[Long, Map[Int, Int]] = { + StratoFetchableStore + .withUnitView[Long, Map[Int, Int]]( + stratoClient, + "frigate/magicrecs/opendPushByHourAggregated.User") + } + + private val lexClient: LiveVideoTimelineClient = { + val lexService = + new TimelineService.FinagledClient( + readOnlyThriftService( + name = "lex", + dest = lexServiceDest, + statsReceiver = statsReceiver.scope("lex-service"), + thriftClientId = pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "lex") + ) + new LiveVideoTimelineClient(lexService) + } + + override val lexServiceStore = { + ObservedCachedReadableStore.from[EventRequest, LiveEvent]( + buildStore(LexServiceStore(lexClient), "lexServiceStore"), + ttl = 1.hour, + maxKeys = 1000, + cacheName = "lexServiceStore_cache", + windowSize = 10000L + )(statsReceiver.scope("lexServiceStore_cache")) + } + + val inferredEntitiesFromInterestedInKeyedByClusterColumn = + "recommendations/simclusters_v2/inferred_entities/inferredEntitiesFromInterestedInKeyedByCluster" + override val simClusterToEntityStore: ReadableStore[Int, SimClustersInferredEntities] = { + val store = StratoFetchableStore + .withUnitView[Int, SimClustersInferredEntities]( + stratoClient, + inferredEntitiesFromInterestedInKeyedByClusterColumn) + ObservedCachedReadableStore.from[Int, SimClustersInferredEntities]( + buildStore(store, "simcluster_entity_store_cache"), + ttl = 6.hours, + maxKeys = 1000, + cacheName = "simcluster_entity_store_cache", + windowSize = 10000L + )(statsReceiver.scope("simcluster_entity_store_cache")) + } + + def fanoutMetadataColumn: String + + override val fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent] = { + val store = StratoFetchableStore + .withUnitView[(Long, Long), FanoutEvent](stratoClient, fanoutMetadataColumn) + ObservedCachedReadableStore.from[(Long, Long), FanoutEvent]( + buildStore(store, "fanoutMetadataStore"), + ttl = 10.minutes, + maxKeys = 1000, + cacheName = "fanoutMetadataStore_cache", + windowSize = 10000L + )(statsReceiver.scope("fanoutMetadataStore_cache")) + } + + /** + * PostRanking Feature Store Client + */ + override def postRankingFeatureStoreClient = { + val clientStats = statsReceiver.scope("post_ranking_feature_store_client") + val clientConfig = + FeatureStoreClientBuilder.getClientConfig(PostRankingFeaturesConfig(), featureStoreUtil) + + FeatureStoreClientBuilder.getDynamicFeatureStoreClient(clientConfig, clientStats) + } + + /** + * Interests lookup store + */ + override val interestsWithLookupContextStore = { + ObservedCachedReadableStore.from[InterestsLookupRequestWithContext, Interests]( + buildStore( + new InterestsWithLookupContextStore(interestThriftServiceClient, statsReceiver), + "InterestsWithLookupContextStore" + ), + ttl = 1.minute, + maxKeys = 1000, + cacheName = "interestsWithLookupContextStore_cache", + windowSize = 10000L + ) + } + + /** + * OptOutInterestsStore + */ + override lazy val optOutUserInterestsStore: ReadableStore[Long, Seq[InterestId]] = { + buildStore( + InterestsOptOutwithLookUpContextStore(interestThriftServiceClient), + "InterestsOptOutStore" + ) + } + + override val topicListing: TopicListing = + if (isServiceLocal) { + new TopicListingBuilder(statsReceiver.scope("topiclisting"), Some(localConfigRepoPath)).build + } else { + new TopicListingBuilder(statsReceiver.scope("topiclisting"), None).build + } + + val cachedUttClient = { + val DefaultUttCacheConfig = CacheConfigV2(capacity = 100) + val uttClientCacheConfigs = uttclient.UttClientCacheConfigsV2( + DefaultUttCacheConfig, + DefaultUttCacheConfig, + DefaultUttCacheConfig, + DefaultUttCacheConfig + ) + new CachedUttClientV2(stratoClient, Environment.Prod, uttClientCacheConfigs, statsReceiver) + } + + override val uttEntityHydrationStore = + new UttEntityHydrationStore(cachedUttClient, statsReceiver, log) + + private lazy val dbv2PredictionServiceScoreStore: RelevancePushPredictionServiceStore = + DeepbirdV2ModelConfig.buildPredictionServiceScoreStore( + deepbirdPredictionServiceClient, + "deepbirdv2_magicrecs" + ) + + // Customized model to PredictionServiceStoreMap + // It is used to specify the predictionServiceStore for the models not in the default dbv2PredictionServiceScoreStore + private lazy val modelToPredictionServiceStoreMap: Map[ + WeightedOpenOrNtabClickModel.ModelNameType, + RelevancePushPredictionServiceStore + ] = Map() + + override lazy val weightedOpenOrNtabClickModelScorer = new PushMLModelScorer( + PushMLModel.WeightedOpenOrNtabClickProbability, + modelToPredictionServiceStoreMap, + dbv2PredictionServiceScoreStore, + statsReceiver.scope("weighted_oonc_scoring") + ) + + override lazy val optoutModelScorer = new PushMLModelScorer( + PushMLModel.OptoutProbability, + Map.empty, + dbv2PredictionServiceScoreStore, + statsReceiver.scope("optout_scoring") + ) + + override lazy val filteringModelScorer = new PushMLModelScorer( + PushMLModel.FilteringProbability, + Map.empty, + dbv2PredictionServiceScoreStore, + statsReceiver.scope("filtering_scoring") + ) + + private val queryFields: Set[QueryFields] = Set( + QueryFields.Profile, + QueryFields.Account, + QueryFields.Roles, + QueryFields.Discoverability, + QueryFields.Safety, + QueryFields.Takedowns, + QueryFields.Labels, + QueryFields.Counts, + QueryFields.ExtendedProfile + ) + + // Setting up safeUserStore + override val safeUserStore = + // in-memory cache + ObservedCachedReadableStore.from[Long, User]( + ObservedReadableStore( + GizmoduckUserStore.safeStore( + client = gizmoduckClient, + queryFields = queryFields, + safetyLevel = SafetyLevel.FilterNone, + statsReceiver = statsReceiver + ) + )(statsReceiver.scope("SafeUserStore")), + ttl = 1.minute, + maxKeys = 5e4.toInt, + cacheName = "safeUserStore_cache", + windowSize = 10000L + )(statsReceiver.scope("safeUserStore_cache")) + + val mobileSdkStore = MobileSdkStore( + "frigate_mobile_sdk_version_apollo", + "mobile_sdk_versions_scalding", + manhattanClientMtlsParams, + Apollo + ) + + val deviceUserStore = ObservedReadableStore( + GizmoduckUserStore( + client = gizmoduckClient, + queryFields = Set(QueryFields.Devices), + context = LookupContext(includeSoftUsers = true), + statsReceiver = statsReceiver + ) + )(statsReceiver.scope("devicesUserStore")) + + override val deviceInfoStore = DeviceInfoStore( + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = ObservedReadableStore( + mobileSdkStore + )(statsReceiver.scope("uncachedMobileSdkVersionsStore")), + cacheClient = pushServiceCacheClient, + ttl = 12.hours + )( + valueInjection = BinaryScalaCodec(SdkVersionValue), + statsReceiver = statsReceiver.scope("MobileSdkVersionsStore"), + keyToString = { + case SdkVersionKey(Some(userId), Some(clientId)) => + s"DeviceInfoStore/$userId/$clientId" + case SdkVersionKey(Some(userId), None) => s"DeviceInfoStore/$userId/_" + case SdkVersionKey(None, Some(clientId)) => + s"DeviceInfoStore/_/$clientId" + case SdkVersionKey(None, None) => s"DeviceInfoStore/_" + } + ), + deviceUserStore + ) + + // Setting up edgeStore + override val edgeStore = SocialGraphPredicate.buildEdgeStore(sgsClient) + + override val socialGraphServiceProcessStore = SocialGraphServiceProcessStore(edgeStore) + + def userTweetEntityGraphDest: String + def userUserGraphDest: String + def lexServiceDest: String + + // Setting up the history store + def frigateHistoryCacheDest: String + + val notificationHistoryStore: NotificationHistoryStore = { + + val manhattanStackBasedClient = ThriftMux.client + .withClientId(notifierThriftClientId) + .withOpportunisticTls(OpportunisticTls.Required) + .withMutualTls( + serviceIdentifier + ) + + val manhattanHistoryMethodBuilder = manhattanStackBasedClient + .withLabel("manhattan_history_v2") + .withRequestTimeout(10.seconds) + .withStatsReceiver(statsReceiver) + .methodBuilder(Omega.wilyName) + .withMaxRetries(3) + + NotificationHistoryStore.build( + "frigate_notifier", + "frigate_notifications_v2", + manhattanHistoryMethodBuilder, + maxRetryCount = 3 + ) + } + + val emailNotificationHistoryStore: ReadOnlyHistoryStore = { + val client = ManhattanKVClient( + appId = "frigate_email_history", + dest = "/s/manhattan/omega.native-thrift", + mtlsParams = ManhattanKVClientMtlsParams( + serviceIdentifier = serviceIdentifier, + opportunisticTls = OpportunisticTls.Required + ) + ) + val endpoint = ManhattanKVEndpointBuilder(client) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .statsReceiver(statsReceiver) + .build() + + ReadOnlyHistoryStore(ManhattanKVHistoryStore(endpoint, dataset = "frigate_email_history"))( + statsReceiver) + } + + val manhattanKVLoggedOutHistoryStoreEndpoint: ManhattanKVEndpoint = { + val mhClient = ManhattanKVClient( + "frigate_notification_logged_out_history", + Nash.wilyName, + manhattanClientMtlsParams) + ManhattanKVEndpointBuilder(mhClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(5.seconds) + .maxRetryCount(3) + .statsReceiver(statsReceiver) + .build() + } + + val manhattanKVNtabHistoryStoreEndpoint: ManhattanKVEndpoint = { + val mhClient = ManhattanKVClient("frigate_ntab", Omega.wilyName, manhattanClientMtlsParams) + ManhattanKVEndpointBuilder(mhClient) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(5.seconds) + .maxRetryCount(3) + .statsReceiver(statsReceiver) + .build() + } + + val nTabHistoryStore: ReadableWritableStore[(Long, String), GenericNotificationOverrideKey] = { + ObservedReadableWritableStore( + NTabHistoryStore(manhattanKVNtabHistoryStoreEndpoint, "frigate_ntab_generic_notif_history") + )(statsReceiver.scope("NTabHistoryStore")) + } + + override lazy val ocfFatigueStore: ReadableStore[OCFHistoryStoreKey, FatigueFlowEnrollment] = + new OCFPromptHistoryStore( + manhattanAppId = "frigate_pushservice_ocf_fatigue_store", + dataset = "fatigue_v1", + manhattanClientMtlsParams + ) + + def historyStore: PushServiceHistoryStore + + def emailHistoryStore: PushServiceHistoryStore + + def loggedOutHistoryStore: PushServiceHistoryStore + + override val hydratedLabeledPushRecsStore: ReadableStore[UserHistoryKey, UserHistoryValue] = { + val labeledHistoryMemcacheClient = { + MemcacheStore.memcachedClient( + name = ClientName("history-memcache"), + dest = ZkEndPoint(frigateHistoryCacheDest), + statsReceiver = statsReceiver, + timeout = 2.seconds, + serviceIdentifier = serviceIdentifier + ) + } + + implicit val keyCodec = CompactScalaCodec(UserHistoryKey) + implicit val valueCodec = CompactScalaCodec(UserHistoryValue) + val dataset: Dataset[UserHistoryKey, UserHistoryValue] = + Dataset( + "", + "frigate_data_pipeline_pushservice", + "labeled_push_recs_aggregated_hydrated", + Athena + ) + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = ObservedReadableStore(buildManhattanStore(dataset))( + statsReceiver.scope("UncachedHydratedLabeledPushRecsStore") + ), + cacheClient = labeledHistoryMemcacheClient, + ttl = 6.hours + )( + valueInjection = valueCodec, + statsReceiver = statsReceiver.scope("HydratedLabeledPushRecsStore"), + keyToString = { + case UserHistoryKey.UserId(userId) => s"HLPRS/$userId" + case unknownKey => + throw new IllegalArgumentException(s"Unknown userHistoryStore cache key $unknownKey") + } + ) + } + + override val realTimeClientEventStore: RealTimeClientEventStore = { + val client = ManhattanKVClient( + "frigate_eventstream", + "/s/manhattan/omega.native-thrift", + manhattanClientMtlsParams + ) + val endpoint = + ManhattanKVEndpointBuilder(client) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(3.seconds) + .statsReceiver(statsReceiver) + .build() + + ManhattanRealTimeClientEventStore(endpoint, "realtime_client_events", statsReceiver, None) + } + + override val onlineUserHistoryStore: ReadableStore[OnlineUserHistoryKey, UserHistoryValue] = { + OnlineUserHistoryStore(realTimeClientEventStore) + } + + override val userMediaRepresentationStore = UserMediaRepresentationStore( + "user_media_representation", + "user_media_representation_dataset", + manhattanClientMtlsParams + ) + + override val producerMediaRepresentationStore = ObservedMemcachedReadableStore.fromCacheClient( + backingStore = UserMediaRepresentationStore( + "user_media_representation", + "producer_media_representation_dataset", + manhattanClientMtlsParams + )(statsReceiver.scope("UncachedProducerMediaRepStore")), + cacheClient = pushServiceCacheClient, + ttl = 4.hours + )( + valueInjection = BinaryScalaCodec(UserMediaRepresentation), + keyToString = { k: Long => s"ProducerMediaRepStore/$k" }, + statsReceiver.scope("ProducerMediaRepStore") + ) + + override val mrUserStatePredictionStore = { + StratoFetchableStore.withUnitView[Long, MRUserHmmState]( + stratoClient, + "frigate/magicrecs/mrUserStatePrediction.User") + } + + override val userHTLLastVisitStore = + UserHTLLastVisitReadableStore( + "pushservice_htl_user_session", + "tls_user_session_store", + statsReceiver.scope("userHTLLastVisitStore"), + manhattanClientMtlsParams + ) + + val crMixerClient: CrMixer.MethodPerEndpoint = new CrMixer.FinagledClient( + readOnlyThriftService( + "cr-mixer", + "/s/cr-mixer/cr-mixer-plus", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "cr-mixer") + ) + + val crMixerStore = CrMixerTweetStore(crMixerClient)(statsReceiver.scope("CrMixerTweetStore")) + + val contentMixerClient: ContentMixer.MethodPerEndpoint = new ContentMixer.FinagledClient( + readOnlyThriftService( + "content-mixer", + "/s/corgi-shared/content-mixer", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "content-mixer") + ) + + val exploreRankerClient: ExploreRanker.MethodPerEndpoint = + new ExploreRanker.FinagledClient( + readOnlyThriftService( + "explore-ranker", + "/s/explore-ranker/explore-ranker", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "explore-ranker") + ) + + val contentMixerStore = { + ObservedReadableStore(ContentMixerStore(contentMixerClient))( + statsReceiver.scope("ContentMixerStore")) + } + + val exploreRankerStore = { + ObservedReadableStore(ExploreRankerStore(exploreRankerClient))( + statsReceiver.scope("ExploreRankerStore") + ) + } + + val gizmoduckUtcOffsetStore = ObservedReadableStore( + GizmoduckUserUtcOffsetStore.fromUserStore(safeUserStore) + )(statsReceiver.scope("GizmoUserUtcOffsetStore")) + + override val userUtcOffsetStore = + UtcOffsetStore + .makeMemcachedUtcOffsetStore( + gizmoduckUtcOffsetStore, + pushServiceCoreSvcsCacheClient, + ReadableStore.empty, + manhattanStarbuckAppId, + manhattanClientMtlsParams + )(statsReceiver) + .mapValues(Duration.fromSeconds) + + override val cachedTweetyPieStoreV2 = { + val getTweetOptions = Some( + GetTweetOptions( + safetyLevel = Some(SafetyLevel.MagicRecsV2), + includeRetweetCount = true, + includeReplyCount = true, + includeFavoriteCount = true, + includeQuotedTweet = true, + additionalFieldIds = Seq(VisibleTextRangeField.id) + ) + ) + buildCachedTweetyPieStore(getTweetOptions, "tp_v2") + } + + override val cachedTweetyPieStoreV2NoVF = { + val getTweetOptions = Some( + GetTweetOptions( + safetyLevel = Some(SafetyLevel.FilterDefault), + includeRetweetCount = true, + includeReplyCount = true, + includeFavoriteCount = true, + includeQuotedTweet = true, + additionalFieldIds = Seq(VisibleTextRangeField.id), + ) + ) + buildCachedTweetyPieStore(getTweetOptions, "tp_v2_noVF") + } + + override val safeCachedTweetyPieStoreV2 = { + val getTweetOptions = Some( + GetTweetOptions( + safetyLevel = Some(SafetyLevel.MagicRecsAggressiveV2), + includeRetweetCount = true, + includeReplyCount = true, + includeFavoriteCount = true, + includeQuotedTweet = true, + additionalFieldIds = Seq(VisibleTextRangeField.id) + ) + ) + buildCachedTweetyPieStore(getTweetOptions, "sftp_v2") + } + + override val userTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult] = { + val getTweetOptions = Some( + GetTweetOptions( + safetyLevel = Some(SafetyLevel.MagicRecsV2), + includeRetweetCount = true, + includeReplyCount = true, + includeFavoriteCount = true, + includeQuotedTweet = true, + additionalFieldIds = Seq(VisibleTextRangeField.id) + ) + ) + TweetyPieStore.buildUserTweetStore( + client = tweetyPieClient, + options = getTweetOptions + ) + } + + override val safeUserTweetTweetyPieStore: ReadableStore[UserTweet, TweetyPieResult] = { + val getTweetOptions = Some( + GetTweetOptions( + safetyLevel = Some(SafetyLevel.MagicRecsAggressiveV2), + includeRetweetCount = true, + includeReplyCount = true, + includeFavoriteCount = true, + includeQuotedTweet = true, + additionalFieldIds = Seq(VisibleTextRangeField.id) + ) + ) + TweetyPieStore.buildUserTweetStore( + client = tweetyPieClient, + options = getTweetOptions + ) + } + + override val tweetContentFeatureCacheStore: ReadableStore[Long, ThriftDataRecord] = { + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = TweetContentFeatureReadableStore(stratoClient), + cacheClient = poptartImpressionsCacheClient, + ttl = 12.hours + )( + valueInjection = BinaryScalaCodec(ThriftDataRecord), + statsReceiver = statsReceiver.scope("TweetContentFeaturesCacheStore"), + keyToString = { k: Long => s"tcf/$k" } + ) + } + + lazy val tweetTranslationStore: ReadableStore[ + TweetTranslationStore.Key, + TweetTranslationStore.Value + ] = { + val isTweetTranslatableStore = + StratoFetchableStore + .withUnitView[IsTweetTranslatableClientColumn.Key, Boolean]( + fetcher = new IsTweetTranslatableClientColumn(stratoClient).fetcher + ) + + val translateTweetStore = + StratoFetchableStore + .withUnitView[MachineTranslateTweetClientColumn.Key, MachineTranslationResponse]( + fetcher = new MachineTranslateTweetClientColumn(stratoClient).fetcher + ) + + ObservedReadableStore( + TweetTranslationStore(translateTweetStore, isTweetTranslatableStore, statsReceiver) + )(statsReceiver.scope("tweetTranslationStore")) + } + + val scarecrowClient = new ScarecrowService.FinagledClient( + readOnlyThriftService( + "", + "/s/abuse/scarecrow", + statsReceiver, + notifierThriftClientId, + requestTimeout = 5.second, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "") + ) + + // Setting up scarecrow store + override val scarecrowCheckEventStore = { + ScarecrowCheckEventStore(scarecrowClient) + } + + // setting up the perspective store + override val userTweetPerspectiveStore = { + val service = new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.TweetPerspectiveStoreQpsLimit), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 40), + PushQPSLimitConstants.PerspectiveStoreQPS)(timer) + .andThen( + readOnlyThriftService( + "tweetypie_perspective_service", + "/s/tweetypie/tweetypie", + statsReceiver, + notifierThriftClientId, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + ) + + val client = new TweetService.FinagledClient( + service, + clientParam = RichClientParam(serviceName = "tweetypie_perspective_client")) + ObservedReadableStore( + PerspectiveReadableStore(client) + )(statsReceiver.scope("TweetPerspectiveStore")) + } + + //user country code store, used in RecsWithheldContentPredicate - wrapped by memcache based cache + override val userCountryStore = + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = ObservedReadableStore( + UserCountryStore(metastoreLocationAppId, manhattanClientMtlsParams) + )(statsReceiver.scope("userCountryStore")), + cacheClient = pushServiceCacheClient, + ttl = 12.hours + )( + valueInjection = BinaryScalaCodec(Location), + statsReceiver = statsReceiver.scope("UserCountryStore"), + keyToString = { k: Long => s"UserCountryStore/$k" } + ) + + override val audioSpaceParticipantsStore: ReadableStore[String, Participants] = { + val store = StratoFetchableStore + .DefaultStratoFetchableStore( + fetcher = new ParticipantsOnAudioSpaceClientColumn(stratoClient).fetcher + ).composeKeyMapping[String](broadcastId => + (broadcastId, AudioSpacesLookupContext(forUserId = None))) + + ObservedCachedReadableStore + .from( + store = buildStore(store, "AudioSpaceParticipantsStore"), + ttl = 20.seconds, + maxKeys = 200, + cacheName = "AudioSpaceParticipantsStore", + windowSize = 200 + ) + + } + + override val topicSocialProofServiceStore: ReadableStore[ + TopicSocialProofRequest, + TopicSocialProofResponse + ] = { + StratoFetchableStore.withUnitView[TopicSocialProofRequest, TopicSocialProofResponse]( + stratoClient, + "topic-signals/tsp/topic-social-proof") + } + + override val spaceDeviceFollowStore: ReadableStore[SourceDestUserRequest, Boolean] = { + StratoFetchableStore.withUnitView( + fetcher = new SpaceDeviceFollowingClientColumn(stratoClient).fetcher + ) + } + + override val audioSpaceStore: ReadableStore[String, AudioSpace] = { + val store = StratoFetchableStore + .DefaultStratoFetchableStore( + fetcher = new CoreOnAudioSpaceClientColumn(stratoClient).fetcher + ).composeKeyMapping[String] { broadcastId => + (broadcastId, AudioSpacesLookupContext(forUserId = None)) + } + + ObservedCachedReadableStore + .from( + store = buildStore(store, "AudioSpaceVisibilityStore"), + ttl = 1.minute, + maxKeys = 5000, + cacheName = "AudioSpaceVisibilityStore", + windowSize = 10000L) + } + + override val userLanguagesStore = UserLanguagesStore( + manhattanMetastoreAppId, + manhattanClientMtlsParams, + statsReceiver.scope("user_languages_store") + ) + + val tflockClient: TFlockClient = new TFlockClient( + new FlockDB.FinagledClient( + readOnlyThriftService( + "tflockClient", + "/s/tflock/tflock", + statsReceiver, + pushserviceThriftClientId, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + serviceName = "tflock", + stats = statsReceiver + ), + defaultPageSize = 1000 + ) + + val rawFlockClient = ThriftMux.client + .withClientId(pushserviceThriftClientId) + .withMutualTls(serviceIdentifier) + .build[FlockDB.MethodPerEndpoint]("/s/flock/flock") + + val flockClient: FlockClient = new FlockClient( + rawFlockClient, + defaultPageSize = 100 + ) + + override val recentFollowsStore: FlockFollowStore = { + val dStats = statsReceiver.scope("FlockRecentFollowsStore") + FlockFollowStore(flockClient, dStats) + } + + def notificationServiceClient: NotificationService$FinagleClient + + def notificationServiceSend( + target: Target, + request: CreateGenericNotificationRequest + ): Future[CreateGenericNotificationResponse] + + def notificationServiceDelete( + request: DeleteGenericNotificationRequest + ): Future[Unit] + + def notificationServiceDeleteTimeline( + request: DeleteCurrentTimelineForUserRequest + ): Future[Unit] + + override val notificationServiceSender: ReadableStore[ + NotificationServiceRequest, + CreateGenericNotificationResponse + ] = { + new NotificationServiceSender( + notificationServiceSend, + PushParams.EnableWritesToNotificationServiceParam, + PushParams.EnableWritesToNotificationServiceForAllEmployeesParam, + PushParams.EnableWritesToNotificationServiceForEveryoneParam + ) + } + + val eventRecosServiceClient = { + val dest = "/s/events-recos/events-recos-service" + new EventsRecosService.FinagledClient( + readOnlyThriftService( + "EventRecosService", + dest, + statsReceiver, + pushserviceThriftClientId, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "EventRecosService") + ) + } + + lazy val recommendedTrendsCandidateSource = RecommendedTrendsCandidateSource( + TrendsRecommendationStore(eventRecosServiceClient, statsReceiver)) + + override val softUserGeoLocationStore: ReadableStore[Long, GeoLocation] = + StratoFetchableStore.withUnitView[Long, GeoLocation](fetcher = + new FrequentSoftUserLocationClientColumn(stratoClient).fetcher) + + lazy val candidateSourceGenerator = new PushCandidateSourceGenerator( + earlybirdCandidateSource, + userTweetEntityGraphCandidates, + cachedTweetyPieStoreV2, + safeCachedTweetyPieStoreV2, + userTweetTweetyPieStore, + safeUserTweetTweetyPieStore, + cachedTweetyPieStoreV2NoVF, + edgeStore, + interestsWithLookupContextStore, + uttEntityHydrationStore, + geoDuckV2Store, + topTweetsByGeoStore, + topTweetsByGeoV2VersionedStore, + ruxTweetImpressionsStore, + recommendedTrendsCandidateSource, + recentTweetsByAuthorsStore, + topicSocialProofServiceStore, + crMixerStore, + contentMixerStore, + exploreRankerStore, + softUserGeoLocationStore, + tripTweetCandidateStore, + popGeoLists, + idsStore + ) + + lazy val loCandidateSourceGenerator = new LoggedOutPushCandidateSourceGenerator( + tripTweetCandidateStore, + geoDuckV2Store, + safeCachedTweetyPieStoreV2, + cachedTweetyPieStoreV2NoVF, + cachedTweetyPieStoreV2, + contentMixerStore, + softUserGeoLocationStore, + topTweetsByGeoStore, + topTweetsByGeoV2VersionedStore + ) + + lazy val rfphStatsRecorder = new RFPHStatsRecorder() + + lazy val rfphRestrictStep = new RFPHRestrictStep() + + lazy val rfphTakeStepUtil = new RFPHTakeStepUtil()(statsReceiver) + + lazy val rfphPrerankFilter = new RFPHPrerankFilter()(statsReceiver) + + lazy val rfphLightRanker = new RFPHLightRanker(lightRanker, statsReceiver) + + lazy val sendHandlerPredicateUtil = new SendHandlerPredicateUtil()(statsReceiver) + + lazy val ntabSender = + new NtabSender( + notificationServiceSender, + nTabHistoryStore, + notificationServiceDelete, + notificationServiceDeleteTimeline + ) + + lazy val ibis2Sender = new Ibis2Sender(pushIbisV2Store, tweetTranslationStore, statsReceiver) + + lazy val historyWriter = new HistoryWriter(historyStore, statsReceiver) + + lazy val loggedOutHistoryWriter = new HistoryWriter(loggedOutHistoryStore, statsReceiver) + + lazy val eventBusWriter = new EventBusWriter(pushSendEventBusPublisher, statsReceiver) + + lazy val ntabOnlyChannelSelector = new NtabOnlyChannelSelector + + lazy val notificationSender = + new NotificationSender( + ibis2Sender, + ntabSender, + statsReceiver, + notificationScribe + ) + + lazy val candidateNotifier = + new CandidateNotifier( + notificationSender, + casLock = casLock, + historyWriter = historyWriter, + eventBusWriter = eventBusWriter, + ntabOnlyChannelSelector = ntabOnlyChannelSelector + )(statsReceiver) + + lazy val loggedOutCandidateNotifier = new CandidateNotifier( + notificationSender, + casLock = casLock, + historyWriter = loggedOutHistoryWriter, + eventBusWriter = null, + ntabOnlyChannelSelector = ntabOnlyChannelSelector + )(statsReceiver) + + lazy val rfphNotifier = + new RefreshForPushNotifier(rfphStatsRecorder, candidateNotifier)(statsReceiver) + + lazy val loRfphNotifier = + new LoggedOutRefreshForPushNotifier(rfphStatsRecorder, loggedOutCandidateNotifier)( + statsReceiver) + + lazy val rfphRanker = { + val randomRanker = RandomRanker[Target, PushCandidate]() + val subscriptionCreatorRanker = + new SubscriptionCreatorRanker(superFollowEligibilityUserStore, statsReceiver) + new RFPHRanker( + randomRanker, + weightedOpenOrNtabClickModelScorer, + subscriptionCreatorRanker, + userHealthSignalStore, + producerMediaRepresentationStore, + statsReceiver + ) + } + + lazy val rfphFeatureHydrator = new RFPHFeatureHydrator(featureHydrator) + lazy val loggedOutRFPHRanker = new LoggedOutRanker(cachedTweetyPieStoreV2, statsReceiver) + + override val userFeaturesStore: ReadableStore[Long, UserFeatures] = { + implicit val valueCodec = new BinaryScalaCodec(UserFeatures) + val dataset: Dataset[Long, UserFeatures] = + Dataset( + "", + "user_features_pushservice_apollo", + "recommendations_user_features_apollo", + Apollo) + + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = ObservedReadableStore(buildManhattanStore(dataset))( + statsReceiver.scope("UncachedUserFeaturesStore") + ), + cacheClient = pushServiceCacheClient, + ttl = 24.hours + )( + valueInjection = valueCodec, + statsReceiver = statsReceiver.scope("UserFeaturesStore"), + keyToString = { k: Long => s"ufts/$k" } + ) + } + + override def htlScoreStore(userId: Long): ReadableStore[Long, ScoredTweet] = { + val fetcher = new TimelineScorerTweetScoresV1ClientColumn(stratoClient).fetcher + val htlStore = buildStore( + StratoFetchableStore.withView[Long, TimelineScorerScoreView, ScoredTweet]( + fetcher, + TimelineScorerScoreView(Some(userId)) + ), + "htlScoreStore" + ) + htlStore + } + + override val userTargetingPropertyStore: ReadableStore[Long, UserTargetingProperty] = { + val name = "userTargetingPropertyStore" + val store = StratoFetchableStore + .withUnitView(new TargetingPropertyOnUserClientColumn(stratoClient).fetcher) + buildStore(store, name) + } + + override val timelinesUserSessionStore: ReadableStore[Long, UserSession] = { + implicit val valueCodec = new CompactScalaCodec(UserSession) + val dataset: Dataset[Long, UserSession] = Dataset[Long, UserSession]( + "", + "frigate_realgraph", + "real_graph_user_features", + Apollo + ) + + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = ObservedReadableStore(buildManhattanStore(dataset))( + statsReceiver.scope("UncachedTimelinesUserSessionStore") + ), + cacheClient = pushServiceCacheClient, + ttl = 6.hours + )( + valueInjection = valueCodec, + statsReceiver = statsReceiver.scope("timelinesUserSessionStore"), + keyToString = { k: Long => s"tluss/$k" } + ) + } + + lazy val recentTweetsFromTflockStore: ReadableStore[Long, Seq[Long]] = + ObservedReadableStore( + RecentTweetsByAuthorsStore.usingRecentTweetsConfig( + tflockClient, + RecentTweetsConfig(maxResults = 1, maxAge = 3.days) + ) + )(statsReceiver.scope("RecentTweetsFromTflockStore")) + + lazy val recentTweetsByAuthorsStore: ReadableStore[RecentTweetsQuery, Seq[Seq[Long]]] = + ObservedReadableStore( + RecentTweetsByAuthorsStore(tflockClient) + )(statsReceiver.scope("RecentTweetsByAuthorsStore")) + + val jobConfig = PopGeoInterestProvider + .getPopularTweetsJobConfig( + InterestDeployConfig( + AppId("PopularTweetsByInterestProd"), + Cluster.ATLA, + Env.Prod, + serviceIdentifier, + manhattanClientMtlsParams + )) + .withManhattanAppId("frigate_pop_by_geo_tweets") + + override val topTweetsByGeoStore = TopTweetsStore.withMemCache( + jobConfig, + pushServiceCacheClient, + 10.seconds + )(statsReceiver) + + override val topTweetsByGeoV2VersionedStore: ReadableStore[String, PopTweetsInPlace] = { + StratoFetchableStore.withUnitView[String, PopTweetsInPlace]( + stratoClient, + "recommendations/popgeo/popGeoTweetsVersioned") + } + + override lazy val pushcapDynamicPredictionStore: ReadableStore[Long, PushcapUserHistory] = { + StratoFetchableStore.withUnitView[Long, PushcapUserHistory]( + stratoClient, + "frigate/magicrecs/pushcapDynamicPrediction.User") + } + + override val tweetAuthorLocationFeatureBuilder = + UserLocationFeatureBuilder(Some("TweetAuthor")) + .withStats() + + override val tweetAuthorLocationFeatureBuilderById = + UserLocationFeatureBuilderById( + userCountryStore, + tweetAuthorLocationFeatureBuilder + ).withStats() + + override val socialContextActionsFeatureBuilder = + SocialContextActionsFeatureBuilder().withStats() + + override val tweetContentFeatureBuilder = + TweetContentFeatureBuilder(tweetContentFeatureCacheStore).withStats() + + override val tweetAuthorRecentRealGraphFeatureBuilder = + RecentRealGraphFeatureBuilder( + stratoClient, + UserAuthorEntity, + TargetUserEntity, + TweetAuthorEntity, + TweetAuthorRecentRealGraphFeatures(statsReceiver.scope("TweetAuthorRecentRealGraphFeatures")) + ).withStats() + + override val socialContextRecentRealGraphFeatureBuilder = + SocialContextRecentRealGraphFeatureBuilder( + RecentRealGraphFeatureBuilder( + stratoClient, + TargetUserSocialContextEntity, + TargetUserEntity, + SocialContextEntity, + SocialContextRecentRealGraphFeatures( + statsReceiver.scope("SocialContextRecentRealGraphFeatures")) + )(statsReceiver + .scope("SocialContextRecentRealGraphFeatureBuilder").scope("RecentRealGraphFeatureBuilder")) + ).withStats() + + override val tweetSocialProofFeatureBuilder = + TweetSocialProofFeatureBuilder(Some("TargetUser")).withStats() + + override val targetUserFullRealGraphFeatureBuilder = + TargetFullRealGraphFeatureBuilder(Some("TargetUser")).withStats() + + override val postProcessingFeatureBuilder: PostProcessingFeatureBuilder = + PostProcessingFeatureBuilder() + + override val mrOfflineUserCandidateSparseAggregatesFeatureBuilder = + MrOfflineUserCandidateSparseAggregatesFeatureBuilder(stratoClient, featureStoreUtil).withStats() + + override val mrOfflineUserAggregatesFeatureBuilder = + MrOfflineUserAggregatesFeatureBuilder(stratoClient, featureStoreUtil).withStats() + + override val mrOfflineUserCandidateAggregatesFeatureBuilder = + MrOfflineUserCandidateAggregatesFeatureBuilder(stratoClient, featureStoreUtil).withStats() + + override val tweetAnnotationsFeatureBuilder = + TweetAnnotationsFeatureBuilder(stratoClient).withStats() + + override val targetUserMediaRepresentationFeatureBuilder = + UserMediaRepresentationFeatureBuilder(userMediaRepresentationStore).withStats() + + override val targetLevelFeatureBuilder = + TargetLevelFeatureBuilder(featureStoreUtil, targetLevelFeaturesConfig).withStats() + + override val candidateLevelFeatureBuilder = + CandidateLevelFeatureBuilder(featureStoreUtil).withStats() + + override lazy val targetFeatureHydrator = RelevanceTargetFeatureHydrator( + targetUserFullRealGraphFeatureBuilder, + postProcessingFeatureBuilder, + targetUserMediaRepresentationFeatureBuilder, + targetLevelFeatureBuilder + ) + + override lazy val featureHydrator = + FeatureHydrator(targetFeatureHydrator, candidateFeatureHydrator) + + val pushServiceLightRankerConfig: LightRankerConfig = new LightRankerConfig( + pushserviceThriftClientId, + serviceIdentifier, + statsReceiver.scope("lightRanker"), + deepbirdv2PredictionServiceDest, + "DeepbirdV2PredictionService" + ) + val lightRanker: MagicRecsServeDataRecordLightRanker = + pushServiceLightRankerConfig.lightRanker + + override val tweetImpressionStore: ReadableStore[Long, Seq[Long]] = { + val name = "htl_impression_store" + val store = buildStore( + HtlTweetImpressionStore.createStoreWithTweetIds( + requestTimeout = 6.seconds, + label = "htl_tweet_impressions", + serviceIdentifier = serviceIdentifier, + statsReceiver = statsReceiver + ), + name + ) + val numTweetsReturned = + statsReceiver.scope(name).stat("num_tweets_returned_per_user") + new TransformedReadableStore(store)((userId: Long, tweetIds: Seq[Long]) => { + numTweetsReturned.add(tweetIds.size) + Future.value(Some(tweetIds)) + }) + } + + val ruxTweetImpressionsStore = new TweetImpressionsStore(stratoClient) + + override val strongTiesStore: ReadableStore[Long, STPResult] = { + implicit val valueCodec = new BinaryScalaCodec(STPResult) + val strongTieScoringDataset: Dataset[Long, STPResult] = + Dataset("", "frigate_stp", "stp_result_rerank", Athena) + buildManhattanStore(strongTieScoringDataset) + } + + override lazy val earlybirdFeatureStore = ObservedReadableStore( + EarlybirdFeatureStore( + clientId = pushserviceThriftClientId.name, + earlybirdSearchStore = earlybirdSearchStore + ) + )(statsReceiver.scope("EarlybirdFeatureStore")) + + override lazy val earlybirdFeatureBuilder = EarlybirdFeatureBuilder(earlybirdFeatureStore) + + override lazy val earlybirdSearchStore = { + val earlybirdClientName: String = "earlybird" + val earlybirdSearchStoreName: String = "EarlybirdSearchStore" + + val earlybirdClient = new EarlybirdService.FinagledClient( + readOnlyThriftService( + earlybirdClientName, + earlybirdSearchDest, + statsReceiver, + pushserviceThriftClientId, + tries = 1, + requestTimeout = 3.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(protocolFactory = new TCompactProtocol.Factory) + ) + + ObservedReadableStore( + EarlybirdSearchStore(earlybirdClient)(statsReceiver.scope(earlybirdSearchStoreName)) + )(statsReceiver.scope(earlybirdSearchStoreName)) + } + + override lazy val earlybirdCandidateSource: EarlybirdCandidateSource = EarlybirdCandidateSource( + clientId = pushserviceThriftClientId.name, + earlybirdSearchStore = earlybirdSearchStore + ) + + override val realGraphScoresTop500InStore: RealGraphScoresTop500InStore = { + val stratoRealGraphInStore = + StratoFetchableStore + .withUnitView[Long, CandidateSeq]( + stratoClient, + "frigate/magicrecs/fanoutCoi500pRealGraphV2") + + RealGraphScoresTop500InStore( + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = stratoRealGraphInStore, + cacheClient = entityGraphCacheClient, + ttl = 24.hours + )( + valueInjection = BinaryScalaCodec(CandidateSeq), + statsReceiver = statsReceiver.scope("CachedRealGraphScoresTop500InStore"), + keyToString = { k: Long => s"500p_test/$k" } + ) + ) + } + + override val tweetEntityGraphStore = { + val tweetEntityGraphClient = new UserTweetEntityGraph.FinagledClient( + Finagle.readOnlyThriftService( + "user_tweet_entity_graph", + userTweetEntityGraphDest, + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + ) + ObservedReadableStore( + RecommendedTweetEntitiesStore( + tweetEntityGraphClient, + statsReceiver.scope("RecommendedTweetEntitiesStore") + ) + )(statsReceiver.scope("RecommendedTweetEntitiesStore")) + } + + override val userUserGraphStore = { + val userUserGraphClient = new UserUserGraph.FinagledClient( + Finagle.readOnlyThriftService( + "user_user_graph", + userUserGraphDest, + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 5.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ), + clientParam = RichClientParam(serviceName = "user_user_graph") + ) + ObservedReadableStore( + UserUserGraphStore(userUserGraphClient, statsReceiver.scope("UserUserGraphStore")) + )(statsReceiver.scope("UserUserGraphStore")) + } + + override val ntabCaretFeedbackStore: ReadableStore[GenericNotificationsFeedbackRequest, Seq[ + CaretFeedbackDetails + ]] = { + val client = ManhattanKVClient( + "pushservice_ntab_caret_feedback_omega", + Omega.wilyName, + manhattanClientMtlsParams + ) + val endpoint = ManhattanKVEndpointBuilder(client) + .defaultGuarantee(Guarantee.SoftDcReadMyWrites) + .defaultMaxTimeout(3.seconds) + .maxRetryCount(2) + .statsReceiver(statsReceiver) + .build() + + val feedbackSignalManhattanClient = + FeedbackSignalManhattanClient(endpoint, statsReceiver.scope("FeedbackSignalManhattanClient")) + NtabCaretFeedbackStore(feedbackSignalManhattanClient) + } + + override val genericFeedbackStore: ReadableStore[FeedbackRequest, Seq[ + FeedbackPromptValue + ]] = { + FeedbackStore( + GenericFeedbackStoreBuilder.build( + manhattanKVClientAppId = "frigate_pushservice_ntabfeedback_prompt", + environment = NotifEnvironment.apply(serviceIdentifier.environment), + svcIdentifier = serviceIdentifier, + statsReceiver = statsReceiver + )) + } + + override val genericNotificationFeedbackStore: GenericFeedbackStore = { + + GenericFeedbackStoreBuilder.build( + manhattanKVClientAppId = "frigate_pushservice_ntabfeedback_prompt", + environment = NotifEnvironment.apply(serviceIdentifier.environment), + svcIdentifier = serviceIdentifier, + statsReceiver = statsReceiver + ) + } + + override val earlybirdSearchDest = "/s/earlybird-root-superroot/root-superroot" + + // low latency as compared to default `semanticCoreMetadataClient` + private val lowLatencySemanticCoreMetadataClient: MetadataService.MethodPerEndpoint = + new MetadataService.FinagledClient( + Finagle.readOnlyThriftService( + name = "semantic_core_metadata_service", + dest = "/s/escherbird/metadataservice", + statsReceiver = statsReceiver, + thriftClientId = pushserviceThriftClientId, + tries = 2, // total number of tries. number of retries = tries - 1 + requestTimeout = 2.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + ) + + private val semanticCoreMetadataStitchClient = new MetadataStitchClient( + lowLatencySemanticCoreMetadataClient + ) + + override val semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata] = { + val name = "semantic_core_megadata_store_cached" + val store = MetaDataReadableStore.getMegadataReadableStore( + metadataStitchClient = semanticCoreMetadataStitchClient, + typedMetadataDomains = Some(Set(Domains.EventsEntityService)) + ) + ObservedCachedReadableStore + .from( + store = ObservedReadableStore(store)( + statsReceiver + .scope("store") + .scope("semantic_core_megadata_store") + ), + ttl = 1.hour, + maxKeys = 1000, + cacheName = "semantic_core_megadata_cache", + windowSize = 10000L + )(statsReceiver.scope("store", name)) + } + + override val basketballGameScoreStore: ReadableStore[QualifiedId, BasketballGameLiveUpdate] = { + StratoFetchableStore.withUnitView[QualifiedId, BasketballGameLiveUpdate]( + stratoClient, + "semanticCore/basketballGameScore.Entity") + } + + override val baseballGameScoreStore: ReadableStore[QualifiedId, BaseballGameLiveUpdate] = { + StratoFetchableStore.withUnitView[QualifiedId, BaseballGameLiveUpdate]( + stratoClient, + "semanticCore/baseballGameScore.Entity") + } + + override val cricketMatchScoreStore: ReadableStore[QualifiedId, CricketMatchLiveUpdate] = { + StratoFetchableStore.withUnitView[QualifiedId, CricketMatchLiveUpdate]( + stratoClient, + "semanticCore/cricketMatchScore.Entity") + } + + override val soccerMatchScoreStore: ReadableStore[QualifiedId, SoccerMatchLiveUpdate] = { + ObservedCachedReadableStore + .from( + store = StratoFetchableStore.withUnitView[QualifiedId, SoccerMatchLiveUpdate]( + stratoClient, + "semanticCore/soccerMatchScore.Entity"), + ttl = 10.seconds, + maxKeys = 100, + cacheName = "SoccerMatchCachedStore", + windowSize = 100L + )(statsReceiver.scope("SoccerMatchCachedStore")) + + } + + override val nflGameScoreStore: ReadableStore[QualifiedId, NflFootballGameLiveUpdate] = { + ObservedCachedReadableStore + .from( + store = StratoFetchableStore.withUnitView[QualifiedId, NflFootballGameLiveUpdate]( + stratoClient, + "semanticCore/nflFootballGameScore.Entity"), + ttl = 10.seconds, + maxKeys = 100, + cacheName = "NFLMatchCachedStore", + windowSize = 100L + )(statsReceiver.scope("NFLMatchCachedStore")) + + } + + override val userHealthSignalStore: ReadableStore[Long, UserHealthSignalResponse] = { + val userHealthSignalFetcher = + stratoClient.fetcher[Long, Seq[UserHealthSignal], UserHealthSignalResponse]( + "hss/user_signals/api/healthSignals.User" + ) + + val store = buildStore( + StratoFetchableStore.withView[Long, Seq[UserHealthSignal], UserHealthSignalResponse]( + userHealthSignalFetcher, + Seq( + AgathaRecentAbuseStrikeDouble, + AgathaCalibratedNsfwDouble, + AgathaCseDouble, + NsfwTextUserScoreDouble, + NsfwConsumerScoreDouble)), + "UserHealthSignalFetcher" + ) + if (!inMemCacheOff) { + ObservedCachedReadableStore + .from( + store = ObservedReadableStore(store)( + statsReceiver.scope("store").scope("user_health_model_score_store")), + ttl = 12.hours, + maxKeys = 16777215, + cacheName = "user_health_model_score_store_cache", + windowSize = 10000L + )(statsReceiver.scope("store", "user_health_model_score_store_cached")) + } else { + store + } + } + + override val tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] = { + val tweetHealthScoreFetcher = + stratoClient.fetcher[TweetScoringRequest, Unit, TweetScoringResponse]( + "abuse/detection/tweetHealthModelScore" + ) + + val store = buildStore( + StratoFetchableStore.withUnitView(tweetHealthScoreFetcher), + "TweetHealthScoreFetcher" + ) + + ObservedCachedReadableStore + .from( + store = ObservedReadableStore(store)( + statsReceiver.scope("store").scope("tweet_health_model_score_store")), + ttl = 30.minutes, + maxKeys = 1000, + cacheName = "tweet_health_model_score_store_cache", + windowSize = 10000L + )(statsReceiver.scope("store", "tweet_health_model_score_store_cached")) + } + + override val appPermissionStore: ReadableStore[(Long, (String, String)), AppPermission] = { + val store = StratoFetchableStore + .withUnitView[(Long, (String, String)), AppPermission]( + stratoClient, + "clients/permissionsState") + ObservedCachedReadableStore.from[(Long, (String, String)), AppPermission]( + buildStore(store, "mr_app_permission_store"), + ttl = 30.minutes, + maxKeys = 1000, + cacheName = "mr_app_permission_store_cache", + windowSize = 10000L + )(statsReceiver.scope("mr_app_permission_store_cached")) + } + + def pushSendEventStreamName: String + + override val pushSendEventBusPublisher = EventBusPublisherBuilder() + .clientId("frigate_pushservice") + .streamName(pushSendEventStreamName) + .thriftStruct(NotificationScribe) + .statsReceiver(statsReceiver.scope("push_send_eventbus")) + .build() + + override lazy val candidateFeatureHydrator: CandidateFeatureHydrator = + CandidateFeatureHydrator( + socialContextActionsFeatureBuilder = Some(socialContextActionsFeatureBuilder), + tweetSocialProofFeatureBuilder = Some(tweetSocialProofFeatureBuilder), + earlybirdFeatureBuilder = Some(earlybirdFeatureBuilder), + tweetContentFeatureBuilder = Some(tweetContentFeatureBuilder), + tweetAuthorRecentRealGraphFeatureBuilder = Some(tweetAuthorRecentRealGraphFeatureBuilder), + socialContextRecentRealGraphFeatureBuilder = Some(socialContextRecentRealGraphFeatureBuilder), + tweetAnnotationsFeatureBuilder = Some(tweetAnnotationsFeatureBuilder), + mrOfflineUserCandidateSparseAggregatesFeatureBuilder = + Some(mrOfflineUserCandidateSparseAggregatesFeatureBuilder), + candidateLevelFeatureBuilder = Some(candidateLevelFeatureBuilder) + )(statsReceiver.scope("push_feature_hydrator")) + + private val candidateCopyCross = + new CandidateCopyExpansion(statsReceiver.scope("refresh_handler/cross")) + + override lazy val candidateHydrator: PushCandidateHydrator = + PushCandidateHydrator( + this.socialGraphServiceProcessStore, + safeUserStore, + listAPIStore, + candidateCopyCross)( + statsReceiver.scope("push_candidate_hydrator"), + weightedOpenOrNtabClickModelScorer) + + override lazy val sendHandlerCandidateHydrator: SendHandlerPushCandidateHydrator = + SendHandlerPushCandidateHydrator( + lexServiceStore, + fanoutMetadataStore, + semanticCoreMegadataStore, + safeUserStore, + simClusterToEntityStore, + audioSpaceStore, + interestsWithLookupContextStore, + uttEntityHydrationStore, + superFollowCreatorTweetCountStore + )( + statsReceiver.scope("push_candidate_hydrator"), + weightedOpenOrNtabClickModelScorer + ) + + def mrRequestScriberNode: String + def loggedOutMrRequestScriberNode: String + + override lazy val configParamsBuilder: ConfigParamsBuilder = ConfigParamsBuilder( + config = overridesConfig, + featureContextBuilder = FeatureContextBuilder(featureSwitches), + statsReceiver = statsReceiver + ) + + def buildStore[K, V](store: ReadableStore[K, V], name: String): ReadableStore[K, V] = { + ObservedReadableStore(store)(statsReceiver.scope("store").scope(name)) + } + + def buildManhattanStore[K, V](dataset: Dataset[K, V]): ReadableStore[K, V] = { + val manhattanKVClientParams = ManhattanKVClientMtlsParams( + serviceIdentifier = serviceIdentifier, + opportunisticTls = OpportunisticTls.Required + ) + ManhattanStore + .fromDatasetWithMtls[K, V]( + dataset, + mtlsParams = manhattanKVClientParams, + statsReceiver = statsReceiver.scope(dataset.datasetName)) + } + + def buildCachedTweetyPieStore( + getTweetOptions: Option[GetTweetOptions], + keyPrefix: String + ): ReadableStore[Long, TweetyPieResult] = { + def discardAdditionalMediaInfo(tweetypieResult: TweetyPieResult) = { + val updatedMedia = tweetypieResult.tweet.media.map { mediaSeq => + mediaSeq.map { media => media.copy(additionalMetadata = None, sizes = Nil.toSet) } + } + val updatedTweet = tweetypieResult.tweet.copy(media = updatedMedia) + tweetypieResult.copy(tweet = updatedTweet) + } + + val tweetypieStoreWithoutAdditionalMediaInfo = TweetyPieStore( + tweetyPieClient, + getTweetOptions, + transformTweetypieResult = discardAdditionalMediaInfo + )(statsReceiver.scope("tweetypie_without_additional_media_info")) + + ObservedMemcachedReadableStore.fromCacheClient( + backingStore = tweetypieStoreWithoutAdditionalMediaInfo, + cacheClient = pushServiceCoreSvcsCacheClient, + ttl = 12.hours + )( + valueInjection = TweetyPieResultInjection, + statsReceiver = statsReceiver.scope("TweetyPieStore"), + keyToString = { k: Long => s"$keyPrefix/$k" } + ) + } + + override def init(): Future[Unit] = + ClientRegistry.expAllRegisteredClientsResolved().map { clients => + log.info("Done resolving clients: " + clients.mkString("[", ", ", "]")) + } + + val InlineActionsMhColumn = + "frigate/magicrecs/inlineActionsMh" + + override val inlineActionHistoryStore: ReadableStore[Long, Seq[(Long, String)]] = + StratoScannableStore + .withUnitView[(Long, Slice[Long]), (Long, Long), String](stratoClient, InlineActionsMhColumn) + .composeKeyMapping[Long] { userId => + (userId, Slice[Long](from = None, to = None, limit = None)) + }.mapValues { response => + response.map { + case (key, value) => (key._2, value) + } + } + + override val tripTweetCandidateStore: ReadableStore[TripDomain, TripTweets] = { + StratoFetchableStore + .withUnitView[TripDomain, TripTweets]( + new TripTweetsAirflowProdClientColumn(stratoClient).fetcher) + } + + override val softUserFollowingStore: ReadableStore[User, Seq[Long]] = new SoftUserFollowingStore( + stratoClient) + + override val superFollowEligibilityUserStore: ReadableStore[Long, Boolean] = { + StratoFetchableStore.withUnitView[Long, Boolean]( + stratoClient, + "audiencerewards/audienceRewardsService/getSuperFollowEligibility.User") + } + + override val superFollowCreatorTweetCountStore: ReadableStore[UserId, Int] = { + ObservedCachedReadableStore + .from( + store = StratoFetchableStore + .withUnitView[UserId, Int](new CreatorSubscriptionNumTweetsColumn(stratoClient).fetcher), + ttl = 5.minutes, + maxKeys = 1000, + cacheName = "SuperFollowCreatorTweetCountStore", + windowSize = 10000L + )(statsReceiver.scope("SuperFollowCreatorTweetCountStore")) + + } + + override val hasSuperFollowingRelationshipStore: ReadableStore[ + HasSuperFollowingRelationshipRequest, + Boolean + ] = { + StratoFetchableStore.withUnitView[HasSuperFollowingRelationshipRequest, Boolean]( + stratoClient, + "audiencerewards/superFollows/hasSuperFollowingRelationshipV2") + } + + override val superFollowApplicationStatusStore: ReadableStore[ + (Long, SellerTrack), + SellerApplicationState + ] = { + StratoFetchableStore.withUnitView[(Long, SellerTrack), SellerApplicationState]( + stratoClient, + "periscope/eligibility/applicationStatus") + } + + def historyStoreMemcacheDest: String + + override lazy val recentHistoryCacheClient = { + RecentHistoryCacheClient.build(historyStoreMemcacheDest, serviceIdentifier, statsReceiver) + } + + override val openAppUserStore: ReadableStore[Long, Boolean] = { + buildStore(OpenAppUserStore(stratoClient), "OpenAppUserStore") + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ExperimentsWithStats.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ExperimentsWithStats.scala new file mode 100644 index 000000000..923a785ee --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ExperimentsWithStats.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.config + +import com.twitter.frigate.common.util.Experiments + +object ExperimentsWithStats { + + /** + * Add an experiment here to collect detailed pushservice stats. + * + * ! Important ! + * Keep this set small and remove experiments when you don't need the stats anymore. + */ + final val PushExperiments: Set[String] = Set( + Experiments.MRAndroidInlineActionHoldback.exptName, + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ProdConfig.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ProdConfig.scala new file mode 100644 index 000000000..7edc8d46d --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/ProdConfig.scala @@ -0,0 +1,230 @@ +package com.twitter.frigate.pushservice.config + +import com.twitter.abdecider.LoggingABDecider +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.bijection.Base64String +import com.twitter.bijection.Injection +import com.twitter.conversions.DurationOps._ +import com.twitter.decider.Decider +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.finagle.thrift.RichClientParam +import com.twitter.finagle.util.DefaultTimer +import com.twitter.frigate.common.config.RateLimiterGenerator +import com.twitter.frigate.common.filter.DynamicRequestMeterFilter +import com.twitter.frigate.common.history.ManhattanHistoryStore +import com.twitter.frigate.common.history.InvalidatingAfterWritesPushServiceHistoryStore +import com.twitter.frigate.common.history.ManhattanKVHistoryStore +import com.twitter.frigate.common.history.PushServiceHistoryStore +import com.twitter.frigate.common.history.SimplePushServiceHistoryStore +import com.twitter.frigate.common.util._ +import com.twitter.frigate.data_pipeline.features_common.FeatureStoreUtil +import com.twitter.frigate.data_pipeline.features_common.TargetLevelFeaturesConfig +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.DeciderKey +import com.twitter.frigate.pushservice.params.PushQPSLimitConstants +import com.twitter.frigate.pushservice.params.PushServiceTunableKeys +import com.twitter.frigate.pushservice.params.ShardParams +import com.twitter.frigate.pushservice.store.PushIbis2Store +import com.twitter.frigate.pushservice.thriftscala.PushRequestScribe +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.ibis2.service.thriftscala.Ibis2Service +import com.twitter.logging.Logger +import com.twitter.notificationservice.api.thriftscala.DeleteCurrentTimelineForUserRequest +import com.twitter.notificationservice.api.thriftscala.NotificationApi +import com.twitter.notificationservice.api.thriftscala.NotificationApi$FinagleClient +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.notificationservice.thriftscala.DeleteGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.NotificationService +import com.twitter.notificationservice.thriftscala.NotificationService$FinagleClient +import com.twitter.servo.decider.DeciderGateBuilder +import com.twitter.util.tunable.TunableMap +import com.twitter.util.Future +import com.twitter.util.Timer + +case class ProdConfig( + override val isServiceLocal: Boolean, + override val localConfigRepoPath: String, + override val inMemCacheOff: Boolean, + override val decider: Decider, + override val abDecider: LoggingABDecider, + override val featureSwitches: FeatureSwitches, + override val shardParams: ShardParams, + override val serviceIdentifier: ServiceIdentifier, + override val tunableMap: TunableMap, +)( + implicit val statsReceiver: StatsReceiver) + extends { + // Due to trait initialization logic in Scala, any abstract members declared in Config or + // DeployConfig should be declared in this block. Otherwise the abstract member might initialize to + // null if invoked before object creation finishing. + + val log = Logger("ProdConfig") + + // Deciders + val isPushserviceCanaryDeepbirdv2CanaryClusterEnabled = decider + .feature(DeciderKey.enablePushserviceDeepbirdv2CanaryClusterDeciderKey.toString).isAvailable + + // Client ids + val notifierThriftClientId = ClientId("frigate-notifier.prod") + val loggedOutNotifierThriftClientId = ClientId("frigate-logged-out-notifier.prod") + val pushserviceThriftClientId: ClientId = ClientId("frigate-pushservice.prod") + + // Dests + val frigateHistoryCacheDest = "/s/cache/frigate_history" + val memcacheCASDest = "/s/cache/magic_recs_cas:twemcaches" + val historyStoreMemcacheDest = + "/srv#/prod/local/cache/magic_recs_history:twemcaches" + + val deepbirdv2PredictionServiceDest = + if (serviceIdentifier.service.equals("frigate-pushservice-canary") && + isPushserviceCanaryDeepbirdv2CanaryClusterEnabled) + "/s/frigate/deepbirdv2-magicrecs-canary" + else "/s/frigate/deepbirdv2-magicrecs" + + override val fanoutMetadataColumn = "frigate/magicfanout/prod/mh/fanoutMetadata" + + override val timer: Timer = DefaultTimer + override val featureStoreUtil = FeatureStoreUtil.withParams(Some(serviceIdentifier)) + override val targetLevelFeaturesConfig = TargetLevelFeaturesConfig() + val pushServiceMHCacheDest = "/s/cache/pushservice_mh" + + val pushServiceCoreSvcsCacheDest = "/srv#/prod/local/cache/pushservice_core_svcs" + + val userTweetEntityGraphDest = "/s/cassowary/user_tweet_entity_graph" + val userUserGraphDest = "/s/cassowary/user_user_graph" + val lexServiceDest = "/s/live-video/timeline-thrift" + val entityGraphCacheDest = "/s/cache/pushservice_entity_graph" + + override val pushIbisV2Store = { + val service = Finagle.readOnlyThriftService( + "ibis-v2-service", + "/s/ibis2/ibis2", + statsReceiver, + notifierThriftClientId, + requestTimeout = 3.seconds, + tries = 3, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + + // according to ibis team, it is safe to retry on timeout, write & channel closed exceptions. + val pushIbisClient = new Ibis2Service.FinagledClient( + new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.IbisQpsLimitTunableKey), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 20), + PushQPSLimitConstants.IbisOrNTabQPSForRFPH + )(timer).andThen(service), + RichClientParam(serviceName = "ibis-v2-service") + ) + + PushIbis2Store(pushIbisClient) + } + + val notificationServiceClient: NotificationService$FinagleClient = { + val service = Finagle.readWriteThriftService( + "notificationservice", + "/s/notificationservice/notificationservice", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 10.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + + new NotificationService.FinagledClient( + new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.NtabQpsLimitTunableKey), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 20), + PushQPSLimitConstants.IbisOrNTabQPSForRFPH)(timer).andThen(service), + RichClientParam(serviceName = "notificationservice") + ) + } + + val notificationServiceApiClient: NotificationApi$FinagleClient = { + val service = Finagle.readWriteThriftService( + "notificationservice-api", + "/s/notificationservice/notificationservice-api:thrift", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 10.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + + new NotificationApi.FinagledClient( + new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.NtabQpsLimitTunableKey), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 20), + PushQPSLimitConstants.IbisOrNTabQPSForRFPH)(timer).andThen(service), + RichClientParam(serviceName = "notificationservice-api") + ) + } + + val mrRequestScriberNode = "mr_request_scribe" + val loggedOutMrRequestScriberNode = "lo_mr_request_scribe" + + override val pushSendEventStreamName = "frigate_pushservice_send_event_prod" +} with DeployConfig { + // Scribe + private val notificationScribeLog = Logger("notification_scribe") + private val notificationScribeInjection: Injection[NotificationScribe, String] = BinaryScalaCodec( + NotificationScribe + ) andThen Injection.connect[Array[Byte], Base64String, String] + + override def notificationScribe(data: NotificationScribe): Unit = { + val logEntry: String = notificationScribeInjection(data) + notificationScribeLog.info(logEntry) + } + + // History Store - Invalidates cached history after writes + override val historyStore = new InvalidatingAfterWritesPushServiceHistoryStore( + ManhattanHistoryStore(notificationHistoryStore, statsReceiver), + recentHistoryCacheClient, + new DeciderGateBuilder(decider) + .idGate(DeciderKey.enableInvalidatingCachedHistoryStoreAfterWrites) + ) + + override val emailHistoryStore: PushServiceHistoryStore = { + statsReceiver.scope("frigate_email_history").counter("request").incr() + new SimplePushServiceHistoryStore(emailNotificationHistoryStore) + } + + override val loggedOutHistoryStore = + new InvalidatingAfterWritesPushServiceHistoryStore( + ManhattanKVHistoryStore( + manhattanKVLoggedOutHistoryStoreEndpoint, + "frigate_notification_logged_out_history"), + recentHistoryCacheClient, + new DeciderGateBuilder(decider) + .idGate(DeciderKey.enableInvalidatingCachedLoggedOutHistoryStoreAfterWrites) + ) + + private val requestScribeLog = Logger("request_scribe") + private val requestScribeInjection: Injection[PushRequestScribe, String] = BinaryScalaCodec( + PushRequestScribe + ) andThen Injection.connect[Array[Byte], Base64String, String] + + override def requestScribe(data: PushRequestScribe): Unit = { + val logEntry: String = requestScribeInjection(data) + requestScribeLog.info(logEntry) + } + + // generic notification server + override def notificationServiceSend( + target: Target, + request: CreateGenericNotificationRequest + ): Future[CreateGenericNotificationResponse] = + notificationServiceClient.createGenericNotification(request) + + // generic notification server + override def notificationServiceDelete( + request: DeleteGenericNotificationRequest + ): Future[Unit] = notificationServiceClient.deleteGenericNotification(request) + + // NTab-api + override def notificationServiceDeleteTimeline( + request: DeleteCurrentTimelineForUserRequest + ): Future[Unit] = notificationServiceApiClient.deleteCurrentTimelineForUser(request) + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/StagingConfig.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/StagingConfig.scala new file mode 100644 index 000000000..c93ca0ea8 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/StagingConfig.scala @@ -0,0 +1,193 @@ +package com.twitter.frigate.pushservice.config + +import com.twitter.abdecider.LoggingABDecider +import com.twitter.conversions.DurationOps._ +import com.twitter.decider.Decider +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.finagle.thrift.RichClientParam +import com.twitter.finagle.util.DefaultTimer +import com.twitter.frigate.common.config.RateLimiterGenerator +import com.twitter.frigate.common.filter.DynamicRequestMeterFilter +import com.twitter.frigate.common.history.InvalidatingAfterWritesPushServiceHistoryStore +import com.twitter.frigate.common.history.ManhattanHistoryStore +import com.twitter.frigate.common.history.ManhattanKVHistoryStore +import com.twitter.frigate.common.history.ReadOnlyHistoryStore +import com.twitter.frigate.common.history.PushServiceHistoryStore +import com.twitter.frigate.common.history.SimplePushServiceHistoryStore +import com.twitter.frigate.common.util.Finagle +import com.twitter.frigate.data_pipeline.features_common.FeatureStoreUtil +import com.twitter.frigate.data_pipeline.features_common.TargetLevelFeaturesConfig +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.DeciderKey +import com.twitter.frigate.pushservice.params.PushQPSLimitConstants +import com.twitter.frigate.pushservice.params.PushServiceTunableKeys +import com.twitter.frigate.pushservice.params.ShardParams +import com.twitter.frigate.pushservice.store._ +import com.twitter.frigate.pushservice.thriftscala.PushRequestScribe +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.ibis2.service.thriftscala.Ibis2Service +import com.twitter.logging.Logger +import com.twitter.notificationservice.api.thriftscala.DeleteCurrentTimelineForUserRequest +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponseType +import com.twitter.notificationservice.thriftscala.DeleteGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.NotificationService +import com.twitter.notificationservice.thriftscala.NotificationService$FinagleClient +import com.twitter.servo.decider.DeciderGateBuilder +import com.twitter.util.tunable.TunableMap +import com.twitter.util.Future +import com.twitter.util.Timer + +case class StagingConfig( + override val isServiceLocal: Boolean, + override val localConfigRepoPath: String, + override val inMemCacheOff: Boolean, + override val decider: Decider, + override val abDecider: LoggingABDecider, + override val featureSwitches: FeatureSwitches, + override val shardParams: ShardParams, + override val serviceIdentifier: ServiceIdentifier, + override val tunableMap: TunableMap, +)( + implicit val statsReceiver: StatsReceiver) + extends { + // Due to trait initialization logic in Scala, any abstract members declared in Config or + // DeployConfig should be declared in this block. Otherwise the abstract member might initialize to + // null if invoked before object creation finishing. + + val log = Logger("StagingConfig") + + // Client ids + val notifierThriftClientId = ClientId("frigate-notifier.dev") + val loggedOutNotifierThriftClientId = ClientId("frigate-logged-out-notifier.dev") + val pushserviceThriftClientId: ClientId = ClientId("frigate-pushservice.staging") + + override val fanoutMetadataColumn = "frigate/magicfanout/staging/mh/fanoutMetadata" + + // dest + val frigateHistoryCacheDest = "/srv#/test/local/cache/twemcache_frigate_history" + val memcacheCASDest = "/srv#/test/local/cache/twemcache_magic_recs_cas_dev:twemcaches" + val pushServiceMHCacheDest = "/srv#/test/local/cache/twemcache_pushservice_test" + val entityGraphCacheDest = "/srv#/test/local/cache/twemcache_pushservice_test" + val pushServiceCoreSvcsCacheDest = "/srv#/test/local/cache/twemcache_pushservice_core_svcs_test" + val historyStoreMemcacheDest = "/srv#/test/local/cache/twemcache_eventstream_test:twemcaches" + val userTweetEntityGraphDest = "/cluster/local/cassowary/staging/user_tweet_entity_graph" + val userUserGraphDest = "/cluster/local/cassowary/staging/user_user_graph" + val lexServiceDest = "/srv#/staging/local/live-video/timeline-thrift" + val deepbirdv2PredictionServiceDest = "/cluster/local/frigate/staging/deepbirdv2-magicrecs" + + override val featureStoreUtil = FeatureStoreUtil.withParams(Some(serviceIdentifier)) + override val targetLevelFeaturesConfig = TargetLevelFeaturesConfig() + val mrRequestScriberNode = "validation_mr_request_scribe" + val loggedOutMrRequestScriberNode = "lo_mr_request_scribe" + + override val timer: Timer = DefaultTimer + + override val pushSendEventStreamName = "frigate_pushservice_send_event_staging" + + override val pushIbisV2Store = { + val service = Finagle.readWriteThriftService( + "ibis-v2-service", + "/s/ibis2/ibis2", + statsReceiver, + notifierThriftClientId, + requestTimeout = 6.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + + val pushIbisClient = new Ibis2Service.FinagledClient( + new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.IbisQpsLimitTunableKey), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 20), + PushQPSLimitConstants.IbisOrNTabQPSForRFPH + )(timer).andThen(service), + RichClientParam(serviceName = "ibis-v2-service") + ) + + StagingIbis2Store(PushIbis2Store(pushIbisClient)) + } + + val notificationServiceClient: NotificationService$FinagleClient = { + val service = Finagle.readWriteThriftService( + "notificationservice", + "/s/notificationservice/notificationservice", + statsReceiver, + pushserviceThriftClientId, + requestTimeout = 10.seconds, + mTLSServiceIdentifier = Some(serviceIdentifier) + ) + + new NotificationService.FinagledClient( + new DynamicRequestMeterFilter( + tunableMap(PushServiceTunableKeys.NtabQpsLimitTunableKey), + RateLimiterGenerator.asTuple(_, shardParams.numShards, 20), + PushQPSLimitConstants.IbisOrNTabQPSForRFPH)(timer).andThen(service), + RichClientParam(serviceName = "notificationservice") + ) + } +} with DeployConfig { + + // Scribe + private val notificationScribeLog = Logger("StagingNotificationScribe") + + override def notificationScribe(data: NotificationScribe): Unit = { + notificationScribeLog.info(data.toString) + } + private val requestScribeLog = Logger("StagingRequestScribe") + + override def requestScribe(data: PushRequestScribe): Unit = { + requestScribeLog.info(data.toString) + } + + // history store + override val historyStore = new InvalidatingAfterWritesPushServiceHistoryStore( + ReadOnlyHistoryStore( + ManhattanHistoryStore(notificationHistoryStore, statsReceiver) + ), + recentHistoryCacheClient, + new DeciderGateBuilder(decider) + .idGate(DeciderKey.enableInvalidatingCachedHistoryStoreAfterWrites) + ) + + override val emailHistoryStore: PushServiceHistoryStore = new SimplePushServiceHistoryStore( + emailNotificationHistoryStore) + + // history store + override val loggedOutHistoryStore = + new InvalidatingAfterWritesPushServiceHistoryStore( + ReadOnlyHistoryStore( + ManhattanKVHistoryStore( + manhattanKVLoggedOutHistoryStoreEndpoint, + "frigate_notification_logged_out_history")), + recentHistoryCacheClient, + new DeciderGateBuilder(decider) + .idGate(DeciderKey.enableInvalidatingCachedLoggedOutHistoryStoreAfterWrites) + ) + + override def notificationServiceSend( + target: Target, + request: CreateGenericNotificationRequest + ): Future[CreateGenericNotificationResponse] = + target.isTeamMember.flatMap { isTeamMember => + if (isTeamMember) { + notificationServiceClient.createGenericNotification(request) + } else { + log.info(s"Mock creating generic notification $request for user: ${target.targetId}") + Future.value( + CreateGenericNotificationResponse(CreateGenericNotificationResponseType.Success) + ) + } + } + + override def notificationServiceDelete( + request: DeleteGenericNotificationRequest + ): Future[Unit] = Future.Unit + + override def notificationServiceDeleteTimeline( + request: DeleteCurrentTimelineForUserRequest + ): Future[Unit] = Future.Unit +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/mlconfig/DeepbirdV2ModelConfig.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/mlconfig/DeepbirdV2ModelConfig.scala new file mode 100644 index 000000000..c45ccc72e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/config/mlconfig/DeepbirdV2ModelConfig.scala @@ -0,0 +1,23 @@ +package com.twitter.frigate.pushservice.config.mlconfig + +import com.twitter.cortex.deepbird.thriftjava.DeepbirdPredictionService +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.ml.prediction.DeepbirdPredictionEngineServiceStore +import com.twitter.nrel.heavyranker.PushDBv2PredictionServiceStore + +object DeepbirdV2ModelConfig { + def buildPredictionServiceScoreStore( + predictionServiceClient: DeepbirdPredictionService.ServiceToClient, + serviceName: String + )( + implicit statsReceiver: StatsReceiver + ): PushDBv2PredictionServiceStore = { + + val stats = statsReceiver.scope(serviceName) + val serviceStats = statsReceiver.scope("dbv2PredictionServiceStore") + + new PushDBv2PredictionServiceStore( + DeepbirdPredictionEngineServiceStore(predictionServiceClient, batchSize = Some(32))(stats) + )(serviceStats) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/controller/PushServiceController.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/controller/PushServiceController.scala new file mode 100644 index 000000000..d271e5a57 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/controller/PushServiceController.scala @@ -0,0 +1,114 @@ +package com.twitter.frigate.pushservice.controller + +import com.google.inject.Inject +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.thrift.ClientId +import com.twitter.finatra.thrift.Controller +import com.twitter.frigate.pushservice.exception.DisplayLocationNotSupportedException +import com.twitter.frigate.pushservice.refresh_handler.RefreshForPushHandler +import com.twitter.frigate.pushservice.send_handler.SendHandler +import com.twitter.frigate.pushservice.refresh_handler.LoggedOutRefreshForPushHandler +import com.twitter.frigate.pushservice.thriftscala.PushService.Loggedout +import com.twitter.frigate.pushservice.thriftscala.PushService.Refresh +import com.twitter.frigate.pushservice.thriftscala.PushService.Send +import com.twitter.frigate.pushservice.{thriftscala => t} +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.util.logging.Logging +import com.twitter.util.Future + +class PushServiceController @Inject() ( + sendHandler: SendHandler, + refreshForPushHandler: RefreshForPushHandler, + loggedOutRefreshForPushHandler: LoggedOutRefreshForPushHandler, + statsReceiver: StatsReceiver) + extends Controller(t.PushService) + with Logging { + + private val stats: StatsReceiver = statsReceiver.scope(s"${this.getClass.getSimpleName}") + private val failureCount = stats.counter("failures") + private val failureStatsScope = stats.scope("failures") + private val uncaughtErrorCount = failureStatsScope.counter("uncaught") + private val uncaughtErrorScope = failureStatsScope.scope("uncaught") + private val clientIdScope = stats.scope("client_id") + + handle(t.PushService.Send) { request: Send.Args => + send(request) + } + + handle(t.PushService.Refresh) { args: Refresh.Args => + refresh(args) + } + + handle(t.PushService.Loggedout) { request: Loggedout.Args => + loggedOutRefresh(request) + } + + private def loggedOutRefresh( + request: t.PushService.Loggedout.Args + ): Future[t.PushService.Loggedout.SuccessType] = { + val fut = request.request.notificationDisplayLocation match { + case NotificationDisplayLocation.PushToMobileDevice => + loggedOutRefreshForPushHandler.refreshAndSend(request.request) + case _ => + Future.exception( + new DisplayLocationNotSupportedException( + "Specified notification display location is not supported")) + } + fut.onFailure { ex => + logger.error( + s"Failure in push service for logged out refresh request: $request - ${ex.getMessage} - ${ex.getStackTrace + .mkString(", \n\t")}", + ex) + failureCount.incr() + uncaughtErrorCount.incr() + uncaughtErrorScope.counter(ex.getClass.getCanonicalName).incr() + } + } + + private def refresh( + request: t.PushService.Refresh.Args + ): Future[t.PushService.Refresh.SuccessType] = { + + val fut = request.request.notificationDisplayLocation match { + case NotificationDisplayLocation.PushToMobileDevice => + val clientId: String = + ClientId.current + .flatMap { cid => Option(cid.name) } + .getOrElse("none") + clientIdScope.counter(clientId).incr() + refreshForPushHandler.refreshAndSend(request.request) + case _ => + Future.exception( + new DisplayLocationNotSupportedException( + "Specified notification display location is not supported")) + } + fut.onFailure { ex => + logger.error( + s"Failure in push service for refresh request: $request - ${ex.getMessage} - ${ex.getStackTrace + .mkString(", \n\t")}", + ex + ) + + failureCount.incr() + uncaughtErrorCount.incr() + uncaughtErrorScope.counter(ex.getClass.getCanonicalName).incr() + } + + } + + private def send( + request: t.PushService.Send.Args + ): Future[t.PushService.Send.SuccessType] = { + sendHandler(request.request).onFailure { ex => + logger.error( + s"Failure in push service for send request: $request - ${ex.getMessage} - ${ex.getStackTrace + .mkString(", \n\t")}", + ex + ) + + failureCount.incr() + uncaughtErrorCount.incr() + uncaughtErrorScope.counter(ex.getClass.getCanonicalName).incr() + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/DisplayLocationNotSupportedException.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/DisplayLocationNotSupportedException.scala new file mode 100644 index 000000000..08399c934 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/DisplayLocationNotSupportedException.scala @@ -0,0 +1,12 @@ +package com.twitter.frigate.pushservice.exception + +import scala.util.control.NoStackTrace + +/** + * Throw exception if DisplayLocation is not supported + * + * @param message Exception message + */ +class DisplayLocationNotSupportedException(private val message: String) + extends Exception(message) + with NoStackTrace diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/InvalidSportDomainException.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/InvalidSportDomainException.scala new file mode 100644 index 000000000..8f0d2b988 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/InvalidSportDomainException.scala @@ -0,0 +1,12 @@ +package com.twitter.frigate.pushservice.exception + +import scala.util.control.NoStackTrace + +/** + * Throw exception if the sport domain is not supported by MagicFanoutSports + * + * @param message Exception message + */ +class InvalidSportDomainException(private val message: String) + extends Exception(message) + with NoStackTrace diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/TweetNTabRequestHydratorException.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/TweetNTabRequestHydratorException.scala new file mode 100644 index 000000000..069e65d79 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/TweetNTabRequestHydratorException.scala @@ -0,0 +1,7 @@ +package com.twitter.frigate.pushservice.exception + +import scala.util.control.NoStackTrace + +class TweetNTabRequestHydratorException(private val message: String) + extends Exception(message) + with NoStackTrace diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UnsupportedCrtException.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UnsupportedCrtException.scala new file mode 100644 index 000000000..5ed6c1c28 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UnsupportedCrtException.scala @@ -0,0 +1,11 @@ +package com.twitter.frigate.pushservice.exception + +import scala.util.control.NoStackTrace + +/** + * Exception for CRT not expected in the scope + * @param message Exception message to log the UnsupportedCrt + */ +class UnsupportedCrtException(private val message: String) + extends Exception(message) + with NoStackTrace diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UttEntityNotFoundException.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UttEntityNotFoundException.scala new file mode 100644 index 000000000..3ac069dac --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/exception/UttEntityNotFoundException.scala @@ -0,0 +1,12 @@ +package com.twitter.frigate.pushservice.exception + +import scala.util.control.NoStackTrace + +/** + * Throw exception if UttEntity is not found where it might be a required data field + * + * @param message Exception message + */ +class UttEntityNotFoundException(private val message: String) + extends Exception(message) + with NoStackTrace diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HealthFeatureGetter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HealthFeatureGetter.scala new file mode 100644 index 000000000..addf5b438 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HealthFeatureGetter.scala @@ -0,0 +1,220 @@ +package com.twitter.frigate.pushservice.ml + +import com.twitter.abuse.detection.scoring.thriftscala.{Model => TweetHealthModel} +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.HealthPredicates.userHealthSignalValueToDouble +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil +import com.twitter.frigate.thriftscala.UserMediaRepresentation +import com.twitter.hss.api.thriftscala.SignalValue +import com.twitter.hss.api.thriftscala.UserHealthSignal +import com.twitter.hss.api.thriftscala.UserHealthSignal.AgathaCalibratedNsfwDouble +import com.twitter.hss.api.thriftscala.UserHealthSignal.NsfwTextUserScoreDouble +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future +import com.twitter.util.Time + +object HealthFeatureGetter { + + def getFeatures( + pushCandidate: PushCandidate, + producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation], + userHealthScoreStore: ReadableStore[Long, UserHealthSignalResponse], + tweetHealthScoreStoreOpt: Option[ReadableStore[TweetScoringRequest, TweetScoringResponse]] = + None + ): Future[FeatureMap] = { + + pushCandidate match { + case cand: PushCandidate with TweetCandidate with TweetAuthor with TweetAuthorDetails => + val pMediaNsfwRequest = + TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore4) + val pTweetTextNsfwRequest = + TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore1) + + cand.authorId match { + case Some(authorId) => + Future + .join( + userHealthScoreStore.get(authorId), + producerMediaRepresentationStore.get(authorId), + tweetHealthScoreStoreOpt.map(_.get(pMediaNsfwRequest)).getOrElse(Future.None), + tweetHealthScoreStoreOpt.map(_.get(pTweetTextNsfwRequest)).getOrElse(Future.None), + cand.tweetAuthor + ).map { + case ( + healthSignalsResponseOpt, + producerMuOpt, + pMediaNsfwOpt, + pTweetTextNsfwOpt, + tweetAuthorOpt) => + val healthSignalScoreMap = healthSignalsResponseOpt + .map(_.signalValues).getOrElse(Map.empty[UserHealthSignal, SignalValue]) + val agathaNSFWScore = userHealthSignalValueToDouble( + healthSignalScoreMap + .getOrElse(AgathaCalibratedNsfwDouble, SignalValue.DoubleValue(0.5))) + val userTextNSFWScore = userHealthSignalValueToDouble( + healthSignalScoreMap + .getOrElse(NsfwTextUserScoreDouble, SignalValue.DoubleValue(0.15))) + val pMediaNsfwScore = pMediaNsfwOpt.map(_.score).getOrElse(0.0) + val pTweetTextNsfwScore = pTweetTextNsfwOpt.map(_.score).getOrElse(0.0) + + val mediaRepresentationMap = + producerMuOpt.map(_.mediaRepresentation).getOrElse(Map.empty[String, Double]) + val sumScore: Double = mediaRepresentationMap.values.sum + val nudityRate = + if (sumScore > 0) + mediaRepresentationMap.getOrElse( + MediaAnnotationsUtil.nudityCategoryId, + 0.0) / sumScore + else 0.0 + val beautyRate = + if (sumScore > 0) + mediaRepresentationMap.getOrElse( + MediaAnnotationsUtil.beautyCategoryId, + 0.0) / sumScore + else 0.0 + val singlePersonRate = + if (sumScore > 0) + mediaRepresentationMap.getOrElse( + MediaAnnotationsUtil.singlePersonCategoryId, + 0.0) / sumScore + else 0.0 + val dislikeCt = cand.numericFeatures.getOrElse( + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_ntab_disliked.any_feature.Duration.Top.count", + 0.0) + val sentCt = cand.numericFeatures.getOrElse( + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_sent.any_feature.Duration.Top.count", + 0.0) + val dislikeRate = if (sentCt > 0) dislikeCt / sentCt else 0.0 + + val authorDislikeCt = cand.numericFeatures.getOrElse( + "tweet_author_aggregate.pair.label.ntab.isDisliked.any_feature.28.days.count", + 0.0) + val authorReportCt = cand.numericFeatures.getOrElse( + "tweet_author_aggregate.pair.label.reportTweetDone.any_feature.28.days.count", + 0.0) + val authorSentCt = cand.numericFeatures + .getOrElse( + "tweet_author_aggregate.pair.any_label.any_feature.28.days.count", + 0.0) + val authorDislikeRate = + if (authorSentCt > 0) authorDislikeCt / authorSentCt else 0.0 + val authorReportRate = + if (authorSentCt > 0) authorReportCt / authorSentCt else 0.0 + + val (isNsfwAccount, authorAccountAge) = tweetAuthorOpt match { + case Some(tweetAuthor) => + ( + CandidateHydrationUtil.isNsfwAccount( + tweetAuthor, + cand.target.params(PushFeatureSwitchParams.NsfwTokensParam)), + (Time.now - Time.fromMilliseconds(tweetAuthor.createdAtMsec)).inHours + ) + case _ => (false, 0) + } + + val tweetSemanticCoreIds = cand.sparseBinaryFeatures + .getOrElse(PushConstants.TweetSemanticCoreIdFeature, Set.empty[String]) + + val continuousFeatures = Map[String, Double]( + "agathaNsfwScore" -> agathaNSFWScore, + "textNsfwScore" -> userTextNSFWScore, + "pMediaNsfwScore" -> pMediaNsfwScore, + "pTweetTextNsfwScore" -> pTweetTextNsfwScore, + "nudityRate" -> nudityRate, + "beautyRate" -> beautyRate, + "singlePersonRate" -> singlePersonRate, + "numSources" -> CandidateUtil.getTagsCRCount(cand), + "favCount" -> cand.numericFeatures + .getOrElse("tweet.core.tweet_counts.favorite_count", 0.0), + "activeFollowers" -> cand.numericFeatures + .getOrElse("RecTweetAuthor.User.ActiveFollowers", 0.0), + "favorsRcvd28Days" -> cand.numericFeatures + .getOrElse("RecTweetAuthor.User.FavorsRcvd28Days", 0.0), + "tweets28Days" -> cand.numericFeatures + .getOrElse("RecTweetAuthor.User.Tweets28Days", 0.0), + "dislikeCount" -> dislikeCt, + "dislikeRate" -> dislikeRate, + "sentCount" -> sentCt, + "authorDislikeCount" -> authorDislikeCt, + "authorDislikeRate" -> authorDislikeRate, + "authorReportCount" -> authorReportCt, + "authorReportRate" -> authorReportRate, + "authorSentCount" -> authorSentCt, + "authorAgeInHour" -> authorAccountAge.toDouble + ) + + val booleanFeatures = Map[String, Boolean]( + "isSimclusterBased" -> RecTypes.simclusterBasedTweets + .contains(cand.commonRecType), + "isTopicTweet" -> RecTypes.isTopicTweetType(cand.commonRecType), + "isHashSpace" -> RecTypes.tagspaceTypes.contains(cand.commonRecType), + "isFRS" -> RecTypes.frsTypes.contains(cand.commonRecType), + "isModelingBased" -> RecTypes.mrModelingBasedTypes.contains(cand.commonRecType), + "isGeoPop" -> RecTypes.GeoPopTweetTypes.contains(cand.commonRecType), + "hasPhoto" -> cand.booleanFeatures + .getOrElse("RecTweet.TweetyPieResult.HasPhoto", false), + "hasVideo" -> cand.booleanFeatures + .getOrElse("RecTweet.TweetyPieResult.HasVideo", false), + "hasUrl" -> cand.booleanFeatures + .getOrElse("RecTweet.TweetyPieResult.HasUrl", false), + "isMrTwistly" -> CandidateUtil.isMrTwistlyCandidate(cand), + "abuseStrikeTop2Percent" -> tweetSemanticCoreIds.contains( + PushConstants.AbuseStrike_Top2Percent_Id), + "abuseStrikeTop1Percent" -> tweetSemanticCoreIds.contains( + PushConstants.AbuseStrike_Top1Percent_Id), + "abuseStrikeTop05Percent" -> tweetSemanticCoreIds.contains( + PushConstants.AbuseStrike_Top05Percent_Id), + "abuseStrikeTop025Percent" -> tweetSemanticCoreIds.contains( + PushConstants.AbuseStrike_Top025Percent_Id), + "allSpamReportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains( + PushConstants.AllSpamReportsPerFav_Top1Percent_Id), + "reportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains( + PushConstants.ReportsPerFav_Top1Percent_Id), + "reportsPerFavTop2Percent" -> tweetSemanticCoreIds.contains( + PushConstants.ReportsPerFav_Top2Percent_Id), + "isNudity" -> tweetSemanticCoreIds.contains( + PushConstants.MediaUnderstanding_Nudity_Id), + "beautyStyleFashion" -> tweetSemanticCoreIds.contains( + PushConstants.MediaUnderstanding_Beauty_Id), + "singlePerson" -> tweetSemanticCoreIds.contains( + PushConstants.MediaUnderstanding_SinglePerson_Id), + "pornList" -> tweetSemanticCoreIds.contains(PushConstants.PornList_Id), + "pornographyAndNsfwContent" -> tweetSemanticCoreIds.contains( + PushConstants.PornographyAndNsfwContent_Id), + "sexLife" -> tweetSemanticCoreIds.contains(PushConstants.SexLife_Id), + "sexLifeOrSexualOrientation" -> tweetSemanticCoreIds.contains( + PushConstants.SexLifeOrSexualOrientation_Id), + "profanity" -> tweetSemanticCoreIds.contains(PushConstants.ProfanityFilter_Id), + "isVerified" -> cand.booleanFeatures + .getOrElse("RecTweetAuthor.User.IsVerified", false), + "hasNsfwToken" -> isNsfwAccount + ) + + val stringFeatures = Map[String, String]( + "tweetLanguage" -> cand.categoricalFeatures + .getOrElse("tweet.core.tweet_text.language", "") + ) + + FeatureMap( + booleanFeatures = booleanFeatures, + numericFeatures = continuousFeatures, + categoricalFeatures = stringFeatures) + } + case _ => Future.value(FeatureMap()) + } + case _ => Future.value(FeatureMap()) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HydrationContextBuilder.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HydrationContextBuilder.scala new file mode 100644 index 000000000..023adb81e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HydrationContextBuilder.scala @@ -0,0 +1,179 @@ +package com.twitter.frigate.pushservice.ml + +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.ml.feature.TweetSocialProofKey +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.quality_model_predicate.PDauCohortUtil +import com.twitter.nrel.hydration.base.FeatureInput +import com.twitter.nrel.hydration.push.HydrationContext +import com.twitter.nrel.hydration.frigate.{FeatureInputs => FI} +import com.twitter.util.Future + +object HydrationContextBuilder { + + private def getRecUserInputs( + pushCandidate: PushCandidate + ): Set[FI.RecUser] = { + pushCandidate match { + case userCandidate: UserCandidate => + Set(FI.RecUser(userCandidate.userId)) + case _ => Set.empty + } + } + + private def getRecTweetInputs( + pushCandidate: PushCandidate + ): Set[FI.RecTweet] = + pushCandidate match { + case tweetCandidateWithAuthor: TweetCandidate with TweetAuthor with TweetAuthorDetails => + val authorIdOpt = tweetCandidateWithAuthor.authorId + Set(FI.RecTweet(tweetCandidateWithAuthor.tweetId, authorIdOpt)) + case _ => Set.empty + } + + private def getMediaInputs( + pushCandidate: PushCandidate + ): Set[FI.Media] = + pushCandidate match { + case tweetCandidateWithMedia: TweetCandidate with TweetDetails => + tweetCandidateWithMedia.mediaKeys + .map { mk => + Set(FI.Media(mk)) + }.getOrElse(Set.empty) + case _ => Set.empty + } + + private def getEventInputs( + pushCandidate: PushCandidate + ): Set[FI.Event] = pushCandidate match { + case mrEventCandidate: EventCandidate => + Set(FI.Event(mrEventCandidate.eventId)) + case mfEventCandidate: MagicFanoutEventCandidate => + Set(FI.Event(mfEventCandidate.eventId)) + case _ => Set.empty + } + + private def getTopicInputs( + pushCandidate: PushCandidate + ): Set[FI.Topic] = + pushCandidate match { + case mrTopicCandidate: TopicCandidate => + mrTopicCandidate.semanticCoreEntityId match { + case Some(topicId) => Set(FI.Topic(topicId)) + case _ => Set.empty + } + case _ => Set.empty + } + + private def getTweetSocialProofKey( + pushCandidate: PushCandidate + ): Future[Set[FI.SocialProofKey]] = { + pushCandidate match { + case candidate: TweetCandidate with SocialContextActions => + val target = pushCandidate.target + target.seedsWithWeight.map { seedsWithWeightOpt => + Set( + FI.SocialProofKey( + TweetSocialProofKey( + seedsWithWeightOpt.getOrElse(Map.empty), + candidate.socialContextAllTypeActions + )) + ) + } + case _ => Future.value(Set.empty) + } + } + + private def getSocialContextInputs( + pushCandidate: PushCandidate + ): Future[Set[FeatureInput]] = + pushCandidate match { + case candidateWithSC: Candidate with SocialContextActions => + val tweetSocialProofKeyFut = getTweetSocialProofKey(pushCandidate) + tweetSocialProofKeyFut.map { tweetSocialProofKeyOpt => + val socialContextUsers = FI.SocialContextUsers(candidateWithSC.socialContextUserIds.toSet) + val socialContextActions = + FI.SocialContextActions(candidateWithSC.socialContextAllTypeActions) + val socialProofKeyOpt = tweetSocialProofKeyOpt + Set(Set(socialContextUsers), Set(socialContextActions), socialProofKeyOpt).flatten + } + case _ => Future.value(Set.empty) + } + + private def getPushStringGroupInputs( + pushCandidate: PushCandidate + ): Set[FI.PushStringGroup] = + Set( + FI.PushStringGroup( + pushCandidate.getPushCopy.flatMap(_.pushStringGroup).map(_.toString).getOrElse("") + )) + + private def getCRTInputs( + pushCandidate: PushCandidate + ): Set[FI.CommonRecommendationType] = + Set(FI.CommonRecommendationType(pushCandidate.commonRecType)) + + private def getFrigateNotification( + pushCandidate: PushCandidate + ): Set[FI.CandidateFrigateNotification] = + Set(FI.CandidateFrigateNotification(pushCandidate.frigateNotification)) + + private def getCopyId( + pushCandidate: PushCandidate + ): Set[FI.CopyId] = + Set(FI.CopyId(pushCandidate.pushCopyId, pushCandidate.ntabCopyId)) + + def build(candidate: PushCandidate): Future[HydrationContext] = { + val socialContextInputsFut = getSocialContextInputs(candidate) + socialContextInputsFut.map { socialContextInputs => + val featureInputs: Set[FeatureInput] = + socialContextInputs ++ + getRecUserInputs(candidate) ++ + getRecTweetInputs(candidate) ++ + getEventInputs(candidate) ++ + getTopicInputs(candidate) ++ + getCRTInputs(candidate) ++ + getPushStringGroupInputs(candidate) ++ + getMediaInputs(candidate) ++ + getFrigateNotification(candidate) ++ + getCopyId(candidate) + + HydrationContext( + candidate.target.targetId, + featureInputs + ) + } + } + + def build(target: Target): Future[HydrationContext] = { + val realGraphFeaturesFut = target.realGraphFeatures + for { + realGraphFeaturesOpt <- realGraphFeaturesFut + dauProb <- PDauCohortUtil.getDauProb(target) + mrUserStateOpt <- target.targetMrUserState + historyInputOpt <- + if (target.params(PushFeatureSwitchParams.EnableHydratingOnlineMRHistoryFeatures)) { + target.onlineLabeledPushRecs.map { mrHistoryValueOpt => + mrHistoryValueOpt.map(FI.MrHistory) + } + } else Future.None + } yield { + val realGraphFeaturesInputOpt = realGraphFeaturesOpt.map { realGraphFeatures => + FI.TargetRealGraphFeatures(realGraphFeatures) + } + val dauProbInput = FI.DauProb(dauProb) + val mrUserStateInput = FI.MrUserState(mrUserStateOpt.map(_.name).getOrElse("unknown")) + HydrationContext( + target.targetId, + Seq( + realGraphFeaturesInputOpt, + historyInputOpt, + Some(dauProbInput), + Some(mrUserStateInput) + ).flatten.toSet + ) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/PushMLModelScorer.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/PushMLModelScorer.scala new file mode 100644 index 000000000..fd702cc3c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/PushMLModelScorer.scala @@ -0,0 +1,188 @@ +package com.twitter.frigate.pushservice.ml + +import com.twitter.cortex.deepbird.thriftjava.ModelSelector +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.frigate.pushservice.params.PushModelName +import com.twitter.frigate.pushservice.params.WeightedOpenOrNtabClickModel +import com.twitter.nrel.heavyranker.PushCandidateHydrationContextWithModel +import com.twitter.nrel.heavyranker.PushPredictionServiceStore +import com.twitter.nrel.heavyranker.TargetFeatureMapWithModel +import com.twitter.timelines.configapi.FSParam +import com.twitter.util.Future + +/** + * PushMLModelScorer scores the Candidates and populates their ML scores + * + * @param pushMLModel Enum to specify which model to use for scoring the Candidates + * @param modelToPredictionServiceStoreMap Supports all other prediction services. Specifies model ID -> dbv2 ReadableStore + * @param defaultDBv2PredictionServiceStore: Supports models that are not specified in the previous maps (which will be directly configured in the config repo) + * @param scoringStats StatsReceiver for scoping stats + */ +class PushMLModelScorer( + pushMLModel: PushMLModel.Value, + modelToPredictionServiceStoreMap: Map[ + WeightedOpenOrNtabClickModel.ModelNameType, + PushPredictionServiceStore + ], + defaultDBv2PredictionServiceStore: PushPredictionServiceStore, + scoringStats: StatsReceiver) { + + val queriesOutsideTheModelMaps: StatsReceiver = + scoringStats.scope("queries_outside_the_model_maps") + val totalQueriesOutsideTheModelMaps: Counter = + queriesOutsideTheModelMaps.counter("total") + + private def scoreByBatchPredictionForModelFromMultiModelService( + predictionServiceStore: PushPredictionServiceStore, + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType, + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + useCommonFeatures: Boolean, + overridePushMLModel: PushMLModel.Value + ): Seq[CandidateDetails[PushCandidate]] = { + val modelName = + PushModelName(overridePushMLModel, modelVersion).toString + val modelSelector = new ModelSelector() + modelSelector.setId(modelName) + + val candidateHydrationWithFeaturesMap = candidatesDetails.map { candidatesDetail => + ( + candidatesDetail.candidate.candidateHydrationContext, + candidatesDetail.candidate.candidateFeatureMap()) + } + if (candidatesDetails.nonEmpty) { + val candidatesWithScore = predictionServiceStore.getBatchPredictionsForModel( + candidatesDetails.head.candidate.target.targetHydrationContext, + candidatesDetails.head.candidate.target.featureMap, + candidateHydrationWithFeaturesMap, + Some(modelSelector), + useCommonFeatures + ) + candidatesDetails.zip(candidatesWithScore).foreach { + case (candidateDetail, (_, scoreOptFut)) => + candidateDetail.candidate.populateQualityModelScore( + overridePushMLModel, + modelVersion, + scoreOptFut + ) + } + } + + candidatesDetails + } + + private def scoreByBatchPrediction( + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType, + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + useCommonFeaturesForDBv2Service: Boolean, + overridePushMLModel: PushMLModel.Value + ): Seq[CandidateDetails[PushCandidate]] = { + if (modelToPredictionServiceStoreMap.contains(modelVersion)) { + scoreByBatchPredictionForModelFromMultiModelService( + modelToPredictionServiceStoreMap(modelVersion), + modelVersion, + candidatesDetails, + useCommonFeaturesForDBv2Service, + overridePushMLModel + ) + } else { + totalQueriesOutsideTheModelMaps.incr() + queriesOutsideTheModelMaps.counter(modelVersion).incr() + scoreByBatchPredictionForModelFromMultiModelService( + defaultDBv2PredictionServiceStore, + modelVersion, + candidatesDetails, + useCommonFeaturesForDBv2Service, + overridePushMLModel + ) + } + } + + def scoreByBatchPredictionForModelVersion( + target: Target, + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + modelVersionParam: FSParam[WeightedOpenOrNtabClickModel.ModelNameType], + useCommonFeaturesForDBv2Service: Boolean = true, + overridePushMLModelOpt: Option[PushMLModel.Value] = None + ): Seq[CandidateDetails[PushCandidate]] = { + scoreByBatchPrediction( + target.params(modelVersionParam), + candidatesDetails, + useCommonFeaturesForDBv2Service, + overridePushMLModelOpt.getOrElse(pushMLModel) + ) + } + + def singlePredicationForModelVersion( + modelVersion: String, + candidate: PushCandidate, + overridePushMLModelOpt: Option[PushMLModel.Value] = None + ): Future[Option[Double]] = { + val modelSelector = new ModelSelector() + modelSelector.setId( + PushModelName(overridePushMLModelOpt.getOrElse(pushMLModel), modelVersion).toString + ) + if (modelToPredictionServiceStoreMap.contains(modelVersion)) { + modelToPredictionServiceStoreMap(modelVersion).get( + PushCandidateHydrationContextWithModel( + candidate.target.targetHydrationContext, + candidate.target.featureMap, + candidate.candidateHydrationContext, + candidate.candidateFeatureMap(), + Some(modelSelector) + ) + ) + } else { + totalQueriesOutsideTheModelMaps.incr() + queriesOutsideTheModelMaps.counter(modelVersion).incr() + defaultDBv2PredictionServiceStore.get( + PushCandidateHydrationContextWithModel( + candidate.target.targetHydrationContext, + candidate.target.featureMap, + candidate.candidateHydrationContext, + candidate.candidateFeatureMap(), + Some(modelSelector) + ) + ) + } + } + + def singlePredictionForTargetLevel( + modelVersion: String, + targetId: Long, + featureMap: Future[FeatureMap] + ): Future[Option[Double]] = { + val modelSelector = new ModelSelector() + modelSelector.setId( + PushModelName(pushMLModel, modelVersion).toString + ) + defaultDBv2PredictionServiceStore.getForTargetLevel( + TargetFeatureMapWithModel(targetId, featureMap, Some(modelSelector)) + ) + } + + def getScoreHistogramCounters( + stats: StatsReceiver, + scopeName: String, + histogramBinSize: Double + ): IndexedSeq[Counter] = { + val histogramScopedStatsReceiver = stats.scope(scopeName) + val numBins = math.ceil(1.0 / histogramBinSize).toInt + + (0 to numBins) map { k => + if (k == 0) + histogramScopedStatsReceiver.counter("candidates_with_scores_zero") + else { + val counterName = "candidates_with_scores_from_%s_to_%s".format( + "%.2f".format(histogramBinSize * (k - 1)).replace(".", ""), + "%.2f".format(math.min(1.0, histogramBinSize * k)).replace(".", "")) + histogramScopedStatsReceiver.counter(counterName) + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/DiscoverTwitter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/DiscoverTwitter.scala new file mode 100644 index 000000000..dc350a740 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/DiscoverTwitter.scala @@ -0,0 +1,89 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.DiscoverTwitterCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.DiscoverTwitterPushIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.DiscoverTwitterNtabRequestHydrator +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.take.predicates.BasicRFPHPredicates +import com.twitter.frigate.pushservice.take.predicates.OutOfNetworkTweetPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate + +class DiscoverTwitterPushCandidate( + candidate: RawCandidate with DiscoverTwitterCandidate, + copyIds: CopyIds, +)( + implicit val statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with DiscoverTwitterCandidate + with DiscoverTwitterPushIbis2Hydrator + with DiscoverTwitterNtabRequestHydrator { + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val target: Target = candidate.target + + override lazy val commonRecType: CommonRecommendationType = candidate.commonRecType + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override val statsReceiver: StatsReceiver = + statsScoped.scope("DiscoverTwitterPushCandidate") +} + +case class AddressBookPushCandidatePredicates(config: Config) + extends BasicRFPHPredicates[DiscoverTwitterPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val predicates: List[ + NamedPredicate[DiscoverTwitterPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnableAddressBookPush + ) + ) +} + +case class CompleteOnboardingPushCandidatePredicates(config: Config) + extends BasicRFPHPredicates[DiscoverTwitterPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val predicates: List[ + NamedPredicate[DiscoverTwitterPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnableCompleteOnboardingPush + ) + ) +} + +case class PopGeoTweetCandidatePredicates(override val config: Config) + extends OutOfNetworkTweetPredicates[OutOfNetworkTweetPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override def postCandidateSpecificPredicates: List[ + NamedPredicate[OutOfNetworkTweetPushCandidate] + ] = List( + PredicatesForCandidate.htlFatiguePredicate( + PushFeatureSwitchParams.NewUserPlaybookAllowedLastLoginHours + ) + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/F1FirstdegreeTweet.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/F1FirstdegreeTweet.scala new file mode 100644 index 000000000..a4e8bec68 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/F1FirstdegreeTweet.scala @@ -0,0 +1,60 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.F1FirstDegree +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.SocialGraphServiceRelationshipMap +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes._ +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.F1FirstDegreeTweetIbis2HydratorForCandidate +import com.twitter.frigate.pushservice.model.ntab.F1FirstDegreeTweetNTabRequestHydrator +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPHWithoutSGSPredicates +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil.TweetWithSocialContextTraits +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +class F1TweetPushCandidate( + candidate: RawCandidate with TweetWithSocialContextTraits, + author: Future[Option[User]], + socialGraphServiceResultMap: Map[RelationEdge, Boolean], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with F1FirstDegree + with TweetAuthorDetails + with SocialGraphServiceRelationshipMap + with F1FirstDegreeTweetNTabRequestHydrator + with F1FirstDegreeTweetIbis2HydratorForCandidate { + override val socialContextActions: Seq[SocialContextAction] = + candidate.socialContextActions + override val socialContextAllTypeActions: Seq[SocialContextAction] = + candidate.socialContextActions + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = + candidate.tweetyPieResult + override lazy val tweetAuthor: Future[Option[User]] = author + override val target: PushTypes.Target = candidate.target + override lazy val commonRecType: CommonRecommendationType = + candidate.commonRecType + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val relationshipMap: Map[RelationEdge, Boolean] = socialGraphServiceResultMap +} + +case class F1TweetCandidatePredicates(override val config: Config) + extends BasicTweetPredicatesForRFPHWithoutSGSPredicates[F1TweetPushCandidate] { + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ListRecommendationPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ListRecommendationPushCandidate.scala new file mode 100644 index 000000000..412dfbf00 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ListRecommendationPushCandidate.scala @@ -0,0 +1,72 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.channels.common.thriftscala.ApiList +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.ListPushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.ListIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.ListCandidateNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate.ListPredicates +import com.twitter.frigate.pushservice.take.predicates.BasicRFPHPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class ListRecommendationPushCandidate( + val apiListStore: ReadableStore[Long, ApiList], + candidate: RawCandidate with ListPushCandidate, + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with ListPushCandidate + with ListIbis2Hydrator + with ListCandidateNTabRequestHydrator { + + override val commonRecType: CommonRecommendationType = candidate.commonRecType + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val statsReceiver: StatsReceiver = stats + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override val target: PushTypes.Target = candidate.target + + override val listId: Long = candidate.listId + + lazy val apiList: Future[Option[ApiList]] = apiListStore.get(listId) + + lazy val listName: Future[Option[String]] = apiList.map { apiListOpt => + apiListOpt.map(_.name) + } + + lazy val listOwnerId: Future[Option[Long]] = apiList.map { apiListOpt => + apiListOpt.map(_.ownerId) + } + +} + +case class ListRecommendationPredicates(config: Config) + extends BasicRFPHPredicates[ListRecommendationPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val predicates: List[NamedPredicate[ListRecommendationPushCandidate]] = List( + ListPredicates.listNameExistsPredicate(), + ListPredicates.listAuthorExistsPredicate(), + ListPredicates.listAuthorAcceptableToTargetUser(config.edgeStore), + ListPredicates.listAcceptablePredicate(), + ListPredicates.listSubscriberCountPredicate() + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutCreatorEventPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutCreatorEventPushCandidate.scala new file mode 100644 index 000000000..65633259c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutCreatorEventPushCandidate.scala @@ -0,0 +1,136 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.HydratedMagicFanoutCreatorEventCandidate +import com.twitter.frigate.common.base.MagicFanoutCreatorEventCandidate +import com.twitter.frigate.magic_events.thriftscala.CreatorFanoutType +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.MagicFanoutCreatorEventIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.MagicFanoutCreatorEventNtabRequestHydrator +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.MagicFanoutNtabCaretFatiguePredicate +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.UserId +import com.twitter.util.Future +import scala.util.control.NoStackTrace + +class MagicFanoutCreatorEventPushCandidateHydratorException(private val message: String) + extends Exception(message) + with NoStackTrace + +class MagicFanoutCreatorEventPushCandidate( + candidate: RawCandidate with MagicFanoutCreatorEventCandidate, + creatorUser: Option[User], + copyIds: CopyIds, + creatorTweetCountStore: ReadableStore[UserId, Int] +)( + implicit val statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with MagicFanoutCreatorEventIbis2Hydrator + with MagicFanoutCreatorEventNtabRequestHydrator + with MagicFanoutCreatorEventCandidate + with HydratedMagicFanoutCreatorEventCandidate { + override def creatorId: Long = candidate.creatorId + + override def hydratedCreator: Option[User] = creatorUser + + override lazy val numberOfTweetsFut: Future[Option[Int]] = + creatorTweetCountStore.get(UserId(creatorId)) + + lazy val userProfile = hydratedCreator + .flatMap(_.profile).getOrElse( + throw new MagicFanoutCreatorEventPushCandidateHydratorException( + s"Unable to get user profile to generate tapThrough for userId: $creatorId")) + + override val frigateNotification: FrigateNotification = candidate.frigateNotification + + override def subscriberId: Option[Long] = candidate.subscriberId + + override def creatorFanoutType: CreatorFanoutType = candidate.creatorFanoutType + + override def target: PushTypes.Target = candidate.target + + override def pushId: Long = candidate.pushId + + override def candidateMagicEventsReasons: Seq[MagicEventsReason] = + candidate.candidateMagicEventsReasons + + override def statsReceiver: StatsReceiver = statsScoped + + override def pushCopyId: Option[Int] = copyIds.pushCopyId + + override def ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override def copyAggregationId: Option[String] = copyIds.aggregationId + + override def commonRecType: CommonRecommendationType = candidate.commonRecType + + override def weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + +} + +case class MagicFanouCreatorSubscriptionEventPushPredicates(config: Config) + extends BasicSendHandlerPredicates[MagicFanoutCreatorEventPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutCreatorEventPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnableCreatorSubscriptionPush + ), + PredicatesForCandidate.isDeviceEligibleForCreatorPush, + MagicFanoutPredicatesForCandidate.creatorPushTargetIsNotCreator(), + MagicFanoutPredicatesForCandidate.duplicateCreatorPredicate, + MagicFanoutPredicatesForCandidate.magicFanoutCreatorPushFatiguePredicate(), + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutCreatorEventPushCandidate] + ] = + List( + MagicFanoutNtabCaretFatiguePredicate(), + MagicFanoutPredicatesForCandidate.isSuperFollowingCreator()(config, statsReceiver).flip + ) +} + +case class MagicFanoutNewCreatorEventPushPredicates(config: Config) + extends BasicSendHandlerPredicates[MagicFanoutCreatorEventPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutCreatorEventPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnableNewCreatorPush + ), + PredicatesForCandidate.isDeviceEligibleForCreatorPush, + MagicFanoutPredicatesForCandidate.duplicateCreatorPredicate, + MagicFanoutPredicatesForCandidate.magicFanoutCreatorPushFatiguePredicate, + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutCreatorEventPushCandidate] + ] = + List( + MagicFanoutNtabCaretFatiguePredicate(), + MagicFanoutPredicatesForCandidate.isSuperFollowingCreator()(config, statsReceiver).flip + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutEventPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutEventPushCandidate.scala new file mode 100644 index 000000000..e0a5f5386 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutEventPushCandidate.scala @@ -0,0 +1,303 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutEventCandidate +import com.twitter.frigate.common.base.RecommendationType +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.common.util.HighPriorityLocaleUtil +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.magic_events.thriftscala.FanoutMetadata +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.magic_events.thriftscala.NewsForYouMetadata +import com.twitter.frigate.magic_events.thriftscala.ReasonSource +import com.twitter.frigate.magic_events.thriftscala.TargetID +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.Ibis2HydratorForCandidate +import com.twitter.frigate.pushservice.model.ntab.EventNTabRequestHydrator +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesUtil +import com.twitter.frigate.pushservice.store.EventRequest +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.pushservice.util.TopicsUtil +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.MagicFanoutEventNotificationDetails +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.interests.thriftscala.InterestId.SemanticCore +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.livevideo.common.ids.CountryId +import com.twitter.livevideo.common.ids.UserId +import com.twitter.livevideo.timeline.domain.v2.Event +import com.twitter.livevideo.timeline.domain.v2.HydrationOptions +import com.twitter.livevideo.timeline.domain.v2.LookupContext +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.storehaus.ReadableStore +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.util.Future + +abstract class MagicFanoutEventPushCandidate( + candidate: RawCandidate with MagicFanoutEventCandidate with RecommendationType, + copyIds: CopyIds, + override val fanoutEvent: Option[FanoutEvent], + override val semanticEntityResults: Map[SemanticEntityForQuery, Option[EntityMegadata]], + simClusterToEntities: Map[Int, Option[SimClustersInferredEntities]], + lexServiceStore: ReadableStore[EventRequest, Event], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore +)( + implicit statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with MagicFanoutEventHydratedCandidate + with MagicFanoutEventCandidate + with EventNTabRequestHydrator + with RecommendationType + with Ibis2HydratorForCandidate { + + override lazy val eventFut: Future[Option[Event]] = { + eventRequestFut.flatMap { + case Some(eventRequest) => lexServiceStore.get(eventRequest) + case _ => Future.None + } + } + + override val frigateNotification: FrigateNotification = candidate.frigateNotification + + override val pushId: Long = candidate.pushId + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + candidate.candidateMagicEventsReasons + + override val eventId: Long = candidate.eventId + + override val momentId: Option[Long] = candidate.momentId + + override val target: Target = candidate.target + + override val eventLanguage: Option[String] = candidate.eventLanguage + + override val details: Option[MagicFanoutEventNotificationDetails] = candidate.details + + override lazy val stats: StatsReceiver = statsScoped.scope("MagicFanoutEventPushCandidate") + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val statsReceiver: StatsReceiver = statsScoped.scope("MagicFanoutEventPushCandidate") + + override val effectiveMagicEventsReasons: Option[Seq[MagicEventsReason]] = Some( + candidateMagicEventsReasons) + + lazy val newsForYouMetadata: Option[NewsForYouMetadata] = + fanoutEvent.flatMap { event => + { + event.fanoutMetadata.collect { + case FanoutMetadata.NewsForYouMetadata(nfyMetadata) => nfyMetadata + } + } + } + + val reverseIndexedTopicIds = candidate.candidateMagicEventsReasons + .filter(_.source.contains(ReasonSource.UttTopicFollowGraph)) + .map(_.reason).collect { + case TargetID.SemanticCoreID(semanticCoreID) => semanticCoreID.entityId + }.toSet + + val ergSemanticCoreIds = candidate.candidateMagicEventsReasons + .filter(_.source.contains(ReasonSource.ErgShortTermInterestSemanticCore)).map( + _.reason).collect { + case TargetID.SemanticCoreID(semanticCoreID) => semanticCoreID.entityId + }.toSet + + override lazy val ergLocalizedEntities = TopicsUtil + .getLocalizedEntityMap(target, ergSemanticCoreIds, uttEntityHydrationStore) + .map { localizedEntityMap => + ergSemanticCoreIds.collect { + case topicId if localizedEntityMap.contains(topicId) => localizedEntityMap(topicId) + } + } + + val eventSemanticCoreEntityIds: Seq[Long] = { + val entityIds = for { + event <- fanoutEvent + targets <- event.targets + } yield { + targets.flatMap { + _.whitelist.map { + _.collect { + case TargetID.SemanticCoreID(semanticCoreID) => semanticCoreID.entityId + } + } + } + } + + entityIds.map(_.flatten).getOrElse(Seq.empty) + } + + val eventSemanticCoreDomainIds: Seq[Long] = { + val domainIds = for { + event <- fanoutEvent + targets <- event.targets + } yield { + targets.flatMap { + _.whitelist.map { + _.collect { + case TargetID.SemanticCoreID(semanticCoreID) => semanticCoreID.domainId + } + } + } + } + + domainIds.map(_.flatten).getOrElse(Seq.empty) + } + + override lazy val followedTopicLocalizedEntities: Future[Set[LocalizedEntity]] = { + + val isNewSignupTargetingReason = candidateMagicEventsReasons.size == 1 && + candidateMagicEventsReasons.headOption.exists(_.source.contains(ReasonSource.NewSignup)) + + val shouldFetchTopicFollows = reverseIndexedTopicIds.nonEmpty || isNewSignupTargetingReason + + val topicFollows = if (shouldFetchTopicFollows) { + TopicsUtil + .getTopicsFollowedByUser( + candidate.target, + interestsLookupStore, + stats.stat("followed_topics") + ).map { _.getOrElse(Seq.empty) }.map { + _.flatMap { + _.interestId match { + case SemanticCore(semanticCore) => Some(semanticCore.id) + case _ => None + } + } + } + } else Future.Nil + + topicFollows.flatMap { followedTopicIds => + val topicIds = if (isNewSignupTargetingReason) { + // if new signup is the only targeting reason then we check the event targeting reason + // against realtime topic follows. + eventSemanticCoreEntityIds.toSet.intersect(followedTopicIds.toSet) + } else { + // check against the fanout reason of topics + followedTopicIds.toSet.intersect(reverseIndexedTopicIds) + } + + TopicsUtil + .getLocalizedEntityMap(target, topicIds, uttEntityHydrationStore) + .map { localizedEntityMap => + topicIds.collect { + case topicId if localizedEntityMap.contains(topicId) => localizedEntityMap(topicId) + } + } + } + } + + lazy val simClusterToEntityMapping: Map[Int, Seq[Long]] = + simClusterToEntities.flatMap { + case (clusterId, Some(inferredEntities)) => + statsReceiver.counter("with_cluster_to_entity_mapping").incr() + Some( + ( + clusterId, + inferredEntities.entities + .map(_.entityId))) + case _ => + statsReceiver.counter("without_cluster_to_entity_mapping").incr() + None + } + + lazy val annotatedAndInferredSemanticCoreEntities: Seq[Long] = + (simClusterToEntityMapping, eventFanoutReasonEntities) match { + case (entityMapping, eventFanoutReasons) => + entityMapping.values.flatten.toSeq ++ + eventFanoutReasons.semanticCoreIds.map(_.entityId) + } + + lazy val shouldHydrateSquareImage = target.deviceInfo.map { deviceInfo => + (PushDeviceUtil.isPrimaryDeviceIOS(deviceInfo) && + target.params(PushFeatureSwitchParams.EnableEventSquareMediaIosMagicFanoutNewsEvent)) || + (PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfo) && + target.params(PushFeatureSwitchParams.EnableEventSquareMediaAndroid)) + } + + lazy val shouldHydratePrimaryImage: Future[Boolean] = target.deviceInfo.map { deviceInfo => + (PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfo) && + target.params(PushFeatureSwitchParams.EnableEventPrimaryMediaAndroid)) + } + + lazy val eventRequestFut: Future[Option[EventRequest]] = + Future + .join( + target.inferredUserDeviceLanguage, + target.accountCountryCode, + shouldHydrateSquareImage, + shouldHydratePrimaryImage).map { + case ( + inferredUserDeviceLanguage, + accountCountryCode, + shouldHydrateSquareImage, + shouldHydratePrimaryImage) => + if (shouldHydrateSquareImage || shouldHydratePrimaryImage) { + Some( + EventRequest( + eventId, + lookupContext = LookupContext( + hydrationOptions = HydrationOptions( + includeSquareImage = shouldHydrateSquareImage, + includePrimaryImage = shouldHydratePrimaryImage + ), + language = inferredUserDeviceLanguage, + countryCode = accountCountryCode, + userId = Some(UserId(target.targetId)) + ) + )) + } else { + Some( + EventRequest( + eventId, + lookupContext = LookupContext( + language = inferredUserDeviceLanguage, + countryCode = accountCountryCode + ) + )) + } + case _ => None + } + + lazy val isHighPriorityEvent: Future[Boolean] = target.accountCountryCode.map { countryCodeOpt => + val isHighPriorityPushOpt = for { + countryCode <- countryCodeOpt + nfyMetadata <- newsForYouMetadata + eventContext <- nfyMetadata.eventContextScribe + } yield { + val highPriorityLocales = HighPriorityLocaleUtil.getHighPriorityLocales( + eventContext = eventContext, + defaultLocalesOpt = nfyMetadata.locales) + val highPriorityGeos = HighPriorityLocaleUtil.getHighPriorityGeos( + eventContext = eventContext, + defaultGeoPlaceIdsOpt = nfyMetadata.placeIds) + val isHighPriorityLocalePush = + highPriorityLocales.flatMap(_.country).map(CountryId(_)).contains(CountryId(countryCode)) + val isHighPriorityGeoPush = MagicFanoutPredicatesUtil + .geoPlaceIdsFromReasons(candidateMagicEventsReasons) + .intersect(highPriorityGeos.toSet) + .nonEmpty + stats.scope("is_high_priority_locale_push").counter(s"$isHighPriorityLocalePush").incr() + stats.scope("is_high_priority_geo_push").counter(s"$isHighPriorityGeoPush").incr() + isHighPriorityLocalePush || isHighPriorityGeoPush + } + isHighPriorityPushOpt.getOrElse(false) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutHydratedCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutHydratedCandidate.scala new file mode 100644 index 000000000..36196120b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutHydratedCandidate.scala @@ -0,0 +1,147 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.escherbird.common.thriftscala.QualifiedId +import com.twitter.escherbird.metadata.thriftscala.BasicMetadata +import com.twitter.escherbird.metadata.thriftscala.EntityIndexFields +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutCandidate +import com.twitter.frigate.common.base.MagicFanoutEventCandidate +import com.twitter.frigate.common.base.RichEventFutCandidate +import com.twitter.frigate.magic_events.thriftscala +import com.twitter.frigate.magic_events.thriftscala.AnnotationAlg +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.magic_events.thriftscala.SemanticCoreID +import com.twitter.frigate.magic_events.thriftscala.SimClusterID +import com.twitter.frigate.magic_events.thriftscala.TargetID +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.livevideo.timeline.domain.v2.Event +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.util.Future + +case class FanoutReasonEntities( + userIds: Set[Long], + placeIds: Set[Long], + semanticCoreIds: Set[SemanticCoreID], + simclusterIds: Set[SimClusterID]) { + val qualifiedIds: Set[QualifiedId] = + semanticCoreIds.map(e => QualifiedId(e.domainId, e.entityId)) +} + +object FanoutReasonEntities { + val empty = FanoutReasonEntities( + userIds = Set.empty, + placeIds = Set.empty, + semanticCoreIds = Set.empty, + simclusterIds = Set.empty + ) + + def from(reasons: Seq[TargetID]): FanoutReasonEntities = { + val userIds: Set[Long] = reasons.collect { + case TargetID.UserID(userId) => userId.id + }.toSet + val placeIds: Set[Long] = reasons.collect { + case TargetID.PlaceID(placeId) => placeId.id + }.toSet + val semanticCoreIds: Set[SemanticCoreID] = reasons.collect { + case TargetID.SemanticCoreID(semanticCoreID) => semanticCoreID + }.toSet + val simclusterIds: Set[SimClusterID] = reasons.collect { + case TargetID.SimClusterID(simClusterID) => simClusterID + }.toSet + + FanoutReasonEntities( + userIds = userIds, + placeIds, + semanticCoreIds = semanticCoreIds, + simclusterIds = simclusterIds + ) + } +} + +trait MagicFanoutHydratedCandidate extends PushCandidate with MagicFanoutCandidate { + lazy val fanoutReasonEntities: FanoutReasonEntities = + FanoutReasonEntities.from(candidateMagicEventsReasons.map(_.reason)) +} + +trait MagicFanoutEventHydratedCandidate + extends MagicFanoutHydratedCandidate + with MagicFanoutEventCandidate + with RichEventFutCandidate { + + def target: PushTypes.Target + + def stats: StatsReceiver + + def fanoutEvent: Option[FanoutEvent] + + def eventFut: Future[Option[Event]] + + def semanticEntityResults: Map[SemanticEntityForQuery, Option[EntityMegadata]] + + def effectiveMagicEventsReasons: Option[Seq[MagicEventsReason]] + + def followedTopicLocalizedEntities: Future[Set[LocalizedEntity]] + + def ergLocalizedEntities: Future[Set[LocalizedEntity]] + + lazy val entityAnnotationAlg: Map[TargetID, Set[AnnotationAlg]] = + fanoutEvent + .flatMap { metadata => + metadata.eventAnnotationInfo.map { eventAnnotationInfo => + eventAnnotationInfo.map { + case (target, annotationInfoSet) => target -> annotationInfoSet.map(_.alg).toSet + }.toMap + } + }.getOrElse(Map.empty) + + lazy val eventSource: Option[String] = fanoutEvent.map { metadata => + val source = metadata.eventSource.getOrElse("undefined") + stats.scope("eventSource").counter(source).incr() + source + } + + lazy val semanticCoreEntityTags: Map[(Long, Long), Set[String]] = + semanticEntityResults.flatMap { + case (semanticEntityForQuery, entityMegadataOpt: Option[EntityMegadata]) => + for { + entityMegadata <- entityMegadataOpt + basicMetadata: BasicMetadata <- entityMegadata.basicMetadata + indexableFields: EntityIndexFields <- basicMetadata.indexableFields + tags <- indexableFields.tags + } yield { + ((semanticEntityForQuery.domainId, semanticEntityForQuery.entityId), tags.toSet) + } + } + + lazy val owningTwitterUserIds: Seq[Long] = semanticEntityResults.values.flatten + .flatMap { + _.basicMetadata.flatMap(_.twitter.flatMap(_.owningTwitterUserIds)) + }.flatten + .toSeq + .distinct + + lazy val eventFanoutReasonEntities: FanoutReasonEntities = + fanoutEvent match { + case Some(fanout) => + fanout.targets + .map { targets: Seq[thriftscala.Target] => + FanoutReasonEntities.from(targets.flatMap(_.whitelist).flatten) + }.getOrElse(FanoutReasonEntities.empty) + case _ => FanoutReasonEntities.empty + } + + override lazy val eventResultFut: Future[Event] = eventFut.map { + case Some(eventResult) => eventResult + case _ => + throw new IllegalArgumentException("event is None for MagicFanoutEventHydratedCandidate") + } + override val rankScore: Option[Double] = None + override val predictionScore: Option[Double] = None +} + +case class MagicFanoutEventHydratedInfo( + fanoutEvent: Option[FanoutEvent], + semanticEntityResults: Map[SemanticEntityForQuery, Option[EntityMegadata]]) diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutNewsEvent.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutNewsEvent.scala new file mode 100644 index 000000000..61ead1f22 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutNewsEvent.scala @@ -0,0 +1,99 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutNewsEventCandidate +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.MagicFanoutNewsEventIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.MagicFanoutNewsEventNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.event.EventPredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutTargetingPredicateWrappersForCandidate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.MagicFanoutNtabCaretFatiguePredicate +import com.twitter.frigate.pushservice.store.EventRequest +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.livevideo.timeline.domain.v2.Event +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.storehaus.ReadableStore + +class MagicFanoutNewsEventPushCandidate( + candidate: RawCandidate with MagicFanoutNewsEventCandidate, + copyIds: CopyIds, + override val fanoutEvent: Option[FanoutEvent], + override val semanticEntityResults: Map[SemanticEntityForQuery, Option[EntityMegadata]], + simClusterToEntities: Map[Int, Option[SimClustersInferredEntities]], + lexServiceStore: ReadableStore[EventRequest, Event], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore +)( + implicit statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends MagicFanoutEventPushCandidate( + candidate, + copyIds, + fanoutEvent, + semanticEntityResults, + simClusterToEntities, + lexServiceStore, + interestsLookupStore, + uttEntityHydrationStore + )(statsScoped, pushModelScorer) + with MagicFanoutNewsEventCandidate + with MagicFanoutNewsEventIbis2Hydrator + with MagicFanoutNewsEventNTabRequestHydrator { + + override lazy val stats: StatsReceiver = statsScoped.scope("MagicFanoutNewsEventPushCandidate") + override val statsReceiver: StatsReceiver = statsScoped.scope("MagicFanoutNewsEventPushCandidate") +} + +case class MagicFanoutNewsEventCandidatePredicates(config: Config) + extends BasicSendHandlerPredicates[MagicFanoutNewsEventPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutNewsEventPushCandidate] + ] = + List( + EventPredicatesForCandidate.accountCountryPredicateWithAllowlist, + PredicatesForCandidate.isDeviceEligibleForNewsOrSports, + MagicFanoutPredicatesForCandidate.inferredUserDeviceLanguagePredicate, + PredicatesForCandidate.secondaryDormantAccountPredicate(statsReceiver), + MagicFanoutPredicatesForCandidate.highPriorityNewsEventExceptedPredicate( + MagicFanoutTargetingPredicateWrappersForCandidate + .magicFanoutTargetingPredicate(statsReceiver, config) + )(config), + MagicFanoutPredicatesForCandidate.geoOptOutPredicate(config.safeUserStore), + EventPredicatesForCandidate.isNotDuplicateWithEventId, + MagicFanoutPredicatesForCandidate.highPriorityNewsEventExceptedPredicate( + MagicFanoutPredicatesForCandidate.newsNotificationFatigue() + )(config), + MagicFanoutPredicatesForCandidate.highPriorityNewsEventExceptedPredicate( + MagicFanoutNtabCaretFatiguePredicate() + )(config), + MagicFanoutPredicatesForCandidate.escherbirdMagicfanoutEventParam()(statsReceiver), + MagicFanoutPredicatesForCandidate.hasCustomTargetingForNewsEventsParam( + statsReceiver + ) + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutNewsEventPushCandidate] + ] = + List( + MagicFanoutPredicatesForCandidate.magicFanoutNoOptoutInterestPredicate, + MagicFanoutPredicatesForCandidate.geoTargetingHoldback(), + MagicFanoutPredicatesForCandidate.userGeneratedEventsPredicate, + EventPredicatesForCandidate.hasTitle, + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutProductLaunchPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutProductLaunchPushCandidate.scala new file mode 100644 index 000000000..4dc569e2b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutProductLaunchPushCandidate.scala @@ -0,0 +1,95 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutProductLaunchCandidate +import com.twitter.frigate.common.util.{FeatureSwitchParams => FS} +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.magic_events.thriftscala.ProductType +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesUtil +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.MagicFanoutProductLaunchIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.MagicFanoutProductLaunchNtabRequestHydrator +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.MagicFanoutNtabCaretFatiguePredicate +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.hermit.predicate.NamedPredicate + +class MagicFanoutProductLaunchPushCandidate( + candidate: RawCandidate with MagicFanoutProductLaunchCandidate, + copyIds: CopyIds +)( + implicit val statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with MagicFanoutProductLaunchCandidate + with MagicFanoutProductLaunchIbis2Hydrator + with MagicFanoutProductLaunchNtabRequestHydrator { + + override val frigateNotification: FrigateNotification = candidate.frigateNotification + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val pushId: Long = candidate.pushId + + override val productLaunchType: ProductType = candidate.productLaunchType + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + candidate.candidateMagicEventsReasons + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val target: Target = candidate.target + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override val statsReceiver: StatsReceiver = + statsScoped.scope("MagicFanoutProductLaunchPushCandidate") +} + +case class MagicFanoutProductLaunchPushCandidatePredicates(config: Config) + extends BasicSendHandlerPredicates[MagicFanoutProductLaunchPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutProductLaunchPushCandidate] + ] = + List( + PredicatesForCandidate.isDeviceEligibleForCreatorPush, + PredicatesForCandidate.exceptedPredicate( + "excepted_is_target_blue_verified", + MagicFanoutPredicatesUtil.shouldSkipBlueVerifiedCheckForCandidate, + PredicatesForCandidate.isTargetBlueVerified.flip + ), // no need to send if target is already Blue Verified + PredicatesForCandidate.exceptedPredicate( + "excepted_is_target_legacy_verified", + MagicFanoutPredicatesUtil.shouldSkipLegacyVerifiedCheckForCandidate, + PredicatesForCandidate.isTargetLegacyVerified.flip + ), // no need to send if target is already Legacy Verified + PredicatesForCandidate.exceptedPredicate( + "excepted_is_target_super_follow_creator", + MagicFanoutPredicatesUtil.shouldSkipSuperFollowCreatorCheckForCandidate, + PredicatesForCandidate.isTargetSuperFollowCreator.flip + ), // no need to send if target is already Super Follow Creator + PredicatesForCandidate.paramPredicate( + FS.EnableMagicFanoutProductLaunch + ), + MagicFanoutPredicatesForCandidate.magicFanoutProductLaunchFatigue(), + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutProductLaunchPushCandidate] + ] = + List( + MagicFanoutNtabCaretFatiguePredicate(), + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutSportsPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutSportsPushCandidate.scala new file mode 100644 index 000000000..84535e4c2 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/MagicFanoutSportsPushCandidate.scala @@ -0,0 +1,119 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.BaseGameScore +import com.twitter.frigate.common.base.MagicFanoutSportsEventCandidate +import com.twitter.frigate.common.base.MagicFanoutSportsScoreInformation +import com.twitter.frigate.common.base.TeamInfo +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.MagicFanoutSportsEventIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.MagicFanoutSportsEventNTabRequestHydrator +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutTargetingPredicateWrappersForCandidate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.MagicFanoutNtabCaretFatiguePredicate +import com.twitter.frigate.pushservice.store.EventRequest +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.livevideo.timeline.domain.v2.Event +import com.twitter.livevideo.timeline.domain.v2.HydrationOptions +import com.twitter.livevideo.timeline.domain.v2.LookupContext +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class MagicFanoutSportsPushCandidate( + candidate: RawCandidate + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation, + copyIds: CopyIds, + override val fanoutEvent: Option[FanoutEvent], + override val semanticEntityResults: Map[SemanticEntityForQuery, Option[EntityMegadata]], + simClusterToEntities: Map[Int, Option[SimClustersInferredEntities]], + lexServiceStore: ReadableStore[EventRequest, Event], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore +)( + implicit statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends MagicFanoutEventPushCandidate( + candidate, + copyIds, + fanoutEvent, + semanticEntityResults, + simClusterToEntities, + lexServiceStore, + interestsLookupStore, + uttEntityHydrationStore)(statsScoped, pushModelScorer) + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation + with MagicFanoutSportsEventNTabRequestHydrator + with MagicFanoutSportsEventIbis2Hydrator { + + override val isScoreUpdate: Boolean = candidate.isScoreUpdate + override val gameScores: Future[Option[BaseGameScore]] = candidate.gameScores + override val homeTeamInfo: Future[Option[TeamInfo]] = candidate.homeTeamInfo + override val awayTeamInfo: Future[Option[TeamInfo]] = candidate.awayTeamInfo + + override lazy val stats: StatsReceiver = statsScoped.scope("MagicFanoutSportsPushCandidate") + override val statsReceiver: StatsReceiver = statsScoped.scope("MagicFanoutSportsPushCandidate") + + override lazy val eventRequestFut: Future[Option[EventRequest]] = { + Future.join(target.inferredUserDeviceLanguage, target.accountCountryCode).map { + case (inferredUserDeviceLanguage, accountCountryCode) => + Some( + EventRequest( + eventId, + lookupContext = LookupContext( + hydrationOptions = HydrationOptions( + includeSquareImage = true, + includePrimaryImage = true + ), + language = inferredUserDeviceLanguage, + countryCode = accountCountryCode + ) + )) + } + } +} + +case class MagicFanoutSportsEventCandidatePredicates(config: Config) + extends BasicSendHandlerPredicates[MagicFanoutSportsPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutSportsPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate(PushFeatureSwitchParams.EnableScoreFanoutNotification) + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[MagicFanoutSportsPushCandidate] + ] = + List( + PredicatesForCandidate.isDeviceEligibleForNewsOrSports, + MagicFanoutPredicatesForCandidate.inferredUserDeviceLanguagePredicate, + MagicFanoutPredicatesForCandidate.highPriorityEventExceptedPredicate( + MagicFanoutTargetingPredicateWrappersForCandidate + .magicFanoutTargetingPredicate(statsReceiver, config) + )(config), + PredicatesForCandidate.secondaryDormantAccountPredicate( + statsReceiver + ), + MagicFanoutPredicatesForCandidate.highPriorityEventExceptedPredicate( + MagicFanoutNtabCaretFatiguePredicate() + )(config), + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/OutOfNetworkTweetPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/OutOfNetworkTweetPushCandidate.scala new file mode 100644 index 000000000..0b8c533ea --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/OutOfNetworkTweetPushCandidate.scala @@ -0,0 +1,68 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.contentrecommender.thriftscala.MetricTag +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.OutOfNetworkTweetIbis2HydratorForCandidate +import com.twitter.frigate.pushservice.model.ntab.OutOfNetworkTweetNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate.HealthPredicates +import com.twitter.frigate.pushservice.take.predicates.OutOfNetworkTweetPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.util.Future + +class OutOfNetworkTweetPushCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TopicCandidate, + author: Future[Option[User]], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with OutOfNetworkTweetCandidate + with TopicCandidate + with TweetAuthorDetails + with OutOfNetworkTweetNTabRequestHydrator + with OutOfNetworkTweetIbis2HydratorForCandidate { + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = + candidate.tweetyPieResult + override lazy val tweetAuthor: Future[Option[User]] = author + override val target: PushTypes.Target = candidate.target + override lazy val commonRecType: CommonRecommendationType = + candidate.commonRecType + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId + override lazy val semanticCoreEntityId: Option[Long] = candidate.semanticCoreEntityId + override lazy val localizedUttEntity: Option[LocalizedEntity] = candidate.localizedUttEntity + override lazy val algorithmCR: Option[String] = candidate.algorithmCR + override lazy val isMrBackfillCR: Option[Boolean] = candidate.isMrBackfillCR + override lazy val tagsCR: Option[Seq[MetricTag]] = candidate.tagsCR +} + +case class OutOfNetworkTweetCandidatePredicates(override val config: Config) + extends OutOfNetworkTweetPredicates[OutOfNetworkTweetPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override def postCandidateSpecificPredicates: List[ + NamedPredicate[OutOfNetworkTweetPushCandidate] + ] = + List( + HealthPredicates.agathaAbusiveTweetAuthorPredicateMrTwistly(), + ) + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/PushTypes.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/PushTypes.scala new file mode 100644 index 000000000..83d5b67c3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/PushTypes.scala @@ -0,0 +1,61 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate.UserLanguage +import com.twitter.frigate.common.candidate._ +import com.twitter.frigate.data_pipeline.features_common.RequestContextForFeatureStore +import com.twitter.frigate.pushservice.model.candidate.CopyInfo +import com.twitter.frigate.pushservice.model.candidate.MLScores +import com.twitter.frigate.pushservice.model.candidate.QualityScribing +import com.twitter.frigate.pushservice.model.candidate.Scriber +import com.twitter.frigate.pushservice.model.ibis.Ibis2HydratorForCandidate +import com.twitter.frigate.pushservice.model.ntab.NTabRequest +import com.twitter.frigate.pushservice.take.ChannelForCandidate +import com.twitter.frigate.pushservice.target._ +import com.twitter.util.Time + +object PushTypes { + + trait Target + extends TargetUser + with UserDetails + with TargetWithPushContext + with TargetDecider + with TargetABDecider + with FrigateHistory + with PushTargeting + with TargetScoringDetails + with TweetImpressionHistory + with CustomConfigForExpt + with CaretFeedbackHistory + with NotificationFeedbackHistory + with PromptFeedbackHistory + with HTLVisitHistory + with MaxTweetAge + with NewUserDetails + with ResurrectedUserDetails + with TargetWithSeedUsers + with MagicFanoutHistory + with OptOutUserInterests + with RequestContextForFeatureStore + with TargetAppPermissions + with UserLanguage + with InlineActionHistory + with TargetPlaces + + trait RawCandidate extends Candidate with TargetInfo[PushTypes.Target] with RecommendationType { + + val createdAt: Time = Time.now + } + + trait PushCandidate + extends RawCandidate + with CandidateScoringDetails + with MLScores + with QualityScribing + with CopyInfo + with Scriber + with Ibis2HydratorForCandidate + with NTabRequest + with ChannelForCandidate +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSpeaker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSpeaker.scala new file mode 100644 index 000000000..6da10ed77 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSpeaker.scala @@ -0,0 +1,85 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.ScheduledSpaceSpeakerCandidate +import com.twitter.frigate.common.base.SpaceCandidateFanoutDetails +import com.twitter.frigate.common.util.FeatureSwitchParams +import com.twitter.frigate.magic_events.thriftscala.SpaceMetadata +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.ScheduledSpaceSpeakerIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.ScheduledSpaceSpeakerNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.SpacePredicate +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.storehaus.ReadableStore +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.util.Future + +class ScheduledSpaceSpeakerPushCandidate( + candidate: RawCandidate with ScheduledSpaceSpeakerCandidate, + hostUser: Option[User], + copyIds: CopyIds, + audioSpaceStore: ReadableStore[String, AudioSpace] +)( + implicit val statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with ScheduledSpaceSpeakerCandidate + with ScheduledSpaceSpeakerIbis2Hydrator + with SpaceCandidateFanoutDetails + with ScheduledSpaceSpeakerNTabRequestHydrator { + + override val startTime: Long = candidate.startTime + + override val hydratedHost: Option[User] = hostUser + + override val spaceId: String = candidate.spaceId + + override val hostId: Option[Long] = candidate.hostId + + override val speakerIds: Option[Seq[Long]] = candidate.speakerIds + + override val listenerIds: Option[Seq[Long]] = candidate.listenerIds + + override val frigateNotification: FrigateNotification = candidate.frigateNotification + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val target: Target = candidate.target + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override lazy val audioSpaceFut: Future[Option[AudioSpace]] = audioSpaceStore.get(spaceId) + + override val spaceFanoutMetadata: Option[SpaceMetadata] = None + + override val statsReceiver: StatsReceiver = + statsScoped.scope("ScheduledSpaceSpeakerCandidate") +} + +case class ScheduledSpaceSpeakerCandidatePredicates(config: Config) + extends BasicSendHandlerPredicates[ScheduledSpaceSpeakerPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[ScheduledSpaceSpeakerPushCandidate] + ] = List( + SpacePredicate.scheduledSpaceStarted( + config.audioSpaceStore + ), + PredicatesForCandidate.paramPredicate(FeatureSwitchParams.EnableScheduledSpaceSpeakers) + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSubscriber.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSubscriber.scala new file mode 100644 index 000000000..78977ab5d --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ScheduledSpaceSubscriber.scala @@ -0,0 +1,86 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.ScheduledSpaceSubscriberCandidate +import com.twitter.frigate.common.base.SpaceCandidateFanoutDetails +import com.twitter.frigate.common.util.FeatureSwitchParams +import com.twitter.frigate.magic_events.thriftscala.SpaceMetadata +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.ScheduledSpaceSubscriberIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.ScheduledSpaceSubscriberNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate._ +import com.twitter.frigate.pushservice.take.predicates.BasicSendHandlerPredicates +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.storehaus.ReadableStore +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.util.Future + +class ScheduledSpaceSubscriberPushCandidate( + candidate: RawCandidate with ScheduledSpaceSubscriberCandidate, + hostUser: Option[User], + copyIds: CopyIds, + audioSpaceStore: ReadableStore[String, AudioSpace] +)( + implicit val statsScoped: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with ScheduledSpaceSubscriberCandidate + with SpaceCandidateFanoutDetails + with ScheduledSpaceSubscriberIbis2Hydrator + with ScheduledSpaceSubscriberNTabRequestHydrator { + + override val startTime: Long = candidate.startTime + + override val hydratedHost: Option[User] = hostUser + + override val spaceId: String = candidate.spaceId + + override val hostId: Option[Long] = candidate.hostId + + override val speakerIds: Option[Seq[Long]] = candidate.speakerIds + + override val listenerIds: Option[Seq[Long]] = candidate.listenerIds + + override val frigateNotification: FrigateNotification = candidate.frigateNotification + + override val pushCopyId: Option[Int] = copyIds.pushCopyId + + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override val copyAggregationId: Option[String] = copyIds.aggregationId + + override val target: Target = candidate.target + + override lazy val audioSpaceFut: Future[Option[AudioSpace]] = audioSpaceStore.get(spaceId) + + override val spaceFanoutMetadata: Option[SpaceMetadata] = None + + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override val statsReceiver: StatsReceiver = + statsScoped.scope("ScheduledSpaceSubscriberCandidate") +} + +case class ScheduledSpaceSubscriberCandidatePredicates(config: Config) + extends BasicSendHandlerPredicates[ScheduledSpaceSubscriberPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[ScheduledSpaceSubscriberPushCandidate] + ] = + List( + PredicatesForCandidate.paramPredicate(FeatureSwitchParams.EnableScheduledSpaceSubscribers), + SpacePredicate.narrowCastSpace, + SpacePredicate.targetInSpace(config.audioSpaceParticipantsStore), + SpacePredicate.spaceHostTargetUserBlocking(config.edgeStore), + PredicatesForCandidate.duplicateSpacesPredicate + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/SubscribedSearchTweetPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/SubscribedSearchTweetPushCandidate.scala new file mode 100644 index 000000000..4d71a0c75 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/SubscribedSearchTweetPushCandidate.scala @@ -0,0 +1,56 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SubscribedSearchTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.SubscribedSearchTweetIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.SubscribedSearchTweetNtabRequestHydrator +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPH +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +class SubscribedSearchTweetPushCandidate( + candidate: RawCandidate with SubscribedSearchTweetCandidate, + author: Option[User], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with SubscribedSearchTweetCandidate + with TweetAuthorDetails + with SubscribedSearchTweetIbis2Hydrator + with SubscribedSearchTweetNtabRequestHydrator { + override def tweetAuthor: Future[Option[User]] = Future.value(author) + + override def weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + + override def tweetId: Long = candidate.tweetId + + override def pushCopyId: Option[Int] = copyIds.pushCopyId + + override def ntabCopyId: Option[Int] = copyIds.ntabCopyId + + override def copyAggregationId: Option[String] = copyIds.aggregationId + + override def target: PushTypes.Target = candidate.target + + override def searchTerm: String = candidate.searchTerm + + override def timeBoundedLandingUrl: Option[String] = None + + override def statsReceiver: StatsReceiver = stats + + override def tweetyPieResult: Option[TweetyPie.TweetyPieResult] = candidate.tweetyPieResult +} + +case class SubscribedSearchTweetCandidatePredicates(override val config: Config) + extends BasicTweetPredicatesForRFPH[SubscribedSearchTweetPushCandidate] { + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopTweetImpressionsPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopTweetImpressionsPushCandidate.scala new file mode 100644 index 000000000..b04a16ac3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopTweetImpressionsPushCandidate.scala @@ -0,0 +1,70 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TopTweetImpressionsCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.TopTweetImpressionsCandidateIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.TopTweetImpressionsNTabRequestHydrator +import com.twitter.frigate.pushservice.predicate.TopTweetImpressionsPredicates +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPH +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.StoryContextValue +import com.twitter.stitch.tweetypie.TweetyPie + +/** + * This class defines a hydrated [[TopTweetImpressionsCandidate]] + * + * @param candidate: [[TopTweetImpressionsCandidate]] for the candidate representing the user's Tweet with the most impressions + * @param copyIds: push and ntab notification copy + * @param stats: finagle scoped states receiver + * @param pushModelScorer: ML model score object for fetching prediction scores + */ +class TopTweetImpressionsPushCandidate( + candidate: RawCandidate with TopTweetImpressionsCandidate, + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TopTweetImpressionsCandidate + with TopTweetImpressionsNTabRequestHydrator + with TopTweetImpressionsCandidateIbis2Hydrator { + override val target: PushTypes.Target = candidate.target + override val commonRecType: CommonRecommendationType = candidate.commonRecType + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = + candidate.tweetyPieResult + override val impressionsCount: Long = candidate.impressionsCount + + override val statsReceiver: StatsReceiver = stats.scope(getClass.getSimpleName) + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val storyContext: Option[StoryContext] = + Some(StoryContext(altText = "", value = Some(StoryContextValue.Tweets(Seq(tweetId))))) +} + +case class TopTweetImpressionsPushCandidatePredicates(config: Config) + extends BasicTweetPredicatesForRFPH[TopTweetImpressionsPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[ + NamedPredicate[TopTweetImpressionsPushCandidate] + ] = List( + TopTweetImpressionsPredicates.topTweetImpressionsFatiguePredicate + ) + + override val postCandidateSpecificPredicates: List[ + NamedPredicate[TopTweetImpressionsPushCandidate] + ] = List( + TopTweetImpressionsPredicates.topTweetImpressionsThreshold() + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopicProofTweetPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopicProofTweetPushCandidate.scala new file mode 100644 index 000000000..f89eb28bf --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TopicProofTweetPushCandidate.scala @@ -0,0 +1,71 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TopicProofTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.TopicProofTweetIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.TopicProofTweetNtabRequestHydrator +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPH +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +/** + * This class defines a hydrated [[TopicProofTweetCandidate]] + * + * @param candidate : [[TopicProofTweetCandidate]] for the candidate representint a Tweet recommendation for followed Topic + * @param author : Tweet author representated as Gizmoduck user object + * @param copyIds : push and ntab notification copy + * @param stats : finagle scoped states receiver + * @param pushModelScorer : ML model score object for fetching prediction scores + */ +class TopicProofTweetPushCandidate( + candidate: RawCandidate with TopicProofTweetCandidate, + author: Option[User], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TopicProofTweetCandidate + with TweetAuthorDetails + with TopicProofTweetNtabRequestHydrator + with TopicProofTweetIbis2Hydrator { + override val statsReceiver: StatsReceiver = stats + override val target: PushTypes.Target = candidate.target + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = candidate.tweetyPieResult + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId + override val semanticCoreEntityId = candidate.semanticCoreEntityId + override val localizedUttEntity = candidate.localizedUttEntity + override val tweetAuthor = Future.value(author) + override val topicListingSetting = candidate.topicListingSetting + override val algorithmCR = candidate.algorithmCR + override val commonRecType: CommonRecommendationType = candidate.commonRecType + override val tagsCR = candidate.tagsCR + override val isOutOfNetwork = candidate.isOutOfNetwork +} + +case class TopicProofTweetCandidatePredicates(override val config: Config) + extends BasicTweetPredicatesForRFPH[TopicProofTweetPushCandidate] { + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates: List[NamedPredicate[TopicProofTweetPushCandidate]] = + List( + PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnableTopicProofTweetRecs + ), + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TrendTweetPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TrendTweetPushCandidate.scala new file mode 100644 index 000000000..ec580f629 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TrendTweetPushCandidate.scala @@ -0,0 +1,50 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.events.recos.thriftscala.TrendsContext +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TrendTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.TrendTweetIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.TrendTweetNtabHydrator +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPH +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +class TrendTweetPushCandidate( + candidate: RawCandidate with TrendTweetCandidate, + author: Option[User], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TrendTweetCandidate + with TweetAuthorDetails + with TrendTweetIbis2Hydrator + with TrendTweetNtabHydrator { + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = candidate.tweetyPieResult + override lazy val tweetAuthor: Future[Option[User]] = Future.value(author) + override val target: PushTypes.Target = candidate.target + override val landingUrl: String = candidate.landingUrl + override val timeBoundedLandingUrl: Option[String] = candidate.timeBoundedLandingUrl + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val trendId: String = candidate.trendId + override val trendName: String = candidate.trendName + override val copyAggregationId: Option[String] = copyIds.aggregationId + override val context: TrendsContext = candidate.context +} + +case class TrendTweetPredicates(override val config: Config) + extends BasicTweetPredicatesForRFPH[TrendTweetPushCandidate] { + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TripTweetPushCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TripTweetPushCandidate.scala new file mode 100644 index 000000000..1981e7bb5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TripTweetPushCandidate.scala @@ -0,0 +1,60 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TripCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.OutOfNetworkTweetIbis2HydratorForCandidate +import com.twitter.frigate.pushservice.model.ntab.OutOfNetworkTweetNTabRequestHydrator +import com.twitter.frigate.pushservice.take.predicates.OutOfNetworkTweetPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import com.twitter.util.Future + +class TripTweetPushCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TripCandidate, + author: Future[Option[User]], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TripCandidate + with TopicCandidate + with OutOfNetworkTweetCandidate + with TweetAuthorDetails + with OutOfNetworkTweetNTabRequestHydrator + with OutOfNetworkTweetIbis2HydratorForCandidate { + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = + candidate.tweetyPieResult + override lazy val tweetAuthor: Future[Option[User]] = author + override val target: PushTypes.Target = candidate.target + override lazy val commonRecType: CommonRecommendationType = + candidate.commonRecType + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId + override lazy val semanticCoreEntityId: Option[Long] = None + override lazy val localizedUttEntity: Option[LocalizedEntity] = None + override lazy val algorithmCR: Option[String] = None + override val tripDomain: Option[collection.Set[TripDomain]] = candidate.tripDomain +} + +case class TripTweetCandidatePredicates(override val config: Config) + extends OutOfNetworkTweetPredicates[TripTweetPushCandidate] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetAction.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetAction.scala new file mode 100644 index 000000000..72453224d --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetAction.scala @@ -0,0 +1,26 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.pushservice.model.PushTypes._ +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.predicate._ +import com.twitter.frigate.pushservice.take.predicates.BasicTweetPredicatesForRFPH + +case class TweetActionCandidatePredicates(override val config: Config) + extends BasicTweetPredicatesForRFPH[ + PushCandidate with TweetCandidate with TweetDetails with SocialContextActions + ] { + + implicit val statsReceiver: StatsReceiver = config.statsReceiver.scope(getClass.getSimpleName) + + override val preCandidateSpecificPredicates = List(PredicatesForCandidate.minSocialContext(1)) + + override val postCandidateSpecificPredicates = List( + PredicatesForCandidate.socialContextBeingFollowed(config.edgeStore), + PredicatesForCandidate.socialContextBlockingOrMuting(config.edgeStore), + PredicatesForCandidate.socialContextNotRetweetFollowing(config.edgeStore) + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetFavorite.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetFavorite.scala new file mode 100644 index 000000000..ae31ddc6c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetFavorite.scala @@ -0,0 +1,53 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetFavoriteCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.TweetFavoriteCandidateIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.TweetFavoriteNTabRequestHydrator +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil.TweetWithSocialContextTraits +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +class TweetFavoritePushCandidate( + candidate: RawCandidate with TweetWithSocialContextTraits, + socialContextUserMap: Future[Map[Long, Option[User]]], + author: Future[Option[User]], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TweetFavoriteCandidate + with SocialContextUserDetails + with TweetAuthorDetails + with TweetFavoriteNTabRequestHydrator + with TweetFavoriteCandidateIbis2Hydrator { + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override val socialContextActions: Seq[SocialContextAction] = + candidate.socialContextActions + + override val socialContextAllTypeActions: Seq[SocialContextAction] = + candidate.socialContextAllTypeActions + + override lazy val scUserMap: Future[Map[Long, Option[User]]] = socialContextUserMap + override lazy val tweetAuthor: Future[Option[User]] = author + override lazy val commonRecType: CommonRecommendationType = + candidate.commonRecType + override val target: PushTypes.Target = candidate.target + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = + candidate.tweetyPieResult + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetRetweet.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetRetweet.scala new file mode 100644 index 000000000..61c8c6526 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/TweetRetweet.scala @@ -0,0 +1,51 @@ +package com.twitter.frigate.pushservice.model + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetRetweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.model.ibis.TweetRetweetCandidateIbis2Hydrator +import com.twitter.frigate.pushservice.model.ntab.TweetRetweetNTabRequestHydrator +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil.TweetWithSocialContextTraits +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.stitch.tweetypie.TweetyPie +import com.twitter.util.Future + +class TweetRetweetPushCandidate( + candidate: RawCandidate with TweetWithSocialContextTraits, + socialContextUserMap: Future[Map[Long, Option[User]]], + author: Future[Option[User]], + copyIds: CopyIds +)( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer) + extends PushCandidate + with TweetRetweetCandidate + with SocialContextUserDetails + with TweetAuthorDetails + with TweetRetweetNTabRequestHydrator + with TweetRetweetCandidateIbis2Hydrator { + override val statsReceiver: StatsReceiver = stats + override val weightedOpenOrNtabClickModelScorer: PushMLModelScorer = pushModelScorer + override val tweetId: Long = candidate.tweetId + override val socialContextActions: Seq[SocialContextAction] = + candidate.socialContextActions + + override val socialContextAllTypeActions: Seq[SocialContextAction] = + candidate.socialContextAllTypeActions + + override lazy val scUserMap: Future[Map[Long, Option[User]]] = socialContextUserMap + override lazy val tweetAuthor: Future[Option[User]] = author + override lazy val commonRecType: CommonRecommendationType = candidate.commonRecType + override val target: PushTypes.Target = candidate.target + override lazy val tweetyPieResult: Option[TweetyPie.TweetyPieResult] = candidate.tweetyPieResult + override val pushCopyId: Option[Int] = copyIds.pushCopyId + override val ntabCopyId: Option[Int] = copyIds.ntabCopyId + override val copyAggregationId: Option[String] = copyIds.aggregationId +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/CopyInfo.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/CopyInfo.scala new file mode 100644 index 000000000..11cc0617a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/CopyInfo.scala @@ -0,0 +1,33 @@ +package com.twitter.frigate.pushservice.model.candidate + +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.common.util.MrPushCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.CandidateUtil + +case class CopyIds( + pushCopyId: Option[Int] = None, + ntabCopyId: Option[Int] = None, + aggregationId: Option[String] = None) + +trait CopyInfo { + self: PushCandidate => + + import com.twitter.frigate.data_pipeline.common.FrigateNotificationUtil._ + + def getPushCopy: Option[MRPushCopy] = + pushCopyId match { + case Some(pushCopyId) => MrPushCopyObjects.getCopyFromId(pushCopyId) + case _ => + crt2PushCopy( + commonRecType, + CandidateUtil.getSocialContextActionsFromCandidate(self).size + ) + } + + def pushCopyId: Option[Int] + + def ntabCopyId: Option[Int] + + def copyAggregationId: Option[String] +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/MLScores.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/MLScores.scala new file mode 100644 index 000000000..4ba79f485 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/MLScores.scala @@ -0,0 +1,307 @@ +package com.twitter.frigate.pushservice.model.candidate + +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.ml.HydrationContextBuilder +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.frigate.pushservice.params.WeightedOpenOrNtabClickModel +import com.twitter.nrel.hydration.push.HydrationContext +import com.twitter.timelines.configapi.FSParam +import com.twitter.util.Future +import java.util.concurrent.ConcurrentHashMap +import scala.collection.concurrent.{Map => CMap} +import scala.collection.convert.decorateAsScala._ + +trait MLScores { + + self: PushCandidate => + + lazy val candidateHydrationContext: Future[HydrationContext] = HydrationContextBuilder.build(self) + + def weightedOpenOrNtabClickModelScorer: PushMLModelScorer + + // Used to store the scores and avoid duplicate prediction + private val qualityModelScores: CMap[ + (PushMLModel.Value, WeightedOpenOrNtabClickModel.ModelNameType), + Future[Option[Double]] + ] = + new ConcurrentHashMap[(PushMLModel.Value, WeightedOpenOrNtabClickModel.ModelNameType), Future[ + Option[Double] + ]]().asScala + + def populateQualityModelScore( + pushMLModel: PushMLModel.Value, + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType, + prob: Future[Option[Double]] + ) = { + val modelAndVersion = (pushMLModel, modelVersion) + if (!qualityModelScores.contains(modelAndVersion)) { + qualityModelScores += modelAndVersion -> prob + } + } + + // The ML scores that also depend on other candidates and are only available after all candidates are processed + // For example, the likelihood info for Importance Sampling + private lazy val crossCandidateMlScores: CMap[String, Double] = + new ConcurrentHashMap[String, Double]().asScala + + def populateCrossCandidateMlScores(scoreName: String, score: Double): Unit = { + if (crossCandidateMlScores.contains(scoreName)) { + throw new Exception( + s"$scoreName has been populated in the CrossCandidateMlScores!\n" + + s"Existing crossCandidateMlScores are ${crossCandidateMlScores}\n" + ) + } + crossCandidateMlScores += scoreName -> score + } + + def getMLModelScore( + pushMLModel: PushMLModel.Value, + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType + ): Future[Option[Double]] = { + qualityModelScores.getOrElseUpdate( + (pushMLModel, modelVersion), + weightedOpenOrNtabClickModelScorer + .singlePredicationForModelVersion(modelVersion, self, Some(pushMLModel)) + ) + } + + def getMLModelScoreWithoutUpdate( + pushMLModel: PushMLModel.Value, + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType + ): Future[Option[Double]] = { + qualityModelScores.getOrElse( + (pushMLModel, modelVersion), + Future.None + ) + } + + def getWeightedOpenOrNtabClickModelScore( + weightedOONCModelParam: FSParam[WeightedOpenOrNtabClickModel.ModelNameType] + ): Future[Option[Double]] = { + getMLModelScore( + PushMLModel.WeightedOpenOrNtabClickProbability, + target.params(weightedOONCModelParam) + ) + } + + /* After we unify the ranking and filtering models, we follow the iteration process below + When improving the WeightedOONC model, + 1) Run experiment which only replace the ranking model + 2) Make decisions according to the experiment results + 3) Use the ranking model for filtering + 4) Adjust percentile thresholds if necessary + */ + lazy val mrWeightedOpenOrNtabClickRankingProbability: Future[Option[Double]] = + target.rankingModelParam.flatMap { modelParam => + getWeightedOpenOrNtabClickModelScore(modelParam) + } + + def getBigFilteringScore( + pushMLModel: PushMLModel.Value, + modelVersion: WeightedOpenOrNtabClickModel.ModelNameType + ): Future[Option[Double]] = { + mrWeightedOpenOrNtabClickRankingProbability.flatMap { + case Some(rankingScore) => + // Adds ranking score to feature map (we must ensure the feature key is also in the feature context) + mergeFeatures( + FeatureMap( + numericFeatures = Map("scribe.WeightedOpenOrNtabClickProbability" -> rankingScore) + ) + ) + getMLModelScore(pushMLModel, modelVersion) + case _ => Future.None + } + } + + def getWeightedOpenOrNtabClickScoreForScribing(): Seq[Future[Map[String, Double]]] = { + Seq( + mrWeightedOpenOrNtabClickRankingProbability.map { + case Some(score) => Map(PushMLModel.WeightedOpenOrNtabClickProbability.toString -> score) + case _ => Map.empty[String, Double] + }, + Future + .join( + target.rankingModelParam, + mrWeightedOpenOrNtabClickRankingProbability + ).map { + case (rankingModelParam, Some(score)) => + Map(target.params(rankingModelParam).toString -> score) + case _ => Map.empty[String, Double] + } + ) + } + + def getNsfwScoreForScribing(): Seq[Future[Map[String, Double]]] = { + val nsfwScoreFut = getMLModelScoreWithoutUpdate( + PushMLModel.HealthNsfwProbability, + target.params(PushFeatureSwitchParams.BqmlHealthModelTypeParam)) + Seq(nsfwScoreFut.map { nsfwScoreOpt => + nsfwScoreOpt + .map(nsfwScore => Map(PushMLModel.HealthNsfwProbability.toString -> nsfwScore)).getOrElse( + Map.empty[String, Double]) + }) + } + + def getBigFilteringSupervisedScoresForScribing(): Seq[Future[Map[String, Double]]] = { + if (target.params( + PushFeatureSwitchParams.EnableMrRequestScribingBigFilteringSupervisedScores)) { + Seq( + mrBigFilteringSupervisedSendingScore.map { + case Some(score) => + Map(PushMLModel.BigFilteringSupervisedSendingModel.toString -> score) + case _ => Map.empty[String, Double] + }, + mrBigFilteringSupervisedWithoutSendingScore.map { + case Some(score) => + Map(PushMLModel.BigFilteringSupervisedWithoutSendingModel.toString -> score) + case _ => Map.empty[String, Double] + } + ) + } else Seq.empty[Future[Map[String, Double]]] + } + + def getBigFilteringRLScoresForScribing(): Seq[Future[Map[String, Double]]] = { + if (target.params(PushFeatureSwitchParams.EnableMrRequestScribingBigFilteringRLScores)) { + Seq( + mrBigFilteringRLSendingScore.map { + case Some(score) => Map(PushMLModel.BigFilteringRLSendingModel.toString -> score) + case _ => Map.empty[String, Double] + }, + mrBigFilteringRLWithoutSendingScore.map { + case Some(score) => Map(PushMLModel.BigFilteringRLWithoutSendingModel.toString -> score) + case _ => Map.empty[String, Double] + } + ) + } else Seq.empty[Future[Map[String, Double]]] + } + + def buildModelScoresSeqForScribing(): Seq[Future[Map[String, Double]]] = { + getWeightedOpenOrNtabClickScoreForScribing() ++ + getBigFilteringSupervisedScoresForScribing() ++ + getBigFilteringRLScoresForScribing() ++ + getNsfwScoreForScribing() + } + + lazy val mrBigFilteringSupervisedSendingScore: Future[Option[Double]] = + getBigFilteringScore( + PushMLModel.BigFilteringSupervisedSendingModel, + target.params(PushFeatureSwitchParams.BigFilteringSupervisedSendingModelParam) + ) + + lazy val mrBigFilteringSupervisedWithoutSendingScore: Future[Option[Double]] = + getBigFilteringScore( + PushMLModel.BigFilteringSupervisedWithoutSendingModel, + target.params(PushFeatureSwitchParams.BigFilteringSupervisedWithoutSendingModelParam) + ) + + lazy val mrBigFilteringRLSendingScore: Future[Option[Double]] = + getBigFilteringScore( + PushMLModel.BigFilteringRLSendingModel, + target.params(PushFeatureSwitchParams.BigFilteringRLSendingModelParam) + ) + + lazy val mrBigFilteringRLWithoutSendingScore: Future[Option[Double]] = + getBigFilteringScore( + PushMLModel.BigFilteringRLWithoutSendingModel, + target.params(PushFeatureSwitchParams.BigFilteringRLWithoutSendingModelParam) + ) + + lazy val mrWeightedOpenOrNtabClickFilteringProbability: Future[Option[Double]] = + getWeightedOpenOrNtabClickModelScore( + target.filteringModelParam + ) + + lazy val mrQualityUprankingProbability: Future[Option[Double]] = + getMLModelScore( + PushMLModel.FilteringProbability, + target.params(PushFeatureSwitchParams.QualityUprankingModelTypeParam) + ) + + lazy val mrNsfwScore: Future[Option[Double]] = + getMLModelScoreWithoutUpdate( + PushMLModel.HealthNsfwProbability, + target.params(PushFeatureSwitchParams.BqmlHealthModelTypeParam) + ) + + // MR quality upranking param + private val qualityUprankingBoost: String = "QualityUprankingBoost" + private val producerQualityUprankingBoost: String = "ProducerQualityUprankingBoost" + private val qualityUprankingInfo: CMap[String, Double] = + new ConcurrentHashMap[String, Double]().asScala + + lazy val mrQualityUprankingBoost: Option[Double] = + qualityUprankingInfo.get(qualityUprankingBoost) + lazy val mrProducerQualityUprankingBoost: Option[Double] = + qualityUprankingInfo.get(producerQualityUprankingBoost) + + def setQualityUprankingBoost(boost: Double) = + if (qualityUprankingInfo.contains(qualityUprankingBoost)) { + qualityUprankingInfo(qualityUprankingBoost) = boost + } else { + qualityUprankingInfo += qualityUprankingBoost -> boost + } + def setProducerQualityUprankingBoost(boost: Double) = + if (qualityUprankingInfo.contains(producerQualityUprankingBoost)) { + qualityUprankingInfo(producerQualityUprankingBoost) = boost + } else { + qualityUprankingInfo += producerQualityUprankingBoost -> boost + } + + private lazy val mrModelScoresFut: Future[Map[String, Double]] = { + if (self.target.isLoggedOutUser) { + Future.value(Map.empty[String, Double]) + } else { + Future + .collectToTry { + buildModelScoresSeqForScribing() + }.map { scoreTrySeq => + scoreTrySeq + .collect { + case result if result.isReturn => result.get() + }.reduce(_ ++ _) + } + } + } + + // Internal model scores (scores that are independent of other candidates) for scribing + lazy val modelScores: Future[Map[String, Double]] = + target.dauProbability.flatMap { dauProbabilityOpt => + val dauProbScoreMap = dauProbabilityOpt + .map(_.probability).map { dauProb => + PushMLModel.DauProbability.toString -> dauProb + }.toMap + + // Avoid unnecessary MR model scribing + if (target.isDarkWrite) { + mrModelScoresFut.map(dauProbScoreMap ++ _) + } else if (RecTypes.isSendHandlerType(commonRecType) && !RecTypes + .sendHandlerTypesUsingMrModel(commonRecType)) { + Future.value(dauProbScoreMap) + } else { + mrModelScoresFut.map(dauProbScoreMap ++ _) + } + } + + // We will scribe both internal ML scores and cross-Candidate scores + def getModelScoresforScribing(): Future[Map[String, Double]] = { + if (RecTypes.notEligibleForModelScoreTracking(commonRecType) || self.target.isLoggedOutUser) { + Future.value(Map.empty[String, Double]) + } else { + modelScores.map { internalScores => + if (internalScores.keySet.intersect(crossCandidateMlScores.keySet).nonEmpty) { + throw new Exception( + "crossCandidateMlScores overlap internalModelScores\n" + + s"internalScores keySet: ${internalScores.keySet}\n" + + s"crossCandidateScores keySet: ${crossCandidateMlScores.keySet}\n" + ) + } + + internalScores ++ crossCandidateMlScores + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/QualityScribing.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/QualityScribing.scala new file mode 100644 index 000000000..283c3d97c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/QualityScribing.scala @@ -0,0 +1,104 @@ +package com.twitter.frigate.pushservice.model.candidate + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.HighQualityScribingScores +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.util.Future +import java.util.concurrent.ConcurrentHashMap +import scala.collection.concurrent.{Map => CMap} +import scala.collection.convert.decorateAsScala._ + +trait QualityScribing { + self: PushCandidate with MLScores => + + // Use to store other scores (to avoid duplicate queries to other services, e.g. HSS) + private val externalCachedScores: CMap[String, Future[Option[Double]]] = + new ConcurrentHashMap[String, Future[Option[Double]]]().asScala + + /** + * Retrieves the model version as specified by the corresponding FS param. + * This model version will be used for getting the cached score or triggering + * a prediction request. + * + * @param modelName The score we will like to scribe + */ + private def getModelVersion( + modelName: HighQualityScribingScores.Name + ): String = { + modelName match { + case HighQualityScribingScores.HeavyRankingScore => + target.params(PushFeatureSwitchParams.HighQualityCandidatesHeavyRankingModel) + case HighQualityScribingScores.NonPersonalizedQualityScoreUsingCnn => + target.params(PushFeatureSwitchParams.HighQualityCandidatesNonPersonalizedQualityCnnModel) + case HighQualityScribingScores.BqmlNsfwScore => + target.params(PushFeatureSwitchParams.HighQualityCandidatesBqmlNsfwModel) + case HighQualityScribingScores.BqmlReportScore => + target.params(PushFeatureSwitchParams.HighQualityCandidatesBqmlReportModel) + } + } + + /** + * Retrieves the score for scribing either from a cached value or + * by generating a prediction request. This will increase model QPS + * + * @param pushMLModel This represents the prefix of the model name (i.e. [pushMLModel]_[version]) + * @param scoreName The name to be use when scribing this score + */ + def getScribingScore( + pushMLModel: PushMLModel.Value, + scoreName: HighQualityScribingScores.Name + ): Future[(String, Option[Double])] = { + getMLModelScore( + pushMLModel, + getModelVersion(scoreName) + ).map { scoreOpt => + scoreName.toString -> scoreOpt + } + } + + /** + * Retrieves the score for scribing if it has been computed/cached before otherwise + * it will return Future.None + * + * @param pushMLModel This represents the prefix of the model name (i.e. [pushMLModel]_[version]) + * @param scoreName The name to be use when scribing this score + */ + def getScribingScoreWithoutUpdate( + pushMLModel: PushMLModel.Value, + scoreName: HighQualityScribingScores.Name + ): Future[(String, Option[Double])] = { + getMLModelScoreWithoutUpdate( + pushMLModel, + getModelVersion(scoreName) + ).map { scoreOpt => + scoreName.toString -> scoreOpt + } + } + + /** + * Caches the given score future + * + * @param scoreName The name to be use when scribing this score + * @param scoreFut Future mapping scoreName -> scoreOpt + */ + def cacheExternalScore(scoreName: String, scoreFut: Future[Option[Double]]) = { + if (!externalCachedScores.contains(scoreName)) { + externalCachedScores += scoreName -> scoreFut + } + } + + /** + * Returns all external scores future cached as a sequence + */ + def getExternalCachedScores: Seq[Future[(String, Option[Double])]] = { + externalCachedScores.map { + case (modelName, scoreFut) => + scoreFut.map { scoreOpt => modelName -> scoreOpt } + }.toSeq + } + + def getExternalCachedScoreByName(name: String): Future[Option[Double]] = { + externalCachedScores.getOrElse(name, Future.None) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/Scriber.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/Scriber.scala new file mode 100644 index 000000000..a43530b44 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/candidate/Scriber.scala @@ -0,0 +1,277 @@ +package com.twitter.frigate.pushservice.model.candidate + +import com.twitter.frigate.data_pipeline.features_common.PushQualityModelFeatureContext.featureContext +import com.twitter.frigate.data_pipeline.features_common.PushQualityModelUtil +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.util.NotificationScribeUtil +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.OutOfNetworkTweetPushCandidate +import com.twitter.frigate.pushservice.model.TopicProofTweetPushCandidate +import com.twitter.frigate.pushservice.ml.HydrationContextBuilder +import com.twitter.frigate.pushservice.predicate.quality_model_predicate.PDauCohort +import com.twitter.frigate.pushservice.predicate.quality_model_predicate.PDauCohortUtil +import com.twitter.frigate.pushservice.util.Candidate2FrigateNotification +import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil.sensitiveMediaCategoryFeatureName +import com.twitter.frigate.scribe.thriftscala.FrigateNotificationScribeType +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.frigate.scribe.thriftscala.PredicateDetailedInfo +import com.twitter.frigate.scribe.thriftscala.PushCapInfo +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.OverrideInfo +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.model.user_state.UserState.UserState +import com.twitter.ibis2.service.thriftscala.Ibis2Response +import com.twitter.ml.api.util.ScalaToJavaDataRecordConversions +import com.twitter.nrel.heavyranker.FeatureHydrator +import com.twitter.util.Future +import java.util.UUID +import java.util.concurrent.ConcurrentHashMap +import scala.collection.concurrent.{Map => CMap} +import scala.collection.Map +import scala.collection.convert.decorateAsScala._ + +trait Scriber { + self: PushCandidate => + + def statsReceiver: StatsReceiver + + def frigateNotification: FrigateNotification = Candidate2FrigateNotification + .getFrigateNotification(self)(statsReceiver) + .copy(copyAggregationId = self.copyAggregationId) + + lazy val impressionId: String = UUID.randomUUID.toString.replaceAll("-", "") + + // Used to store the score and threshold for predicates + // Map(predicate name, (score, threshold, filter?)) + private val predicateScoreAndThreshold: CMap[String, PredicateDetailedInfo] = + new ConcurrentHashMap[String, PredicateDetailedInfo]().asScala + + def cachePredicateInfo( + predName: String, + predScore: Double, + predThreshold: Double, + predResult: Boolean, + additionalInformation: Option[Map[String, Double]] = None + ) = { + if (!predicateScoreAndThreshold.contains(predName)) { + predicateScoreAndThreshold += predName -> PredicateDetailedInfo( + predName, + predScore, + predThreshold, + predResult, + additionalInformation) + } + } + + def getCachedPredicateInfo(): Seq[PredicateDetailedInfo] = predicateScoreAndThreshold.values.toSeq + + def frigateNotificationForPersistence( + channels: Seq[ChannelName], + isSilentPush: Boolean, + overrideInfoOpt: Option[OverrideInfo] = None, + copyFeaturesList: Set[String] + ): Future[FrigateNotification] = { + + // record display location for frigate notification + statsReceiver + .scope("FrigateNotificationForPersistence") + .scope("displayLocation") + .counter(frigateNotification.notificationDisplayLocation.name) + .incr() + + val getModelScores = self.getModelScoresforScribing() + + Future.join(getModelScores, self.target.targetMrUserState).map { + case (mlScores, mrUserState) => + frigateNotification.copy( + impressionId = Some(impressionId), + isSilentPush = Some(isSilentPush), + overrideInfo = overrideInfoOpt, + mlModelScores = Some(mlScores), + mrUserState = mrUserState.map(_.name), + copyFeatures = Some(copyFeaturesList.toSeq) + ) + } + } + // scribe data + private def getNotificationScribe( + notifForPersistence: FrigateNotification, + userState: Option[UserState], + dauCohort: PDauCohort.Value, + ibis2Response: Option[Ibis2Response], + tweetAuthorId: Option[Long], + recUserId: Option[Long], + modelScoresMap: Option[Map[String, Double]], + primaryClient: Option[String], + isMrBackfillCR: Option[Boolean] = None, + tagsCR: Option[Seq[String]] = None, + gizmoduckTargetUser: Option[User], + predicateDetailedInfoList: Option[Seq[PredicateDetailedInfo]] = None, + pushCapInfoList: Option[Seq[PushCapInfo]] = None + ): NotificationScribe = { + NotificationScribe( + FrigateNotificationScribeType.SendMessage, + System.currentTimeMillis(), + targetUserId = Some(self.target.targetId), + timestampKeyForHistoryV2 = Some(createdAt.inSeconds), + sendType = NotificationScribeUtil.convertToScribeDisplayLocation( + self.frigateNotification.notificationDisplayLocation + ), + recommendationType = NotificationScribeUtil.convertToScribeRecommendationType( + self.frigateNotification.commonRecommendationType + ), + commonRecommendationType = Some(self.frigateNotification.commonRecommendationType), + fromPushService = Some(true), + frigateNotification = Some(notifForPersistence), + impressionId = Some(impressionId), + skipModelInfo = target.skipModelInfo, + ibis2Response = ibis2Response, + tweetAuthorId = tweetAuthorId, + scribeFeatures = Some(target.noSkipButScribeFeatures), + userState = userState.map(_.toString), + pDauCohort = Some(dauCohort.toString), + recommendedUserId = recUserId, + modelScores = modelScoresMap, + primaryClient = primaryClient, + isMrBackfillCR = isMrBackfillCR, + tagsCR = tagsCR, + targetUserType = gizmoduckTargetUser.map(_.userType), + predicateDetailedInfoList = predicateDetailedInfoList, + pushCapInfoList = pushCapInfoList + ) + } + + def scribeData( + ibis2Response: Option[Ibis2Response] = None, + isSilentPush: Boolean = false, + overrideInfoOpt: Option[OverrideInfo] = None, + copyFeaturesList: Set[String] = Set.empty, + channels: Seq[ChannelName] = Seq.empty + ): Future[NotificationScribe] = { + + val recTweetAuthorId = self match { + case t: TweetCandidate with TweetAuthor => t.authorId + case _ => None + } + + val recUserId = self match { + case u: UserCandidate => Some(u.userId) + case _ => None + } + + val isMrBackfillCR = self match { + case t: OutOfNetworkTweetPushCandidate => t.isMrBackfillCR + case _ => None + } + + val tagsCR = self match { + case t: OutOfNetworkTweetPushCandidate => + t.tagsCR.map { tags => + tags.map(_.toString) + } + case t: TopicProofTweetPushCandidate => + t.tagsCR.map { tags => + tags.map(_.toString) + } + case _ => None + } + + Future + .join( + frigateNotificationForPersistence( + channels = channels, + isSilentPush = isSilentPush, + overrideInfoOpt = overrideInfoOpt, + copyFeaturesList = copyFeaturesList + ), + target.targetUserState, + PDauCohortUtil.getPDauCohort(target), + target.deviceInfo, + target.targetUser + ) + .flatMap { + case (notifForPersistence, userState, dauCohort, deviceInfo, gizmoduckTargetUserOpt) => + val primaryClient = deviceInfo.flatMap(_.guessedPrimaryClient).map(_.toString) + val cachedPredicateInfo = + if (self.target.params(PushParams.EnablePredicateDetailedInfoScribing)) { + Some(getCachedPredicateInfo()) + } else None + + val cachedPushCapInfo = + if (self.target + .params(PushParams.EnablePushCapInfoScribing)) { + Some(target.finalPushcapAndFatigue.values.toSeq) + } else None + + val data = getNotificationScribe( + notifForPersistence, + userState, + dauCohort, + ibis2Response, + recTweetAuthorId, + recUserId, + notifForPersistence.mlModelScores, + primaryClient, + isMrBackfillCR, + tagsCR, + gizmoduckTargetUserOpt, + cachedPredicateInfo, + cachedPushCapInfo + ) + //Don't scribe features for CRTs not eligible for ML Layer + if ((target.isModelTrainingData || target.scribeFeatureWithoutHydratingNewFeatures) + && !RecTypes.notEligibleForModelScoreTracking(self.commonRecType)) { + // scribe all the features for the model training data + self.getFeaturesForScribing.map { scribedFeatureMap => + if (target.params(PushParams.EnableScribingMLFeaturesAsDataRecord) && !target.params( + PushFeatureSwitchParams.EnableMrScribingMLFeaturesAsFeatureMapForStaging)) { + val scribedFeatureDataRecord = + ScalaToJavaDataRecordConversions.javaDataRecord2ScalaDataRecord( + PushQualityModelUtil.adaptToDataRecord(scribedFeatureMap, featureContext)) + data.copy( + featureDataRecord = Some(scribedFeatureDataRecord) + ) + } else { + data.copy(features = + Some(PushQualityModelUtil.convertFeatureMapToFeatures(scribedFeatureMap))) + } + } + } else Future.value(data) + } + } + + def getFeaturesForScribing: Future[FeatureMap] = { + target.featureMap + .flatMap { targetFeatureMap => + val onlineFeatureMap = targetFeatureMap ++ self + .candidateFeatureMap() // targetFeatureMap includes target core user history features + + val filteredFeatureMap = { + onlineFeatureMap.copy( + sparseContinuousFeatures = onlineFeatureMap.sparseContinuousFeatures.filterKeys( + !_.equals(sensitiveMediaCategoryFeatureName)) + ) + } + + val targetHydrationContext = HydrationContextBuilder.build(self.target) + val candidateHydrationContext = HydrationContextBuilder.build(self) + + val featureMapFut = targetHydrationContext.join(candidateHydrationContext).flatMap { + case (targetContext, candidateContext) => + FeatureHydrator.getFeatures( + candidateHydrationContext = candidateContext, + targetHydrationContext = targetContext, + onlineFeatures = filteredFeatureMap, + statsReceiver = statsReceiver) + } + + featureMapFut + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/CustomConfigurationMapForIbis.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/CustomConfigurationMapForIbis.scala new file mode 100644 index 000000000..75b00e346 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/CustomConfigurationMapForIbis.scala @@ -0,0 +1,25 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.ibis2.lib.util.JsonMarshal +import com.twitter.util.Future + +trait CustomConfigurationMapForIbis { + self: PushCandidate => + + lazy val customConfigMapsJsonFut: Future[String] = { + customFieldsMapFut.map { customFields => + JsonMarshal.toJson(customFields) + } + } + + lazy val customConfigMapsFut: Future[Map[String, String]] = { + if (self.target.isLoggedOutUser) { + Future.value(Map.empty[String, String]) + } else { + customConfigMapsJsonFut.map { customConfigMapsJson => + Map("custom_config" -> customConfigMapsJson) + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/DiscoverTwitterPushIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/DiscoverTwitterPushIbis2Hydrator.scala new file mode 100644 index 000000000..a3a48ff28 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/DiscoverTwitterPushIbis2Hydrator.scala @@ -0,0 +1,17 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.DiscoverTwitterCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeFutModelValues +import com.twitter.util.Future + +trait DiscoverTwitterPushIbis2Hydrator extends Ibis2HydratorForCandidate { + self: PushCandidate with DiscoverTwitterCandidate => + + private lazy val targetModelValues: Map[String, String] = Map( + "target_user" -> target.targetId.toString + ) + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, Future.value(targetModelValues)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/F1FirstDegreeTweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/F1FirstDegreeTweetIbis2Hydrator.scala new file mode 100644 index 000000000..6ddaa49d1 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/F1FirstDegreeTweetIbis2Hydrator.scala @@ -0,0 +1,24 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.F1FirstDegree +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.util.Future + +trait F1FirstDegreeTweetIbis2HydratorForCandidate + extends TweetCandidateIbis2Hydrator + with RankedSocialContextIbis2Hydrator { + self: PushCandidate with F1FirstDegree with TweetAuthorDetails => + + override lazy val scopedStats: StatsReceiver = statsReceiver.scope(getClass.getSimpleName) + + override lazy val tweetModelValues: Future[Map[String, String]] = { + for { + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ otherModelValues ++ mediaModelValue ++ tweetInlineModelValues ++ inlineVideoMediaMap + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/Ibis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/Ibis2Hydrator.scala new file mode 100644 index 000000000..fd7d26186 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/Ibis2Hydrator.scala @@ -0,0 +1,127 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.common.util.MrPushCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.ibis2.service.thriftscala.Flags +import com.twitter.ibis2.service.thriftscala.Ibis2Request +import com.twitter.ibis2.service.thriftscala.RecipientSelector +import com.twitter.ibis2.service.thriftscala.ResponseFlags +import com.twitter.util.Future +import scala.util.control.NoStackTrace +import com.twitter.ni.lib.logged_out_transform.Ibis2RequestTransform + +class PushCopyIdNotFoundException(private val message: String) + extends Exception(message) + with NoStackTrace + +class InvalidPushCopyIdException(private val message: String) + extends Exception(message) + with NoStackTrace + +trait Ibis2HydratorForCandidate + extends CandidatePushCopy + with OverrideForIbis2Request + with CustomConfigurationMapForIbis { + self: PushCandidate => + + lazy val silentPushModelValue: Map[String, String] = + if (RecTypes.silentPushDefaultEnabledCrts.contains(commonRecType)) { + Map.empty + } else { + Map("is_silent_push" -> "true") + } + + private def transformRelevanceScore( + mlScore: Double, + scoreRange: Seq[Double] + ): Double = { + val (lowerBound, upperBound) = (scoreRange.head, scoreRange.last) + (mlScore * (upperBound - lowerBound)) + lowerBound + } + + private def getBoundedMlScore(mlScore: Double): Double = { + if (RecTypes.isMagicFanoutEventType(commonRecType)) { + val mfScoreRange = target.params(FS.MagicFanoutRelevanceScoreRange) + transformRelevanceScore(mlScore, mfScoreRange) + } else { + val mrScoreRange = target.params(FS.MagicRecsRelevanceScoreRange) + transformRelevanceScore(mlScore, mrScoreRange) + } + } + + lazy val relevanceScoreMapFut: Future[Map[String, String]] = { + mrWeightedOpenOrNtabClickRankingProbability.map { + case Some(mlScore) if target.params(FS.IncludeRelevanceScoreInIbis2Payload) => + val boundedMlScore = getBoundedMlScore(mlScore) + Map("relevance_score" -> boundedMlScore.toString) + case _ => Map.empty[String, String] + } + } + + def customFieldsMapFut: Future[Map[String, String]] = relevanceScoreMapFut + + //override is only enabled for RFPH CRT + def modelValues: Future[Map[String, String]] = { + Future.join(overrideModelValueFut, customConfigMapsFut).map { + case (overrideModelValue, customConfig) => + overrideModelValue ++ silentPushModelValue ++ customConfig + } + } + + def modelName: String = pushCopy.ibisPushModelName + + def senderId: Option[Long] = None + + def ibis2Request: Future[Option[Ibis2Request]] = { + Future.join(self.target.loggedOutMetadata, modelValues).map { + case (Some(metadata), modelVals) => + Some( + Ibis2RequestTransform + .apply(metadata, modelName, modelVals).copy( + senderId = senderId, + flags = Some(Flags( + darkWrite = Some(target.isDarkWrite), + skipDupcheck = target.pushContext.flatMap(_.useDebugHandler), + responseFlags = Some(ResponseFlags(stringTelemetry = Some(true))) + )) + )) + case (None, modelVals) => + Some( + Ibis2Request( + recipientSelector = RecipientSelector(Some(target.targetId)), + modelName = modelName, + modelValues = Some(modelVals), + senderId = senderId, + flags = Some( + Flags( + darkWrite = Some(target.isDarkWrite), + skipDupcheck = target.pushContext.flatMap(_.useDebugHandler), + responseFlags = Some(ResponseFlags(stringTelemetry = Some(true))) + ) + ) + )) + } + } +} + +trait CandidatePushCopy { + self: PushCandidate => + + final lazy val pushCopy: MRPushCopy = + pushCopyId match { + case Some(pushCopyId) => + MrPushCopyObjects + .getCopyFromId(pushCopyId) + .getOrElse( + throw new InvalidPushCopyIdException( + s"Invalid push copy id: $pushCopyId for ${self.commonRecType}")) + + case None => + throw new PushCopyIdNotFoundException( + s"PushCopy not found in frigateNotification for ${self.commonRecType}" + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/InlineActionIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/InlineActionIbis2Hydrator.scala new file mode 100644 index 000000000..8e927254a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/InlineActionIbis2Hydrator.scala @@ -0,0 +1,12 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.InlineActionUtil +import com.twitter.util.Future + +trait InlineActionIbis2Hydrator { + self: PushCandidate => + + lazy val tweetInlineActionModelValue: Future[Map[String, String]] = + InlineActionUtil.getTweetInlineActionValue(target) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ListIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ListIbis2Hydrator.scala new file mode 100644 index 000000000..57483c8ba --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ListIbis2Hydrator.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.ListRecommendationPushCandidate +import com.twitter.util.Future + +trait ListIbis2Hydrator extends Ibis2HydratorForCandidate { + self: ListRecommendationPushCandidate => + + override lazy val senderId: Option[Long] = Some(0L) + + override lazy val modelValues: Future[Map[String, String]] = + Future.join(listName, listOwnerId).map { + case (nameOpt, authorId) => + Map( + "list" -> listId.toString, + "list_name" -> nameOpt + .getOrElse(""), + "list_author" -> s"${authorId.getOrElse(0L)}" + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutCreatorEventIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutCreatorEventIbis2Hydrator.scala new file mode 100644 index 000000000..edb0aa51e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutCreatorEventIbis2Hydrator.scala @@ -0,0 +1,29 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.magic_events.thriftscala.CreatorFanoutType +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutCreatorEventPushCandidate +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeModelValues +import com.twitter.util.Future + +trait MagicFanoutCreatorEventIbis2Hydrator + extends CustomConfigurationMapForIbis + with Ibis2HydratorForCandidate { + self: PushCandidate with MagicFanoutCreatorEventPushCandidate => + + val userMap = Map( + "handle" -> userProfile.screenName, + "display_name" -> userProfile.name + ) + + override val senderId = hydratedCreator.map(_.id) + + override lazy val modelValues: Future[Map[String, String]] = + mergeModelValues(super.modelValues, userMap) + + override val ibis2Request = creatorFanoutType match { + case CreatorFanoutType.UserSubscription => Future.None + case CreatorFanoutType.NewCreator => super.ibis2Request + case _ => super.ibis2Request + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutNewsEventIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutNewsEventIbis2Hydrator.scala new file mode 100644 index 000000000..a1b073a38 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutNewsEventIbis2Hydrator.scala @@ -0,0 +1,103 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesUtil +import com.twitter.frigate.pushservice.util.PushIbisUtil._ +import com.twitter.util.Future + +trait MagicFanoutNewsEventIbis2Hydrator extends Ibis2HydratorForCandidate { + self: PushCandidate with MagicFanoutEventHydratedCandidate => + + override lazy val senderId: Option[Long] = { + val isUgmMoment = self.semanticCoreEntityTags.values.flatten.toSet + .contains(MagicFanoutPredicatesUtil.UgmMomentTag) + + owningTwitterUserIds.headOption match { + case Some(owningTwitterUserId) + if isUgmMoment && target.params( + PushFeatureSwitchParams.MagicFanoutNewsUserGeneratedEventsEnable) => + Some(owningTwitterUserId) + case _ => None + } + } + + lazy val stats = self.statsReceiver.scope("MagicFanout") + lazy val defaultImageCounter = stats.counter("default_image") + lazy val requestImageCounter = stats.counter("request_num") + lazy val noneImageCounter = stats.counter("none_num") + + private def getModelValueMediaUrl( + urlOpt: Option[String], + mapKey: String + ): Option[(String, String)] = { + requestImageCounter.incr() + urlOpt match { + case Some(PushConstants.DefaultEventMediaUrl) => + defaultImageCounter.incr() + None + case Some(url) => Some(mapKey -> url) + case None => + noneImageCounter.incr() + None + } + } + + private lazy val eventModelValuesFut: Future[Map[String, String]] = { + for { + title <- eventTitleFut + squareImageUrl <- squareImageUrlFut + primaryImageUrl <- primaryImageUrlFut + eventDescriptionOpt <- eventDescriptionFut + } yield { + + val authorId = owningTwitterUserIds.headOption match { + case Some(author) + if target.params(PushFeatureSwitchParams.MagicFanoutNewsUserGeneratedEventsEnable) => + Some("author" -> author.toString) + case _ => None + } + + val eventDescription = eventDescriptionOpt match { + case Some(description) + if target.params(PushFeatureSwitchParams.MagicFanoutNewsEnableDescriptionCopy) => + Some("event_description" -> description) + case _ => + None + } + + Map( + "event_id" -> s"$eventId", + "event_title" -> title + ) ++ + getModelValueMediaUrl(squareImageUrl, "square_media_url") ++ + getModelValueMediaUrl(primaryImageUrl, "media_url") ++ + authorId ++ + eventDescription + } + } + + private lazy val topicValuesFut: Future[Map[String, String]] = { + if (target.params(PushFeatureSwitchParams.EnableTopicCopyForMF)) { + followedTopicLocalizedEntities.map(_.headOption).flatMap { + case Some(localizedEntity) => + Future.value(Map("topic_name" -> localizedEntity.localizedNameForDisplay)) + case _ => + ergLocalizedEntities.map(_.headOption).map { + case Some(localizedEntity) + if target.params(PushFeatureSwitchParams.EnableTopicCopyForImplicitTopics) => + Map("topic_name" -> localizedEntity.localizedNameForDisplay) + case _ => Map.empty[String, String] + } + } + } else { + Future.value(Map.empty[String, String]) + } + } + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, mergeFutModelValues(eventModelValuesFut, topicValuesFut)) + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutProductLaunchIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutProductLaunchIbis2Hydrator.scala new file mode 100644 index 000000000..3062a66d0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutProductLaunchIbis2Hydrator.scala @@ -0,0 +1,54 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.MagicFanoutProductLaunchCandidate +import com.twitter.frigate.magic_events.thriftscala.ProductInfo +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeModelValues +import com.twitter.util.Future + +trait MagicFanoutProductLaunchIbis2Hydrator + extends CustomConfigurationMapForIbis + with Ibis2HydratorForCandidate { + self: PushCandidate with MagicFanoutProductLaunchCandidate => + + private def getProductInfoMap(productInfo: ProductInfo): Map[String, String] = { + val titleMap = productInfo.title + .map { title => + Map("title" -> title) + }.getOrElse(Map.empty) + val bodyMap = productInfo.body + .map { body => + Map("body" -> body) + }.getOrElse(Map.empty) + val deeplinkMap = productInfo.deeplink + .map { deeplink => + Map("deeplink" -> deeplink) + }.getOrElse(Map.empty) + + titleMap ++ bodyMap ++ deeplinkMap + } + + private lazy val landingPage: Map[String, String] = { + val urlFromFS = target.params(PushFeatureSwitchParams.ProductLaunchLandingPageDeepLink) + Map("push_land_url" -> urlFromFS) + } + + private lazy val customProductLaunchPushDetails: Map[String, String] = { + frigateNotification.magicFanoutProductLaunchNotification match { + case Some(productLaunchNotif) => + productLaunchNotif.productInfo match { + case Some(productInfo) => + getProductInfoMap(productInfo) + case _ => Map.empty + } + case _ => Map.empty + } + } + + override lazy val customFieldsMapFut: Future[Map[String, String]] = + mergeModelValues(super.customFieldsMapFut, customProductLaunchPushDetails) + + override lazy val modelValues: Future[Map[String, String]] = + mergeModelValues(super.modelValues, landingPage) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutSportsEventIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutSportsEventIbis2Hydrator.scala new file mode 100644 index 000000000..811caa993 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/MagicFanoutSportsEventIbis2Hydrator.scala @@ -0,0 +1,89 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.BaseGameScore +import com.twitter.frigate.common.base.MagicFanoutSportsEventCandidate +import com.twitter.frigate.common.base.MagicFanoutSportsScoreInformation +import com.twitter.frigate.common.base.NflGameScore +import com.twitter.frigate.common.base.SoccerGameScore +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutSportsUtil +import com.twitter.frigate.pushservice.util.PushIbisUtil._ +import com.twitter.util.Future + +trait MagicFanoutSportsEventIbis2Hydrator extends Ibis2HydratorForCandidate { + self: PushCandidate + with MagicFanoutEventHydratedCandidate + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation => + + lazy val stats = self.statsReceiver.scope("MagicFanoutSportsEvent") + lazy val defaultImageCounter = stats.counter("default_image") + lazy val requestImageCounter = stats.counter("request_num") + lazy val noneImageCounter = stats.counter("none_num") + + override lazy val relevanceScoreMapFut = Future.value(Map.empty[String, String]) + + private def getModelValueMediaUrl( + urlOpt: Option[String], + mapKey: String + ): Option[(String, String)] = { + requestImageCounter.incr() + urlOpt match { + case Some(PushConstants.DefaultEventMediaUrl) => + defaultImageCounter.incr() + None + case Some(url) => Some(mapKey -> url) + case None => + noneImageCounter.incr() + None + } + } + + private lazy val eventModelValuesFut: Future[Map[String, String]] = { + for { + title <- eventTitleFut + squareImageUrl <- squareImageUrlFut + primaryImageUrl <- primaryImageUrlFut + } yield { + Map( + "event_id" -> s"$eventId", + "event_title" -> title + ) ++ + getModelValueMediaUrl(squareImageUrl, "square_media_url") ++ + getModelValueMediaUrl(primaryImageUrl, "media_url") + } + } + + private lazy val sportsScoreValues: Future[Map[String, String]] = { + for { + scores <- gameScores + homeName <- homeTeamInfo.map(_.map(_.name)) + awayName <- awayTeamInfo.map(_.map(_.name)) + } yield { + if (awayName.isDefined && homeName.isDefined && scores.isDefined) { + scores.get match { + case game: SoccerGameScore => + MagicFanoutSportsUtil.getSoccerIbisMap(game) ++ Map( + "away_team" -> awayName.get, + "home_team" -> homeName.get + ) + case game: NflGameScore => + MagicFanoutSportsUtil.getNflIbisMap(game) ++ Map( + "away_team" -> MagicFanoutSportsUtil.getNFLReadableName(awayName.get), + "home_team" -> MagicFanoutSportsUtil.getNFLReadableName(homeName.get) + ) + case baseGameScore: BaseGameScore => + Map.empty[String, String] + } + } else Map.empty[String, String] + } + } + + override lazy val customFieldsMapFut: Future[Map[String, String]] = + mergeFutModelValues(super.customFieldsMapFut, sportsScoreValues) + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, eventModelValuesFut) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OutOfNetworkTweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OutOfNetworkTweetIbis2Hydrator.scala new file mode 100644 index 000000000..c7bd051b7 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OutOfNetworkTweetIbis2Hydrator.scala @@ -0,0 +1,90 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.rec_types.RecTypes._ +import com.twitter.frigate.common.util.MrPushCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.InlineActionUtil +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeModelValues +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.util.Future + +trait OutOfNetworkTweetIbis2HydratorForCandidate extends TweetCandidateIbis2Hydrator { + self: PushCandidate with OutOfNetworkTweetCandidate with TopicCandidate with TweetAuthorDetails => + + private lazy val useNewOonCopyValue = + if (target.params(PushFeatureSwitchParams.EnableNewMROONCopyForPush)) { + Map( + "use_new_oon_copy" -> "true" + ) + } else Map.empty[String, String] + + override lazy val tweetDynamicInlineActionsModelValues = + if (target.params(PushFeatureSwitchParams.EnableOONGeneratedInlineActions)) { + val actions = target.params(PushFeatureSwitchParams.OONTweetDynamicInlineActionsList) + InlineActionUtil.getGeneratedTweetInlineActions(target, statsReceiver, actions) + } else Map.empty[String, String] + + private lazy val ibtModelValues: Map[String, String] = + Map( + "is_tweet" -> s"${!(hasPhoto || hasVideo)}", + "is_photo" -> s"$hasPhoto", + "is_video" -> s"$hasVideo" + ) + + private lazy val launchVideosInImmersiveExploreValue = + Map( + "launch_videos_in_immersive_explore" -> s"${hasVideo && target.params(PushFeatureSwitchParams.EnableLaunchVideosInImmersiveExplore)}" + ) + + private lazy val oonTweetModelValues = + useNewOonCopyValue ++ ibtModelValues ++ tweetDynamicInlineActionsModelValues ++ launchVideosInImmersiveExploreValue + + lazy val useTopicCopyForMBCGIbis = mrModelingBasedTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableMrModelingBasedCandidatesTopicCopy) + lazy val useTopicCopyForFrsIbis = frsTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableFrsTweetCandidatesTopicCopy) + lazy val useTopicCopyForTagspaceIbis = tagspaceTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableHashspaceCandidatesTopicCopy) + + override lazy val modelName: String = { + if (localizedUttEntity.isDefined && + (useTopicCopyForMBCGIbis || useTopicCopyForFrsIbis || useTopicCopyForTagspaceIbis)) { + MrPushCopyObjects.TopicTweet.ibisPushModelName // uses topic copy + } else super.modelName + } + + lazy val exploreVideoParams: Map[String, String] = { + if (self.commonRecType == CommonRecommendationType.ExploreVideoTweet) { + Map( + "is_explore_video" -> "true" + ) + } else Map.empty[String, String] + } + + override lazy val customFieldsMapFut: Future[Map[String, String]] = + mergeModelValues(super.customFieldsMapFut, exploreVideoParams) + + override lazy val tweetModelValues: Future[Map[String, String]] = + if (localizedUttEntity.isDefined && + (useTopicCopyForMBCGIbis || useTopicCopyForFrsIbis || useTopicCopyForTagspaceIbis)) { + lazy val topicTweetModelValues: Map[String, String] = + Map("topic_name" -> s"${localizedUttEntity.get.localizedNameForDisplay}") + for { + superModelValues <- super.tweetModelValues + tweetInlineModelValue <- tweetInlineActionModelValue + } yield { + superModelValues ++ topicTweetModelValues ++ tweetInlineModelValue + } + } else { + for { + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ mediaModelValue ++ oonTweetModelValues ++ tweetInlineModelValues ++ inlineVideoMediaMap + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OverrideForIbis2Request.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OverrideForIbis2Request.scala new file mode 100644 index 000000000..e802a6421 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/OverrideForIbis2Request.scala @@ -0,0 +1,210 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FSParams} +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.ContinuousFunction +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.ContinuousFunctionParam +import com.twitter.frigate.pushservice.util.OverrideNotificationUtil +import com.twitter.frigate.pushservice.util.PushCapUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType.MagicFanoutSportsEvent +import com.twitter.ibis2.lib.util.JsonMarshal +import com.twitter.util.Future + +trait OverrideForIbis2Request { + self: PushCandidate => + + private lazy val overrideStats = self.statsReceiver.scope("override_for_ibis2") + + private lazy val addedOverrideAndroidCounter = + overrideStats.scope("android").counter("added_override_for_ibis2_request") + private lazy val addedSmartPushConfigAndroidCounter = + overrideStats.scope("android").counter("added_smart_push_config_for_ibis2_request") + private lazy val addedOverrideIosCounter = + overrideStats.scope("ios").counter("added_override_for_ibis2_request") + private lazy val noOverrideCounter = overrideStats.counter("no_override_for_ibis2_request") + private lazy val noOverrideDueToDeviceInfoCounter = + overrideStats.counter("no_override_due_to_device_info") + private lazy val addedMlScoreToPayloadAndroid = + overrideStats.scope("android").counter("added_ml_score") + private lazy val noMlScoreAddedToPayload = + overrideStats.counter("no_ml_score") + private lazy val addedNSlotsToPayload = + overrideStats.counter("added_n_slots") + private lazy val noNSlotsAddedToPayload = + overrideStats.counter("no_n_slots") + private lazy val addedCustomThreadIdToPayload = + overrideStats.counter("added_custom_thread_id") + private lazy val noCustomThreadIdAddedToPayload = + overrideStats.counter("no_custom_thread_id") + private lazy val enableTargetIdOverrideForMagicFanoutSportsEventCounter = + overrideStats.counter("enable_target_id_override_for_mf_sports_event") + + lazy val candidateModelScoreFut: Future[Option[Double]] = { + if (RecTypes.notEligibleForModelScoreTracking(commonRecType)) Future.None + else mrWeightedOpenOrNtabClickRankingProbability + } + + lazy val overrideModelValueFut: Future[Map[String, String]] = { + if (self.target.isLoggedOutUser) { + Future.value(Map.empty[String, String]) + } else { + Future + .join( + target.deviceInfo, + target.accountCountryCode, + OverrideNotificationUtil.getCollapseAndImpressionIdForOverride(self), + candidateModelScoreFut, + target.dynamicPushcap, + target.optoutAdjustedPushcap, + PushCapUtil.getDefaultPushCap(target) + ).map { + case ( + deviceInfoOpt, + countryCodeOpt, + Some((collapseId, impressionIds)), + mlScore, + dynamicPushcapOpt, + optoutAdjustedPushcapOpt, + defaultPushCap) => + val pushCap: Int = (dynamicPushcapOpt, optoutAdjustedPushcapOpt) match { + case (_, Some(optoutAdjustedPushcap)) => optoutAdjustedPushcap + case (Some(pushcapInfo), _) => pushcapInfo.pushcap + case _ => defaultPushCap + } + getClientSpecificOverrideModelValues( + target, + deviceInfoOpt, + countryCodeOpt, + collapseId, + impressionIds, + mlScore, + pushCap) + case _ => + noOverrideCounter.incr() + Map.empty[String, String] + } + } + } + + /** + * Determines the appropriate Override Notification model values based on the client + * @param target Target that will be receiving the push recommendation + * @param deviceInfoOpt Target's Device Info + * @param collapseId Collapse ID determined by OverrideNotificationUtil + * @param impressionIds Impression IDs of previously sent Override Notifications + * @param mlScore Open/NTab click ranking score of the current push candidate + * @param pushCap Push cap for the target + * @return Map consisting of the model values that need to be added to the Ibis2 Request + */ + def getClientSpecificOverrideModelValues( + target: Target, + deviceInfoOpt: Option[DeviceInfo], + countryCodeOpt: Option[String], + collapseId: String, + impressionIds: Seq[String], + mlScoreOpt: Option[Double], + pushCap: Int + ): Map[String, String] = { + + val primaryDeviceIos = PushDeviceUtil.isPrimaryDeviceIOS(deviceInfoOpt) + val primaryDeviceAndroid = PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfoOpt) + + if (primaryDeviceIos || + (primaryDeviceAndroid && + target.params(FSParams.EnableOverrideNotificationsSmartPushConfigForAndroid))) { + + if (primaryDeviceIos) addedOverrideIosCounter.incr() + else addedSmartPushConfigAndroidCounter.incr() + + val impressionIdsSeq = { + if (target.params(FSParams.EnableTargetIdsInSmartPushPayload)) { + if (target.params(FSParams.EnableOverrideNotificationsMultipleTargetIds)) + impressionIds + else Seq(impressionIds.head) + } + // Explicitly enable targetId-based override for MagicFanoutSportsEvent candidates (live sport update notifications) + else if (self.commonRecType == MagicFanoutSportsEvent && target.params( + FSParams.EnableTargetIdInSmartPushPayloadForMagicFanoutSportsEvent)) { + enableTargetIdOverrideForMagicFanoutSportsEventCounter.incr() + Seq(impressionIds.head) + } else Seq.empty[String] + } + + val mlScoreMap = mlScoreOpt match { + case Some(mlScore) + if target.params(FSParams.EnableOverrideNotificationsScoreBasedOverride) => + addedMlScoreToPayloadAndroid.incr() + Map("score" -> mlScore) + case _ => + noMlScoreAddedToPayload.incr() + Map.empty + } + + val nSlotsMap = { + if (target.params(FSParams.EnableOverrideNotificationsNSlots)) { + if (target.params(FSParams.EnableOverrideMaxSlotFn)) { + val nslotFnParam = ContinuousFunctionParam( + target + .params(PushFeatureSwitchParams.OverrideMaxSlotFnPushCapKnobs), + target + .params(PushFeatureSwitchParams.OverrideMaxSlotFnNSlotKnobs), + target + .params(PushFeatureSwitchParams.OverrideMaxSlotFnPowerKnobs), + target + .params(PushFeatureSwitchParams.OverrideMaxSlotFnWeight), + target.params(FSParams.OverrideNotificationsMaxNumOfSlots) + ) + val numOfSlots = ContinuousFunction.safeEvaluateFn( + pushCap, + nslotFnParam, + overrideStats.scope("max_nslot_fn")) + overrideStats.counter("max_notification_slots_num_" + numOfSlots.toString).incr() + addedNSlotsToPayload.incr() + Map("max_notification_slots" -> numOfSlots) + } else { + addedNSlotsToPayload.incr() + val numOfSlots = target.params(FSParams.OverrideNotificationsMaxNumOfSlots) + Map("max_notification_slots" -> numOfSlots) + } + } else { + noNSlotsAddedToPayload.incr() + Map.empty + } + } + + val baseActionDetailsMap = Map("target_ids" -> impressionIdsSeq) + + val actionDetailsMap = + Map("action_details" -> (baseActionDetailsMap ++ nSlotsMap)) + + val baseSmartPushConfigMap = Map("notification_action" -> "REPLACE") + + val customThreadId = { + if (target.params(FSParams.EnableCustomThreadIdForOverride)) { + addedCustomThreadIdToPayload.incr() + Map("custom_thread_id" -> impressionId) + } else { + noCustomThreadIdAddedToPayload.incr() + Map.empty + } + } + + val smartPushConfigMap = + JsonMarshal.toJson( + baseSmartPushConfigMap ++ actionDetailsMap ++ mlScoreMap ++ customThreadId) + + Map("smart_notification_configuration" -> smartPushConfigMap) + } else if (primaryDeviceAndroid) { + addedOverrideAndroidCounter.incr() + Map("notification_id" -> collapseId, "overriding_impression_id" -> impressionIds.head) + } else { + noOverrideDueToDeviceInfoCounter.incr() + Map.empty[String, String] + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/PushOverrideInfo.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/PushOverrideInfo.scala new file mode 100644 index 000000000..359b876a1 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/PushOverrideInfo.scala @@ -0,0 +1,246 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.OverrideInfo +import com.twitter.util.Duration +import com.twitter.util.Time + +object PushOverrideInfo { + + private val name: String = this.getClass.getSimpleName + + /** + * Gets all eligible time + override push notification pairs from a target's History + * + * @param history: history of push notifications + * @param lookbackDuration: duration to look back up in history for overriding notifications + * @return: list of notifications with send timestamps which are eligible for overriding + */ + def getOverrideEligibleHistory( + history: History, + lookbackDuration: Duration, + ): Seq[(Time, FrigateNotification)] = { + history.sortedHistory + .takeWhile { case (notifTimestamp, _) => lookbackDuration.ago < notifTimestamp } + .filter { + case (_, notification) => notification.overrideInfo.isDefined + } + } + + /** + * Gets all eligible override push notifications from a target's History + * + * @param history Target's History + * @param lookbackDuration Duration in which we would like to obtain the eligible push notifications + * @param stats StatsReceiver to track stats for this function + * @return Returns a list of FrigateNotification + */ + def getOverrideEligiblePushNotifications( + history: History, + lookbackDuration: Duration, + stats: StatsReceiver, + ): Seq[FrigateNotification] = { + val eligibleNotificationsDistribution = + stats.scope(name).stat("eligible_notifications_size_distribution") + val eligibleNotificationsSeq = + getOverrideEligibleHistory(history, lookbackDuration) + .collect { + case (_, notification) => notification + } + + eligibleNotificationsDistribution.add(eligibleNotificationsSeq.size) + eligibleNotificationsSeq + } + + /** + * Gets the OverrideInfo for the last eligible Override Notification FrigateNotification, if it exists + * @param history Target's History + * @param lookbackDuration Duration in which we would like to obtain the last override notification + * @param stats StatsReceiver to track stats for this function + * @return Returns OverrideInfo of the last MR push, else None + */ + def getOverrideInfoOfLastEligiblePushNotif( + history: History, + lookbackDuration: Duration, + stats: StatsReceiver + ): Option[OverrideInfo] = { + val overrideInfoEmptyOfLastPush = stats.scope(name).counter("override_info_empty_of_last_push") + val overrideInfoExistsForLastPush = + stats.scope(name).counter("override_info_exists_for_last_push") + val overrideHistory = + getOverrideEligiblePushNotifications(history, lookbackDuration, stats) + if (overrideHistory.isEmpty) { + overrideInfoEmptyOfLastPush.incr() + None + } else { + overrideInfoExistsForLastPush.incr() + overrideHistory.head.overrideInfo + } + } + + /** + * Gets all the MR Push Notifications in the specified override chain + * @param history Target's History + * @param overrideChainId Override Chain Identifier + * @param stats StatsReceiver to track stats for this function + * @return Returns a sequence of FrigateNotification that exist in the override chain + */ + def getMrPushNotificationsInOverrideChain( + history: History, + overrideChainId: String, + stats: StatsReceiver + ): Seq[FrigateNotification] = { + val notificationInOverrideChain = stats.scope(name).counter("notification_in_override_chain") + val notificationNotInOverrideChain = + stats.scope(name).counter("notification_not_in_override_chain") + history.sortedHistory.flatMap { + case (_, notification) + if isNotificationInOverrideChain(notification, overrideChainId, stats) => + notificationInOverrideChain.incr() + Some(notification) + case _ => + notificationNotInOverrideChain.incr() + None + } + } + + /** + * Gets the timestamp (in milliseconds) for the specified FrigateNotification + * @param notification The FrigateNotification that we would like the timestamp for + * @param history Target's History + * @param stats StatsReceiver to track stats for this function + * @return Returns the timestamp in milliseconds for the specified notification + * if it exists History, else None + */ + def getTimestampInMillisForFrigateNotification( + notification: FrigateNotification, + history: History, + stats: StatsReceiver + ): Option[Long] = { + val foundTimestampOfNotificationInHistory = + stats.scope(name).counter("found_timestamp_of_notification_in_history") + history.sortedHistory + .find(_._2.equals(notification)).map { + case (time, _) => + foundTimestampOfNotificationInHistory.incr() + time.inMilliseconds + } + } + + /** + * Gets the oldest frigate notification based on the user's NTab last read position + * @param overrideCandidatesMap All the NTab Notifications in the override chain + * @return Returns the oldest frigate notification in the chain + */ + def getOldestFrigateNotification( + overrideCandidatesMap: Map[Long, FrigateNotification], + ): FrigateNotification = { + overrideCandidatesMap.minBy(_._1)._2 + } + + /** + * Gets the impression ids of previous eligible push notification. + * @param history Target's History + * @param lookbackDuration Duration in which we would like to obtain previous impression ids + * @param stats StatsReceiver to track stats for this function + * @return Returns the impression identifier for the last eligible push notif. + * if it exists in the target's History, else None. + */ + def getImpressionIdsOfPrevEligiblePushNotif( + history: History, + lookbackDuration: Duration, + stats: StatsReceiver + ): Seq[String] = { + val foundImpressionIdOfLastEligiblePushNotif = + stats.scope(name).counter("found_impression_id_of_last_eligible_push_notif") + val overrideHistoryEmptyWhenFetchingImpressionId = + stats.scope(name).counter("override_history_empty_when_fetching_impression_id") + val overrideHistory = getOverrideEligiblePushNotifications(history, lookbackDuration, stats) + .filter(frigateNotification => + // Exclude notifications of nonGenericOverrideTypes from being overridden + !RecTypes.nonGenericOverrideTypes.contains(frigateNotification.commonRecommendationType)) + + if (overrideHistory.isEmpty) { + overrideHistoryEmptyWhenFetchingImpressionId.incr() + Seq.empty + } else { + foundImpressionIdOfLastEligiblePushNotif.incr() + overrideHistory.flatMap(_.impressionId) + } + } + + /** + * Gets the impressions ids by eventId, for MagicFanoutEvent candidates. + * + * @param history Target's History + * @param lookbackDuration Duration in which we would like to obtain previous impression ids + * @param stats StatsReceiver to track stats for this function + * @param overridableType Specific MagicFanoutEvent CRT + * @param eventId Event identifier for MagicFanoutEventCandidate. + * @return Returns the impression identifiers for the last eligible, eventId-matching + * MagicFanoutEvent push notifications if they exist in the target's history, else None. + */ + def getImpressionIdsForPrevEligibleMagicFanoutEventCandidates( + history: History, + lookbackDuration: Duration, + stats: StatsReceiver, + overridableType: CommonRecommendationType, + eventId: Long + ): Seq[String] = { + val foundImpressionIdOfMagicFanoutEventNotif = + stats.scope(name).counter("found_impression_id_of_magic_fanout_event_notif") + val overrideHistoryEmptyWhenFetchingImpressionId = + stats + .scope(name).counter( + "override_history_empty_when_fetching_impression_id_for_magic_fanout_event_notif") + + val overrideHistory = + getOverrideEligiblePushNotifications(history, lookbackDuration, stats) + .filter(frigateNotification => + // Only override notifications with same CRT and eventId + frigateNotification.commonRecommendationType == overridableType && + frigateNotification.magicFanoutEventNotification.exists(_.eventId == eventId)) + + if (overrideHistory.isEmpty) { + overrideHistoryEmptyWhenFetchingImpressionId.incr() + Seq.empty + } else { + foundImpressionIdOfMagicFanoutEventNotif.incr() + overrideHistory.flatMap(_.impressionId) + } + } + + /** + * Determines if the provided notification is part of the specified override chain + * @param notification FrigateNotification that we're trying to identify as within the override chain + * @param overrideChainId Override Chain Identifier + * @param stats StatsReceiver to track stats for this function + * @return Returns true if the provided FrigateNotification is within the override chain, else false + */ + private def isNotificationInOverrideChain( + notification: FrigateNotification, + overrideChainId: String, + stats: StatsReceiver + ): Boolean = { + val notifIsInOverrideChain = stats.scope(name).counter("notif_is_in_override_chain") + val notifNotInOverrideChain = stats.scope(name).counter("notif_not_in_override_chain") + notification.overrideInfo match { + case Some(overrideInfo) => + val isNotifInOverrideChain = overrideInfo.collapseInfo.overrideChainId == overrideChainId + if (isNotifInOverrideChain) { + notifIsInOverrideChain.incr() + true + } else { + notifNotInOverrideChain.incr() + false + } + case _ => + notifNotInOverrideChain.incr() + false + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/RankedSocialContextIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/RankedSocialContextIbis2Hydrator.scala new file mode 100644 index 000000000..479c230eb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/RankedSocialContextIbis2Hydrator.scala @@ -0,0 +1,22 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.frigate.pushservice.util.PushIbisUtil +import com.twitter.util.Future + +trait RankedSocialContextIbis2Hydrator { + self: PushCandidate with SocialContextActions => + + lazy val socialContextModelValues: Future[Map[String, String]] = + rankedSocialContextActionsFut.map(rankedSocialContextActions => + PushIbisUtil.getSocialContextModelValues(rankedSocialContextActions.map(_.userId))) + + lazy val rankedSocialContextActionsFut: Future[Seq[SocialContextAction]] = + CandidateUtil.getRankedSocialContext( + socialContextActions, + target.seedsWithWeight, + defaultToRecency = false) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSpeakerIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSpeakerIbis2Hydrator.scala new file mode 100644 index 000000000..d1a439972 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSpeakerIbis2Hydrator.scala @@ -0,0 +1,34 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.ScheduledSpaceSpeakerPushCandidate +import com.twitter.frigate.pushservice.util.PushIbisUtil._ +import com.twitter.frigate.thriftscala.SpaceNotificationType +import com.twitter.util.Future + +trait ScheduledSpaceSpeakerIbis2Hydrator extends Ibis2HydratorForCandidate { + self: ScheduledSpaceSpeakerPushCandidate => + + override lazy val senderId: Option[Long] = None + + private lazy val targetModelValues: Future[Map[String, String]] = { + hostId match { + case Some(spaceHostId) => + audioSpaceFut.map { audioSpace => + val isStartNow = frigateNotification.spaceNotification.exists( + _.spaceNotificationType.contains(SpaceNotificationType.AtSpaceBroadcast)) + + Map( + "host_id" -> s"$spaceHostId", + "space_id" -> spaceId, + "is_start_now" -> s"$isStartNow" + ) ++ audioSpace.flatMap(_.title.map("space_title" -> _)) + } + case _ => + Future.exception( + new IllegalStateException("Unable to get host id for ScheduledSpaceSpeakerIbis2Hydrator")) + } + } + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, targetModelValues) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSubscriberIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSubscriberIbis2Hydrator.scala new file mode 100644 index 000000000..b1486de3f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/ScheduledSpaceSubscriberIbis2Hydrator.scala @@ -0,0 +1,29 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.ScheduledSpaceSubscriberPushCandidate +import com.twitter.frigate.pushservice.util.PushIbisUtil._ +import com.twitter.util.Future + +trait ScheduledSpaceSubscriberIbis2Hydrator extends Ibis2HydratorForCandidate { + self: ScheduledSpaceSubscriberPushCandidate => + + override lazy val senderId: Option[Long] = hostId + + private lazy val targetModelValues: Future[Map[String, String]] = { + hostId match { + case Some(spaceHostId) => + audioSpaceFut.map { audioSpace => + Map( + "host_id" -> s"$spaceHostId", + "space_id" -> spaceId, + ) ++ audioSpace.flatMap(_.title.map("space_title" -> _)) + } + case _ => + Future.exception( + new RuntimeException("Unable to get host id for ScheduledSpaceSubscriberIbis2Hydrator")) + } + } + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, targetModelValues) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/SubscribedSearchTweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/SubscribedSearchTweetIbis2Hydrator.scala new file mode 100644 index 000000000..a61edc509 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/SubscribedSearchTweetIbis2Hydrator.scala @@ -0,0 +1,33 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.SubscribedSearchTweetPushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.InlineActionUtil +import com.twitter.util.Future + +trait SubscribedSearchTweetIbis2Hydrator extends TweetCandidateIbis2Hydrator { + self: SubscribedSearchTweetPushCandidate => + + override lazy val tweetDynamicInlineActionsModelValues = { + if (target.params(PushFeatureSwitchParams.EnableOONGeneratedInlineActions)) { + val actions = target.params(PushFeatureSwitchParams.TweetDynamicInlineActionsList) + InlineActionUtil.getGeneratedTweetInlineActions(target, statsReceiver, actions) + } else Map.empty[String, String] + } + + private lazy val searchTermValue: Map[String, String] = + Map( + "search_term" -> searchTerm, + "search_url" -> pushLandingUrl + ) + + private lazy val searchModelValues = searchTermValue ++ tweetDynamicInlineActionsModelValues + + override lazy val tweetModelValues: Future[Map[String, String]] = + for { + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ mediaModelValue ++ searchModelValues ++ tweetInlineModelValues ++ inlineVideoMediaMap + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopTweetImpressionsCandidateIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopTweetImpressionsCandidateIbis2Hydrator.scala new file mode 100644 index 000000000..e12733fb2 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopTweetImpressionsCandidateIbis2Hydrator.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.TopTweetImpressionsCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeFutModelValues +import com.twitter.util.Future + +trait TopTweetImpressionsCandidateIbis2Hydrator extends Ibis2HydratorForCandidate { + self: PushCandidate with TopTweetImpressionsCandidate => + + private lazy val targetModelValues: Map[String, String] = { + Map( + "target_user" -> target.targetId.toString, + "tweet" -> tweetId.toString, + "impressions_count" -> impressionsCount.toString + ) + } + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, Future.value(targetModelValues)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopicProofTweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopicProofTweetIbis2Hydrator.scala new file mode 100644 index 000000000..6a187dfeb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TopicProofTweetIbis2Hydrator.scala @@ -0,0 +1,32 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.pushservice.model.TopicProofTweetPushCandidate +import com.twitter.frigate.pushservice.exception.UttEntityNotFoundException +import com.twitter.util.Future + +trait TopicProofTweetIbis2Hydrator extends TweetCandidateIbis2Hydrator { + self: TopicProofTweetPushCandidate => + + private lazy val implicitTopicTweetModelValues: Map[String, String] = { + val uttEntity = localizedUttEntity.getOrElse( + throw new UttEntityNotFoundException( + s"${getClass.getSimpleName} UttEntity missing for $tweetId")) + + Map( + "topic_name" -> uttEntity.localizedNameForDisplay, + "topic_id" -> uttEntity.entityId.toString + ) + } + + override lazy val modelName: String = pushCopy.ibisPushModelName + + override lazy val tweetModelValues: Future[Map[String, String]] = + for { + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ + tweetInlineModelValues ++ + implicitTopicTweetModelValues + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TrendTweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TrendTweetIbis2Hydrator.scala new file mode 100644 index 000000000..1c3420df4 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TrendTweetIbis2Hydrator.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.TrendTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate + +trait TrendTweetIbis2Hydrator extends TweetCandidateIbis2Hydrator { + self: PushCandidate with TrendTweetCandidate with TweetAuthorDetails => + + lazy val trendNameModelValue = Map("trend_name" -> trendName) + + override lazy val tweetModelValues = for { + tweetValues <- super.tweetModelValues + inlineActionValues <- tweetInlineActionModelValue + } yield tweetValues ++ inlineActionValues ++ trendNameModelValue +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetCandidateIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetCandidateIbis2Hydrator.scala new file mode 100644 index 000000000..0b0a5db05 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetCandidateIbis2Hydrator.scala @@ -0,0 +1,166 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.SubtextForAndroidPushHeader +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.util.CopyUtil +import com.twitter.frigate.pushservice.util.EmailLandingPageExperimentUtil +import com.twitter.frigate.pushservice.util.InlineActionUtil +import com.twitter.frigate.pushservice.util.PushToHomeUtil +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeFutModelValues +import com.twitter.util.Future + +trait TweetCandidateIbis2Hydrator + extends Ibis2HydratorForCandidate + with InlineActionIbis2Hydrator + with CustomConfigurationMapForIbis { + self: PushCandidate with TweetCandidate with TweetDetails with TweetAuthorDetails => + + lazy val scopedStats: StatsReceiver = statsReceiver.scope(getClass.getSimpleName) + + lazy val tweetIdModelValue: Map[String, String] = + Map( + "tweet" -> tweetId.toString + ) + + lazy val authorModelValue: Map[String, String] = { + assert(authorId.isDefined) + Map( + "author" -> authorId.getOrElse(0L).toString + ) + } + + lazy val otherModelValues: Map[String, String] = + Map( + "show_explanatory_text" -> "true", + "show_negative_feedback" -> "true" + ) + + lazy val mediaModelValue: Map[String, String] = + Map( + "show_media" -> "true" + ) + + lazy val inlineVideoMediaMap: Map[String, String] = { + if (hasVideo) { + val isInlineVideoEnabled = target.params(FS.EnableInlineVideo) + val isAutoplayEnabled = target.params(FS.EnableAutoplayForInlineVideo) + Map( + "enable_inline_video_for_ios" -> isInlineVideoEnabled.toString, + "enable_autoplay_for_inline_video_ios" -> isAutoplayEnabled.toString + ) + } else Map.empty + } + + lazy val landingPageModelValues: Future[Map[String, String]] = { + for { + deviceInfoOpt <- target.deviceInfo + } yield { + PushToHomeUtil.getIbis2ModelValue(deviceInfoOpt, target, scopedStats) match { + case Some(pushToHomeModelValues) => pushToHomeModelValues + case _ => + EmailLandingPageExperimentUtil.getIbis2ModelValue( + deviceInfoOpt, + target, + tweetId + ) + } + } + } + + lazy val tweetDynamicInlineActionsModelValues = { + if (target.params(PushFeatureSwitchParams.EnableTweetDynamicInlineActions)) { + val actions = target.params(PushFeatureSwitchParams.TweetDynamicInlineActionsList) + InlineActionUtil.getGeneratedTweetInlineActions(target, statsReceiver, actions) + } else Map.empty[String, String] + } + + lazy val tweetDynamicInlineActionsModelValuesForWeb: Map[String, String] = { + if (target.isLoggedOutUser) { + Map.empty[String, String] + } else { + InlineActionUtil.getGeneratedTweetInlineActionsForWeb( + actions = target.params(PushFeatureSwitchParams.TweetDynamicInlineActionsListForWeb), + enableForDesktopWeb = + target.params(PushFeatureSwitchParams.EnableDynamicInlineActionsForDesktopWeb), + enableForMobileWeb = + target.params(PushFeatureSwitchParams.EnableDynamicInlineActionsForMobileWeb) + ) + } + } + + lazy val copyFeaturesFut: Future[Map[String, String]] = + CopyUtil.getCopyFeatures(self, scopedStats) + + private def getVerifiedSymbolModelValue: Future[Map[String, String]] = { + self.tweetAuthor.map { + case Some(author) => + if (author.safety.exists(_.verified)) { + scopedStats.counter("is_verified").incr() + if (target.params(FS.EnablePushPresentationVerifiedSymbol)) { + scopedStats.counter("is_verified_and_add").incr() + Map("is_author_verified" -> "true") + } else { + scopedStats.counter("is_verified_and_NOT_add").incr() + Map.empty + } + } else { + scopedStats.counter("is_NOT_verified").incr() + Map.empty + } + case _ => + scopedStats.counter("none_author").incr() + Map.empty + } + } + + private def subtextAndroidPushHeader: Map[String, String] = { + self.target.params(PushFeatureSwitchParams.SubtextInAndroidPushHeaderParam) match { + case SubtextForAndroidPushHeader.None => + Map.empty + case SubtextForAndroidPushHeader.TargetHandler => + Map("subtext_target_handler" -> "true") + case SubtextForAndroidPushHeader.TargetTagHandler => + Map("subtext_target_tag_handler" -> "true") + case SubtextForAndroidPushHeader.TargetName => + Map("subtext_target_name" -> "true") + case SubtextForAndroidPushHeader.AuthorTagHandler => + Map("subtext_author_tag_handler" -> "true") + case SubtextForAndroidPushHeader.AuthorName => + Map("subtext_author_name" -> "true") + case _ => + Map.empty + } + } + + lazy val bodyPushMap: Map[String, String] = { + if (self.target.params(PushFeatureSwitchParams.EnableEmptyBody)) { + Map("enable_empty_body" -> "true") + } else Map.empty[String, String] + } + + override def customFieldsMapFut: Future[Map[String, String]] = + for { + superModelValues <- super.customFieldsMapFut + copyFeaturesModelValues <- copyFeaturesFut + verifiedSymbolModelValue <- getVerifiedSymbolModelValue + } yield { + superModelValues ++ copyFeaturesModelValues ++ + verifiedSymbolModelValue ++ subtextAndroidPushHeader ++ bodyPushMap + } + + override lazy val senderId: Option[Long] = authorId + + def tweetModelValues: Future[Map[String, String]] = + landingPageModelValues.map { landingPageModelValues => + tweetIdModelValue ++ authorModelValue ++ landingPageModelValues ++ tweetDynamicInlineActionsModelValues ++ tweetDynamicInlineActionsModelValuesForWeb + } + + override lazy val modelValues: Future[Map[String, String]] = + mergeFutModelValues(super.modelValues, tweetModelValues) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetFavoriteIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetFavoriteIbis2Hydrator.scala new file mode 100644 index 000000000..ae4cd9174 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetFavoriteIbis2Hydrator.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetFavoriteCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.util.Future + +trait TweetFavoriteCandidateIbis2Hydrator + extends TweetCandidateIbis2Hydrator + with RankedSocialContextIbis2Hydrator { + self: PushCandidate with TweetFavoriteCandidate with TweetAuthorDetails => + + override lazy val tweetModelValues: Future[Map[String, String]] = + for { + socialContextModelValues <- socialContextModelValues + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ mediaModelValue ++ otherModelValues ++ socialContextModelValues ++ tweetInlineModelValues + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetRetweetIbis2Hydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetRetweetIbis2Hydrator.scala new file mode 100644 index 000000000..2b665a8fa --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ibis/TweetRetweetIbis2Hydrator.scala @@ -0,0 +1,32 @@ +package com.twitter.frigate.pushservice.model.ibis + +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetRetweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.PushIbisUtil.mergeModelValues + +import com.twitter.util.Future + +trait TweetRetweetCandidateIbis2Hydrator + extends TweetCandidateIbis2Hydrator + with RankedSocialContextIbis2Hydrator { + self: PushCandidate with TweetRetweetCandidate with TweetAuthorDetails => + + override lazy val tweetModelValues: Future[Map[String, String]] = + for { + socialContextModelValues <- socialContextModelValues + superModelValues <- super.tweetModelValues + tweetInlineModelValues <- tweetInlineActionModelValue + } yield { + superModelValues ++ mediaModelValue ++ otherModelValues ++ socialContextModelValues ++ tweetInlineModelValues ++ inlineVideoMediaMap + } + + lazy val socialContextForRetweetMap: Map[String, String] = + if (self.target.params(PushFeatureSwitchParams.EnableSocialContextForRetweet)) { + Map("enable_social_context_retweet" -> "true") + } else Map.empty[String, String] + + override lazy val customFieldsMapFut: Future[Map[String, String]] = + mergeModelValues(super.customFieldsMapFut, socialContextForRetweetMap) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/CandidateNTabCopy.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/CandidateNTabCopy.scala new file mode 100644 index 000000000..ef80db5b5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/CandidateNTabCopy.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.util.MRNtabCopy +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.InvalidNtabCopyIdException +import com.twitter.frigate.pushservice.take.NtabCopyIdNotFoundException + +trait CandidateNTabCopy { + self: PushCandidate => + + def ntabCopy: MRNtabCopy = + ntabCopyId + .map(getNtabCopyFromCopyId).getOrElse( + throw new NtabCopyIdNotFoundException(s"NtabCopyId not found for $commonRecType")) + + private def getNtabCopyFromCopyId(ntabCopyId: Int): MRNtabCopy = + MrNtabCopyObjects + .getCopyFromId(ntabCopyId).getOrElse( + throw new InvalidNtabCopyIdException(s"Unknown NTab Copy ID: $ntabCopyId")) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/DiscoverTwitterNtabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/DiscoverTwitterNtabRequestHydrator.scala new file mode 100644 index 000000000..4d6d67893 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/DiscoverTwitterNtabRequestHydrator.scala @@ -0,0 +1,58 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.notificationservice.thriftscala._ +import com.twitter.util.Future +import com.twitter.util.Time + +trait DiscoverTwitterNtabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate => + + override val senderIdFut: Future[Long] = Future.value(0L) + + override val tapThroughFut: Future[String] = + commonRecType match { + case CRT.AddressBookUploadPush => Future.value(PushConstants.AddressBookUploadTapThrough) + case CRT.InterestPickerPush => Future.value(PushConstants.InterestPickerTapThrough) + case CRT.CompleteOnboardingPush => + Future.value(PushConstants.CompleteOnboardingInterestAddressTapThrough) + case _ => + Future.value(PushConstants.ConnectTabPushTapThrough) + } + + override val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = Future.Nil + + override val facepileUsersFut: Future[Seq[Long]] = Future.Nil + + override val storyContext: Option[StoryContext] = None + + override val inlineCard: Option[InlineCard] = None + + override val socialProofDisplayText: Option[DisplayText] = Some(DisplayText()) + + override lazy val ntabRequest: Future[Option[CreateGenericNotificationRequest]] = + if (self.commonRecType == CRT.ConnectTabPush || RecTypes.isOnboardingFlowType( + self.commonRecType)) { + Future.join(senderIdFut, displayTextEntitiesFut, facepileUsersFut, tapThroughFut).map { + case (senderId, displayTextEntities, facepileUsers, tapThrough) => + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableType + )) + } + } else Future.None +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/EventNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/EventNTabRequestHydrator.scala new file mode 100644 index 000000000..082bc1742 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/EventNTabRequestHydrator.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.util.Future + +trait EventNTabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate => + + override def senderIdFut: Future[Long] = Future.value(0L) + + override def facepileUsersFut: Future[Seq[Long]] = Future.Nil + + override val storyContext: Option[StoryContext] = None + + override val inlineCard: Option[InlineCard] = None + + override val socialProofDisplayText: Option[DisplayText] = Some(DisplayText()) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/F1FirstDegreeTweetNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/F1FirstDegreeTweetNTabRequestHydrator.scala new file mode 100644 index 000000000..18662e257 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/F1FirstDegreeTweetNTabRequestHydrator.scala @@ -0,0 +1,18 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.util.Future + +trait F1FirstDegreeTweetNTabRequestHydrator extends TweetNTabRequestHydrator { + self: PushCandidate with TweetCandidate with TweetAuthorDetails => + + override val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + NotificationServiceSender.getDisplayTextEntityFromUser(tweetAuthor, "author", true).map(_.toSeq) + + override lazy val facepileUsersFut: Future[Seq[Long]] = senderIdFut.map(Seq(_)) + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ListCandidateNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ListCandidateNTabRequestHydrator.scala new file mode 100644 index 000000000..8475256ad --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ListCandidateNTabRequestHydrator.scala @@ -0,0 +1,34 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.ListRecommendationPushCandidate +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait ListCandidateNTabRequestHydrator extends NTabRequestHydrator { + + self: ListRecommendationPushCandidate => + + override lazy val senderIdFut: Future[Long] = + listOwnerId.map(_.getOrElse(0L)) + + override lazy val facepileUsersFut: Future[Seq[Long]] = Future.Nil + + override lazy val storyContext: Option[StoryContext] = None + + override lazy val inlineCard: Option[InlineCard] = None + + override lazy val tapThroughFut: Future[String] = Future.value(s"i/lists/${listId}") + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = listName.map { + listNameOpt => + listNameOpt.toSeq.map { name => + DisplayTextEntity(name = "title", value = TextValue.Text(name)) + } + } + + override val socialProofDisplayText: Option[DisplayText] = Some(DisplayText()) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutCreatorEventNtabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutCreatorEventNtabRequestHydrator.scala new file mode 100644 index 000000000..a245769a6 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutCreatorEventNtabRequestHydrator.scala @@ -0,0 +1,110 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.magic_events.thriftscala.CreatorFanoutType +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutCreatorEventPushCandidate +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.GenericType +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.notificationservice.thriftscala.TapThroughAction +import com.twitter.util.Future +import com.twitter.util.Time + +trait MagicFanoutCreatorEventNtabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate with MagicFanoutCreatorEventPushCandidate => + + override val senderIdFut: Future[Long] = Future.value(creatorId) + + override lazy val tapThroughFut: Future[String] = + Future.value(s"/${userProfile.screenName}/superfollows/subscribe") + + lazy val optionalTweetCountEntityFut: Future[Option[DisplayTextEntity]] = { + creatorFanoutType match { + case CreatorFanoutType.UserSubscription => + numberOfTweetsFut.map { + _.flatMap { + case numberOfTweets if numberOfTweets >= 10 => + Some( + DisplayTextEntity( + name = "tweet_count", + emphasis = true, + value = TextValue.Text(numberOfTweets.toString))) + case _ => None + } + } + case _ => Future.None + } + } + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + optionalTweetCountEntityFut + .map { tweetCountOpt => + Seq( + NotificationServiceSender + .getDisplayTextEntityFromUser(hydratedCreator, "display_name", isBold = true), + tweetCountOpt).flatten + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = Future.value(Seq(creatorId)) + + override val storyContext: Option[StoryContext] = None + + override val inlineCard: Option[InlineCard] = None + + lazy val refreshableTypeFut = { + creatorFanoutType match { + case CreatorFanoutType.UserSubscription => + numberOfTweetsFut.map { + _.flatMap { + case numberOfTweets if numberOfTweets >= 10 => + Some("MagicFanoutCreatorSubscriptionWithTweets") + case _ => super.refreshableType + } + } + case _ => Future.value(super.refreshableType) + } + } + + override lazy val socialProofDisplayText: Option[DisplayText] = { + creatorFanoutType match { + case CreatorFanoutType.UserSubscription => + Some( + DisplayText(values = Seq( + DisplayTextEntity(name = "handle", value = TextValue.Text(userProfile.screenName))))) + case CreatorFanoutType.NewCreator => None + case _ => None + } + } + + override lazy val ntabRequest = { + Future + .join( + senderIdFut, + displayTextEntitiesFut, + facepileUsersFut, + tapThroughFut, + refreshableTypeFut).map { + case (senderId, displayTextEntities, facepileUsers, tapThrough, refreshableTypeOpt) => + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableTypeOpt + )) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutNewsEventNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutNewsEventNTabRequestHydrator.scala new file mode 100644 index 000000000..202533e3c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutNewsEventNTabRequestHydrator.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait MagicFanoutNewsEventNTabRequestHydrator extends EventNTabRequestHydrator { + self: PushCandidate with MagicFanoutEventHydratedCandidate => + override lazy val tapThroughFut: Future[String] = Future.value(s"i/events/$eventId") + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + eventTitleFut.map { eventTitle => + Seq(DisplayTextEntity(name = "title", value = TextValue.Text(eventTitle))) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutProductLaunchNtabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutProductLaunchNtabRequestHydrator.scala new file mode 100644 index 000000000..797dbe890 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutProductLaunchNtabRequestHydrator.scala @@ -0,0 +1,97 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.MagicFanoutProductLaunchCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.notificationservice.thriftscala._ +import com.twitter.util.Future +import com.twitter.util.Time + +trait MagicFanoutProductLaunchNtabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate with MagicFanoutProductLaunchCandidate => + + override val senderIdFut: Future[Long] = Future.value(0L) + + override lazy val tapThroughFut: Future[String] = Future.value(getProductLaunchTapThrough()) + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = { + Future.value( + frigateNotification.magicFanoutProductLaunchNotification + .flatMap { + _.productInfo.flatMap { + _.body.map { body => + Seq( + DisplayTextEntity(name = "body", value = TextValue.Text(body)), + ) + } + } + }.getOrElse(Nil)) + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = { + Future.value( + frigateNotification.magicFanoutProductLaunchNotification + .flatMap { + _.productInfo.flatMap { + _.facepileUsers + } + }.getOrElse(Nil)) + } + + override val storyContext: Option[StoryContext] = None + + override val inlineCard: Option[InlineCard] = None + + override lazy val socialProofDisplayText: Option[DisplayText] = { + frigateNotification.magicFanoutProductLaunchNotification.flatMap { + _.productInfo.flatMap { + _.title.map { title => + DisplayText(values = + Seq(DisplayTextEntity(name = "social_context", value = TextValue.Text(title)))) + } + } + } + } + + lazy val defaultTapThrough = target.params(PushFeatureSwitchParams.ProductLaunchTapThrough) + + private def getProductLaunchTapThrough(): String = { + frigateNotification.magicFanoutProductLaunchNotification match { + case Some(productLaunchNotif) => + productLaunchNotif.productInfo match { + case Some(productInfo) => productInfo.tapThrough.getOrElse(defaultTapThrough) + case _ => defaultTapThrough + } + case _ => defaultTapThrough + } + } + + private lazy val productLaunchNtabRequest: Future[Option[CreateGenericNotificationRequest]] = { + Future + .join(senderIdFut, displayTextEntitiesFut, facepileUsersFut, tapThroughFut) + .map { + case (senderId, displayTextEntities, facepileUsers, tapThrough) => + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableType + )) + } + } + + override lazy val ntabRequest: Future[Option[CreateGenericNotificationRequest]] = { + if (target.params(PushFeatureSwitchParams.EnableNTabEntriesForProductLaunchNotifications)) { + productLaunchNtabRequest + } else Future.None + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutSportsEventNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutSportsEventNTabRequestHydrator.scala new file mode 100644 index 000000000..ca3d9faf0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/MagicFanoutSportsEventNTabRequestHydrator.scala @@ -0,0 +1,95 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.MagicFanoutSportsEventCandidate +import com.twitter.frigate.common.base.MagicFanoutSportsScoreInformation +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.GenericType +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.notificationservice.thriftscala.TapThroughAction +import com.twitter.util.Future +import com.twitter.util.Time + +trait MagicFanoutSportsEventNTabRequestHydrator extends EventNTabRequestHydrator { + self: PushCandidate + with MagicFanoutEventHydratedCandidate + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation => + + lazy val stats = self.statsReceiver.scope("MagicFanoutSportsEventNtabHydrator") + lazy val inNetworkOnlyCounter = stats.counter("in_network_only") + lazy val facePilesEnabledCounter = stats.counter("face_piles_enabled") + lazy val facePilesDisabledCounter = stats.counter("face_piles_disabled") + lazy val filterPeopleWhoDontFollowMeCounter = stats.counter("pepole_who_dont_follow_me_counter") + + override lazy val tapThroughFut: Future[String] = { + Future.value(s"i/events/$eventId") + } + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + eventTitleFut.map { eventTitle => + Seq(DisplayTextEntity(name = "title", value = TextValue.Text(eventTitle))) + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = + if (target.params(FS.EnableNTabFacePileForSportsEventNotifications)) { + Future + .join( + target.notificationsFromOnlyPeopleIFollow, + target.filterNotificationsFromPeopleThatDontFollowMe, + awayTeamInfo, + homeTeamInfo).map { + case (inNetworkOnly, filterPeopleWhoDontFollowMe, away, home) + if !(inNetworkOnly || filterPeopleWhoDontFollowMe) => + val awayTeamId = away.flatMap(_.twitterUserId) + val homeTeamId = home.flatMap(_.twitterUserId) + facePilesEnabledCounter.incr + Seq(awayTeamId, homeTeamId).flatten + case (inNetworkOnly, filterPeopleWhoDontFollowMe, _, _) => + facePilesDisabledCounter.incr + if (inNetworkOnly) inNetworkOnlyCounter.incr + if (filterPeopleWhoDontFollowMe) filterPeopleWhoDontFollowMeCounter.incr + Seq.empty[Long] + } + } else Future.Nil + + private lazy val sportsNtabRequest: Future[Option[CreateGenericNotificationRequest]] = { + Future + .join(senderIdFut, displayTextEntitiesFut, facepileUsersFut, tapThroughFut) + .map { + case (senderId, displayTextEntities, facepileUsers, tapThrough) => + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableType + )) + } + } + + override lazy val ntabRequest: Future[Option[CreateGenericNotificationRequest]] = { + if (target.params(FS.EnableNTabEntriesForSportsEventNotifications)) { + self.target.history.flatMap { pushHistory => + val prevEventHistoryExists = pushHistory.sortedHistory.exists { + case (_, notification) => + notification.magicFanoutEventNotification.exists(_.eventId == self.eventId) + } + if (prevEventHistoryExists) { + Future.None + } else sportsNtabRequest + } + } else Future.None + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequest.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequest.scala new file mode 100644 index 000000000..ea99ea68d --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequest.scala @@ -0,0 +1,10 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.util.Future + +trait NTabRequest { + + def ntabRequest: Future[Option[CreateGenericNotificationRequest]] + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequestHydrator.scala new file mode 100644 index 000000000..01df5365f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabRequestHydrator.scala @@ -0,0 +1,64 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.GenericType +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.TapThroughAction +import com.twitter.util.Future +import com.twitter.util.Time + +trait NTabRequestHydrator extends NTabRequest with CandidateNTabCopy { + self: PushCandidate => + + // Represents the sender of the recommendation + def senderIdFut: Future[Long] + + // Consists of a sequence representing the social context user ids. + def facepileUsersFut: Future[Seq[Long]] + + // Story Context is required for Tweet Recommendations + // Contains the Tweet ID of the recommended Tweet + def storyContext: Option[StoryContext] + + // Inline card used to render a generic notification. + def inlineCard: Option[InlineCard] + + // Represents where the recommendation should land when clicked + def tapThroughFut: Future[String] + + // Hydration for fields that are used within the NTab copy + def displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] + + // Represents the social proof text that is needed for specific NTab copies + def socialProofDisplayText: Option[DisplayText] + + // MagicRecs NTab entries always use RefreshableType as the Generic Type + final val genericType: GenericType = GenericType.RefreshableNotification + + def refreshableType: Option[String] = ntabCopy.refreshableType + + lazy val ntabRequest: Future[Option[CreateGenericNotificationRequest]] = { + Future.join(senderIdFut, displayTextEntitiesFut, facepileUsersFut, tapThroughFut).map { + case (senderId, displayTextEntities, facepileUsers, tapThrough) => + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableType + )) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabSocialContext.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabSocialContext.scala new file mode 100644 index 000000000..17b43f457 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/NTabSocialContext.scala @@ -0,0 +1,46 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.util.Future + +trait NTabSocialContext { + self: PushCandidate with SocialContextActions with SocialContextUserDetails => + + private def ntabDisplayUserIds: Seq[Long] = + socialContextUserIds.take(ntabDisplayUserIdsLength) + + def ntabDisplayUserIdsLength: Int = + if (socialContextUserIds.size == 2) 2 else 1 + + def ntabDisplayNamesAndIds: Future[Seq[(String, Long)]] = + scUserMap.map { userObjMap => + ntabDisplayUserIds.flatMap { id => + userObjMap(id).flatMap(_.profile.map(_.name)).map { name => (name, id) } + } + } + + def rankedNtabDisplayNamesAndIds(defaultToRecency: Boolean): Future[Seq[(String, Long)]] = + scUserMap.flatMap { userObjMap => + val rankedSocialContextActivityFut = + CandidateUtil.getRankedSocialContext( + socialContextActions, + target.seedsWithWeight, + defaultToRecency) + rankedSocialContextActivityFut.map { rankedSocialContextActivity => + val ntabDisplayUserIds = + rankedSocialContextActivity.map(_.userId).take(ntabDisplayUserIdsLength) + ntabDisplayUserIds.flatMap { id => + userObjMap(id).flatMap(_.profile.map(_.name)).map { name => (name, id) } + } + } + } + + def otherCount: Future[Int] = + ntabDisplayNamesAndIds.map { + case namesWithIdSeq => + Math.max(0, socialContextUserIds.length - namesWithIdSeq.size) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/OutOfNetworkTweetNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/OutOfNetworkTweetNTabRequestHydrator.scala new file mode 100644 index 000000000..a2b99d1af --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/OutOfNetworkTweetNTabRequestHydrator.scala @@ -0,0 +1,78 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.common.rec_types.RecTypes._ +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.pushservice.exception.TweetNTabRequestHydratorException +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait OutOfNetworkTweetNTabRequestHydrator extends TweetNTabRequestHydrator { + self: PushCandidate + with TweetCandidate + with TweetAuthorDetails + with TopicCandidate + with TweetDetails => + + lazy val useTopicCopyForMBCGNtab = mrModelingBasedTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableMrModelingBasedCandidatesTopicCopy) + lazy val useTopicCopyForFrsNtab = frsTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableFrsTweetCandidatesTopicCopy) + lazy val useTopicCopyForTagspaceNtab = tagspaceTypes.contains(commonRecType) && target.params( + PushFeatureSwitchParams.EnableHashspaceCandidatesTopicCopy) + + override lazy val tapThroughFut: Future[String] = { + if (hasVideo && self.target.params( + PushFeatureSwitchParams.EnableLaunchVideosInImmersiveExplore)) { + Future.value( + s"i/immersive_timeline?display_location=notification&include_pinned_tweet=true&pinned_tweet_id=${tweetId}&tl_type=imv") + } else { + tweetAuthor.map { + case Some(author) => + val authorProfile = author.profile.getOrElse( + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author profile for: ${author.id}")) + s"${authorProfile.screenName}/status/${tweetId.toString}" + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author and target details to generate tap through for Tweet: $tweetId") + } + } + } + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + if (localizedUttEntity.isDefined && + (useTopicCopyForMBCGNtab || useTopicCopyForFrsNtab || useTopicCopyForTagspaceNtab)) { + NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "tweetAuthorName", isBold = true).map(_.toSeq) + } else { + NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "author", isBold = true).map(_.toSeq) + } + + override lazy val refreshableType: Option[String] = { + if (localizedUttEntity.isDefined && + (useTopicCopyForMBCGNtab || useTopicCopyForFrsNtab || useTopicCopyForTagspaceNtab)) { + MrNtabCopyObjects.TopicTweet.refreshableType + } else ntabCopy.refreshableType + } + + override def socialProofDisplayText: Option[DisplayText] = { + if (localizedUttEntity.isDefined && + (useTopicCopyForMBCGNtab || useTopicCopyForFrsNtab || useTopicCopyForTagspaceNtab)) { + localizedUttEntity.map(uttEntity => + DisplayText(values = + Seq(DisplayTextEntity("topic_name", TextValue.Text(uttEntity.localizedNameForDisplay))))) + } else None + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = senderIdFut.map(Seq(_)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ScheduledSpaceNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ScheduledSpaceNTabRequestHydrator.scala new file mode 100644 index 000000000..4673a001e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/ScheduledSpaceNTabRequestHydrator.scala @@ -0,0 +1,106 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.SpaceCandidate +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.ScheduledSpaceSpeakerPushCandidate +import com.twitter.frigate.pushservice.model.ScheduledSpaceSubscriberPushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.frigate.thriftscala.SpaceNotificationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.notificationservice.thriftscala._ +import com.twitter.util.Future +import com.twitter.util.Time + +trait ScheduledSpaceSpeakerNTabRequestHydrator extends ScheduledSpaceNTabRequestHydrator { + self: PushCandidate with ScheduledSpaceSpeakerPushCandidate => + + override def refreshableType: Option[String] = { + frigateNotification.spaceNotification.flatMap { spaceNotification => + spaceNotification.spaceNotificationType.flatMap { + case SpaceNotificationType.PreSpaceBroadcast => + MrNtabCopyObjects.ScheduledSpaceSpeakerSoon.refreshableType + case SpaceNotificationType.AtSpaceBroadcast => + MrNtabCopyObjects.ScheduledSpaceSpeakerNow.refreshableType + case _ => + throw new IllegalStateException(s"Unexpected SpaceNotificationType") + } + } + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = Future.Nil + + override val socialProofDisplayText: Option[DisplayText] = Some(DisplayText()) +} + +trait ScheduledSpaceSubscriberNTabRequestHydrator extends ScheduledSpaceNTabRequestHydrator { + self: PushCandidate with ScheduledSpaceSubscriberPushCandidate => + + override lazy val facepileUsersFut: Future[Seq[Long]] = { + hostId match { + case Some(spaceHostId) => Future.value(Seq(spaceHostId)) + case _ => + Future.exception( + new IllegalStateException( + "Unable to get host id for ScheduledSpaceSubscriberNTabRequestHydrator")) + } + } + + override val socialProofDisplayText: Option[DisplayText] = None +} + +trait ScheduledSpaceNTabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate with SpaceCandidate => + + def hydratedHost: Option[User] + + override lazy val senderIdFut: Future[Long] = { + hostId match { + case Some(spaceHostId) => Future.value(spaceHostId) + case _ => throw new IllegalStateException(s"No Space Host Id") + } + } + + override lazy val tapThroughFut: Future[String] = Future.value(s"i/spaces/$spaceId") + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + NotificationServiceSender + .getDisplayTextEntityFromUser( + Future.value(hydratedHost), + fieldName = "space_host_name", + isBold = true + ).map(_.toSeq) + + override val storyContext: Option[StoryContext] = None + + override val inlineCard: Option[InlineCard] = None + + override lazy val ntabRequest: Future[Option[CreateGenericNotificationRequest]] = { + Future.join(senderIdFut, displayTextEntitiesFut, facepileUsersFut, tapThroughFut).map { + case (senderId, displayTextEntities, facepileUsers, tapThrough) => + val expiryTimeMillis = if (target.params(PushFeatureSwitchParams.EnableSpacesTtlForNtab)) { + Some( + (Time.now + target.params( + PushFeatureSwitchParams.SpaceNotificationsTTLDurationForNTab)).inMillis) + } else None + + Some( + CreateGenericNotificationRequest( + userId = target.targetId, + senderId = senderId, + genericType = GenericType.RefreshableNotification, + displayText = DisplayText(values = displayTextEntities), + facepileUsers = facepileUsers, + timestampMillis = Time.now.inMillis, + tapThroughAction = Some(TapThroughAction(Some(tapThrough))), + impressionId = Some(impressionId), + socialProofText = socialProofDisplayText, + context = storyContext, + inlineCard = inlineCard, + refreshableType = refreshableType, + expiryTimeMillis = expiryTimeMillis + )) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/SubscribedSearchTweetNtabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/SubscribedSearchTweetNtabRequestHydrator.scala new file mode 100644 index 000000000..caa2a8cd0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/SubscribedSearchTweetNtabRequestHydrator.scala @@ -0,0 +1,23 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.SubscribedSearchTweetPushCandidate +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait SubscribedSearchTweetNtabRequestHydrator extends TweetNTabRequestHydrator { + self: SubscribedSearchTweetPushCandidate => + override def displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "tweetAuthor", isBold = true).map(_.toSeq) + + override def socialProofDisplayText: Option[DisplayText] = { + Some(DisplayText(values = Seq(DisplayTextEntity("search_query", TextValue.Text(searchTerm))))) + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = senderIdFut.map(Seq(_)) + + override lazy val tapThroughFut: Future[String] = + Future.value(self.ntabLandingUrl) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopTweetImpressionsNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopTweetImpressionsNTabRequestHydrator.scala new file mode 100644 index 000000000..a67dee399 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopTweetImpressionsNTabRequestHydrator.scala @@ -0,0 +1,37 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.TopTweetImpressionsCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.StoryContextValue +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait TopTweetImpressionsNTabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate with TopTweetImpressionsCandidate => + + override lazy val tapThroughFut: Future[String] = + Future.value(s"${target.targetId}/status/$tweetId") + + override val senderIdFut: Future[Long] = Future.value(0L) + + override val facepileUsersFut: Future[Seq[Long]] = Future.Nil + + override val storyContext: Option[StoryContext] = + Some(StoryContext(altText = "", value = Some(StoryContextValue.Tweets(Seq(tweetId))))) + + override val inlineCard: Option[InlineCard] = None + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = { + Future.value( + Seq( + DisplayTextEntity(name = "num_impressions", value = TextValue.Number(self.impressionsCount)) + ) + ) + } + + override def socialProofDisplayText: Option[DisplayText] = None +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopicProofTweetNtabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopicProofTweetNtabRequestHydrator.scala new file mode 100644 index 000000000..17519efda --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TopicProofTweetNtabRequestHydrator.scala @@ -0,0 +1,60 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.pushservice.model.TopicProofTweetPushCandidate +import com.twitter.frigate.pushservice.exception.TweetNTabRequestHydratorException +import com.twitter.frigate.pushservice.exception.UttEntityNotFoundException +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.StoryContextValue +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait TopicProofTweetNtabRequestHydrator extends NTabRequestHydrator { + self: TopicProofTweetPushCandidate => + + override def displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "tweetAuthorName", true) + .map(_.toSeq) + + private lazy val uttEntity = localizedUttEntity.getOrElse( + throw new UttEntityNotFoundException( + s"${getClass.getSimpleName} UttEntity missing for $tweetId") + ) + + override lazy val tapThroughFut: Future[String] = { + tweetAuthor.map { + case Some(author) => + val authorProfile = author.profile.getOrElse( + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author profile for: ${author.id}")) + s"${authorProfile.screenName}/status/${tweetId.toString}" + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author and target details to generate tap through for Tweet: $tweetId") + } + } + + override lazy val socialProofDisplayText: Option[DisplayText] = { + Some( + DisplayText(values = + Seq(DisplayTextEntity("topic_name", TextValue.Text(uttEntity.localizedNameForDisplay)))) + ) + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = senderIdFut.map(Seq(_)) + + override val inlineCard = None + + override def storyContext: Option[StoryContext] = Some( + StoryContext("", Some(StoryContextValue.Tweets(Seq(tweetId))))) + + override def senderIdFut: Future[Long] = + tweetAuthor.map { + case Some(author) => author.id + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain Author ID for: $commonRecType") + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TrendTweetNtabHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TrendTweetNtabHydrator.scala new file mode 100644 index 000000000..07946a220 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TrendTweetNtabHydrator.scala @@ -0,0 +1,61 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.TrendTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.exception.TweetNTabRequestHydratorException +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.frigate.pushservice.util.EmailLandingPageExperimentUtil +import com.twitter.notificationservice.thriftscala.DisplayText +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.notificationservice.thriftscala.TextValue +import com.twitter.util.Future + +trait TrendTweetNtabHydrator extends TweetNTabRequestHydrator { + self: PushCandidate with TrendTweetCandidate with TweetCandidate with TweetAuthorDetails => + + private lazy val trendTweetNtabStats = self.statsReceiver.scope("trend_tweet_ntab") + + private lazy val ruxLandingOnNtabCounter = + trendTweetNtabStats.counter("use_rux_landing_on_ntab") + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = + NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, fieldName = "author_name", isBold = true) + .map( + _.toSeq :+ DisplayTextEntity( + name = "trend_name", + value = TextValue.Text(trendName), + emphasis = true) + ) + + override lazy val facepileUsersFut: Future[Seq[Long]] = senderIdFut.map(Seq(_)) + + override lazy val socialProofDisplayText: Option[DisplayText] = None + + override def refreshableType: Option[String] = ntabCopy.refreshableType + + override lazy val tapThroughFut: Future[String] = { + Future.join(tweetAuthor, target.deviceInfo).map { + case (Some(author), Some(deviceInfo)) => + val enableRuxLandingPage = deviceInfo.isRuxLandingPageEligible && target.params( + PushFeatureSwitchParams.EnableNTabRuxLandingPage) + val authorProfile = author.profile.getOrElse( + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author profile for: ${author.id}")) + + if (enableRuxLandingPage) { + ruxLandingOnNtabCounter.incr() + EmailLandingPageExperimentUtil.createNTabRuxLandingURI(authorProfile.screenName, tweetId) + } else { + s"${authorProfile.screenName}/status/${tweetId.toString}" + } + + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author and target details to generate tap through for Tweet: $tweetId") + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetFavoriteNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetFavoriteNTabRequestHydrator.scala new file mode 100644 index 000000000..52a643b84 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetFavoriteNTabRequestHydrator.scala @@ -0,0 +1,38 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.util.Future + +trait TweetFavoriteNTabRequestHydrator extends TweetNTabRequestHydrator with NTabSocialContext { + self: PushCandidate + with TweetCandidate + with TweetAuthor + with TweetAuthorDetails + with SocialContextActions + with SocialContextUserDetails => + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = { + Future + .join( + NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "tweetAuthorName", isBold = false), + NotificationServiceSender + .generateSocialContextTextEntities( + rankedNtabDisplayNamesAndIds(defaultToRecency = false), + otherCount) + ) + .map { + case (authorDisplay, socialContextDisplay) => + socialContextDisplay ++ authorDisplay + } + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = Future.value(socialContextUserIds) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetNTabRequestHydrator.scala new file mode 100644 index 000000000..bfa8507f0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetNTabRequestHydrator.scala @@ -0,0 +1,55 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.exception.TweetNTabRequestHydratorException +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.notificationservice.thriftscala.InlineCard +import com.twitter.notificationservice.thriftscala.StoryContext +import com.twitter.notificationservice.thriftscala.StoryContextValue +import com.twitter.frigate.pushservice.util.EmailLandingPageExperimentUtil +import com.twitter.notificationservice.thriftscala._ +import com.twitter.util.Future + +trait TweetNTabRequestHydrator extends NTabRequestHydrator { + self: PushCandidate with TweetCandidate with TweetAuthorDetails => + + override def senderIdFut: Future[Long] = + tweetAuthor.map { + case Some(author) => author.id + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain Author ID for: $commonRecType") + } + + override def storyContext: Option[StoryContext] = Some( + StoryContext( + altText = "", + value = Some(StoryContextValue.Tweets(Seq(tweetId))), + details = None + )) + + override def inlineCard: Option[InlineCard] = Some(InlineCard.TweetCard(TweetCard(tweetId))) + + override lazy val tapThroughFut: Future[String] = { + Future.join(tweetAuthor, target.deviceInfo).map { + case (Some(author), Some(deviceInfo)) => + val enableRuxLandingPage = deviceInfo.isRuxLandingPageEligible && target.params( + PushFeatureSwitchParams.EnableNTabRuxLandingPage) + val authorProfile = author.profile.getOrElse( + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author profile for: ${author.id}")) + if (enableRuxLandingPage) { + EmailLandingPageExperimentUtil.createNTabRuxLandingURI(authorProfile.screenName, tweetId) + } else { + s"${authorProfile.screenName}/status/${tweetId.toString}" + } + case _ => + throw new TweetNTabRequestHydratorException( + s"Unable to obtain author and target details to generate tap through for Tweet: $tweetId") + } + } + + override def socialProofDisplayText: Option[DisplayText] = None +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetRetweetNTabRequestHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetRetweetNTabRequestHydrator.scala new file mode 100644 index 000000000..c142fbfba --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/model/ntab/TweetRetweetNTabRequestHydrator.scala @@ -0,0 +1,38 @@ +package com.twitter.frigate.pushservice.model.ntab + +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.NotificationServiceSender +import com.twitter.notificationservice.thriftscala.DisplayTextEntity +import com.twitter.util.Future + +trait TweetRetweetNTabRequestHydrator extends TweetNTabRequestHydrator with NTabSocialContext { + self: PushCandidate + with TweetCandidate + with TweetAuthor + with TweetAuthorDetails + with SocialContextActions + with SocialContextUserDetails => + + override lazy val displayTextEntitiesFut: Future[Seq[DisplayTextEntity]] = { + Future + .join( + NotificationServiceSender + .getDisplayTextEntityFromUser(tweetAuthor, "tweetAuthorName", isBold = false), + NotificationServiceSender + .generateSocialContextTextEntities( + rankedNtabDisplayNamesAndIds(defaultToRecency = false), + otherCount) + ) + .map { + case (authorDisplay, socialContextDisplay) => + socialContextDisplay ++ authorDisplay + } + } + + override lazy val facepileUsersFut: Future[Seq[Long]] = Future.value(socialContextUserIds) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/DeployConfigModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/DeployConfigModule.scala new file mode 100644 index 000000000..238efe0bb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/DeployConfigModule.scala @@ -0,0 +1,68 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Provides +import com.google.inject.Singleton +import com.twitter.abdecider.LoggingABDecider +import com.twitter.decider.Decider +import com.twitter.featureswitches.v2.FeatureSwitches +import com.twitter.finagle.mtls.authentication.ServiceIdentifier +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tunable.StandardTunableMap +import com.twitter.frigate.pushservice.config.DeployConfig +import com.twitter.frigate.pushservice.config.ProdConfig +import com.twitter.frigate.pushservice.config.StagingConfig +import com.twitter.frigate.pushservice.params.ShardParams +import com.twitter.inject.TwitterModule +import com.twitter.inject.annotations.Flag +import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule.ConfigRepoLocalPath +import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule.ServiceLocal + +object DeployConfigModule extends TwitterModule { + + @Provides + @Singleton + def providesDeployConfig( + @Flag(FlagName.numShards) numShards: Int, + @Flag(FlagName.shardId) shardId: Int, + @Flag(FlagName.isInMemCacheOff) inMemCacheOff: Boolean, + @Flag(ServiceLocal) isServiceLocal: Boolean, + @Flag(ConfigRepoLocalPath) localConfigRepoPath: String, + serviceIdentifier: ServiceIdentifier, + decider: Decider, + abDecider: LoggingABDecider, + featureSwitches: FeatureSwitches, + statsReceiver: StatsReceiver + ): DeployConfig = { + val tunableMap = if (serviceIdentifier.service.contains("canary")) { + StandardTunableMap(id = "frigate-pushservice-canary") + } else { StandardTunableMap(id = serviceIdentifier.service) } + val shardParams = ShardParams(numShards, shardId) + serviceIdentifier.environment match { + case "devel" | "staging" => + StagingConfig( + isServiceLocal = isServiceLocal, + localConfigRepoPath = localConfigRepoPath, + inMemCacheOff = inMemCacheOff, + decider = decider, + abDecider = abDecider, + featureSwitches = featureSwitches, + serviceIdentifier = serviceIdentifier, + tunableMap = tunableMap, + shardParams = shardParams + )(statsReceiver) + case "prod" => + ProdConfig( + isServiceLocal = isServiceLocal, + localConfigRepoPath = localConfigRepoPath, + inMemCacheOff = inMemCacheOff, + decider = decider, + abDecider = abDecider, + featureSwitches = featureSwitches, + serviceIdentifier = serviceIdentifier, + tunableMap = tunableMap, + shardParams = shardParams + )(statsReceiver) + case env => throw new Exception(s"Unknown environment $env") + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FilterModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FilterModule.scala new file mode 100644 index 000000000..579f65acf --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FilterModule.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Provides +import javax.inject.Singleton +import com.twitter.discovery.common.nackwarmupfilter.NackWarmupFilter +import com.twitter.inject.TwitterModule +import com.twitter.inject.annotations.Flag +import com.twitter.util.Duration + +object FilterModule extends TwitterModule { + @Singleton + @Provides + def providesNackWarmupFilter( + @Flag(FlagName.nackWarmupDuration) warmupDuration: Duration + ): NackWarmupFilter = new NackWarmupFilter(warmupDuration) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FlagModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FlagModule.scala new file mode 100644 index 000000000..4306e47ca --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/FlagModule.scala @@ -0,0 +1,56 @@ +package com.twitter.frigate.pushservice.module + +import com.twitter.app.Flag +import com.twitter.inject.TwitterModule +import com.twitter.util.Duration +import com.twitter.conversions.DurationOps._ + +object FlagName { + final val shardId = "service.shard" + final val numShards = "service.num_shards" + final val nackWarmupDuration = "service.nackWarmupDuration" + final val isInMemCacheOff = "service.isInMemCacheOff" +} + +object FlagModule extends TwitterModule { + + val shardId: Flag[Int] = flag[Int]( + name = FlagName.shardId, + help = "Service shard id" + ) + + val numShards: Flag[Int] = flag[Int]( + name = FlagName.numShards, + help = "Number of shards" + ) + + val mrLoggerIsTraceAll: Flag[Boolean] = flag[Boolean]( + name = "service.isTraceAll", + help = "atraceflag", + default = false + ) + + val mrLoggerNthLog: Flag[Boolean] = flag[Boolean]( + name = "service.nthLog", + help = "nthlog", + default = false + ) + + val inMemCacheOff: Flag[Boolean] = flag[Boolean]( + name = FlagName.isInMemCacheOff, + help = "is inMemCache Off (currently only applies for user_health_model_score_store_cache)", + default = false + ) + + val mrLoggerNthVal: Flag[Long] = flag[Long]( + name = "service.nthVal", + help = "nthlogval", + default = 0, + ) + + val nackWarmupDuration: Flag[Duration] = flag[Duration]( + name = FlagName.nackWarmupDuration, + help = "duration to nack at startup", + default = 0.seconds + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/LoggedOutPushTargetUserBuilderModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/LoggedOutPushTargetUserBuilderModule.scala new file mode 100644 index 000000000..d4bceb549 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/LoggedOutPushTargetUserBuilderModule.scala @@ -0,0 +1,27 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Provides +import com.google.inject.Singleton +import com.twitter.decider.Decider +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.target.LoggedOutPushTargetUserBuilder +import com.twitter.frigate.pushservice.config.DeployConfig +import com.twitter.inject.TwitterModule + +object LoggedOutPushTargetUserBuilderModule extends TwitterModule { + + @Provides + @Singleton + def providesLoggedOutPushTargetUserBuilder( + decider: Decider, + config: DeployConfig, + statsReceiver: StatsReceiver + ): LoggedOutPushTargetUserBuilder = { + LoggedOutPushTargetUserBuilder( + historyStore = config.loggedOutHistoryStore, + inputDecider = decider, + inputAbDecider = config.abDecider, + loggedOutPushInfoStore = config.loggedOutPushInfoStore + )(statsReceiver) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushHandlerModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushHandlerModule.scala new file mode 100644 index 000000000..c71ff24dd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushHandlerModule.scala @@ -0,0 +1,78 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Provides +import com.google.inject.Singleton +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.target.LoggedOutPushTargetUserBuilder +import com.twitter.frigate.pushservice.refresh_handler.RefreshForPushHandler +import com.twitter.frigate.pushservice.config.DeployConfig +import com.twitter.frigate.pushservice.send_handler.SendHandler +import com.twitter.frigate.pushservice.take.candidate_validator.RFPHCandidateValidator +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPostCandidateValidator +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPreCandidateValidator +import com.twitter.frigate.pushservice.refresh_handler.LoggedOutRefreshForPushHandler +import com.twitter.frigate.pushservice.take.SendHandlerNotifier +import com.twitter.frigate.pushservice.target.PushTargetUserBuilder +import com.twitter.inject.TwitterModule + +object PushHandlerModule extends TwitterModule { + + @Provides + @Singleton + def providesRefreshForPushHandler( + pushTargetUserBuilder: PushTargetUserBuilder, + config: DeployConfig, + statsReceiver: StatsReceiver + ): RefreshForPushHandler = { + new RefreshForPushHandler( + pushTargetUserBuilder = pushTargetUserBuilder, + candSourceGenerator = config.candidateSourceGenerator, + rfphRanker = config.rfphRanker, + candidateHydrator = config.candidateHydrator, + candidateValidator = new RFPHCandidateValidator(config), + rfphTakeStepUtil = config.rfphTakeStepUtil, + rfphRestrictStep = config.rfphRestrictStep, + rfphNotifier = config.rfphNotifier, + rfphStatsRecorder = config.rfphStatsRecorder, + mrRequestScriberNode = config.mrRequestScriberNode, + rfphFeatureHydrator = config.rfphFeatureHydrator, + rfphPrerankFilter = config.rfphPrerankFilter, + rfphLightRanker = config.rfphLightRanker + )(statsReceiver) + } + + @Provides + @Singleton + def providesSendHandler( + pushTargetUserBuilder: PushTargetUserBuilder, + config: DeployConfig, + statsReceiver: StatsReceiver + ): SendHandler = { + new SendHandler( + pushTargetUserBuilder, + new SendHandlerPreCandidateValidator(config), + new SendHandlerPostCandidateValidator(config), + new SendHandlerNotifier(config.candidateNotifier, statsReceiver.scope("SendHandlerNotifier")), + config.sendHandlerCandidateHydrator, + config.featureHydrator, + config.sendHandlerPredicateUtil, + config.mrRequestScriberNode)(statsReceiver, config) + } + + @Provides + @Singleton + def providesLoggedOutRefreshForPushHandler( + loPushTargetUserBuilder: LoggedOutPushTargetUserBuilder, + config: DeployConfig, + statsReceiver: StatsReceiver + ): LoggedOutRefreshForPushHandler = { + new LoggedOutRefreshForPushHandler( + loPushTargetUserBuilder = loPushTargetUserBuilder, + loPushCandidateSourceGenerator = config.loCandidateSourceGenerator, + candidateHydrator = config.candidateHydrator, + loRanker = config.loggedOutRFPHRanker, + loRfphNotifier = config.loRfphNotifier, + loMrRequestScriberNode = config.loggedOutMrRequestScriberNode, + )(statsReceiver) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushServiceDarkTrafficModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushServiceDarkTrafficModule.scala new file mode 100644 index 000000000..97e484492 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushServiceDarkTrafficModule.scala @@ -0,0 +1,33 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Singleton +import com.twitter.decider.Decider +import com.twitter.decider.RandomRecipient +import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient +import com.twitter.frigate.pushservice.thriftscala.PushService +import com.twitter.inject.Injector +import com.twitter.inject.thrift.modules.ReqRepDarkTrafficFilterModule + +/** + * The darkTraffic filter sample all requests by default + and set the diffy dest to nil for non prod environments + */ +@Singleton +object PushServiceDarkTrafficModule + extends ReqRepDarkTrafficFilterModule[PushService.ReqRepServicePerEndpoint] + with MtlsClient { + + override def label: String = "frigate-pushservice-diffy-proxy" + + /** + * Function to determine if the request should be "sampled", e.g. + * sent to the dark service. + * + * @param injector the [[com.twitter.inject.Injector]] for use in determining if a given request + * should be forwarded or not. + */ + override protected def enableSampling(injector: Injector): Any => Boolean = { + val decider = injector.instance[Decider] + _ => decider.isAvailable("frigate_pushservice_dark_traffic_percent", Some(RandomRecipient)) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushTargetUserBuilderModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushTargetUserBuilderModule.scala new file mode 100644 index 000000000..ccdd1f110 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/PushTargetUserBuilderModule.scala @@ -0,0 +1,64 @@ +package com.twitter.frigate.pushservice.module + +import com.google.inject.Provides +import com.google.inject.Singleton +import com.twitter.decider.Decider +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.config.DeployConfig +import com.twitter.frigate.pushservice.target.PushTargetUserBuilder +import com.twitter.inject.TwitterModule + +object PushTargetUserBuilderModule extends TwitterModule { + + @Provides + @Singleton + def providesPushTargetUserBuilder( + decider: Decider, + config: DeployConfig, + statsReceiver: StatsReceiver + ): PushTargetUserBuilder = { + PushTargetUserBuilder( + historyStore = config.historyStore, + emailHistoryStore = config.emailHistoryStore, + labeledPushRecsStore = config.labeledPushRecsDecideredStore, + onlineUserHistoryStore = config.onlineUserHistoryStore, + pushRecItemsStore = config.pushRecItemStore, + userStore = config.safeUserStore, + pushInfoStore = config.pushInfoStore, + userCountryStore = config.userCountryStore, + userUtcOffsetStore = config.userUtcOffsetStore, + dauProbabilityStore = config.dauProbabilityStore, + nsfwConsumerStore = config.nsfwConsumerStore, + genericNotificationFeedbackStore = config.genericNotificationFeedbackStore, + userFeatureStore = config.userFeaturesStore, + mrUserStateStore = config.mrUserStatePredictionStore, + tweetImpressionStore = config.tweetImpressionStore, + timelinesUserSessionStore = config.timelinesUserSessionStore, + cachedTweetyPieStore = config.cachedTweetyPieStoreV2, + strongTiesStore = config.strongTiesStore, + userHTLLastVisitStore = config.userHTLLastVisitStore, + userLanguagesStore = config.userLanguagesStore, + inputDecider = decider, + inputAbDecider = config.abDecider, + realGraphScoresTop500InStore = config.realGraphScoresTop500InStore, + recentFollowsStore = config.recentFollowsStore, + resurrectedUserStore = config.reactivatedUserInfoStore, + configParamsBuilder = config.configParamsBuilder, + optOutUserInterestsStore = config.optOutUserInterestsStore, + deviceInfoStore = config.deviceInfoStore, + pushcapDynamicPredictionStore = config.pushcapDynamicPredictionStore, + appPermissionStore = config.appPermissionStore, + optoutModelScorer = config.optoutModelScorer, + userTargetingPropertyStore = config.userTargetingPropertyStore, + ntabCaretFeedbackStore = config.ntabCaretFeedbackStore, + genericFeedbackStore = config.genericFeedbackStore, + inlineActionHistoryStore = config.inlineActionHistoryStore, + featureHydrator = config.featureHydrator, + openAppUserStore = config.openAppUserStore, + openedPushByHourAggregatedStore = config.openedPushByHourAggregatedStore, + geoduckStoreV2 = config.geoDuckV2Store, + superFollowEligibilityUserStore = config.superFollowEligibilityUserStore, + superFollowApplicationStatusStore = config.superFollowApplicationStatusStore + )(statsReceiver) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/ThriftWebFormsModule.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/ThriftWebFormsModule.scala new file mode 100644 index 000000000..049d731a3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/module/ThriftWebFormsModule.scala @@ -0,0 +1,9 @@ +package com.twitter.frigate.pushservice.module + +import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule +import com.twitter.finatra.thrift.ThriftServer +import com.twitter.frigate.pushservice.thriftscala.PushService + +class ThriftWebFormsModule(server: ThriftServer) + extends MtlsThriftWebFormsModule[PushService.MethodPerEndpoint](server) { +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/DeciderKey.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/DeciderKey.scala new file mode 100644 index 000000000..9c17ea5f2 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/DeciderKey.scala @@ -0,0 +1,210 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.servo.decider.DeciderKeyEnum + +object DeciderKey extends DeciderKeyEnum { + val disableAllRelevance = Value("frigate_pushservice_disable_all_relevance") + val disableHeavyRanking = Value("frigate_pushservice_disable_heavy_ranking") + val restrictLightRanking = Value("frigate_pushservice_restrict_light_ranking") + val downSampleLightRankingScribeCandidates = Value( + "frigate_pushservice_down_sample_light_ranking_scribe_candidates") + val entityGraphTweetRecsDeciderKey = Value("user_tweet_entity_graph_tweet_recs") + val enablePushserviceWritesToNotificationServiceDeciderKey = Value( + "frigate_pushservice_enable_writes_to_notification_service") + val enablePushserviceWritesToNotificationServiceForAllEmployeesDeciderKey = Value( + "frigate_pushservice_enable_writes_to_notification_service_for_employees") + val enablePushserviceWritesToNotificationServiceForEveryoneDeciderKey = Value( + "frigate_pushservice_enable_writes_to_notification_service_for_everyone") + val enablePromptFeedbackFatigueResponseNoPredicateDeciderKey = Value( + "frigate_pushservice_enable_ntab_feedback_prompt_response_no_filter_predicate") + val enablePushserviceDeepbirdv2CanaryClusterDeciderKey = Value( + "frigate_pushservice_canary_enable_deepbirdv2_canary_cluster") + val enableUTEGSCForEarlybirdTweetsDecider = Value( + "frigate_pushservice_enable_uteg_sc_for_eb_tweets") + val enableTweetFavRecs = Value("frigate_pushservice_enable_tweet_fav_recs") + val enableTweetRetweetRecs = Value("frigate_pushservice_enable_tweet_retweet_recs") + val enablePushSendEventBus = Value("frigate_pushservice_enable_push_send_eventbus") + + val enableModelBasedPushcapAssignments = Value( + "frigate_pushservice_enable_model_based_pushcap_assignments") + + val enableTweetAnnotationFeatureHydration = Value( + "frigate_pushservice_enable_tweet_annotation_features_hydration") + val enableMrRequestScribing = Value("frigate_pushservice_enable_mr_request_scribing") + val enableHighQualityCandidateScoresScribing = Value( + "frigate_pushservice_enable_high_quality_candidate_scribing") + val enableHtlUserAuthorRealTimeAggregateFeatureHydration = Value( + "frigate_pushservice_enable_htl_new_user_user_author_rta_hydration") + val enableMrUserSemanticCoreFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_semantic_core_feature_hydration") + val enableMrUserSemanticCoreNoZeroFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_semantic_core_no_zero_feature_hydration") + val enableHtlOfflineUserAggregateExtendedFeaturesHydration = Value( + "frigate_pushservice_enable_htl_offline_user_aggregate_extended_features_hydration") + val enableNerErgFeaturesHydration = Value("frigate_pushservice_enable_ner_erg_features_hydration") + val enableDaysSinceRecentResurrectionFeatureHydration = Value( + "frigate_pushservice_enable_days_since_recent_resurrection_features_hydration") + val enableUserPastAggregatesFeatureHydration = Value( + "frigate_pushservice_enable_user_past_aggregates_features_hydration") + val enableUserSignalLanguageFeatureHydration = Value( + "frigate_pushservice_enable_user_signal_language_features_hydration") + val enableUserPreferredLanguageFeatureHydration = Value( + "frigate_pushservice_enable_user_preferred_language_features_hydration") + val enablePredicateDetailedInfoScribing = Value( + "frigate_pushservice_enable_predicate_detailed_info_scribing") + val enablePushCapInfoScribing = Value("frigate_pushservice_enable_push_cap_info_scribing") + val disableMLInFiltering = Value("frigate_pushservice_disable_ml_in_filtering") + val useHydratedLabeledSendsForFeaturesDeciderKey = Value( + "use_hydrated_labeled_sends_for_features") + val verifyHydratedLabeledSendsForFeaturesDeciderKey = Value( + "verify_hydrated_labeled_sends_for_features") + val trainingDataDeciderKey = Value("frigate_notifier_quality_model_training_data") + val skipMlModelPredicateDeciderKey = Value("skip_ml_model_predicate") + val scribeModelFeaturesDeciderKey = Value("scribe_model_features") + val scribeModelFeaturesWithoutHydratingNewFeaturesDeciderKey = Value( + "scribe_model_features_without_hydrating_new_features") + val scribeModelFeaturesForRequestScribe = Value("scribe_model_features_for_request_scribe") + val enableMrUserSimclusterV2020FeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_simcluster_v2020_hydration") + val enableMrUserSimclusterV2020NoZeroFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_simcluster_v2020_no_zero_feature_hydration") + val enableMrUserEngagedTweetTokensFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_engaged_tweet_tokens_feature_hydration") + val enableMrCandidateTweetTokensFeaturesHydration = Value( + "frigate_pushservice_enable_mr_candidate_tweet_tokens_feature_hydration") + val enableTopicEngagementRealTimeAggregatesFeatureHydration = Value( + "frigate_pushservice_enable_topic_engagement_real_time_aggregates_feature_hydration" + ) + val enableUserTopicAggregatesFeatureHydration = Value( + "frigate_pushservice_enable_user_topic_aggregates_feature_hydration" + ) + val enableDurationSinceLastVisitFeatureHydration = Value( + "frigate_pushservice_enable_duration_since_last_visit_features_hydration" + ) + val enableTwistlyAggregatesFeatureHydration = Value( + "frigate_pushservice_enable_twistly_agg_feature_hydration" + ) + val enableTwHINUserEngagementFeaturesHydration = Value( + "frigate_pushservice_enable_twhin_user_engagement_features_hydration" + ) + val enableTwHINUserFollowFeaturesHydration = Value( + "frigate_pushservice_enable_twhin_user_follow_features_hydration" + ) + val enableTwHINAuthorFollowFeaturesHydration = Value( + "frigate_pushservice_enable_twhin_author_follow_features_hydration" + ) + val enableTweetTwHINFavFeaturesHydration = Value( + "frigate_pushservice_enable_tweet_twhin_fav_features_hydration" + ) + val enableSpaceVisibilityLibraryFiltering = Value( + "frigate_pushservice_enable_space_visibility_library_filtering" + ) + val enableVfFeatureHydrationSpaceShim = Value( + "frigate_pushservice_enable_visibility_filtering_feature_hydration_in_space_shim") + val enableUserTopicFollowFeatureSet = Value( + "frigate_pushservice_enable_user_topic_follow_feature_hydration") + val enableOnboardingNewUserFeatureSet = Value( + "frigate_pushservice_enable_onboarding_new_user_feature_hydration") + val enableMrUserTopicSparseContFeatureSet = Value( + "frigate_pushservice_enable_mr_user_topic_sparse_cont_feature_hydration" + ) + val enableUserPenguinLanguageFeatureSet = Value( + "frigate_pushservice_enable_user_penguin_language_feature_hydration") + val enableMrUserHashspaceEmbeddingFeatureSet = Value( + "frigate_pushservice_enable_mr_user_hashspace_embedding_feature_hydration") + val enableMrUserAuthorSparseContFeatureSet = Value( + "frigate_pushservice_enable_mr_user_author_sparse_cont_feature_hydration" + ) + val enableMrTweetSentimentFeatureSet = Value( + "frigate_pushservice_enable_mr_tweet_sentiment_feature_hydration" + ) + val enableMrTweetAuthorAggregatesFeatureSet = Value( + "frigate_pushservice_enable_mr_tweet_author_aggregates_feature_hydration" + ) + val enableUserGeoFeatureSet = Value("frigate_pushservice_enable_user_geo_feature_hydration") + val enableAuthorGeoFeatureSet = Value("frigate_pushservice_enable_author_geo_feature_hydration") + + val rampupUserGeoFeatureSet = Value("frigate_pushservice_ramp_up_user_geo_feature_hydration") + val rampupAuthorGeoFeatureSet = Value("frigate_pushservice_ramp_up_author_geo_feature_hydration") + + val enablePopGeoTweets = Value("frigate_pushservice_enable_pop_geo_tweets") + val enableTrendsTweets = Value("frigate_pushservice_enable_trends_tweets") + val enableTripGeoTweetCandidates = Value("frigate_pushservice_enable_trip_geo_tweets") + val enableContentRecommenderMixerAdaptor = Value( + "frigate_pushservice_enable_content_recommender_mixer_adaptor") + val enableGenericCandidateAdaptor = Value("frigate_pushservice_enable_generic_candidate_adaptor") + val enableTripGeoTweetContentMixerDarkTraffic = Value( + "frigate_pushservice_enable_trip_geo_tweets_content_mixer_dark_traffic") + + val enableInsTraffic = Value("frigate_pushservice_enable_ins_traffic") + val enableIsTweetTranslatable = Value("frigate_pushservice_enable_is_tweet_translatable") + + val enableMrTweetSimClusterFeatureSet = Value( + "frigate_pushservice_enable_mr_tweet_simcluster_feature_hydration") + + val enableMrOfflineUserTweetTopicAggregate = Value( + "frigate_pushservice_enable_mr_offline_user_tweet_topic_aggregate_hydration") + + val enableMrOfflineUserTweetSimClusterAggregate = Value( + "frigate_pushservice_enable_mr_offline_user_tweet_simcluster_aggregate_hydration" + ) + val enableRealGraphV2FeatureHydration = Value( + "frigate_pushservice_enable_real_graph_v2_features_hydration") + + val enableTweetBeTFeatureHydration = Value( + "frigate_pushservice_enable_tweet_bet_features_hydration") + + val enableInvalidatingCachedHistoryStoreAfterWrites = Value( + "frigate_pushservice_enable_invalidating_cached_history_store_after_writes") + + val enableInvalidatingCachedLoggedOutHistoryStoreAfterWrites = Value( + "frigate_pushservice_enable_invalidating_cached_logged_out_history_store_after_writes") + + val enableUserSendTimeFeatureHydration = Value( + "frigate_pushservice_enable_user_send_time_feature_hydration" + ) + + val enablePnegMultimodalPredictionForF1Tweets = Value( + "frigate_pushservice_enable_pneg_multimodal_prediction_for_f1_tweets" + ) + + val enableScribingOonFavScoreForF1Tweets = Value( + "frigate_pushservice_enable_oon_fav_scribe_for_f1_tweets" + ) + + val enableMrUserUtcSendTimeAggregateFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_utc_send_time_aggregate_hydration" + ) + + val enableMrUserLocalSendTimeAggregateFeaturesHydration = Value( + "frigate_pushservice_enable_mr_user_local_send_time_aggregate_hydration" + ) + + val enableBqmlReportModelPredictionForF1Tweets = Value( + "frigate_pushservice_enable_bqml_report_model_prediction_for_f1_tweets" + ) + + val enableUserTwhinEmbeddingFeatureHydration = Value( + "frigate_pushservice_enable_user_twhin_embedding_feature_hydration" + ) + + val enableAuthorFollowTwhinEmbeddingFeatureHydration = Value( + "frigate_pushservice_enable_author_follow_twhin_embedding_feature_hydration" + ) + + val enableScribingMLFeaturesAsDataRecord = Value( + "frigate_pushservice_enable_scribing_ml_features_as_datarecord" + ) + + val enableDirectHydrationForUserFeatures = Value( + "frigate_pushservice_enable_direct_hydration_for_user_features" + ) + + val enableAuthorVerifiedFeatureHydration = Value( + "frigate_pushservice_enable_author_verified_feature_hydration" + ) + + val enableAuthorCreatorSubscriptionFeatureHydration = Value( + "frigate_pushservice_enable_author_creator_subscription_feature_hydration" + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushConstants.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushConstants.scala new file mode 100644 index 000000000..cd96b4934 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushConstants.scala @@ -0,0 +1,126 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.conversions.DurationOps._ +import com.twitter.frigate.user_states.thriftscala.UserState +import java.util.Locale + +object PushConstants { + + final val ServiceProdEnvironmentName = "prod" + + final val RestrictLightRankingCandidatesThreshold = 1 + + final val DownSampleLightRankingScribeCandidatesRate = 1 + + final val NewUserLookbackWindow = 1.days + + final val PushCapInactiveUserAndroid = 1 + final val PushCapInactiveUserIos = 1 + final val PushCapLightOccasionalOpenerUserAndroid = 1 + final val PushCapLightOccasionalOpenerUserIos = 1 + + final val UserStateToPushCapIos = Map( + UserState.Inactive.name -> PushCapInactiveUserIos, + UserState.LightOccasionalOpener.name -> PushCapLightOccasionalOpenerUserIos + ) + final val UserStateToPushCapAndroid = Map( + UserState.Inactive.name -> PushCapInactiveUserAndroid, + UserState.LightOccasionalOpener.name -> PushCapLightOccasionalOpenerUserAndroid + ) + + final val AcceptableTimeSinceLastNegativeResponse = 1.days + + final val DefaultLookBackForHistory = 1.hours + + final val DefaultEventMediaUrl = "" + + final val ConnectTabPushTapThrough = "i/connect_people" + + final val AddressBookUploadTapThrough = "i/flow/mr-address-book-upload" + final val InterestPickerTapThrough = "i/flow/mr-interest-picker" + final val CompleteOnboardingInterestAddressTapThrough = "i/flow/mr-interest-address" + + final val IndiaCountryCode = "IN" + final val JapanCountryCode = Locale.JAPAN.getCountry.toUpperCase + final val UKCountryCode = Locale.UK.getCountry.toUpperCase + + final val IndiaTimeZoneCode = "Asia/Kolkata" + final val JapanTimeZoneCode = "Asia/Tokyo" + final val UKTimeZoneCode = "Europe/London" + + final val countryCodeToTimeZoneMap = Map( + IndiaCountryCode -> IndiaTimeZoneCode, + JapanCountryCode -> JapanTimeZoneCode, + UKCountryCode -> UKTimeZoneCode + ) + + final val AbuseStrike_Top2Percent_Id = "AbuseStrike_Top2Percent_Id" + final val AbuseStrike_Top1Percent_Id = "AbuseStrike_Top1Percent_Id" + final val AbuseStrike_Top05Percent_Id = "AbuseStrike_Top05Percent_Id" + final val AbuseStrike_Top025Percent_Id = "AbuseStrike_Top025Percent_Id" + final val AllSpamReportsPerFav_Top1Percent_Id = "AllSpamReportsPerFav_Top1Percent_Id" + final val ReportsPerFav_Top1Percent_Id = "ReportsPerFav_Top1Percent_Id" + final val ReportsPerFav_Top2Percent_Id = "ReportsPerFav_Top2Percent_Id" + final val MediaUnderstanding_Nudity_Id = "MediaUnderstanding_Nudity_Id" + final val MediaUnderstanding_Beauty_Id = "MediaUnderstanding_Beauty_Id" + final val MediaUnderstanding_SinglePerson_Id = "MediaUnderstanding_SinglePerson_Id" + final val PornList_Id = "PornList_Id" + final val PornographyAndNsfwContent_Id = "PornographyAndNsfwContent_Id" + final val SexLife_Id = "SexLife_Id" + final val SexLifeOrSexualOrientation_Id = "SexLifeOrSexualOrientation_Id" + final val ProfanityFilter_Id = "ProfanityFilter_Id" + final val TweetSemanticCoreIdFeature = "tweet.core.tweet.semantic_core_annotations" + final val targetUserGenderFeatureName = "Target.User.Gender" + final val targetUserAgeFeatureName = "Target.User.AgeBucket" + final val targetUserPreferredLanguage = "user.language.user.preferred_contents" + final val tweetAgeInHoursFeatureName = "RecTweet.TweetyPieResult.TweetAgeInHrs" + final val authorActiveFollowerFeatureName = "RecTweetAuthor.User.ActiveFollowers" + final val favFeatureName = "tweet.core.tweet_counts.favorite_count" + final val sentFeatureName = + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_sent.any_feature.Duration.Top.count" + final val authorSendCountFeatureName = + "tweet_author_aggregate.pair.any_label.any_feature.28.days.count" + final val authorReportCountFeatureName = + "tweet_author_aggregate.pair.label.reportTweetDone.any_feature.28.days.count" + final val authorDislikeCountFeatureName = + "tweet_author_aggregate.pair.label.ntab.isDisliked.any_feature.28.days.count" + final val TweetLikesFeatureName = "tweet.core.tweet_counts.favorite_count" + final val TweetRepliesFeatureName = "tweet.core.tweet_counts.reply_count" + + final val EnableCopyFeaturesForIbis2ModelValues = "has_copy_features" + + final val EmojiFeatureNameForIbis2ModelValues = "emoji" + + final val TargetFeatureNameForIbis2ModelValues = "target" + + final val CopyBodyExpIbisModelValues = "enable_body_exp" + + final val TweetMediaEmbeddingBQKeyIds = Seq( + 230, 110, 231, 111, 232, 233, 112, 113, 234, 235, 114, 236, 115, 237, 116, 117, 238, 118, 239, + 119, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 240, 120, 241, 121, 242, 0, 1, 122, 243, 244, 123, + 2, 124, 245, 3, 4, 246, 125, 5, 126, 247, 127, 248, 6, 128, 249, 7, 8, 129, 9, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 250, 130, 251, 252, 131, 132, 253, 133, 254, 134, 255, 135, 136, 137, + 138, 139, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 140, 141, 142, 143, 144, 145, 146, 147, 148, + 149, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 70, 71, + 72, 73, 74, 75, 76, 77, 78, 79, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 80, 81, 82, + 83, 84, 85, 86, 87, 88, 89, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 90, 91, 92, 93, + 94, 95, 96, 97, 98, 99, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, + 214, 215, 216, 217, 218, 219, 220, 100, 221, 101, 222, 223, 102, 224, 103, 104, 225, 105, 226, + 227, 106, 107, 228, 108, 229, 109 + ) + + final val SportsEventDomainId = 6L + + final val OoncQualityCombinedScore = "OoncQualityCombinedScore" +} + +object PushQPSLimitConstants { + + final val PerspectiveStoreQPS = 100000 + + final val IbisOrNTabQPSForRFPH = 100000 + + final val SocialGraphServiceBatchSize = 100000 +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushEnums.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushEnums.scala new file mode 100644 index 000000000..370e40076 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushEnums.scala @@ -0,0 +1,135 @@ +package com.twitter.frigate.pushservice.params + +/** + * Enum for naming scores we will scribe for non-personalized high quality candidate generation + */ +object HighQualityScribingScores extends Enumeration { + type Name = Value + val HeavyRankingScore = Value + val NonPersonalizedQualityScoreUsingCnn = Value + val BqmlNsfwScore = Value + val BqmlReportScore = Value +} + +/** + * Enum for quality upranking transform + */ +object MrQualityUprankingTransformTypeEnum extends Enumeration { + val Linear = Value + val Sigmoid = Value +} + +/** + * Enum for quality partial upranking transform + */ +object MrQualityUprankingPartialTypeEnum extends Enumeration { + val All = Value + val Oon = Value +} + +/** + * Enum for bucket membership in DDG 10220 Mr Bold Title Favorite Retweet Notification experiment + */ +object MRBoldTitleFavoriteAndRetweetExperimentEnum extends Enumeration { + val ShortTitle = Value +} + +/** + * Enum for ML filtering predicates + */ +object QualityPredicateEnum extends Enumeration { + val WeightedOpenOrNtabClick = Value + val ExplicitOpenOrNtabClickFilter = Value + val AlwaysTrue = Value // Disable ML filtering +} + +/** + * Enum to specify normalization used in BigFiltering experiments + */ +object BigFilteringNormalizationEnum extends Enumeration { + val NormalizationDisabled = Value + val NormalizeByNotSendingScore = Value +} + +/** + * Enum for inline actions + */ +object InlineActionsEnum extends Enumeration { + val Favorite = Value + val Follow = Value + val Reply = Value + val Retweet = Value +} + +/** + * Enum for template format + */ +object IbisTemplateFormatEnum extends Enumeration { + val template1 = Value +} + +/** + * Enum for Store name for Top Tweets By Geo + */ +object TopTweetsForGeoCombination extends Enumeration { + val Default = Value + val AccountsTweetFavAsBackfill = Value + val AccountsTweetFavIntermixed = Value +} + +/** + * Enum for scoring function for Top Tweets By Geo + */ +object TopTweetsForGeoRankingFunction extends Enumeration { + val Score = Value + val GeohashLengthAndThenScore = Value +} + +/** + * Enum for which version of popgeo tweets to be using + */ +object PopGeoTweetVersion extends Enumeration { + val Prod = Value +} + +/** + * Enum for Subtext in Android header + */ +object SubtextForAndroidPushHeader extends Enumeration { + val None = Value + val TargetHandler = Value + val TargetTagHandler = Value + val TargetName = Value + val AuthorTagHandler = Value + val AuthorName = Value +} + +object NsfwTextDetectionModel extends Enumeration { + val ProdModel = Value + val RetrainedModel = Value +} + +object HighQualityCandidateGroupEnum extends Enumeration { + val AgeBucket = Value + val Language = Value + val Topic = Value + val Country = Value + val Admin0 = Value + val Admin1 = Value +} + +object CrtGroupEnum extends Enumeration { + val Twistly = Value + val Frs = Value + val F1 = Value + val Topic = Value + val Trip = Value + val GeoPop = Value + val Other = Value + val None = Value +} + +object SportGameEnum extends Enumeration { + val Soccer = Value + val Nfl = Value +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitchParams.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitchParams.scala new file mode 100644 index 000000000..262b3a8b7 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitchParams.scala @@ -0,0 +1,5043 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.conversions.DurationOps._ +import com.twitter.frigate.pushservice.params.InlineActionsEnum._ +import com.twitter.frigate.pushservice.params.HighQualityCandidateGroupEnum._ +import com.twitter.timelines.configapi.DurationConversion +import com.twitter.timelines.configapi.FSBoundedParam +import com.twitter.timelines.configapi.FSEnumParam +import com.twitter.timelines.configapi.FSEnumSeqParam +import com.twitter.timelines.configapi.FSParam +import com.twitter.timelines.configapi.HasDurationConversion +import com.twitter.util.Duration + +object PushFeatureSwitchParams { + + /** + * List of CRTs to uprank. Last CRT in sequence ends up on top of list + */ + object ListOfCrtsToUpRank + extends FSParam[Seq[String]]("rerank_candidates_crt_to_top", default = Seq.empty[String]) + + object ListOfCrtsForOpenApp + extends FSParam[Seq[String]]( + "open_app_allowed_crts", + default = Seq( + "f1firstdegreetweet", + "f1firstdegreephoto", + "f1firstdegreevideo", + "geopoptweet", + "frstweet", + "trendtweet", + "hermituser", + "triangularloopuser" + )) + + /** + * List of CRTs to downrank. Last CRT in sequence ends up on bottom of list + */ + object ListOfCrtsToDownRank + extends FSParam[Seq[String]]( + name = "rerank_candidates_crt_to_downrank", + default = Seq.empty[String]) + + /** + * Param to enable VF filtering in Tweetypie (vs using VisibilityLibrary) + */ + object EnableVFInTweetypie + extends FSParam[Boolean]( + name = "visibility_filtering_enable_vf_in_tweetypie", + default = true + ) + + /** + * Number of max earlybird candidates + */ + object NumberOfMaxEarlybirdInNetworkCandidatesParam + extends FSBoundedParam( + name = "frigate_push_max_earlybird_in_network_candidates", + default = 100, + min = 0, + max = 800 + ) + + /** + * Number of max UserTweetEntityGraph candidates to query + */ + object NumberOfMaxUTEGCandidatesQueriedParam + extends FSBoundedParam( + name = "frigate_push_max_uteg_candidates_queried", + default = 30, + min = 0, + max = 300 + ) + + /** + * Param to control the max tweet age for users + */ + object MaxTweetAgeParam + extends FSBoundedParam[Duration]( + name = "tweet_age_max_hours", + default = 24.hours, + min = 1.hours, + max = 72.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for modeling-based candidates + */ + object ModelingBasedCandidateMaxTweetAgeParam + extends FSBoundedParam[Duration]( + name = "tweet_age_candidate_generation_model_max_hours", + default = 24.hours, + min = 1.hours, + max = 72.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for simcluster-based candidates + */ + object GeoPopTweetMaxAgeInHours + extends FSBoundedParam[Duration]( + name = "tweet_age_geo_pop_max_hours", + default = 24.hours, + min = 1.hours, + max = 120.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for simcluster-based candidates + */ + object SimclusterBasedCandidateMaxTweetAgeParam + extends FSBoundedParam[Duration]( + name = "tweet_age_simcluster_max_hours", + default = 24.hours, + min = 24.hours, + max = 48.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for Detopic-based candidates + */ + object DetopicBasedCandidateMaxTweetAgeParam + extends FSBoundedParam[Duration]( + name = "tweet_age_detopic_max_hours", + default = 24.hours, + min = 24.hours, + max = 48.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for F1 candidates + */ + object F1CandidateMaxTweetAgeParam + extends FSBoundedParam[Duration]( + name = "tweet_age_f1_max_hours", + default = 24.hours, + min = 1.hours, + max = 96.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the max tweet age for Explore Video Tweet + */ + object ExploreVideoTweetAgeParam + extends FSBoundedParam[Duration]( + name = "explore_video_tweets_age_max_hours", + default = 48.hours, + min = 1.hours, + max = 336.hours // Two weeks + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to no send for new user playbook push if user login for past hours + */ + object NewUserPlaybookAllowedLastLoginHours + extends FSBoundedParam[Duration]( + name = "new_user_playbook_allowed_last_login_hours", + default = 0.hours, + min = 0.hours, + max = 72.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * The batch size of RefreshForPushHandler's Take step + */ + object NumberOfMaxCandidatesToBatchInRFPHTakeStep + extends FSBoundedParam( + name = "frigate_push_rfph_batch_take_max_size", + default = 1, + min = 1, + max = 10 + ) + + /** + * The maximum number of candidates to batch for Importance Sampling + */ + object NumberOfMaxCandidatesToBatchForImportanceSampling + extends FSBoundedParam( + name = "frigate_push_rfph_max_candidates_to_batch_for_importance_sampling", + default = 65, + min = 1, + max = 500 + ) + + /** + * Maximum number of regular MR push in 24.hours/daytime/nighttime + */ + object MaxMrPushSends24HoursParam + extends FSBoundedParam( + name = "pushcap_max_sends_24hours", + default = 5, + min = 0, + max = 12 + ) + + /** + * Maximum number of regular MR ntab only channel in 24.hours/daytime/nighttime + */ + object MaxMrNtabOnlySends24HoursParamV3 + extends FSBoundedParam( + name = "pushcap_max_sends_24hours_ntabonly_v3", + default = 5, + min = 0, + max = 12 + ) + + /** + * Maximum number of regular MR ntab only in 24.hours/daytime/nighttime + */ + object MaxMrPushSends24HoursNtabOnlyUsersParam + extends FSBoundedParam( + name = "pushcap_max_sends_24hours_ntab_only", + default = 5, + min = 0, + max = 10 + ) + + /** + * Customized PushCap offset (e.g., to the predicted value) + */ + object CustomizedPushCapOffset + extends FSBoundedParam[Int]( + name = "pushcap_customized_offset", + default = 0, + min = -2, + max = 4 + ) + + /** + * Param to enable restricting minimum pushcap assigned with ML models + * */ + object EnableRestrictedMinModelPushcap + extends FSParam[Boolean]( + name = "pushcap_restricted_model_min_enable", + default = false + ) + + /** + * Param to specify the minimum pushcap allowed to be assigned with ML models + * */ + object RestrictedMinModelPushcap + extends FSBoundedParam[Int]( + name = "pushcap_restricted_model_min_value", + default = 1, + min = 0, + max = 9 + ) + + object EnablePushcapRefactor + extends FSParam[Boolean]( + name = "pushcap_enable_refactor", + default = false + ) + + /** + * Enables the restrict step in pushservice for a given user + * + * Setting this to false may cause a large number of candidates to be passed on to filtering/take + * step in RefreshForPushHandler, increasing the service latency significantly + */ + object EnableRestrictStep extends FSParam[Boolean]("frigate_push_rfph_restrict_step_enable", true) + + /** + * The number of candidates that are able to pass through the restrict step. + */ + object RestrictStepSize + extends FSBoundedParam( + name = "frigate_push_rfph_restrict_step_size", + default = 65, + min = 65, + max = 200 + ) + + /** + * Number of max crMixer candidates to send. + */ + object NumberOfMaxCrMixerCandidatesParam + extends FSBoundedParam( + name = "cr_mixer_migration_max_num_of_candidates_to_return", + default = 400, + min = 0, + max = 2000 + ) + + /** + * Duration between two MR pushes + */ + object MinDurationSincePushParam + extends FSBoundedParam[Duration]( + name = "pushcap_min_duration_since_push_hours", + default = 4.hours, + min = 0.hours, + max = 72.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Each Phase duration to gradually ramp up MagicRecs for new users + */ + object GraduallyRampUpPhaseDurationDays + extends FSBoundedParam[Duration]( + name = "pushcap_gradually_ramp_up_phase_duration_days", + default = 3.days, + min = 2.days, + max = 7.days + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to specify interval for target pushcap fatigue + */ + object TargetPushCapFatigueIntervalHours + extends FSBoundedParam[Duration]( + name = "pushcap_fatigue_interval_hours", + default = 24.hours, + min = 1.hour, + max = 240.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to specify interval for target ntabOnly fatigue + */ + object TargetNtabOnlyCapFatigueIntervalHours + extends FSBoundedParam[Duration]( + name = "pushcap_ntabonly_fatigue_interval_hours", + default = 24.hours, + min = 1.hour, + max = 240.hours + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to use completely explicit push cap instead of LTV/modeling-based + */ + object EnableExplicitPushCap + extends FSParam[Boolean]( + name = "pushcap_explicit_enable", + default = false + ) + + /** + * Param to control explicit push cap (non-LTV) + */ + object ExplicitPushCap + extends FSBoundedParam[Int]( + name = "pushcap_explicit_value", + default = 1, + min = 0, + max = 20 + ) + + /** + * Parameters for percentile thresholds of OpenOrNtabClick model in MR filtering model refreshing DDG + */ + object PercentileThresholdCohort1 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort1", + default = 0.65, + min = 0.0, + max = 1.0 + ) + + object PercentileThresholdCohort2 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort2", + default = 0.03, + min = 0.0, + max = 1.0 + ) + object PercentileThresholdCohort3 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort3", + default = 0.03, + min = 0.0, + max = 1.0 + ) + object PercentileThresholdCohort4 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort4", + default = 0.06, + min = 0.0, + max = 1.0 + ) + object PercentileThresholdCohort5 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort5", + default = 0.06, + min = 0.0, + max = 1.0 + ) + object PercentileThresholdCohort6 + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_percentile_threshold_cohort6", + default = 0.8, + min = 0.0, + max = 1.0 + ) + + /** + * Parameters for percentile threshold list of OpenOrNtabCLick model in MR percentile grid search experiments + */ + object MrPercentileGridSearchThresholdsCohort1 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort1", + default = Seq(0.8, 0.75, 0.65, 0.55, 0.45, 0.35, 0.25) + ) + object MrPercentileGridSearchThresholdsCohort2 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort2", + default = Seq(0.15, 0.12, 0.1, 0.08, 0.06, 0.045, 0.03) + ) + object MrPercentileGridSearchThresholdsCohort3 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort3", + default = Seq(0.15, 0.12, 0.1, 0.08, 0.06, 0.045, 0.03) + ) + object MrPercentileGridSearchThresholdsCohort4 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort4", + default = Seq(0.15, 0.12, 0.1, 0.08, 0.06, 0.045, 0.03) + ) + object MrPercentileGridSearchThresholdsCohort5 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort5", + default = Seq(0.3, 0.2, 0.15, 0.1, 0.08, 0.06, 0.05) + ) + object MrPercentileGridSearchThresholdsCohort6 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_percentile_grid_search_thresholds_cohort6", + default = Seq(0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2) + ) + + /** + * Parameters for threshold list of OpenOrNtabClick model in MF grid search experiments + */ + object MfGridSearchThresholdsCohort1 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort1", + default = Seq(0.030, 0.040, 0.050, 0.062, 0.070, 0.080, 0.090) // default: 0.062 + ) + object MfGridSearchThresholdsCohort2 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort2", + default = Seq(0.005, 0.010, 0.015, 0.020, 0.030, 0.040, 0.050) // default: 0.020 + ) + object MfGridSearchThresholdsCohort3 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort3", + default = Seq(0.010, 0.015, 0.020, 0.025, 0.035, 0.045, 0.055) // default: 0.025 + ) + object MfGridSearchThresholdsCohort4 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort4", + default = Seq(0.015, 0.020, 0.025, 0.030, 0.040, 0.050, 0.060) // default: 0.030 + ) + object MfGridSearchThresholdsCohort5 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort5", + default = Seq(0.035, 0.040, 0.045, 0.050, 0.060, 0.070, 0.080) // default: 0.050 + ) + object MfGridSearchThresholdsCohort6 + extends FSParam[Seq[Double]]( + name = "frigate_push_modeling_mf_grid_search_thresholds_cohort6", + default = Seq(0.040, 0.045, 0.050, 0.055, 0.065, 0.075, 0.085) // default: 0.055 + ) + + /** + * Param to specify which global optout models to use to first predict the global scores for users + */ + object GlobalOptoutModelParam + extends FSParam[Seq[OptoutModel.ModelNameType]]( + name = "optout_model_global_model_ids", + default = Seq.empty[OptoutModel.ModelNameType] + ) + + /** + * Param to specify which optout model to use according to the experiment bucket + */ + object BucketOptoutModelParam + extends FSParam[OptoutModel.ModelNameType]( + name = "optout_model_bucket_model_id", + default = OptoutModel.D0_has_realtime_features + ) + + /* + * Param to enable candidate generation model + * */ + object EnableCandidateGenerationModelParam + extends FSParam[Boolean]( + name = "candidate_generation_model_enable", + default = false + ) + + object EnableOverrideForSportsCandidates + extends FSParam[Boolean](name = "magicfanout_sports_event_enable_override", default = true) + + object EnableEventIdBasedOverrideForSportsCandidates + extends FSParam[Boolean]( + name = "magicfanout_sports_event_enable_event_id_based_override", + default = true) + + /** + * Param to specify the threshold to determine if a user’s optout score is high enough to enter the experiment. + */ + object GlobalOptoutThresholdParam + extends FSParam[Seq[Double]]( + name = "optout_model_global_thresholds", + default = Seq(1.0, 1.0) + ) + + /** + * Param to specify the threshold to determine if a user’s optout score is high enough to be assigned + * with a reduced pushcap based on the bucket membership. + */ + object BucketOptoutThresholdParam + extends FSBoundedParam[Double]( + name = "optout_model_bucket_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to specify the reduced pushcap value if the optout probability predicted by the bucket + * optout model is higher than the specified bucket optout threshold. + */ + object OptoutExptPushCapParam + extends FSBoundedParam[Int]( + name = "optout_model_expt_push_cap", + default = 10, + min = 0, + max = 10 + ) + + /** + * Param to specify the thresholds to determine which push cap slot the user should be assigned to + * according to the optout score. For example,the slot thresholds are [0.1, 0.2, ..., 1.0], the user + * is assigned to the second slot if the optout score is in (0.1, 0.2]. + */ + object BucketOptoutSlotThresholdParam + extends FSParam[Seq[Double]]( + name = "optout_model_bucket_slot_thresholds", + default = Seq.empty[Double] + ) + + /** + * Param to specify the adjusted push cap of each slot. For example, if the slot push caps are [1, 2, ..., 10] + * and the user is assigned to the 2nd slot according to the optout score, the push cap of the user + * will be adjusted to 2. + */ + object BucketOptoutSlotPushcapParam + extends FSParam[Seq[Int]]( + name = "optout_model_bucket_slot_pushcaps", + default = Seq.empty[Int] + ) + + /** + * Param to specify if the optout score based push cap adjustment is enabled + */ + object EnableOptoutAdjustedPushcap + extends FSParam[Boolean]( + "optout_model_enable_optout_adjusted_pushcap", + false + ) + + /** + * Param to specify which weighted open or ntab click model to use + */ + object WeightedOpenOrNtabClickRankingModelParam + extends FSParam[WeightedOpenOrNtabClickModel.ModelNameType]( + name = "frigate_push_modeling_oonc_ranking_model_id", + default = WeightedOpenOrNtabClickModel.Periodically_Refreshed_Prod_Model + ) + + /** + * Param to disable heavy ranker + */ + object DisableHeavyRankingModelFSParam + extends FSParam[Boolean]( + name = "frigate_push_modeling_disable_heavy_ranking", + default = false + ) + + /** + * Param to specify which weighted open or ntab click model to use for Android modelling experiment + */ + object WeightedOpenOrNtabClickRankingModelForAndroidParam + extends FSParam[WeightedOpenOrNtabClickModel.ModelNameType]( + name = "frigate_push_modeling_oonc_ranking_model_for_android_id", + default = WeightedOpenOrNtabClickModel.Periodically_Refreshed_Prod_Model + ) + + /** + * Param to specify which weighted open or ntab click model to use for FILTERING + */ + object WeightedOpenOrNtabClickFilteringModelParam + extends FSParam[WeightedOpenOrNtabClickModel.ModelNameType]( + name = "frigate_push_modeling_oonc_filtering_model_id", + default = WeightedOpenOrNtabClickModel.Periodically_Refreshed_Prod_Model + ) + + /** + * Param to specify which quality predicate to use for ML filtering + */ + object QualityPredicateIdParam + extends FSEnumParam[QualityPredicateEnum.type]( + name = "frigate_push_modeling_quality_predicate_id", + default = QualityPredicateEnum.WeightedOpenOrNtabClick, + enum = QualityPredicateEnum + ) + + /** + * Param to control threshold for any quality predicates using explicit thresholds + */ + object QualityPredicateExplicitThresholdParam + extends FSBoundedParam[Double]( + name = "frigate_push_modeling_quality_predicate_explicit_threshold", + default = 0.1, + min = 0, + max = 1) + + /** + * MagicFanout relaxed eventID fatigue interval (when we want to enable multiple updates for the same event) + */ + object MagicFanoutRelaxedEventIdFatigueIntervalInHours + extends FSBoundedParam[Int]( + name = "frigate_push_magicfanout_relaxed_event_id_fatigue_interval_in_hours", + default = 24, + min = 0, + max = 720 + ) + + /** + * MagicFanout DenyListed Countries + */ + object MagicFanoutDenyListedCountries + extends FSParam[Seq[String]]( + "frigate_push_magicfanout_denylisted_countries", + Seq.empty[String]) + + object MagicFanoutSportsEventDenyListedCountries + extends FSParam[Seq[String]]( + "magicfanout_sports_event_denylisted_countries", + Seq.empty[String]) + + /** + * MagicFanout maximum erg rank for a given push event for non heavy users + */ + object MagicFanoutRankErgThresholdNonHeavy + extends FSBoundedParam[Int]( + name = "frigate_push_magicfanout_erg_rank_threshold_non_heavy", + default = 25, + min = 1, + max = 50 + ) + + /** + * MagicFanout maximum erg rank for a given push event for heavy users + */ + object MagicFanoutRankErgThresholdHeavy + extends FSBoundedParam[Int]( + name = "frigate_push_magicfanout_erg_rank_threshold_heavy", + default = 20, + min = 1, + max = 50 + ) + + object EnablePushMixerReplacingAllSources + extends FSParam[Boolean]( + name = "push_mixer_enable_replacing_all_sources", + default = false + ) + + object EnablePushMixerReplacingAllSourcesWithControl + extends FSParam[Boolean]( + name = "push_mixer_enable_replacing_all_sources_with_control", + default = false + ) + + object EnablePushMixerReplacingAllSourcesWithExtra + extends FSParam[Boolean]( + name = "push_mixer_enable_replacing_all_sources_with_extra", + default = false + ) + + object EnablePushMixerSource + extends FSParam[Boolean]( + name = "push_mixer_enable_source", + default = false + ) + + object PushMixerMaxResults + extends FSBoundedParam[Int]( + name = "push_mixer_max_results", + default = 10, + min = 1, + max = 5000 + ) + + /** + * Enable tweets from trends that have been annotated by curators + */ + object EnableCuratedTrendTweets + extends FSParam[Boolean](name = "trend_tweet_curated_trends_enable", default = false) + + /** + * Enable tweets from trends that haven't been annotated by curators + */ + object EnableNonCuratedTrendTweets + extends FSParam[Boolean](name = "trend_tweet_non_curated_trends_enable", default = false) + + /** + * Maximum trend tweet notifications in fixed duration + */ + object MaxTrendTweetNotificationsInDuration + extends FSBoundedParam[Int]( + name = "trend_tweet_max_notifications_in_duration", + min = 0, + default = 0, + max = 20) + + /** + * Duration in days over which trend tweet notifications fatigue is applied + */ + object TrendTweetNotificationsFatigueDuration + extends FSBoundedParam[Duration]( + name = "trend_tweet_notifications_fatigue_in_days", + default = 1.day, + min = Duration.Bottom, + max = Duration.Top + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Maximum number of trends candidates to query from event-recos endpoint + */ + object MaxRecommendedTrendsToQuery + extends FSBoundedParam[Int]( + name = "trend_tweet_max_trends_to_query", + min = 0, + default = 0, + max = 100) + + /** + * Fix missing event-associated interests in MagicFanoutNoOptoutInterestsPredicate + */ + object MagicFanoutFixNoOptoutInterestsBugParam + extends FSParam[Boolean]("frigate_push_magicfanout_fix_no_optout_interests", default = true) + + object EnableSimclusterOfflineAggFeatureForExpt + extends FSParam[Boolean]("frigate_enable_simcluster_offline_agg_feature", false) + + /** + * Param to enable removal of UTT domain for + */ + object ApplyMagicFanoutBroadEntityInterestRankThresholdPredicate + extends FSParam[Boolean]( + "frigate_push_magicfanout_broad_entity_interest_rank_threshold_predicate", + false + ) + + object HydrateEventReasonsFeatures + extends FSParam[Boolean]( + name = "frigate_push_magicfanout_hydrate_event_reasons_features", + false + ) + + /** + * Param to enable online MR history features + */ + object EnableHydratingOnlineMRHistoryFeatures + extends FSParam[Boolean]( + name = "feature_hydration_online_mr_history", + default = false + ) + + /** + * Param to enable bold title on favorite and retweet push copy for Android in DDG 10220 + */ + object MRBoldTitleFavoriteAndRetweetParam + extends FSEnumParam[MRBoldTitleFavoriteAndRetweetExperimentEnum.type]( + name = "frigate_push_bold_title_favorite_and_retweet_id", + default = MRBoldTitleFavoriteAndRetweetExperimentEnum.ShortTitle, + enum = MRBoldTitleFavoriteAndRetweetExperimentEnum + ) + + /** + * Param to enable high priority push + */ + object EnableHighPriorityPush + extends FSParam[Boolean]("frigate_push_magicfanout_enable_high_priority_push", false) + + /** + * Param to redirect sports crt event to a custom url + */ + object EnableSearchURLRedirectForSportsFanout + extends FSParam[Boolean]("magicfanout_sports_event_enable_search_url_redirect", false) + + /** + * Param to enable score fanout notification for sports + */ + object EnableScoreFanoutNotification + extends FSParam[Boolean]("magicfanout_sports_event_enable_score_fanout", false) + + /** + * Param to add custom search url for sports crt event + */ + object SearchURLRedirectForSportsFanout + extends FSParam[String]( + name = "magicfanout_sports_event_search_url_redirect", + default = "https://twitter.com/explore/tabs/ipl", + ) + + /** + * Param to enable high priority sports push + */ + object EnableHighPrioritySportsPush + extends FSParam[Boolean]("magicfanout_sports_event_enable_high_priority_push", false) + + /** + * Param to control rank threshold for magicfanout user follow + */ + object MagicFanoutRealgraphRankThreshold + extends FSBoundedParam[Int]( + name = "magicfanout_realgraph_threshold", + default = 500, + max = 500, + min = 100 + ) + + /** + * Topic score threshold for topic proof tweet candidates topic annotations + * */ + object TopicProofTweetCandidatesTopicScoreThreshold + extends FSBoundedParam[Double]( + name = "topics_as_social_proof_topic_score_threshold", + default = 0.0, + min = 0.0, + max = 100.0 + ) + + /** + * Enable Topic Proof Tweet Recs + */ + object EnableTopicProofTweetRecs + extends FSParam[Boolean](name = "topics_as_social_proof_enable", default = true) + + /** + * Enable health filters for topic tweet notifications + */ + object EnableHealthFiltersForTopicProofTweet + extends FSParam[Boolean]( + name = "topics_as_social_proof_enable_health_filters", + default = false) + + /** + * Disable health filters for CrMixer candidates + */ + object DisableHealthFiltersForCrMixerCandidates + extends FSParam[Boolean]( + name = "health_and_quality_filter_disable_for_crmixer_candidates", + default = false) + + object EnableMagicFanoutNewsForYouNtabCopy + extends FSParam[Boolean](name = "send_handler_enable_nfy_ntab_copy", default = false) + + /** + * Param to enable semi-personalized high quality candidates in pushservice + * */ + object HighQualityCandidatesEnableCandidateSource + extends FSParam[Boolean]( + name = "high_quality_candidates_enable_candidate_source", + default = false + ) + + /** + * Param to decide semi-personalized high quality candidates + * */ + object HighQualityCandidatesEnableGroups + extends FSEnumSeqParam[HighQualityCandidateGroupEnum.type]( + name = "high_quality_candidates_enable_groups_ids", + default = Seq(AgeBucket, Language), + enum = HighQualityCandidateGroupEnum + ) + + /** + * Param to decide semi-personalized high quality candidates + * */ + object HighQualityCandidatesNumberOfCandidates + extends FSBoundedParam[Int]( + name = "high_quality_candidates_number_of_candidates", + default = 0, + min = 0, + max = Int.MaxValue + ) + + /** + * Param to enable small domain falling back to bigger domains for high quality candidates in pushservice + * */ + object HighQualityCandidatesEnableFallback + extends FSParam[Boolean]( + name = "high_quality_candidates_enable_fallback", + default = false + ) + + /** + * Param to decide whether to fallback to bigger domain for high quality candidates + * */ + object HighQualityCandidatesMinNumOfCandidatesToFallback + extends FSBoundedParam[Int]( + name = "high_quality_candidates_min_num_of_candidates_to_fallback", + default = 50, + min = 0, + max = Int.MaxValue + ) + + /** + * Param to specific source ids for high quality candidates + * */ + object HighQualityCandidatesFallbackSourceIds + extends FSParam[Seq[String]]( + name = "high_quality_candidates_fallback_source_ids", + default = Seq("HQ_C_COUNT_PASS_QUALITY_SCORES")) + + /** + * Param to decide groups for semi-personalized high quality candidates + * */ + object HighQualityCandidatesFallbackEnabledGroups + extends FSEnumSeqParam[HighQualityCandidateGroupEnum.type]( + name = "high_quality_candidates_fallback_enabled_groups_ids", + default = Seq(Country), + enum = HighQualityCandidateGroupEnum + ) + + /** + * Param to control what heavy ranker model to use for scribing scores + */ + object HighQualityCandidatesHeavyRankingModel + extends FSParam[String]( + name = "high_quality_candidates_heavy_ranking_model", + default = "Periodically_Refreshed_Prod_Model_V11" + ) + + /** + * Param to control what non personalized quality "Cnn" model to use for scribing scores + */ + object HighQualityCandidatesNonPersonalizedQualityCnnModel + extends FSParam[String]( + name = "high_quality_candidates_non_personalized_quality_cnn_model", + default = "Q1_2023_Mr_Tf_Quality_Model_cnn" + ) + + /** + * Param to control what nsfw health model to use for scribing scores + */ + object HighQualityCandidatesBqmlNsfwModel + extends FSParam[String]( + name = "high_quality_candidates_bqml_nsfw_model", + default = "Q2_2022_Mr_Bqml_Health_Model_NsfwV0" + ) + + /** + * Param to control what reportodel to use for scribing scores + */ + object HighQualityCandidatesBqmlReportModel + extends FSParam[String]( + name = "high_quality_candidates_bqml_report_model", + default = "Q3_2022_15266_Mr_Bqml_Non_Personalized_Report_Model_with_Media_Embeddings" + ) + + /** + * Param to specify the threshold to determine if a tweet contains nudity media + */ + object TweetMediaSensitiveCategoryThresholdParam + extends FSBoundedParam[Double]( + name = "tweet_media_sensitive_category_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to boost candidates from subscription creators + */ + object BoostCandidatesFromSubscriptionCreators + extends FSParam[Boolean]( + name = "subscription_enable_boost_candidates_from_active_creators", + default = false + ) + + /** + * Param to soft rank candidates from subscription creators + */ + object SoftRankCandidatesFromSubscriptionCreators + extends FSParam[Boolean]( + name = "subscription_enable_soft_rank_candidates_from_active_creators", + default = false + ) + + /** + * Param as factor to control how much we want to boost creator tweets + */ + object SoftRankFactorForSubscriptionCreators + extends FSBoundedParam[Double]( + name = "subscription_soft_rank_factor_for_boost", + default = 1.0, + min = 0.0, + max = Double.MaxValue + ) + + /** + * Param to enable new OON copy for Push Notifications + */ + object EnableNewMROONCopyForPush + extends FSParam[Boolean]( + name = "mr_copy_enable_new_mr_oon_copy_push", + default = true + ) + + /** + * Param to enable generated inline actions on OON Notifications + */ + object EnableOONGeneratedInlineActions + extends FSParam[Boolean]( + name = "mr_inline_enable_oon_generated_actions", + default = false + ) + + /** + * Param to control dynamic inline actions for Out-of-Network copies + */ + object OONTweetDynamicInlineActionsList + extends FSEnumSeqParam[InlineActionsEnum.type]( + name = "mr_inline_oon_tweet_dynamic_action_ids", + default = Seq(Follow, Retweet, Favorite), + enum = InlineActionsEnum + ) + + object HighOONCTweetFormat + extends FSEnumParam[IbisTemplateFormatEnum.type]( + name = "mr_copy_high_oonc_format_id", + default = IbisTemplateFormatEnum.template1, + enum = IbisTemplateFormatEnum + ) + + object LowOONCTweetFormat + extends FSEnumParam[IbisTemplateFormatEnum.type]( + name = "mr_copy_low_oonc_format_id", + default = IbisTemplateFormatEnum.template1, + enum = IbisTemplateFormatEnum + ) + + /** + * Param to enable dynamic inline actions based on FSParams for Tweet copies (not OON) + */ + object EnableTweetDynamicInlineActions + extends FSParam[Boolean]( + name = "mr_inline_enable_tweet_dynamic_actions", + default = false + ) + + /** + * Param to control dynamic inline actions for Tweet copies (not OON) + */ + object TweetDynamicInlineActionsList + extends FSEnumSeqParam[InlineActionsEnum.type]( + name = "mr_inline_tweet_dynamic_action_ids", + default = Seq(Reply, Retweet, Favorite), + enum = InlineActionsEnum + ) + + object UseInlineActionsV1 + extends FSParam[Boolean]( + name = "mr_inline_use_inline_action_v1", + default = true + ) + + object UseInlineActionsV2 + extends FSParam[Boolean]( + name = "mr_inline_use_inline_action_v2", + default = false + ) + + object EnableInlineFeedbackOnPush + extends FSParam[Boolean]( + name = "mr_inline_enable_inline_feedback_on_push", + default = false + ) + + object InlineFeedbackSubstitutePosition + extends FSBoundedParam[Int]( + name = "mr_inline_feedback_substitute_position", + min = 0, + max = 2, + default = 2, // default to substitute or append last inline action + ) + + /** + * Param to control dynamic inline actions for web notifications + */ + object EnableDynamicInlineActionsForDesktopWeb + extends FSParam[Boolean]( + name = "mr_inline_enable_dynamic_actions_for_desktop_web", + default = false + ) + + object EnableDynamicInlineActionsForMobileWeb + extends FSParam[Boolean]( + name = "mr_inline_enable_dynamic_actions_for_mobile_web", + default = false + ) + + /** + * Param to define dynamic inline action types for web notifications (both desktop web + mobile web) + */ + object TweetDynamicInlineActionsListForWeb + extends FSEnumSeqParam[InlineActionsEnum.type]( + name = "mr_inline_tweet_dynamic_action_for_web_ids", + default = Seq(Retweet, Favorite), + enum = InlineActionsEnum + ) + + /** + * Param to enable MR Override Notifications for Android + */ + object EnableOverrideNotificationsForAndroid + extends FSParam[Boolean]( + name = "mr_override_enable_override_notifications_for_android", + default = false + ) + + /** + * Param to enable MR Override Notifications for iOS + */ + object EnableOverrideNotificationsForIos + extends FSParam[Boolean]( + name = "mr_override_enable_override_notifications_for_ios", + default = false + ) + + /** + * Param to enable gradually ramp up notification + */ + object EnableGraduallyRampUpNotification + extends FSParam[Boolean]( + name = "pushcap_gradually_ramp_up_enable", + default = false + ) + + /** + * Param to control the minInrerval for fatigue between consecutive MFNFY pushes + */ + object MFMinIntervalFatigue + extends FSBoundedParam[Duration]( + name = "frigate_push_magicfanout_fatigue_min_interval_consecutive_pushes_minutes", + default = 240.minutes, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromMinutes + } + + /** + * Param to control the interval for MFNFY pushes + */ + object MFPushIntervalInHours + extends FSBoundedParam[Duration]( + name = "frigate_push_magicfanout_fatigue_push_interval_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of Sports MF pushes in a period of time + */ + object SportsMaxNumberOfPushesInInterval + extends FSBoundedParam[Int]( + name = "magicfanout_sports_event_fatigue_max_pushes_in_interval", + default = 2, + min = 0, + max = 6) + + /** + * Param to control the minInterval for fatigue between consecutive sports pushes + */ + object SportsMinIntervalFatigue + extends FSBoundedParam[Duration]( + name = "magicfanout_sports_event_fatigue_min_interval_consecutive_pushes_minutes", + default = 240.minutes, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromMinutes + } + + /** + * Param to control the interval for sports pushes + */ + object SportsPushIntervalInHours + extends FSBoundedParam[Duration]( + name = "magicfanout_sports_event_fatigue_push_interval_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of same event sports MF pushes in a period of time + */ + object SportsMaxNumberOfPushesInIntervalPerEvent + extends FSBoundedParam[Int]( + name = "magicfanout_sports_event_fatigue_max_pushes_in_per_event_interval", + default = 2, + min = 0, + max = 6) + + /** + * Param to control the minInterval for fatigue between consecutive same event sports pushes + */ + object SportsMinIntervalFatiguePerEvent + extends FSBoundedParam[Duration]( + name = "magicfanout_sports_event_fatigue_min_interval_consecutive_pushes_per_event_minutes", + default = 240.minutes, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromMinutes + } + + /** + * Param to control the interval for same event sports pushes + */ + object SportsPushIntervalInHoursPerEvent + extends FSBoundedParam[Duration]( + name = "magicfanout_sports_event_fatigue_push_interval_per_event_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of MF pushes in a period of time + */ + object MFMaxNumberOfPushesInInterval + extends FSBoundedParam[Int]( + name = "frigate_push_magicfanout_fatigue_max_pushes_in_interval", + default = 2, + min = 0, + max = 6) + + /** + * Param to enable custom duration for fatiguing + */ + object GPEnableCustomMagicFanoutCricketFatigue + extends FSParam[Boolean]( + name = "global_participation_cricket_magicfanout_enable_custom_fatigue", + default = false + ) + + /** + * Param to enable e2e scribing for target filtering step + */ + object EnableMrRequestScribingForTargetFiltering + extends FSParam[Boolean]( + name = "mr_request_scribing_enable_for_target_filtering", + default = false + ) + + /** + * Param to enable e2e scribing for candidate filtering step + */ + object EnableMrRequestScribingForCandidateFiltering + extends FSParam[Boolean]( + name = "mr_request_scribing_enable_for_candidate_filtering", + default = false + ) + + /** + * Param to enable e2e scribing with feature hydrating + */ + object EnableMrRequestScribingWithFeatureHydrating + extends FSParam[Boolean]( + name = "mr_request_scribing_enable_with_feature_hydrating", + default = false + ) + + /* + * TargetLevel Feature list for Mr request scribing + */ + object TargetLevelFeatureListForMrRequestScribing + extends FSParam[Seq[String]]( + name = "mr_request_scribing_target_level_feature_list", + default = Seq.empty + ) + + /** + * Param to enable \eps-greedy exploration for BigFiltering/LTV-based filtering + */ + object EnableMrRequestScribingForEpsGreedyExploration + extends FSParam[Boolean]( + name = "mr_request_scribing_eps_greedy_exploration_enable", + default = false + ) + + /** + * Param to control epsilon in \eps-greedy exploration for BigFiltering/LTV-based filtering + */ + object MrRequestScribingEpsGreedyExplorationRatio + extends FSBoundedParam[Double]( + name = "mr_request_scribing_eps_greedy_exploration_ratio", + default = 0.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to enable scribing dismiss model score + */ + object EnableMrRequestScribingDismissScore + extends FSParam[Boolean]( + name = "mr_request_scribing_dismiss_score_enable", + default = false + ) + + /** + * Param to enable scribing BigFiltering supervised model(s) score(s) + */ + object EnableMrRequestScribingBigFilteringSupervisedScores + extends FSParam[Boolean]( + name = "mr_request_scribing_bigfiltering_supervised_scores_enable", + default = false + ) + + /** + * Param to enable scribing BigFiltering RL model(s) score(s) + */ + object EnableMrRequestScribingBigFilteringRLScores + extends FSParam[Boolean]( + name = "mr_request_scribing_bigfiltering_rl_scores_enable", + default = false + ) + + /** + * Param to flatten mr request scribe + */ + object EnableFlattenMrRequestScribing + extends FSParam[Boolean]( + name = "mr_request_scribing_enable_flatten", + default = false + ) + + /** + * Param to enable NSFW token based filtering + */ + object EnableNsfwTokenBasedFiltering + extends FSParam[Boolean]( + name = "health_and_quality_filter_enable_nsfw_token_based_filtering", + default = false + ) + + object NsfwTokensParam + extends FSParam[Seq[String]]( + name = "health_and_quality_filter_nsfw_tokens", + default = Seq("nsfw", "18+", "\uD83D\uDD1E")) + + object MinimumAllowedAuthorAccountAgeInHours + extends FSBoundedParam[Int]( + name = "health_and_quality_filter_minimum_allowed_author_account_age_in_hours", + default = 0, + min = 0, + max = 168 + ) + + /** + * Param to enable the profanity filter + */ + object EnableProfanityFilterParam + extends FSParam[Boolean]( + name = "health_and_quality_filter_enable_profanity_filter", + default = false + ) + + /** + * Param to enable query the author media representation store + */ + object EnableQueryAuthorMediaRepresentationStore + extends FSParam[Boolean]( + name = "health_and_quality_filter_enable_query_author_media_representation_store", + default = false + ) + + /** + * Threshold to filter a tweet based on the author sensitive media score + */ + object AuthorSensitiveMediaFilteringThreshold + extends FSBoundedParam[Double]( + name = "health_and_quality_filter_author_sensitive_media_filtering_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to filter a tweet based on the author sensitive media score + */ + object AuthorSensitiveMediaFilteringThresholdForMrTwistly + extends FSBoundedParam[Double]( + name = "health_and_quality_filter_author_sensitive_media_filtering_threshold_for_mrtwistly", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to enable filtering the SimCluster tweet if it has AbuseStrike_Top2Percent entitiy + */ + object EnableAbuseStrikeTop2PercentFilterSimCluster + extends FSParam[Boolean]( + name = "health_signal_store_enable_abuse_strike_top_2_percent_filter_sim_cluster", + default = false + ) + + /** + * Param to enable filtering the SimCluster tweet if it has AbuseStrike_Top1Percent entitiy + */ + object EnableAbuseStrikeTop1PercentFilterSimCluster + extends FSParam[Boolean]( + name = "health_signal_store_enable_abuse_strike_top_1_percent_filter_sim_cluster", + default = false + ) + + /** + * Param to enable filtering the SimCluster tweet if it has AbuseStrike_Top0.5Percent entitiy + */ + object EnableAbuseStrikeTop05PercentFilterSimCluster + extends FSParam[Boolean]( + name = "health_signal_store_enable_abuse_strike_top_05_percent_filter_sim_cluster", + default = false + ) + + object EnableAgathaUserHealthModelPredicate + extends FSParam[Boolean]( + name = "health_signal_store_enable_agatha_user_health_model_predicate", + default = false + ) + + /** + * Threshold to filter a tweet based on the agatha_calibrated_nsfw score of its author for MrTwistly + */ + object AgathaCalibratedNSFWThresholdForMrTwistly + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_calibrated_nsfw_threshold_for_mrtwistly", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to filter a tweet based on the agatha_calibrated_nsfw score of its author + */ + object AgathaCalibratedNSFWThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_calibrated_nsfw_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to filter a tweet based on the agatha_nsfw_text_user score of its author for MrTwistly + */ + object AgathaTextNSFWThresholdForMrTwistly + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_text_nsfw_threshold_for_mrtwistly", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to filter a tweet based on the agatha_nsfw_text_user score of its author + */ + object AgathaTextNSFWThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_text_nsfw_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to bucket a user based on the agatha_calibrated_nsfw score of the tweet author + */ + object AgathaCalibratedNSFWBucketThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_calibrated_nsfw_bucket_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold to bucket a user based on the agatha_nsfw_text_user score of the tweet author + */ + object AgathaTextNSFWBucketThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_agatha_text_nsfw_bucket_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to enable filtering using pnsfw_text_tweet model. + */ + object EnableHealthSignalStorePnsfwTweetTextPredicate + extends FSParam[Boolean]( + name = "health_signal_store_enable_pnsfw_tweet_text_predicate", + default = false + ) + + /** + * Threshold score for filtering based on pnsfw_text_tweet Model. + */ + object PnsfwTweetTextThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_tweet_text_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold score for bucketing based on pnsfw_text_tweet Model. + */ + object PnsfwTweetTextBucketingThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_tweet_text_bucketing_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Enable filtering tweets with media based on pnsfw_media_tweet Model for OON tweets only. + */ + object PnsfwTweetMediaFilterOonOnly + extends FSParam[Boolean]( + name = "health_signal_store_pnsfw_tweet_media_filter_oon_only", + default = true + ) + + /** + * Threshold score for filtering tweets with media based on pnsfw_media_tweet Model. + */ + object PnsfwTweetMediaThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_tweet_media_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold score for filtering tweets with images based on pnsfw_media_tweet Model. + */ + object PnsfwTweetImageThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_tweet_image_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold score for filtering quote/reply tweets based on source tweet's media + */ + object PnsfwQuoteTweetThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_quote_tweet_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold score for bucketing based on pnsfw_media_tweet Model. + */ + object PnsfwTweetMediaBucketingThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_pnsfw_tweet_media_bucketing_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to enable filtering using multilingual psnfw predicate + */ + object EnableHealthSignalStoreMultilingualPnsfwTweetTextPredicate + extends FSParam[Boolean]( + name = "health_signal_store_enable_multilingual_pnsfw_tweet_text_predicate", + default = false + ) + + /** + * Language sequence we will query pnsfw scores for + */ + object MultilingualPnsfwTweetTextSupportedLanguages + extends FSParam[Seq[String]]( + name = "health_signal_store_multilingual_pnsfw_tweet_supported_languages", + default = Seq.empty[String], + ) + + /** + * Threshold score per language for bucketing based on pnsfw scores. + */ + object MultilingualPnsfwTweetTextBucketingThreshold + extends FSParam[Seq[Double]]( + name = "health_signal_store_multilingual_pnsfw_tweet_text_bucketing_thresholds", + default = Seq.empty[Double], + ) + + /** + * Threshold score per language for filtering based on pnsfw scores. + */ + object MultilingualPnsfwTweetTextFilteringThreshold + extends FSParam[Seq[Double]]( + name = "health_signal_store_multilingual_pnsfw_tweet_text_filtering_thresholds", + default = Seq.empty[Double], + ) + + /** + * List of models to threshold scores for bucketing purposes + */ + object MultilingualPnsfwTweetTextBucketingModelList + extends FSEnumSeqParam[NsfwTextDetectionModel.type]( + name = "health_signal_store_multilingual_pnsfw_tweet_text_bucketing_models_ids", + default = Seq(NsfwTextDetectionModel.ProdModel), + enum = NsfwTextDetectionModel + ) + + object MultilingualPnsfwTweetTextModel + extends FSEnumParam[NsfwTextDetectionModel.type]( + name = "health_signal_store_multilingual_pnsfw_tweet_text_model", + default = NsfwTextDetectionModel.ProdModel, + enum = NsfwTextDetectionModel + ) + + /** + * Param to determine media should be enabled for android + */ + object EnableEventSquareMediaAndroid + extends FSParam[Boolean]( + name = "mr_enable_event_media_square_android", + default = false + ) + + /** + * Param to determine expanded media should be enabled for android + */ + object EnableEventPrimaryMediaAndroid + extends FSParam[Boolean]( + name = "mr_enable_event_media_primary_android", + default = false + ) + + /** + * Param to determine media should be enabled for ios for MagicFanout + */ + object EnableEventSquareMediaIosMagicFanoutNewsEvent + extends FSParam[Boolean]( + name = "mr_enable_event_media_square_ios_mf", + default = false + ) + + /** + * Param to configure HTL Visit fatigue + */ + object HTLVisitFatigueTime + extends FSBoundedParam[Int]( + name = "frigate_push_htl_visit_fatigue_time", + default = 20, + min = 0, + max = 72) { + + // Fatigue duration for HTL visit + final val DefaultHoursToFatigueAfterHtlVisit = 20 + final val OldHoursToFatigueAfterHtlVisit = 8 + } + + object MagicFanoutNewsUserGeneratedEventsEnable + extends FSParam[Boolean]( + name = "magicfanout_news_user_generated_events_enable", + default = false) + + object MagicFanoutSkipAccountCountryPredicate + extends FSParam[Boolean]("magicfanout_news_skip_account_country_predicate", false) + + object MagicFanoutNewsEnableDescriptionCopy + extends FSParam[Boolean](name = "magicfanout_news_enable_description_copy", default = false) + + /** + * Enables Custom Targeting for MagicFnaout News events in Pushservice + */ + object MagicFanoutEnableCustomTargetingNewsEvent + extends FSParam[Boolean]("magicfanout_news_event_custom_targeting_enable", false) + + /** + * Enable Topic Copy in MF + */ + object EnableTopicCopyForMF + extends FSParam[Boolean]( + name = "magicfanout_enable_topic_copy", + default = false + ) + + /** + * Enable Topic Copy in MF for implicit topics + */ + object EnableTopicCopyForImplicitTopics + extends FSParam[Boolean]( + name = "magicfanout_enable_topic_copy_erg_interests", + default = false + ) + + /** + * Enable NewCreator push + */ + object EnableNewCreatorPush + extends FSParam[Boolean]( + name = "new_creator_enable_push", + default = false + ) + + /** + * Enable CreatorSubscription push + */ + object EnableCreatorSubscriptionPush + extends FSParam[Boolean]( + name = "creator_subscription_enable_push", + default = false + ) + + /** + * Featureswitch param to enable/disable push recommendations + */ + object EnablePushRecommendationsParam + extends FSParam[Boolean](name = "push_recommendations_enabled", default = false) + + object DisableMlInFilteringFeatureSwitchParam + extends FSParam[Boolean]( + name = "frigate_push_modeling_disable_ml_in_filtering", + default = false + ) + + object EnableMinDurationModifier + extends FSParam[Boolean]( + name = "min_duration_modifier_enable_hour_modifier", + default = false + ) + + object EnableMinDurationModifierV2 + extends FSParam[Boolean]( + name = "min_duration_modifier_enable_hour_modifier_v2", + default = false + ) + + object MinDurationModifierStartHourList + extends FSParam[Seq[Int]]( + name = "min_duration_modifier_start_time_list", + default = Seq(), + ) + + object MinDurationModifierEndHourList + extends FSParam[Seq[Int]]( + name = "min_duration_modifier_start_end_list", + default = Seq(), + ) + + object MinDurationTimeModifierConst + extends FSParam[Seq[Int]]( + name = "min_duration_modifier_const_list", + default = Seq(), + ) + + object EnableQueryUserOpenedHistory + extends FSParam[Boolean]( + name = "min_duration_modifier_enable_query_user_opened_history", + default = false + ) + + object EnableMinDurationModifierByUserHistory + extends FSParam[Boolean]( + name = "min_duration_modifier_enable_hour_modifier_by_user_history", + default = false + ) + + object EnableRandomHourForQuickSend + extends FSParam[Boolean]( + name = "min_duration_modifier_enable_random_hour_for_quick_send", + default = false + ) + + object SendTimeByUserHistoryMaxOpenedThreshold + extends FSBoundedParam[Int]( + name = "min_duration_modifier_max_opened_threshold", + default = 4, + min = 0, + max = 100) + + object SendTimeByUserHistoryNoSendsHours + extends FSBoundedParam[Int]( + name = "min_duration_modifier_no_sends_hours", + default = 1, + min = 0, + max = 24) + + object SendTimeByUserHistoryQuickSendBeforeHours + extends FSBoundedParam[Int]( + name = "min_duration_modifier_quick_send_before_hours", + default = 0, + min = 0, + max = 24) + + object SendTimeByUserHistoryQuickSendAfterHours + extends FSBoundedParam[Int]( + name = "min_duration_modifier_quick_send_after_hours", + default = 0, + min = 0, + max = 24) + + object SendTimeByUserHistoryQuickSendMinDurationInMinute + extends FSBoundedParam[Int]( + name = "min_duration_modifier_quick_send_min_duration", + default = 0, + min = 0, + max = 1440) + + object SendTimeByUserHistoryNoSendMinDuration + extends FSBoundedParam[Int]( + name = "min_duration_modifier_no_send_min_duration", + default = 24, + min = 0, + max = 24) + + object EnableMfGeoTargeting + extends FSParam[Boolean]( + name = "frigate_push_magicfanout_geo_targeting_enable", + default = false) + + /** + * Enable RUX Tweet landing page for push open. When this param is enabled, user will go to RUX + * landing page instead of Tweet details page when opening MagicRecs push. + */ + object EnableRuxLandingPage + extends FSParam[Boolean](name = "frigate_push_enable_rux_landing_page", default = false) + + /** + * Enable RUX Tweet landing page for Ntab Click. When this param is enabled, user will go to RUX + * landing page instead of Tweet details page when click MagicRecs entry on Ntab. + */ + object EnableNTabRuxLandingPage + extends FSParam[Boolean](name = "frigate_push_enable_ntab_rux_landing_page", default = false) + + /** + * Param to enable Onboarding Pushes + */ + object EnableOnboardingPushes + extends FSParam[Boolean]( + name = "onboarding_push_enable", + default = false + ) + + /** + * Param to enable Address Book Pushes + */ + object EnableAddressBookPush + extends FSParam[Boolean]( + name = "onboarding_push_enable_address_book_push", + default = false + ) + + /** + * Param to enable Complete Onboarding Pushes + */ + object EnableCompleteOnboardingPush + extends FSParam[Boolean]( + name = "onboarding_push_enable_complete_onboarding_push", + default = false + ) + + /** + * Param to enable Smart Push Config for MR Override Notifs on Android + */ + object EnableOverrideNotificationsSmartPushConfigForAndroid + extends FSParam[Boolean]( + name = "mr_override_enable_smart_push_config_for_android", + default = false) + + /** + * Param to control the min duration since last MR push for Onboarding Pushes + */ + object MrMinDurationSincePushForOnboardingPushes + extends FSBoundedParam[Duration]( + name = "onboarding_push_min_duration_since_push_days", + default = 4.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to control the push fatigue for Onboarding Pushes + */ + object FatigueForOnboardingPushes + extends FSBoundedParam[Duration]( + name = "onboarding_push_fatigue_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to specify the maximum number of Onboarding Push Notifs in a specified period of time + */ + object MaxOnboardingPushInInterval + extends FSBoundedParam[Int]( + name = "onboarding_push_max_in_interval", + default = 1, + min = 0, + max = 10 + ) + + /** + * Param to disable the Onboarding Push Notif Fatigue + */ + object DisableOnboardingPushFatigue + extends FSParam[Boolean]( + name = "onboarding_push_disable_push_fatigue", + default = false + ) + + /** + * Param to control the inverter for fatigue between consecutive TopTweetsByGeoPush + */ + object TopTweetsByGeoPushInterval + extends FSBoundedParam[Duration]( + name = "top_tweets_by_geo_interval_days", + default = 0.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to control the inverter for fatigue between consecutive TripTweets + */ + object HighQualityTweetsPushInterval + extends FSBoundedParam[Duration]( + name = "high_quality_candidates_push_interval_days", + default = 1.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Expiry TTL duration for Tweet Notification types written to history store + */ + object FrigateHistoryTweetNotificationWriteTtl + extends FSBoundedParam[Duration]( + name = "frigate_notification_history_tweet_write_ttl_days", + default = 60.days, + min = Duration.Bottom, + max = Duration.Top + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Expiry TTL duration for Notification written to history store + */ + object FrigateHistoryOtherNotificationWriteTtl + extends FSBoundedParam[Duration]( + name = "frigate_notification_history_other_write_ttl_days", + default = 90.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to control maximum number of TopTweetsByGeoPush pushes to receive in an interval + */ + object MaxTopTweetsByGeoPushGivenInterval + extends FSBoundedParam[Int]( + name = "top_tweets_by_geo_push_given_interval", + default = 1, + min = 0, + max = 10 + ) + + /** + * Param to control maximum number of HighQualityTweet pushes to receive in an interval + */ + object MaxHighQualityTweetsPushGivenInterval + extends FSBoundedParam[Int]( + name = "high_quality_candidates_max_push_given_interval", + default = 3, + min = 0, + max = 10 + ) + + /** + * Param to downrank/backfill top tweets by geo candidates + */ + object BackfillRankTopTweetsByGeoCandidates + extends FSParam[Boolean]( + name = "top_tweets_by_geo_backfill_rank", + default = false + ) + + /** + * Determine whether to use aggressive thresholds for Health filtering on SearchTweet + */ + object PopGeoTweetEnableAggressiveThresholds + extends FSParam[Boolean]( + name = "top_tweets_by_geo_enable_aggressive_health_thresholds", + default = false + ) + + /** + * Param to apply different scoring functions to select top tweets by geo candidates + */ + object ScoringFuncForTopTweetsByGeo + extends FSParam[String]( + name = "top_tweets_by_geo_scoring_function", + default = "Pop8H", + ) + + /** + * Param to query different stores in pop geo service. + */ + object TopTweetsByGeoCombinationParam + extends FSEnumParam[TopTweetsForGeoCombination.type]( + name = "top_tweets_by_geo_combination_id", + default = TopTweetsForGeoCombination.Default, + enum = TopTweetsForGeoCombination + ) + + /** + * Param for popgeo tweet version + */ + object PopGeoTweetVersionParam + extends FSEnumParam[PopGeoTweetVersion.type]( + name = "top_tweets_by_geo_version_id", + default = PopGeoTweetVersion.Prod, + enum = PopGeoTweetVersion + ) + + /** + * Param to query what length of hash for geoh store + */ + object GeoHashLengthList + extends FSParam[Seq[Int]]( + name = "top_tweets_by_geo_hash_length_list", + default = Seq(4), + ) + + /** + * Param to include country code results as back off . + */ + object EnableCountryCodeBackoffTopTweetsByGeo + extends FSParam[Boolean]( + name = "top_tweets_by_geo_enable_country_code_backoff", + default = false, + ) + + /** + * Param to decide ranking function for fetched top tweets by geo + */ + object RankingFunctionForTopTweetsByGeo + extends FSEnumParam[TopTweetsForGeoRankingFunction.type]( + name = "top_tweets_by_geo_ranking_function_id", + default = TopTweetsForGeoRankingFunction.Score, + enum = TopTweetsForGeoRankingFunction + ) + + /** + * Param to enable top tweets by geo candidates + */ + object EnableTopTweetsByGeoCandidates + extends FSParam[Boolean]( + name = "top_tweets_by_geo_enable_candidate_source", + default = false + ) + + /** + * Param to enable top tweets by geo candidates for dormant users + */ + object EnableTopTweetsByGeoCandidatesForDormantUsers + extends FSParam[Boolean]( + name = "top_tweets_by_geo_enable_candidate_source_dormant_users", + default = false + ) + + /** + * Param to specify the maximum number of Top Tweets by Geo candidates to take + */ + object MaxTopTweetsByGeoCandidatesToTake + extends FSBoundedParam[Int]( + name = "top_tweets_by_geo_candidates_to_take", + default = 10, + min = 0, + max = 100 + ) + + /** + * Param to min duration since last MR push for top tweets by geo pushes + */ + object MrMinDurationSincePushForTopTweetsByGeoPushes + extends FSBoundedParam[Duration]( + name = "top_tweets_by_geo_min_duration_since_last_mr_days", + default = 3.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to enable FRS candidate tweets + */ + object EnableFrsCandidates + extends FSParam[Boolean]( + name = "frs_tweet_candidate_enable_adaptor", + default = false + ) + + /** + * Param to enable FRSTweet candidates for topic setting users + * */ + object EnableFrsTweetCandidatesTopicSetting + extends FSParam[Boolean]( + name = "frs_tweet_candidate_enable_adaptor_for_topic_setting", + default = false + ) + + /** + * Param to enable topic annotations for FRSTweet candidates tweets + * */ + object EnableFrsTweetCandidatesTopicAnnotation + extends FSParam[Boolean]( + name = "frs_tweet_candidate_enable_topic_annotation", + default = false + ) + + /** + * Param to enable topic copy for FRSTweet candidates tweets + * */ + object EnableFrsTweetCandidatesTopicCopy + extends FSParam[Boolean]( + name = "frs_tweet_candidate_enable_topic_copy", + default = false + ) + + /** + * Topic score threshold for FRSTweet candidates topic annotations + * */ + object FrsTweetCandidatesTopicScoreThreshold + extends FSBoundedParam[Double]( + name = "frs_tweet_candidate_topic_score_threshold", + default = 0.0, + min = 0.0, + max = 100.0 + ) + + /** + * Param to enable mr modeling-based candidates tweets + * */ + object EnableMrModelingBasedCandidates + extends FSParam[Boolean]( + name = "candidate_generation_model_enable_adaptor", + default = false + ) + + /** + Param to enable mr modeling-based candidates tweets for topic setting users + * */ + object EnableMrModelingBasedCandidatesTopicSetting + extends FSParam[Boolean]( + name = "candidate_generation_model_enable_adaptor_for_topic_setting", + default = false + ) + + /** + * Param to enable topic annotations for mr modeling-based candidates tweets + * */ + object EnableMrModelingBasedCandidatesTopicAnnotation + extends FSParam[Boolean]( + name = "candidate_generation_model_enable_adaptor_topic_annotation", + default = false + ) + + /** + * Topic score threshold for mr modeling based candidates topic annotations + * */ + object MrModelingBasedCandidatesTopicScoreThreshold + extends FSBoundedParam[Double]( + name = "candidate_generation_model_topic_score_threshold", + default = 0.0, + min = 0.0, + max = 100.0 + ) + + /** + * Param to enable topic copy for mr modeling-based candidates tweets + * */ + object EnableMrModelingBasedCandidatesTopicCopy + extends FSParam[Boolean]( + name = "candidate_generation_model_enable_topic_copy", + default = false + ) + + /** + * Number of max mr modeling based candidates + * */ + object NumberOfMaxMrModelingBasedCandidates + extends FSBoundedParam[Int]( + name = "candidate_generation_model_max_mr_modeling_based_candidates", + default = 200, + min = 0, + max = 1000 + ) + + /** + * Enable the traffic to use fav threshold + * */ + object EnableThresholdOfFavMrModelingBasedCandidates + extends FSParam[Boolean]( + name = "candidate_generation_model_enable_fav_threshold", + default = false + ) + + /** + * Threshold of fav for mr modeling based candidates + * */ + object ThresholdOfFavMrModelingBasedCandidates + extends FSBoundedParam[Int]( + name = "candidate_generation_model_fav_threshold", + default = 0, + min = 0, + max = 500 + ) + + /** + * Filtered threshold for mr modeling based candidates + * */ + object CandidateGenerationModelCosineThreshold + extends FSBoundedParam[Double]( + name = "candidate_generation_model_cosine_threshold", + default = 0.9, + min = 0.0, + max = 1.0 + ) + + /* + * ANN hyparameters + * */ + object ANNEfQuery + extends FSBoundedParam[Int]( + name = "candidate_generation_model_ann_ef_query", + default = 300, + min = 50, + max = 1500 + ) + + /** + * Param to do real A/B impression for FRS candidates to avoid dilution + */ + object EnableResultFromFrsCandidates + extends FSParam[Boolean]( + name = "frs_tweet_candidate_enable_returned_result", + default = false + ) + + /** + * Param to enable hashspace candidate tweets + */ + object EnableHashspaceCandidates + extends FSParam[Boolean]( + name = "hashspace_candidate_enable_adaptor", + default = false + ) + + /** + * Param to enable hashspace candidates tweets for topic setting users + * */ + object EnableHashspaceCandidatesTopicSetting + extends FSParam[Boolean]( + name = "hashspace_candidate_enable_adaptor_for_topic_setting", + default = false + ) + + /** + * Param to enable topic annotations for hashspace candidates tweets + * */ + object EnableHashspaceCandidatesTopicAnnotation + extends FSParam[Boolean]( + name = "hashspace_candidate_enable_topic_annotation", + default = false + ) + + /** + * Param to enable topic copy for hashspace candidates tweets + * */ + object EnableHashspaceCandidatesTopicCopy + extends FSParam[Boolean]( + name = "hashspace_candidate_enable_topic_copy", + default = false + ) + + /** + * Topic score threshold for hashspace candidates topic annotations + * */ + object HashspaceCandidatesTopicScoreThreshold + extends FSBoundedParam[Double]( + name = "hashspace_candidate_topic_score_threshold", + default = 0.0, + min = 0.0, + max = 100.0 + ) + + /** + * Param to do real A/B impression for hashspace candidates to avoid dilution + */ + object EnableResultFromHashspaceCandidates + extends FSParam[Boolean]( + name = "hashspace_candidate_enable_returned_result", + default = false + ) + + /** + * Param to enable detopic tweet candidates in adaptor + */ + object EnableDeTopicTweetCandidates + extends FSParam[Boolean]( + name = "detopic_tweet_candidate_enable_adaptor", + default = false + ) + + /** + * Param to enable detopic tweet candidates results (to avoid dilution) + */ + object EnableDeTopicTweetCandidateResults + extends FSParam[Boolean]( + name = "detopic_tweet_candidate_enable_results", + default = false + ) + + /** + * Param to specify whether to provide a custom list of topics in request + */ + object EnableDeTopicTweetCandidatesCustomTopics + extends FSParam[Boolean]( + name = "detopic_tweet_candidate_enable_custom_topics", + default = false + ) + + /** + * Param to specify whether to provide a custom language in request + */ + object EnableDeTopicTweetCandidatesCustomLanguages + extends FSParam[Boolean]( + name = "detopic_tweet_candidate_enable_custom_languages", + default = false + ) + + /** + * Number of detopic tweet candidates in the request + * */ + object NumberOfDeTopicTweetCandidates + extends FSBoundedParam[Int]( + name = "detopic_tweet_candidate_num_candidates_in_request", + default = 600, + min = 0, + max = 3000 + ) + + /** + * Max Number of detopic tweet candidates returned in adaptor + * */ + object NumberOfMaxDeTopicTweetCandidatesReturned + extends FSBoundedParam[Int]( + name = "detopic_tweet_candidate_max_num_candidates_returned", + default = 200, + min = 0, + max = 3000 + ) + + /** + * Param to enable F1 from protected Authors + */ + object EnableF1FromProtectedTweetAuthors + extends FSParam[Boolean]( + "f1_enable_protected_tweets", + false + ) + + /** + * Param to enable safe user tweet tweetypie store + */ + object EnableSafeUserTweetTweetypieStore + extends FSParam[Boolean]( + "mr_infra_enable_use_safe_user_tweet_tweetypie", + false + ) + + /** + * Param to min duration since last MR push for top tweets by geo pushes + */ + object EnableMrMinDurationSinceMrPushFatigue + extends FSParam[Boolean]( + name = "top_tweets_by_geo_enable_min_duration_since_mr_fatigue", + default = false + ) + + /** + * Param to check time since last time user logged in for geo top tweets by geo push + */ + object TimeSinceLastLoginForGeoPopTweetPush + extends FSBoundedParam[Duration]( + name = "top_tweets_by_geo_time_since_last_login_in_days", + default = 14.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to check time since last time user logged in for geo top tweets by geo push + */ + object MinimumTimeSinceLastLoginForGeoPopTweetPush + extends FSBoundedParam[Duration]( + name = "top_tweets_by_geo_minimum_time_since_last_login_in_days", + default = 14.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** How long we wait after a user visited the app before sending them a space fanout rec */ + object SpaceRecsAppFatigueDuration + extends FSBoundedParam[Duration]( + name = "space_recs_app_fatigue_duration_hours", + default = 4.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** The fatigue time-window for OON space fanout recs, e.g. 1 push every 3 days */ + object OONSpaceRecsFatigueDuration + extends FSBoundedParam[Duration]( + name = "space_recs_oon_fatigue_duration_days", + default = 1.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** The global fatigue time-window for space fanout recs, e.g. 1 push every 3 days */ + object SpaceRecsGlobalFatigueDuration + extends FSBoundedParam[Duration]( + name = "space_recs_global_fatigue_duration_days", + default = 1.day, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** The min-interval between space fanout recs. + * After receiving a space fanout rec, they must wait a minimum of this + * interval before eligibile for another */ + object SpaceRecsFatigueMinIntervalDuration + extends FSBoundedParam[Duration]( + name = "space_recs_fatigue_mininterval_duration_minutes", + default = 30.minutes, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromMinutes + } + + /** Space fanout user-follow rank threshold. + * Users targeted by a follow that is above this threshold will be filtered */ + object SpaceRecsRealgraphThreshold + extends FSBoundedParam[Int]( + name = "space_recs_realgraph_threshold", + default = 50, + max = 500, + min = 0 + ) + + object EnableHydratingRealGraphTargetUserFeatures + extends FSParam[Boolean]( + name = "frigate_push_modeling_enable_hydrating_real_graph_target_user_feature", + default = true + ) + + /** Param to reduce dillution when checking if a space is featured or not */ + object CheckFeaturedSpaceOON + extends FSParam[Boolean](name = "space_recs_check_if_its_featured_space", default = false) + + /** Enable Featured Spaces Rules for OON spaces */ + object EnableFeaturedSpacesOON + extends FSParam[Boolean](name = "space_recs_enable_featured_spaces_oon", default = false) + + /** Enable Geo Targeting */ + object EnableGeoTargetingForSpaces + extends FSParam[Boolean](name = "space_recs_enable_geo_targeting", default = false) + + /** Number of max pushes within the fatigue duration for OON Space Recs */ + object OONSpaceRecsPushLimit + extends FSBoundedParam[Int]( + name = "space_recs_oon_push_limit", + default = 1, + max = 3, + min = 0 + ) + + /** Space fanout recs, number of max pushes within the fatigue duration */ + object SpaceRecsGlobalPushLimit + extends FSBoundedParam[Int]( + name = "space_recs_global_push_limit", + default = 3, + max = 50, + min = 0 + ) + + /** + * Param to enable score based override. + */ + object EnableOverrideNotificationsScoreBasedOverride + extends FSParam[Boolean]( + name = "mr_override_enable_score_ranking", + default = false + ) + + /** + * Param to determine the lookback duration when searching for override info. + */ + object OverrideNotificationsLookbackDurationForOverrideInfo + extends FSBoundedParam[Duration]( + name = "mr_override_lookback_duration_override_info_in_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to determine the lookback duration when searching for impression ids. + */ + object OverrideNotificationsLookbackDurationForImpressionId + extends FSBoundedParam[Duration]( + name = "mr_override_lookback_duration_impression_id_in_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to enable sending multiple target ids in the payload. + */ + object EnableOverrideNotificationsMultipleTargetIds + extends FSParam[Boolean]( + name = "mr_override_enable_multiple_target_ids", + default = false + ) + + /** + * Param for MR Web Notifications holdback + */ + object MRWebHoldbackParam + extends FSParam[Boolean]( + name = "mr_web_notifications_holdback", + default = false + ) + + object CommonRecommendationTypeDenyListPushHoldbacks + extends FSParam[Seq[String]]( + name = "crt_to_exclude_from_holdbacks_push_holdbacks", + default = Seq.empty[String] + ) + + /** + * Param to enable sending number of slots to maintain in the payload. + */ + object EnableOverrideNotificationsNSlots + extends FSParam[Boolean]( + name = "mr_override_enable_n_slots", + default = false + ) + + /** + * Enable down ranking of NUPS and pop geo topic follow candidates for new user playbook. + */ + object EnableDownRankOfNewUserPlaybookTopicFollowPush + extends FSParam[Boolean]( + name = "topic_follow_new_user_playbook_enable_down_rank", + default = false + ) + + /** + * Enable down ranking of NUPS and pop geo topic tweet candidates for new user playbook. + */ + object EnableDownRankOfNewUserPlaybookTopicTweetPush + extends FSParam[Boolean]( + name = "topic_tweet_new_user_playbook_enable_down_rank", + default = false + ) + + /** + * Param to enable/disable employee only spaces for fanout of notifications + */ + object EnableEmployeeOnlySpaceNotifications + extends FSParam[Boolean](name = "space_recs_employee_only_enable", default = false) + + /** + * NTab spaces ttl experiments + */ + object EnableSpacesTtlForNtab + extends FSParam[Boolean]( + name = "ntab_spaces_ttl_enable", + default = false + ) + + /** + * Param to determine the ttl duration for space notifications on NTab. + */ + object SpaceNotificationsTTLDurationForNTab + extends FSBoundedParam[Duration]( + name = "ntab_spaces_ttl_hours", + default = 1.hour, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /* + * NTab override experiments + * see go/ntab-override experiment brief for more details + */ + + /** + * Override notifications for Spaces on lockscreen. + */ + object EnableOverrideForSpaces + extends FSParam[Boolean]( + name = "mr_override_spaces", + default = false + ) + + /** + * Param to enable storing the Generic Notification Key. + */ + object EnableStoringNtabGenericNotifKey + extends FSParam[Boolean]( + name = "ntab_enable_storing_generic_notif_key", + default = false + ) + + /** + * Param to enable deleting the Target's timeline. + */ + object EnableDeletingNtabTimeline + extends FSParam[Boolean]( + name = "ntab_enable_delete_timeline", + default = false + ) + + /** + * Param to enable sending the overrideId + * to NTab which enables override support in NTab-api + */ + object EnableOverrideIdNTabRequest + extends FSParam[Boolean]( + name = "ntab_enable_override_id_in_request", + default = false + ) + + /** + * [Override Workstream] Param to enable NTab override n-slot feature. + */ + object EnableNslotsForOverrideOnNtab + extends FSParam[Boolean]( + name = "ntab_enable_override_max_count", + default = false + ) + + /** + * Param to determine the lookback duration for override candidates on NTab. + */ + object OverrideNotificationsLookbackDurationForNTab + extends FSBoundedParam[Duration]( + name = "ntab_override_lookback_duration_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to determine the max count for candidates on NTab. + */ + object OverrideNotificationsMaxCountForNTab + extends FSBoundedParam[Int]( + name = "ntab_override_limit", + min = 0, + max = Int.MaxValue, + default = 4) + + //// end override experiments //// + /** + * Param to enable top tweet impressions notification + */ + object EnableTopTweetImpressionsNotification + extends FSParam[Boolean]( + name = "top_tweet_impressions_notification_enable", + default = false + ) + + /** + * Param to control the inverter for fatigue between consecutive TweetImpressions + */ + object TopTweetImpressionsNotificationInterval + extends FSBoundedParam[Duration]( + name = "top_tweet_impressions_notification_interval_days", + default = 7.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * The min-interval between TweetImpressions notifications. + * After receiving a TweetImpressions notif, they must wait a minimum of this + * interval before being eligible for another + */ + object TopTweetImpressionsFatigueMinIntervalDuration + extends FSBoundedParam[Duration]( + name = "top_tweet_impressions_fatigue_mininterval_duration_days", + default = 1.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Maximum number of top tweet impressions notifications to receive in an interval + */ + object MaxTopTweetImpressionsNotifications + extends FSBoundedParam( + name = "top_tweet_impressions_fatigue_max_in_interval", + default = 0, + min = 0, + max = 10 + ) + + /** + * Param for min number of impressions counts to be eligible for lonely_birds_tweet_impressions model + */ + object TopTweetImpressionsMinRequired + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_min_required", + default = 25, + min = 0, + max = Int.MaxValue + ) + + /** + * Param for threshold of impressions counts to notify for lonely_birds_tweet_impressions model + */ + object TopTweetImpressionsThreshold + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_threshold", + default = 25, + min = 0, + max = Int.MaxValue + ) + + /** + * Param for the number of days to search up to for a user's original tweets + */ + object TopTweetImpressionsOriginalTweetsNumDaysSearch + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_original_tweets_num_days_search", + default = 3, + min = 0, + max = 21 + ) + + /** + * Param for the minimum number of original tweets a user needs to be considered an original author + */ + object TopTweetImpressionsMinNumOriginalTweets + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_num_original_tweets", + default = 3, + min = 0, + max = Int.MaxValue + ) + + /** + * Param for the max number of favorites any original Tweet can have + */ + object TopTweetImpressionsMaxFavoritesPerTweet + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_max_favorites_per_tweet", + default = 3, + min = 0, + max = Int.MaxValue + ) + + /** + * Param for the max number of total inbound favorites for a user's tweets + */ + object TopTweetImpressionsTotalInboundFavoritesLimit + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_total_inbound_favorites_limit", + default = 60, + min = 0, + max = Int.MaxValue + ) + + /** + * Param for the number of days to search for tweets to count the total inbound favorites + */ + object TopTweetImpressionsTotalFavoritesLimitNumDaysSearch + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_total_favorites_limit_num_days_search", + default = 7, + min = 0, + max = 21 + ) + + /** + * Param for the max number of recent tweets Tflock should return + */ + object TopTweetImpressionsRecentTweetsByAuthorStoreMaxResults + extends FSBoundedParam[Int]( + name = "top_tweet_impressions_recent_tweets_by_author_store_max_results", + default = 50, + min = 0, + max = 1000 + ) + + /* + * Param to represent the max number of slots to maintain for Override Notifications + */ + object OverrideNotificationsMaxNumOfSlots + extends FSBoundedParam[Int]( + name = "mr_override_max_num_slots", + default = 1, + max = 10, + min = 1 + ) + + object EnableOverrideMaxSlotFn + extends FSParam[Boolean]( + name = "mr_override_enable_max_num_slots_fn", + default = false + ) + + object OverrideMaxSlotFnPushCapKnobs + extends FSParam[Seq[Double]]("mr_override_fn_pushcap_knobs", default = Seq.empty[Double]) + + object OverrideMaxSlotFnNSlotKnobs + extends FSParam[Seq[Double]]("mr_override_fn_nslot_knobs", default = Seq.empty[Double]) + + object OverrideMaxSlotFnPowerKnobs + extends FSParam[Seq[Double]]("mr_override_fn_power_knobs", default = Seq.empty[Double]) + + object OverrideMaxSlotFnWeight + extends FSBoundedParam[Double]( + "mr_override_fn_weight", + default = 1.0, + min = 0.0, + max = Double.MaxValue) + + /** + * Use to enable sending target ids in the Smart Push Payload + */ + object EnableTargetIdsInSmartPushPayload + extends FSParam[Boolean](name = "mr_override_enable_target_ids", default = true) + + /** + * Param to enable override by target id for MagicFanoutSportsEvent candidates + */ + object EnableTargetIdInSmartPushPayloadForMagicFanoutSportsEvent + extends FSParam[Boolean]( + name = "mr_override_enable_target_id_for_magic_fanout_sports_event", + default = true) + + /** + * Param to enable secondary account predicate on MF NFY + */ + object EnableSecondaryAccountPredicateMF + extends FSParam[Boolean]( + name = "frigate_push_magicfanout_secondary_account_predicate", + default = false + ) + + /** + * Enables showing our customers videos on their notifications + */ + object EnableInlineVideo + extends FSParam[Boolean](name = "mr_inline_enable_inline_video", default = false) + + /** + * Enables autoplay for inline videos + */ + object EnableAutoplayForInlineVideo + extends FSParam[Boolean](name = "mr_inline_enable_autoplay_for_inline_video", default = false) + + /** + * Enable OON filtering based on MentionFilter. + */ + object EnableOONFilteringBasedOnUserSettings + extends FSParam[Boolean](name = "oon_filtering_enable_based_on_user_settings", false) + + /** + * Enables Custom Thread Ids which is used to ungroup notifications for N-slots on iOS + */ + object EnableCustomThreadIdForOverride + extends FSParam[Boolean](name = "mr_override_enable_custom_thread_id", default = false) + + /** + * Enables showing verified symbol in the push presentation + */ + object EnablePushPresentationVerifiedSymbol + extends FSParam[Boolean](name = "push_presentation_enable_verified_symbol", default = false) + + /** + * Decide subtext in Android push header + */ + object SubtextInAndroidPushHeaderParam + extends FSEnumParam[SubtextForAndroidPushHeader.type]( + name = "push_presentation_subtext_in_android_push_header_id", + default = SubtextForAndroidPushHeader.None, + enum = SubtextForAndroidPushHeader) + + /** + * Enable SimClusters Targeting For Spaces. If false we just drop all candidates with such targeting reason + */ + object EnableSimClusterTargetingSpaces + extends FSParam[Boolean](name = "space_recs_send_simcluster_recommendations", default = false) + + /** + * Param to control threshold for dot product of simcluster based targeting on Spaces + */ + object SpacesTargetingSimClusterDotProductThreshold + extends FSBoundedParam[Double]( + "space_recs_simclusters_dot_product_threshold", + default = 0.0, + min = 0.0, + max = 10.0) + + /** + * Param to control top-k clusters simcluster based targeting on Spaces + */ + object SpacesTopKSimClusterCount + extends FSBoundedParam[Int]( + "space_recs_simclusters_top_k_count", + default = 1, + min = 1, + max = 50) + + /** SimCluster users host/speaker must meet this follower count minimum threshold to be considered for sends */ + object SpaceRecsSimClusterUserMinimumFollowerCount + extends FSBoundedParam[Int]( + name = "space_recs_simcluster_user_min_follower_count", + default = 5000, + max = Int.MaxValue, + min = 0 + ) + + /** + * Target has been bucketed into the Inline Action App Visit Fatigue Experiment + */ + object TargetInInlineActionAppVisitFatigue + extends FSParam[Boolean](name = "inline_action_target_in_app_visit_fatigue", default = false) + + /** + * Enables Inline Action App Visit Fatigue + */ + object EnableInlineActionAppVisitFatigue + extends FSParam[Boolean](name = "inline_action_enable_app_visit_fatigue", default = false) + + /** + * Determines the fatigue that we should apply when the target user has performed an inline action + */ + object InlineActionAppVisitFatigue + extends FSBoundedParam[Duration]( + name = "inline_action_app_visit_fatigue_hours", + default = 8.hours, + min = 1.hour, + max = 48.hours) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Weight for reranking(oonc - weight * nudityRate) + */ + object AuthorSensitiveScoreWeightInReranking + extends FSBoundedParam[Double]( + name = "rerank_candidates_author_sensitive_score_weight_in_reranking", + default = 0.0, + min = -100.0, + max = 100.0 + ) + + /** + * Param to control the last active space listener threshold to filter out based on that + */ + object SpaceParticipantHistoryLastActiveThreshold + extends FSBoundedParam[Duration]( + name = "space_recs_last_active_space_listener_threshold_in_hours", + default = 0.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /* + * Param to enable mr user simcluster feature set (v2020) hydration for modeling-based candidate generation + * */ + object HydrateMrUserSimclusterV2020InModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_mr_user_simcluster_v2020", + default = false) + + /* + * Param to enable mr semantic core feature set hydration for modeling-based candidate generation + * */ + object HydrateMrUserSemanticCoreInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_mr_user_semantic_core", + default = false) + + /* + * Param to enable mr semantic core feature set hydration for modeling-based candidate generation + * */ + object HydrateOnboardingInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_onboarding", + default = false) + + /* + * Param to enable mr topic follow feature set hydration for modeling-based candidate generation + * */ + object HydrateTopicFollowInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_topic_follow", + default = false) + + /* + * Param to enable mr user topic feature set hydration for modeling-based candidate generation + * */ + object HydrateMrUserTopicInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_mr_user_topic", + default = false) + + /* + * Param to enable mr user topic feature set hydration for modeling-based candidate generation + * */ + object HydrateMrUserAuthorInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_mr_user_author", + default = false) + + /* + * Param to enable user penguin language feature set hydration for modeling-based candidate generation + * */ + object HydrateUserPenguinLanguageInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_user_penguin_language", + default = false) + /* + * Param to enable user geo feature set hydration for modeling-based candidate generation + * */ + object HydrateUseGeoInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_user_geo", + default = false) + + /* + * Param to enable mr user hashspace embedding feature set hydration for modeling-based candidate generation + * */ + object HydrateMrUserHashspaceEmbeddingInModelingBasedCG + extends FSParam[Boolean]( + name = "candidate_generation_model_hydrate_mr_user_hashspace_embedding", + default = false) + /* + * Param to enable user tweet text feature hydration + * */ + object EnableMrUserEngagedTweetTokensFeature + extends FSParam[Boolean]( + name = "feature_hydration_mr_user_engaged_tweet_tokens", + default = false) + + /** + * Params for CRT based see less often fatigue rules + */ + object EnableF1TriggerSeeLessOftenFatigue + extends FSParam[Boolean]( + name = "seelessoften_enable_f1_trigger_fatigue", + default = false + ) + + object EnableNonF1TriggerSeeLessOftenFatigue + extends FSParam[Boolean]( + name = "seelessoften_enable_nonf1_trigger_fatigue", + default = false + ) + + /** + * Adjust the NtabCaretClickFatigue for candidates if it is triggered by + * TripHqTweet candidates + */ + object AdjustTripHqTweetTriggeredNtabCaretClickFatigue + extends FSParam[Boolean]( + name = "seelessoften_adjust_trip_hq_tweet_triggered_fatigue", + default = false + ) + + object NumberOfDaysToFilterForSeeLessOftenForF1TriggerF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_f1_trigger_f1_tofiltermr_days", + default = 7.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToReducePushCapForSeeLessOftenForF1TriggerF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_f1_trigger_f1_toreduce_pushcap_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToFilterForSeeLessOftenForF1TriggerNonF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_f1_trigger_nonf1_tofiltermr_days", + default = 7.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToReducePushCapForSeeLessOftenForF1TriggerNonF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_f1_trigger_non_f1_toreduce_pushcap_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToFilterForSeeLessOftenForNonF1TriggerF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_nonf1_trigger_f1_tofiltermr_days", + default = 7.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToReducePushCapForSeeLessOftenForNonF1TriggerF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_nonf1_trigger_f1_toreduce_pushcap_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToFilterForSeeLessOftenForNonF1TriggerNonF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_nonf1_trigger_nonf1_tofiltermr_days", + default = 7.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object NumberOfDaysToReducePushCapForSeeLessOftenForNonF1TriggerNonF1 + extends FSBoundedParam[Duration]( + name = "seelessoften_for_nonf1_trigger_nonf1_toreduce_pushcap_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + object EnableContFnF1TriggerSeeLessOftenFatigue + extends FSParam[Boolean]( + name = "seelessoften_fn_enable_f1_trigger_fatigue", + default = false + ) + + object EnableContFnNonF1TriggerSeeLessOftenFatigue + extends FSParam[Boolean]( + name = "seelessoften_fn_enable_nonf1_trigger_fatigue", + default = false + ) + + object SeeLessOftenListOfDayKnobs + extends FSParam[Seq[Double]]("seelessoften_fn_day_knobs", default = Seq.empty[Double]) + + object SeeLessOftenListOfPushCapWeightKnobs + extends FSParam[Seq[Double]]("seelessoften_fn_pushcap_knobs", default = Seq.empty[Double]) + + object SeeLessOftenListOfPowerKnobs + extends FSParam[Seq[Double]]("seelessoften_fn_power_knobs", default = Seq.empty[Double]) + + object SeeLessOftenF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_f1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_f1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenNonF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_nonf1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenNonF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_nonf1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenTripHqTweetTriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_trip_hq_tweet_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenTripHqTweetTriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_trip_hq_tweet_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenTripHqTweetTriggerTripHqTweetPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_trip_hq_tweet_trigger_trip_hq_tweet_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object SeeLessOftenTopicTriggerTopicPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_topic_trigger_topic_weight", + default = 1.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenTopicTriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_topic_trigger_f1_weight", + default = 100000.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenTopicTriggerOONPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_topic_trigger_oon_weight", + default = 100000.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenF1TriggerTopicPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_f1_trigger_topic_weight", + default = 100000.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenOONTriggerTopicPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_oon_trigger_topic_weight", + default = 1.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenDefaultPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_default_weight", + default = 100000.0, + min = 0.0, + max = Double.MaxValue) + + object SeeLessOftenNtabOnlyNotifUserPushCapWeight + extends FSBoundedParam[Double]( + "seelessoften_fn_ntab_only_user_weight", + default = 1.0, + min = 0.0, + max = Double.MaxValue) + + // Params for inline feedback fatigue + object EnableContFnF1TriggerInlineFeedbackFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_enable_f1_trigger_fatigue", + default = false + ) + + object EnableContFnNonF1TriggerInlineFeedbackFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_enable_nonf1_trigger_fatigue", + default = false + ) + + object UseInlineDislikeForFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_use_dislike", + default = true + ) + object UseInlineDismissForFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_use_dismiss", + default = false + ) + object UseInlineSeeLessForFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_use_see_less", + default = false + ) + object UseInlineNotRelevantForFatigue + extends FSParam[Boolean]( + name = "feedback_inline_fn_use_not_relevant", + default = false + ) + object InlineFeedbackListOfDayKnobs + extends FSParam[Seq[Double]]("feedback_inline_fn_day_knobs", default = Seq.empty[Double]) + + object InlineFeedbackListOfPushCapWeightKnobs + extends FSParam[Seq[Double]]("feedback_inline_fn_pushcap_knobs", default = Seq.empty[Double]) + + object InlineFeedbackListOfPowerKnobs + extends FSParam[Seq[Double]]("feedback_inline_fn_power_knobs", default = Seq.empty[Double]) + + object InlineFeedbackF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_inline_fn_f1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object InlineFeedbackF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_inline_fn_f1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object InlineFeedbackNonF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_inline_fn_nonf1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object InlineFeedbackNonF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_inline_fn_nonf1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + // Params for prompt feedback + object EnableContFnF1TriggerPromptFeedbackFatigue + extends FSParam[Boolean]( + name = "feedback_prompt_fn_enable_f1_trigger_fatigue", + default = false + ) + + object EnableContFnNonF1TriggerPromptFeedbackFatigue + extends FSParam[Boolean]( + name = "feedback_prompt_fn_enable_nonf1_trigger_fatigue", + default = false + ) + object PromptFeedbackListOfDayKnobs + extends FSParam[Seq[Double]]("feedback_prompt_fn_day_knobs", default = Seq.empty[Double]) + + object PromptFeedbackListOfPushCapWeightKnobs + extends FSParam[Seq[Double]]("feedback_prompt_fn_pushcap_knobs", default = Seq.empty[Double]) + + object PromptFeedbackListOfPowerKnobs + extends FSParam[Seq[Double]]("feedback_prompt_fn_power_knobs", default = Seq.empty[Double]) + + object PromptFeedbackF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_prompt_fn_f1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object PromptFeedbackF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_prompt_fn_f1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object PromptFeedbackNonF1TriggerF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_prompt_fn_nonf1_trigger_f1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + object PromptFeedbackNonF1TriggerNonF1PushCapWeight + extends FSBoundedParam[Double]( + "feedback_prompt_fn_nonf1_trigger_nonf1_weight", + default = 1.0, + min = 0.0, + max = 10000000.0) + + /* + * Param to enable cohost join event notif + */ + object EnableSpaceCohostJoinEvent + extends FSParam[Boolean](name = "space_recs_cohost_join_enable", default = true) + + /* + * Param to bypass global push cap when target is device following host/speaker. + */ + object BypassGlobalSpacePushCapForSoftDeviceFollow + extends FSParam[Boolean](name = "space_recs_bypass_global_pushcap_for_soft_follow", false) + + /* + * Param to bypass active listener predicate when target is device following host/speaker. + */ + object CheckActiveListenerPredicateForSoftDeviceFollow + extends FSParam[Boolean](name = "space_recs_check_active_listener_for_soft_follow", false) + + object SpreadControlRatioParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_ratio", + default = 1000.0, + min = 0.0, + max = 100000.0 + ) + + object FavOverSendThresholdParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_fav_over_send_threshold", + default = 0.14, + min = 0.0, + max = 1000.0 + ) + + object AuthorReportRateThresholdParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_author_report_rate_threshold", + default = 7.4e-6, + min = 0.0, + max = 1000.0 + ) + + object AuthorDislikeRateThresholdParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_author_dislike_rate_threshold", + default = 1.0, + min = 0.0, + max = 1000.0 + ) + + object MinTweetSendsThresholdParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_min_tweet_sends_threshold", + default = 10000000000.0, + min = 0.0, + max = 10000000000.0 + ) + + object MinAuthorSendsThresholdParam + extends FSBoundedParam[Double]( + name = "oon_spread_control_min_author_sends_threshold", + default = 10000000000.0, + min = 0.0, + max = 10000000000.0 + ) + + /* + * Tweet Ntab-dislike predicate related params + */ + object TweetNtabDislikeCountThresholdParam + extends FSBoundedParam[Double]( + name = "oon_tweet_ntab_dislike_count_threshold", + default = 10000.0, + min = 0.0, + max = 10000.0 + ) + object TweetNtabDislikeRateThresholdParam + extends FSBoundedParam[Double]( + name = "oon_tweet_ntab_dislike_rate_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param for tweet language feature name + */ + object TweetLanguageFeatureNameParam + extends FSParam[String]( + name = "language_tweet_language_feature_name", + default = "tweet.language.tweet.identified") + + /** + * Threshold for user inferred language filtering + */ + object UserInferredLanguageThresholdParam + extends FSBoundedParam[Double]( + name = "language_user_inferred_language_threshold", + default = 0.0, + min = 0.0, + max = 1.0 + ) + + /** + * Threshold for user device language filtering + */ + object UserDeviceLanguageThresholdParam + extends FSBoundedParam[Double]( + name = "language_user_device_language_threshold", + default = 0.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to enable/disable tweet language filter + */ + object EnableTweetLanguageFilter + extends FSParam[Boolean]( + name = "language_enable_tweet_language_filter", + default = false + ) + + /** + * Param to skip language filter for media tweets + */ + object SkipLanguageFilterForMediaTweets + extends FSParam[Boolean]( + name = "language_skip_language_filter_for_media_tweets", + default = false + ) + + /* + * Tweet Ntab-dislike predicate related params for MrTwistly + */ + object TweetNtabDislikeCountThresholdForMrTwistlyParam + extends FSBoundedParam[Double]( + name = "oon_tweet_ntab_dislike_count_threshold_for_mrtwistly", + default = 10000.0, + min = 0.0, + max = 10000.0 + ) + object TweetNtabDislikeRateThresholdForMrTwistlyParam + extends FSBoundedParam[Double]( + name = "oon_tweet_ntab_dislike_rate_threshold_for_mrtwistly", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + object TweetNtabDislikeCountBucketThresholdParam + extends FSBoundedParam[Double]( + name = "oon_tweet_ntab_dislike_count_bucket_threshold", + default = 10.0, + min = 0.0, + max = 10000.0 + ) + + /* + * Tweet engagement ratio predicate related params + */ + object TweetQTtoNtabClickRatioThresholdParam + extends FSBoundedParam[Double]( + name = "oon_tweet_engagement_filter_qt_to_ntabclick_ratio_threshold", + default = 0.0, + min = 0.0, + max = 100000.0 + ) + + /** + * Lower bound threshold to filter a tweet based on its reply to like ratio + */ + object TweetReplytoLikeRatioThresholdLowerBound + extends FSBoundedParam[Double]( + name = "oon_tweet_engagement_filter_reply_to_like_ratio_threshold_lower_bound", + default = Double.MaxValue, + min = 0.0, + max = Double.MaxValue + ) + + /** + * Upper bound threshold to filter a tweet based on its reply to like ratio + */ + object TweetReplytoLikeRatioThresholdUpperBound + extends FSBoundedParam[Double]( + name = "oon_tweet_engagement_filter_reply_to_like_ratio_threshold_upper_bound", + default = 0.0, + min = 0.0, + max = Double.MaxValue + ) + + /** + * Upper bound threshold to filter a tweet based on its reply to like ratio + */ + object TweetReplytoLikeRatioReplyCountThreshold + extends FSBoundedParam[Int]( + name = "oon_tweet_engagement_filter_reply_count_threshold", + default = Int.MaxValue, + min = 0, + max = Int.MaxValue + ) + + /* + * oonTweetLengthBasedPrerankingPredicate related params + */ + object OonTweetLengthPredicateUpdatedMediaLogic + extends FSParam[Boolean]( + name = "oon_quality_filter_tweet_length_updated_media_logic", + default = false + ) + + object OonTweetLengthPredicateUpdatedQuoteTweetLogic + extends FSParam[Boolean]( + name = "oon_quality_filter_tweet_length_updated_quote_tweet_logic", + default = false + ) + + object OonTweetLengthPredicateMoreStrictForUndefinedLanguages + extends FSParam[Boolean]( + name = "oon_quality_filter_tweet_length_more_strict_for_undefined_languages", + default = false + ) + + object EnablePrerankingTweetLengthPredicate + extends FSParam[Boolean]( + name = "oon_quality_filter_enable_preranking_filter", + default = false + ) + + /* + * LengthLanguageBasedOONTweetCandidatesQualityPredicate related params + */ + object SautOonWithMediaTweetLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_length_threshold_for_saut_oon_with_media", + default = 0.0, + min = 0.0, + max = 70.0 + ) + object NonSautOonWithMediaTweetLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_length_threshold_for_non_saut_oon_with_media", + default = 0.0, + min = 0.0, + max = 70.0 + ) + object SautOonWithoutMediaTweetLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_length_threshold_for_saut_oon_without_media", + default = 0.0, + min = 0.0, + max = 70.0 + ) + object NonSautOonWithoutMediaTweetLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_length_threshold_for_non_saut_oon_without_media", + default = 0.0, + min = 0.0, + max = 70.0 + ) + + object ArgfOonWithMediaTweetWordLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_word_length_threshold_for_argf_oon_with_media", + default = 0.0, + min = 0.0, + max = 18.0 + ) + object EsfthOonWithMediaTweetWordLengthThresholdParam + extends FSBoundedParam[Double]( + name = "oon_quality_filter_tweet_word_length_threshold_for_esfth_oon_with_media", + default = 0.0, + min = 0.0, + max = 10.0 + ) + + /** + * Param to enable/disable sentiment feature hydration + */ + object EnableMrTweetSentimentFeatureHydrationFS + extends FSParam[Boolean]( + name = "feature_hydration_enable_mr_tweet_sentiment_feature", + default = false + ) + + /** + * Param to enable/disable feature map scribing for staging test log + */ + object EnableMrScribingMLFeaturesAsFeatureMapForStaging + extends FSParam[Boolean]( + name = "frigate_pushservice_enable_scribing_ml_features_as_featuremap_for_staging", + default = false + ) + + /** + * Param to enable timeline health signal hydration + * */ + object EnableTimelineHealthSignalHydration + extends FSParam[Boolean]( + name = "timeline_health_signal_hydration", + default = false + ) + + /** + * Param to enable timeline health signal hydration for model training + * */ + object EnableTimelineHealthSignalHydrationForModelTraining + extends FSParam[Boolean]( + name = "timeline_health_signal_hydration_for_model_training", + default = false + ) + + /** + * Param to enable/disable mr user social context agg feature hydration + */ + object EnableMrUserSocialContextAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_social_context_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user semantic core agg feature hydration + */ + object EnableMrUserSemanticCoreAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_semantic_core_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user candidate sparse agg feature hydration + */ + object EnableMrUserCandidateSparseOfflineAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_candidate_sparse_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user candidate agg feature hydration + */ + object EnableMrUserCandidateOfflineAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_candidate_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user candidate compact agg feature hydration + */ + object EnableMrUserCandidateOfflineCompactAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_candidate_compact_agg_feature", + default = false + ) + + /** + * Param to enable/disable mr real graph user-author/social-context feature hydration + */ + object EnableRealGraphUserAuthorAndSocialContxtFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_real_graph_user_social_feature", + default = true + ) + + /** + * Param to enable/disable mr user author agg feature hydration + */ + object EnableMrUserAuthorOfflineAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_author_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user author compact agg feature hydration + */ + object EnableMrUserAuthorOfflineCompactAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_author_compact_agg_feature", + default = false + ) + + /** + * Param to enable/disable mr user compact agg feature hydration + */ + object EnableMrUserOfflineCompactAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_compact_agg_feature", + default = false + ) + + /** + * Param to enable/disable mr user simcluster agg feature hydration + */ + object EnableMrUserSimcluster2020AggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_simcluster_agg_feature", + default = true + ) + + /** + * Param to enable/disable mr user agg feature hydration + */ + object EnableMrUserOfflineAggregateFeatureHydration + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_agg_feature", + default = true + ) + + /** + * Param to enable/disable topic engagement RTA in the ranking model + */ + object EnableTopicEngagementRealTimeAggregatesFS + extends FSParam[Boolean]( + "feature_hydration_enable_htl_topic_engagement_real_time_agg_feature", + false) + + /* + * Param to enable mr user semantic core feature hydration for heavy ranker + * */ + object EnableMrUserSemanticCoreFeatureForExpt + extends FSParam[Boolean]( + name = "frigate_push_modeling_hydrate_mr_user_semantic_core", + default = false) + + /** + * Param to enable hydrating user duration since last visit features + */ + object EnableHydratingUserDurationSinceLastVisitFeatures + extends FSParam[Boolean]( + name = "feature_hydration_user_duration_since_last_visit", + default = false) + + /** + Param to enable/disable user-topic aggregates in the ranking model + */ + object EnableUserTopicAggregatesFS + extends FSParam[Boolean]("feature_hydration_enable_htl_topic_user_agg_feature", false) + + /* + * PNegMultimodalPredicate related params + */ + object EnablePNegMultimodalPredicateParam + extends FSParam[Boolean]( + name = "pneg_multimodal_filter_enable_param", + default = false + ) + object PNegMultimodalPredicateModelThresholdParam + extends FSBoundedParam[Double]( + name = "pneg_multimodal_filter_model_threshold_param", + default = 1.0, + min = 0.0, + max = 1.0 + ) + object PNegMultimodalPredicateBucketThresholdParam + extends FSBoundedParam[Double]( + name = "pneg_multimodal_filter_bucket_threshold_param", + default = 0.4, + min = 0.0, + max = 1.0 + ) + + /* + * NegativeKeywordsPredicate related params + */ + object EnableNegativeKeywordsPredicateParam + extends FSParam[Boolean]( + name = "negative_keywords_filter_enable_param", + default = false + ) + object NegativeKeywordsPredicateDenylist + extends FSParam[Seq[String]]( + name = "negative_keywords_filter_denylist", + default = Seq.empty[String] + ) + /* + * LightRanking related params + */ + object EnableLightRankingParam + extends FSParam[Boolean]( + name = "light_ranking_enable_param", + default = false + ) + object LightRankingNumberOfCandidatesParam + extends FSBoundedParam[Int]( + name = "light_ranking_number_of_candidates_param", + default = 100, + min = 0, + max = 1000 + ) + object LightRankingModelTypeParam + extends FSParam[String]( + name = "light_ranking_model_type_param", + default = "WeightedOpenOrNtabClickProbability_Q4_2021_13172_Mr_Light_Ranker_Dbv2_Top3") + object EnableRandomBaselineLightRankingParam + extends FSParam[Boolean]( + name = "light_ranking_random_baseline_enable_param", + default = false + ) + + object LightRankingScribeCandidatesDownSamplingParam + extends FSBoundedParam[Double]( + name = "light_ranking_scribe_candidates_down_sampling_param", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /* + * Quality Upranking related params + */ + object EnableProducersQualityBoostingForHeavyRankingParam + extends FSParam[Boolean]( + name = "quality_upranking_enable_producers_quality_boosting_for_heavy_ranking_param", + default = false + ) + + object QualityUprankingBoostForHighQualityProducersParam + extends FSBoundedParam[Double]( + name = "quality_upranking_boost_for_high_quality_producers_param", + default = 1.0, + min = 0.0, + max = 10000.0 + ) + + object QualityUprankingDownboostForLowQualityProducersParam + extends FSBoundedParam[Double]( + name = "quality_upranking_downboost_for_low_quality_producers_param", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + object EnableQualityUprankingForHeavyRankingParam + extends FSParam[Boolean]( + name = "quality_upranking_enable_for_heavy_ranking_param", + default = false + ) + object QualityUprankingModelTypeParam + extends FSParam[WeightedOpenOrNtabClickModel.ModelNameType]( + name = "quality_upranking_model_id", + default = "Q4_2022_Mr_Bqml_Quality_Model_wALL" + ) + object QualityUprankingTransformTypeParam + extends FSEnumParam[MrQualityUprankingTransformTypeEnum.type]( + name = "quality_upranking_transform_id", + default = MrQualityUprankingTransformTypeEnum.Sigmoid, + enum = MrQualityUprankingTransformTypeEnum + ) + + object QualityUprankingBoostForHeavyRankingParam + extends FSBoundedParam[Double]( + name = "quality_upranking_boost_for_heavy_ranking_param", + default = 1.0, + min = -10.0, + max = 10.0 + ) + object QualityUprankingSigmoidBiasForHeavyRankingParam + extends FSBoundedParam[Double]( + name = "quality_upranking_sigmoid_bias_for_heavy_ranking_param", + default = 0.0, + min = -10.0, + max = 10.0 + ) + object QualityUprankingSigmoidWeightForHeavyRankingParam + extends FSBoundedParam[Double]( + name = "quality_upranking_sigmoid_weight_for_heavy_ranking_param", + default = 1.0, + min = -10.0, + max = 10.0 + ) + object QualityUprankingLinearBarForHeavyRankingParam + extends FSBoundedParam[Double]( + name = "quality_upranking_linear_bar_for_heavy_ranking_param", + default = 1.0, + min = 0.0, + max = 10.0 + ) + object EnableQualityUprankingCrtScoreStatsForHeavyRankingParam + extends FSParam[Boolean]( + name = "quality_upranking_enable_crt_score_stats_for_heavy_ranking_param", + default = false + ) + /* + * BQML Health Model related params + */ + object EnableBqmlHealthModelPredicateParam + extends FSParam[Boolean]( + name = "bqml_health_model_filter_enable_param", + default = false + ) + + object EnableBqmlHealthModelPredictionForInNetworkCandidatesParam + extends FSParam[Boolean]( + name = "bqml_health_model_enable_prediction_for_in_network_candidates_param", + default = false + ) + + object BqmlHealthModelTypeParam + extends FSParam[HealthNsfwModel.ModelNameType]( + name = "bqml_health_model_id", + default = HealthNsfwModel.Q2_2022_Mr_Bqml_Health_Model_NsfwV0 + ) + object BqmlHealthModelPredicateFilterThresholdParam + extends FSBoundedParam[Double]( + name = "bqml_health_model_filter_threshold_param", + default = 1.0, + min = 0.0, + max = 1.0 + ) + object BqmlHealthModelPredicateBucketThresholdParam + extends FSBoundedParam[Double]( + name = "bqml_health_model_bucket_threshold_param", + default = 0.005, + min = 0.0, + max = 1.0 + ) + + object EnableBqmlHealthModelScoreHistogramParam + extends FSParam[Boolean]( + name = "bqml_health_model_score_histogram_enable_param", + default = false + ) + + /* + * BQML Quality Model related params + */ + object EnableBqmlQualityModelPredicateParam + extends FSParam[Boolean]( + name = "bqml_quality_model_filter_enable_param", + default = false + ) + object EnableBqmlQualityModelScoreHistogramParam + extends FSParam[Boolean]( + name = "bqml_quality_model_score_histogram_enable_param", + default = false + ) + object BqmlQualityModelTypeParam + extends FSParam[WeightedOpenOrNtabClickModel.ModelNameType]( + name = "bqml_quality_model_id", + default = "Q1_2022_13562_Mr_Bqml_Quality_Model_V2" + ) + + /** + * Param to specify which quality models to use to get the scores for determining + * whether to bucket a user for the DDG + */ + object BqmlQualityModelBucketModelIdListParam + extends FSParam[Seq[WeightedOpenOrNtabClickModel.ModelNameType]]( + name = "bqml_quality_model_bucket_model_id_list", + default = Seq( + "Q1_2022_13562_Mr_Bqml_Quality_Model_V2", + "Q2_2022_DDG14146_Mr_Personalised_BQML_Quality_Model", + "Q2_2022_DDG14146_Mr_NonPersonalised_BQML_Quality_Model" + ) + ) + + object BqmlQualityModelPredicateThresholdParam + extends FSBoundedParam[Double]( + name = "bqml_quality_model_filter_threshold_param", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to specify the threshold to determine if a user’s quality score is high enough to enter the experiment. + */ + object BqmlQualityModelBucketThresholdListParam + extends FSParam[Seq[Double]]( + name = "bqml_quality_model_bucket_threshold_list", + default = Seq(0.7, 0.7, 0.7) + ) + + /* + * TweetAuthorAggregates related params + */ + object EnableTweetAuthorAggregatesFeatureHydrationParam + extends FSParam[Boolean]( + name = "tweet_author_aggregates_feature_hydration_enable_param", + default = false + ) + + /** + * Param to determine if we should include the relevancy score of candidates in the Ibis payload + */ + object IncludeRelevanceScoreInIbis2Payload + extends FSParam[Boolean]( + name = "relevance_score_include_in_ibis2_payload", + default = false + ) + + /** + * Param to specify supervised model to predict score by sending the notification + */ + object BigFilteringSupervisedSendingModelParam + extends FSParam[BigFilteringSupervisedModel.ModelNameType]( + name = "ltv_filtering_bigfiltering_supervised_sending_model_param", + default = BigFilteringSupervisedModel.V0_0_BigFiltering_Supervised_Sending_Model + ) + + /** + * Param to specify supervised model to predict score by not sending the notification + */ + object BigFilteringSupervisedWithoutSendingModelParam + extends FSParam[BigFilteringSupervisedModel.ModelNameType]( + name = "ltv_filtering_bigfiltering_supervised_without_sending_model_param", + default = BigFilteringSupervisedModel.V0_0_BigFiltering_Supervised_Without_Sending_Model + ) + + /** + * Param to specify RL model to predict score by sending the notification + */ + object BigFilteringRLSendingModelParam + extends FSParam[BigFilteringSupervisedModel.ModelNameType]( + name = "ltv_filtering_bigfiltering_rl_sending_model_param", + default = BigFilteringRLModel.V0_0_BigFiltering_Rl_Sending_Model + ) + + /** + * Param to specify RL model to predict score by not sending the notification + */ + object BigFilteringRLWithoutSendingModelParam + extends FSParam[BigFilteringSupervisedModel.ModelNameType]( + name = "ltv_filtering_bigfiltering_rl_without_sending_model_param", + default = BigFilteringRLModel.V0_0_BigFiltering_Rl_Without_Sending_Model + ) + + /** + * Param to specify the threshold (send notification if score >= threshold) + */ + object BigFilteringThresholdParam + extends FSBoundedParam[Double]( + name = "ltv_filtering_bigfiltering_threshold_param", + default = 0.0, + min = Double.MinValue, + max = Double.MaxValue + ) + + /** + * Param to specify normalization used for BigFiltering + */ + object BigFilteringNormalizationTypeIdParam + extends FSEnumParam[BigFilteringNormalizationEnum.type]( + name = "ltv_filtering_bigfiltering_normalization_type_id", + default = BigFilteringNormalizationEnum.NormalizationDisabled, + enum = BigFilteringNormalizationEnum + ) + + /** + * Param to specify histograms of model scores in BigFiltering + */ + object BigFilteringEnableHistogramsParam + extends FSParam[Boolean]( + name = "ltv_filtering_bigfiltering_enable_histograms_param", + default = false + ) + + /* + * Param to enable sending requests to Ins Sender + */ + object EnableInsSender extends FSParam[Boolean](name = "ins_enable_dark_traffic", default = false) + + /** + * Param to specify the range of relevance scores for MagicFanout types. + */ + object MagicFanoutRelevanceScoreRange + extends FSParam[Seq[Double]]( + name = "relevance_score_mf_range", + default = Seq(0.75, 1.0) + ) + + /** + * Param to specify the range of relevance scores for MR types. + */ + object MagicRecsRelevanceScoreRange + extends FSParam[Seq[Double]]( + name = "relevance_score_mr_range", + default = Seq(0.25, 0.5) + ) + + /** + * Param to enable backfilling OON candidates if number of F1 candidates is greater than a threshold K. + */ + object EnableOONBackfillBasedOnF1Candidates + extends FSParam[Boolean](name = "oon_enable_backfill_based_on_f1", default = false) + + /** + * Threshold for the minimum number of F1 candidates required to enable backfill of OON candidates. + */ + object NumberOfF1CandidatesThresholdForOONBackfill + extends FSBoundedParam[Int]( + name = "oon_enable_backfill_f1_threshold", + min = 0, + default = 5000, + max = 5000) + + /** + * Event ID allowlist to skip account country predicate + */ + object MagicFanoutEventAllowlistToSkipAccountCountryPredicate + extends FSParam[Seq[Long]]( + name = "magicfanout_event_allowlist_skip_account_country_predicate", + default = Seq.empty[Long] + ) + + /** + * MagicFanout Event Semantic Core Domain Ids + */ + object ListOfEventSemanticCoreDomainIds + extends FSParam[Seq[Long]]( + name = "magicfanout_automated_events_semantic_core_domain_ids", + default = Seq()) + + /** + * Adhoc id for detailed rank flow stats + */ + object ListOfAdhocIdsForStatsTracking + extends FSParam[Set[Long]]( + name = "stats_enable_detailed_stats_tracking_ids", + default = Set.empty[Long] + ) + + object EnableGenericCRTBasedFatiguePredicate + extends FSParam[Boolean]( + name = "seelessoften_enable_generic_crt_based_fatigue_predicate", + default = false) + + /** + * Param to enable copy features such as Emojis and Target Name + */ + object EnableCopyFeaturesForF1 + extends FSParam[Boolean](name = "mr_copy_enable_features_f1", default = false) + + /** + * Param to enable copy features such as Emojis and Target Name + */ + object EnableCopyFeaturesForOon + extends FSParam[Boolean](name = "mr_copy_enable_features_oon", default = false) + + /** + * Param to enable Emoji in F1 Copy + */ + object EnableEmojiInF1Copy + extends FSParam[Boolean](name = "mr_copy_enable_f1_emoji", default = false) + + /** + * Param to enable Target in F1 Copy + */ + object EnableTargetInF1Copy + extends FSParam[Boolean](name = "mr_copy_enable_f1_target", default = false) + + /** + * Param to enable Emoji in OON Copy + */ + object EnableEmojiInOonCopy + extends FSParam[Boolean](name = "mr_copy_enable_oon_emoji", default = false) + + /** + * Param to enable Target in OON Copy + */ + object EnableTargetInOonCopy + extends FSParam[Boolean](name = "mr_copy_enable_oon_target", default = false) + + /** + * Param to enable split fatigue for Target and Emoji copy for OON and F1 + */ + object EnableTargetAndEmojiSplitFatigue + extends FSParam[Boolean](name = "mr_copy_enable_target_emoji_split_fatigue", default = false) + + /** + * Param to enable experimenting string on the body + */ + object EnableF1CopyBody extends FSParam[Boolean](name = "mr_copy_f1_enable_body", default = false) + + object EnableOONCopyBody + extends FSParam[Boolean](name = "mr_copy_oon_enable_body", default = false) + + object EnableIosCopyBodyTruncate + extends FSParam[Boolean](name = "mr_copy_enable_body_truncate", default = false) + + object EnableNsfwCopy extends FSParam[Boolean](name = "mr_copy_enable_nsfw", default = false) + + /** + * Param to determine F1 candidate nsfw score threshold + */ + object NsfwScoreThresholdForF1Copy + extends FSBoundedParam[Double]( + name = "mr_copy_nsfw_threshold_f1", + default = 0.3, + min = 0.0, + max = 1.0 + ) + + /** + * Param to determine OON candidate nsfw score threshold + */ + object NsfwScoreThresholdForOONCopy + extends FSBoundedParam[Double]( + name = "mr_copy_nsfw_threshold_oon", + default = 0.2, + min = 0.0, + max = 1.0 + ) + + /** + * Param to determine the lookback duration when searching for prev copy features. + */ + object CopyFeaturesHistoryLookbackDuration + extends FSBoundedParam[Duration]( + name = "mr_copy_history_lookback_duration_in_days", + default = 30.days, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to determine the F1 emoji copy fatigue in # of hours. + */ + object F1EmojiCopyFatigueDuration + extends FSBoundedParam[Duration]( + name = "mr_copy_f1_emoji_copy_fatigue_in_hours", + default = 24.hours, + min = 0.hours, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to determine the F1 target copy fatigue in # of hours. + */ + object F1TargetCopyFatigueDuration + extends FSBoundedParam[Duration]( + name = "mr_copy_f1_target_copy_fatigue_in_hours", + default = 24.hours, + min = 0.hours, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to determine the OON emoji copy fatigue in # of hours. + */ + object OonEmojiCopyFatigueDuration + extends FSBoundedParam[Duration]( + name = "mr_copy_oon_emoji_copy_fatigue_in_hours", + default = 24.hours, + min = 0.hours, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to determine the OON target copy fatigue in # of hours. + */ + object OonTargetCopyFatigueDuration + extends FSBoundedParam[Duration]( + name = "mr_copy_oon_target_copy_fatigue_in_hours", + default = 24.hours, + min = 0.hours, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to turn on/off home timeline based fatigue rule, where once last home timeline visit + * is larger than the specified will evalute to not fatigue + */ + object EnableHTLBasedFatigueBasicRule + extends FSParam[Boolean]( + name = "mr_copy_enable_htl_based_fatigue_basic_rule", + default = false) + + /** + * Param to determine f1 emoji copy fatigue in # of pushes + */ + object F1EmojiCopyNumOfPushesFatigue + extends FSBoundedParam[Int]( + name = "mr_copy_f1_emoji_copy_number_of_pushes_fatigue", + default = 0, + min = 0, + max = 200 + ) + + /** + * Param to determine oon emoji copy fatigue in # of pushes + */ + object OonEmojiCopyNumOfPushesFatigue + extends FSBoundedParam[Int]( + name = "mr_copy_oon_emoji_copy_number_of_pushes_fatigue", + default = 0, + min = 0, + max = 200 + ) + + /** + * If user haven't visited home timeline for certain duration, we will + * exempt user from feature copy fatigue. This param is used to control + * how long it is before we enter exemption. + */ + object MinFatigueDurationSinceLastHTLVisit + extends FSBoundedParam[Duration]( + name = "mr_copy_min_duration_since_last_htl_visit_hours", + default = Duration.Top, + min = 0.hour, + max = Duration.Top, + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * If a user haven't visit home timeline very long, the user will return + * to fatigue state under the home timeline based fatigue rule. There will + * only be a window, where the user is out of fatigue state under the rule. + * This param control the length of the non fatigue period. + */ + object LastHTLVisitBasedNonFatigueWindow + extends FSBoundedParam[Duration]( + name = "mr_copy_last_htl_visit_based_non_fatigue_window_hours", + default = 48.hours, + min = 0.hour, + max = Duration.Top, + ) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + object EnableOONCBasedCopy + extends FSParam[Boolean]( + name = "mr_copy_enable_oonc_based_copy", + default = false + ) + + object HighOONCThresholdForCopy + extends FSBoundedParam[Double]( + name = "mr_copy_high_oonc_threshold_for_copy", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + object LowOONCThresholdForCopy + extends FSBoundedParam[Double]( + name = "mr_copy_low_oonc_threshold_for_copy", + default = 0.0, + min = 0.0, + max = 1.0 + ) + + object EnableTweetTranslation + extends FSParam[Boolean](name = "tweet_translation_enable", default = false) + + object TripTweetCandidateReturnEnable + extends FSParam[Boolean](name = "trip_tweet_candidate_enable", default = false) + + object TripTweetCandidateSourceIds + extends FSParam[Seq[String]]( + name = "trip_tweet_candidate_source_ids", + default = Seq("TOP_GEO_V3")) + + object TripTweetMaxTotalCandidates + extends FSBoundedParam[Int]( + name = "trip_tweet_max_total_candidates", + default = 500, + min = 10, + max = 1000) + + object EnableEmptyBody + extends FSParam[Boolean](name = "push_presentation_enable_empty_body", default = false) + + object EnableSocialContextForRetweet + extends FSParam[Boolean](name = "push_presentation_social_context_retweet", default = false) + + /** + * Param to enable/disable simcluster feature hydration + */ + object EnableMrTweetSimClusterFeatureHydrationFS + extends FSParam[Boolean]( + name = "feature_hydration_enable_mr_tweet_simcluster_feature", + default = false + ) + + /** + * Param to disable OON candidates based on tweetAuthor + */ + object DisableOutNetworkTweetCandidatesFS + extends FSParam[Boolean](name = "oon_filtering_disable_oon_candidates", default = false) + + /** + * Param to enable Local Viral Tweets + */ + object EnableLocalViralTweets + extends FSParam[Boolean](name = "local_viral_tweets_enable", default = true) + + /** + * Param to enable Explore Video Tweets + */ + object EnableExploreVideoTweets + extends FSParam[Boolean](name = "explore_video_tweets_enable", default = false) + + /** + * Param to enable List Recommendations + */ + object EnableListRecommendations + extends FSParam[Boolean](name = "list_recommendations_enable", default = false) + + /** + * Param to enable IDS List Recommendations + */ + object EnableIDSListRecommendations + extends FSParam[Boolean](name = "list_recommendations_ids_enable", default = false) + + /** + * Param to enable PopGeo List Recommendations + */ + object EnablePopGeoListRecommendations + extends FSParam[Boolean](name = "list_recommendations_pop_geo_enable", default = false) + + /** + * Param to control the inverter for fatigue between consecutive ListRecommendations + */ + object ListRecommendationsPushInterval + extends FSBoundedParam[Duration]( + name = "list_recommendations_interval_days", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromDays + } + + /** + * Param to control the granularity of GeoHash for ListRecommendations + */ + object ListRecommendationsGeoHashLength + extends FSBoundedParam[Int]( + name = "list_recommendations_geo_hash_length", + default = 5, + min = 3, + max = 5) + + /** + * Param to control maximum number of ListRecommendation pushes to receive in an interval + */ + object MaxListRecommendationsPushGivenInterval + extends FSBoundedParam[Int]( + name = "list_recommendations_push_given_interval", + default = 1, + min = 0, + max = 10 + ) + + /** + * Param to control the subscriber count for list recommendation + */ + object ListRecommendationsSubscriberCount + extends FSBoundedParam[Int]( + name = "list_recommendations_subscriber_count", + default = 0, + min = 0, + max = Integer.MAX_VALUE) + + /** + * Param to define dynamic inline action types for web notifications (both desktop web + mobile web) + */ + object LocalViralTweetsBucket + extends FSParam[String]( + name = "local_viral_tweets_bucket", + default = "high", + ) + + /** + * List of CrTags to disable + */ + object OONCandidatesDisabledCrTagParam + extends FSParam[Seq[String]]( + name = "oon_enable_oon_candidates_disabled_crtag", + default = Seq.empty[String] + ) + + /** + * List of Crt groups to disable + */ + object OONCandidatesDisabledCrtGroupParam + extends FSEnumSeqParam[CrtGroupEnum.type]( + name = "oon_enable_oon_candidates_disabled_crt_group_ids", + default = Seq.empty[CrtGroupEnum.Value], + enum = CrtGroupEnum + ) + + /** + * Param to enable launching video tweets in the Immersive Explore timeline + */ + object EnableLaunchVideosInImmersiveExplore + extends FSParam[Boolean](name = "launch_videos_in_immersive_explore", default = false) + + /** + * Param to enable Ntab Entries for Sports Event Notifications + */ + object EnableNTabEntriesForSportsEventNotifications + extends FSParam[Boolean]( + name = "magicfanout_sports_event_enable_ntab_entries", + default = false) + + /** + * Param to enable Ntab Facepiles for teams in Sport Notifs + */ + object EnableNTabFacePileForSportsEventNotifications + extends FSParam[Boolean]( + name = "magicfanout_sports_event_enable_ntab_facepiles", + default = false) + + /** + * Param to enable Ntab Override for Sports Event Notifications + */ + object EnableNTabOverrideForSportsEventNotifications + extends FSParam[Boolean]( + name = "magicfanout_sports_event_enable_ntab_override", + default = false) + + /** + * Param to control the interval for MF Product Launch Notifs + */ + object ProductLaunchPushIntervalInHours + extends FSBoundedParam[Duration]( + name = "product_launch_fatigue_push_interval_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of MF Product Launch Notifs in a period of time + */ + object ProductLaunchMaxNumberOfPushesInInterval + extends FSBoundedParam[Int]( + name = "product_launch_fatigue_max_pushes_in_interval", + default = 1, + min = 0, + max = 10) + + /** + * Param to control the minInterval for fatigue between consecutive MF Product Launch Notifs + */ + object ProductLaunchMinIntervalFatigue + extends FSBoundedParam[Duration]( + name = "product_launch_fatigue_min_interval_consecutive_pushes_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the interval for MF New Creator Notifs + */ + object NewCreatorPushIntervalInHours + extends FSBoundedParam[Duration]( + name = "new_creator_fatigue_push_interval_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of MF New Creator Notifs in a period of time + */ + object NewCreatorPushMaxNumberOfPushesInInterval + extends FSBoundedParam[Int]( + name = "new_creator_fatigue_max_pushes_in_interval", + default = 1, + min = 0, + max = 10) + + /** + * Param to control the minInterval for fatigue between consecutive MF New Creator Notifs + */ + object NewCreatorPushMinIntervalFatigue + extends FSBoundedParam[Duration]( + name = "new_creator_fatigue_min_interval_consecutive_pushes_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the interval for MF New Creator Notifs + */ + object CreatorSubscriptionPushIntervalInHours + extends FSBoundedParam[Duration]( + name = "creator_subscription_fatigue_push_interval_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to control the maximum number of MF New Creator Notifs in a period of time + */ + object CreatorSubscriptionPushMaxNumberOfPushesInInterval + extends FSBoundedParam[Int]( + name = "creator_subscription_fatigue_max_pushes_in_interval", + default = 1, + min = 0, + max = 10) + + /** + * Param to control the minInterval for fatigue between consecutive MF New Creator Notifs + */ + object CreatorSubscriptionPushhMinIntervalFatigue + extends FSBoundedParam[Duration]( + name = "creator_subscription_fatigue_min_interval_consecutive_pushes_in_hours", + default = 24.hours, + min = Duration.Bottom, + max = Duration.Top) + with HasDurationConversion { + override val durationConversion = DurationConversion.FromHours + } + + /** + * Param to define the landing page deeplink of product launch notifications + */ + object ProductLaunchLandingPageDeepLink + extends FSParam[String]( + name = "product_launch_landing_page_deeplink", + default = "" + ) + + /** + * Param to define the tap through of product launch notifications + */ + object ProductLaunchTapThrough + extends FSParam[String]( + name = "product_launch_tap_through", + default = "" + ) + + /** + * Param to skip checking isTargetBlueVerified + */ + object DisableIsTargetBlueVerifiedPredicate + extends FSParam[Boolean]( + name = "product_launch_disable_is_target_blue_verified_predicate", + default = false + ) + + /** + * Param to enable Ntab Entries for Sports Event Notifications + */ + object EnableNTabEntriesForProductLaunchNotifications + extends FSParam[Boolean](name = "product_launch_enable_ntab_entry", default = true) + + /** + * Param to skip checking isTargetLegacyVerified + */ + object DisableIsTargetLegacyVerifiedPredicate + extends FSParam[Boolean]( + name = "product_launch_disable_is_target_legacy_verified_predicate", + default = false + ) + + /** + * Param to enable checking isTargetSuperFollowCreator + */ + object EnableIsTargetSuperFollowCreatorPredicate + extends FSParam[Boolean]( + name = "product_launch_is_target_super_follow_creator_predicate_enabled", + default = false + ) + + /** + * Param to enable Spammy Tweet filter + */ + object EnableSpammyTweetFilter + extends FSParam[Boolean]( + name = "health_signal_store_enable_spammy_tweet_filter", + default = false) + + /** + * Param to enable Push to Home Android + */ + object EnableTweetPushToHomeAndroid + extends FSParam[Boolean](name = "push_to_home_tweet_recs_android", default = false) + + /** + * Param to enable Push to Home iOS + */ + object EnableTweetPushToHomeiOS + extends FSParam[Boolean](name = "push_to_home_tweet_recs_iOS", default = false) + + /** + * Param to set Spammy Tweet score threshold for OON candidates + */ + object SpammyTweetOonThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_spammy_tweet_oon_threshold", + default = 1.1, + min = 0.0, + max = 1.1 + ) + + object NumFollowerThresholdForHealthAndQualityFilters + extends FSBoundedParam[Double]( + name = "health_signal_store_num_follower_threshold_for_health_and_quality_filters", + default = 10000000000.0, + min = 0.0, + max = 10000000000.0 + ) + + object NumFollowerThresholdForHealthAndQualityFiltersPreranking + extends FSBoundedParam[Double]( + name = + "health_signal_store_num_follower_threshold_for_health_and_quality_filters_preranking", + default = 10000000.0, + min = 0.0, + max = 10000000000.0 + ) + + /** + * Param to set Spammy Tweet score threshold for IN candidates + */ + object SpammyTweetInThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_spammy_tweet_in_threshold", + default = 1.1, + min = 0.0, + max = 1.1 + ) + + /** + * Param to control bucketing for the Spammy Tweet score + */ + object SpammyTweetBucketingThreshold + extends FSBoundedParam[Double]( + name = "health_signal_store_spammy_tweet_bucketing_threshold", + default = 1.0, + min = 0.0, + max = 1.0 + ) + + /** + * Param to specify the maximum number of Explore Video Tweets to request + */ + object MaxExploreVideoTweets + extends FSBoundedParam[Int]( + name = "explore_video_tweets_max_candidates", + default = 100, + min = 0, + max = 500 + ) + + /** + * Param to enable social context feature set + */ + object EnableBoundedFeatureSetForSocialContext + extends FSParam[Boolean]( + name = "feature_hydration_user_social_context_bounded_feature_set_enable", + default = true) + + /** + * Param to enable stp user social context feature set + */ + object EnableStpBoundedFeatureSetForUserSocialContext + extends FSParam[Boolean]( + name = "feature_hydration_stp_social_context_bounded_feature_set_enable", + default = true) + + /** + * Param to enable core user history social context feature set + */ + object EnableCoreUserHistoryBoundedFeatureSetForSocialContext + extends FSParam[Boolean]( + name = "feature_hydration_core_user_history_social_context_bounded_feature_set_enable", + default = true) + + /** + * Param to enable skipping post-ranking filters + */ + object SkipPostRankingFilters + extends FSParam[Boolean]( + name = "frigate_push_modeling_skip_post_ranking_filters", + default = false) + + object MagicFanoutSimClusterDotProductNonHeavyUserThreshold + extends FSBoundedParam[Double]( + name = "frigate_push_magicfanout_simcluster_non_heavy_user_dot_product_threshold", + default = 0.0, + min = 0.0, + max = 100.0 + ) + + object MagicFanoutSimClusterDotProductHeavyUserThreshold + extends FSBoundedParam[Double]( + name = "frigate_push_magicfanout_simcluster_heavy_user_dot_product_threshold", + default = 10.0, + min = 0.0, + max = 100.0 + ) + + object EnableReducedFatigueRulesForSeeLessOften + extends FSParam[Boolean]( + name = "seelessoften_enable_reduced_fatigue", + default = false + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitches.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitches.scala new file mode 100644 index 000000000..96167c134 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushFeatureSwitches.scala @@ -0,0 +1,751 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.{FeatureSwitchParams => Common} +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => Pushservice} +import com.twitter.logging.Logger +import com.twitter.servo.decider.DeciderGateBuilder +import com.twitter.timelines.configapi.BaseConfigBuilder +import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil +import com.twitter.timelines.configapi.decider.DeciderUtils + +case class PushFeatureSwitches( + deciderGateBuilder: DeciderGateBuilder, + statsReceiver: StatsReceiver) { + + private[this] val logger = Logger(classOf[PushFeatureSwitches]) + private[this] val stat = statsReceiver.scope("PushFeatureSwitches") + + private val booleanDeciderOverrides = DeciderUtils.getBooleanDeciderOverrides( + deciderGateBuilder, + PushParams.DisableAllRelevanceParam, + PushParams.DisableHeavyRankingParam, + PushParams.RestrictLightRankingParam, + PushParams.UTEGTweetCandidateSourceParam, + PushParams.EnableWritesToNotificationServiceParam, + PushParams.EnableWritesToNotificationServiceForAllEmployeesParam, + PushParams.EnableWritesToNotificationServiceForEveryoneParam, + PushParams.EnablePromptFeedbackFatigueResponseNoPredicate, + PushParams.EarlyBirdSCBasedCandidatesParam, + PushParams.MRTweetFavRecsParam, + PushParams.MRTweetRetweetRecsParam, + PushParams.EnablePushSendEventBus, + PushParams.DisableMlInFilteringParam, + PushParams.DownSampleLightRankingScribeCandidatesParam, + PushParams.EnableMrRequestScribing, + PushParams.EnableHighQualityCandidateScoresScribing, + PushParams.EnablePnegMultimodalPredictionForF1Tweets, + PushParams.EnableScribeOonFavScoreForF1Tweets, + PushParams.EnableMrUserSemanticCoreFeaturesHydration, + PushParams.EnableMrUserSemanticCoreNoZeroFeaturesHydration, + PushParams.EnableHtlOfflineUserAggregatesExtendedHydration, + PushParams.EnableNerErgFeatureHydration, + PushParams.EnableDaysSinceRecentResurrectionFeatureHydration, + PushParams.EnableUserPastAggregatesFeatureHydration, + PushParams.EnableMrUserSimclusterV2020FeaturesHydration, + PushParams.EnableMrUserSimclusterV2020NoZeroFeaturesHydration, + PushParams.EnableTopicEngagementRealTimeAggregatesFeatureHydration, + PushParams.EnableUserTopicAggregatesFeatureHydration, + PushParams.EnableHtlUserAuthorRTAFeaturesFromFeatureStoreHydration, + PushParams.EnableDurationSinceLastVisitFeatures, + PushParams.EnableTweetAnnotationFeaturesHydration, + PushParams.EnableSpaceVisibilityLibraryFiltering, + PushParams.EnableUserTopicFollowFeatureSetHydration, + PushParams.EnableOnboardingNewUserFeatureSetHydration, + PushParams.EnableMrUserAuthorSparseContFeatureSetHydration, + PushParams.EnableMrUserTopicSparseContFeatureSetHydration, + PushParams.EnableUserPenguinLanguageFeatureSetHydration, + PushParams.EnableMrUserHashspaceEmbeddingFeatureHydration, + PushParams.EnableMrUserEngagedTweetTokensFeatureHydration, + PushParams.EnableMrCandidateTweetTokensFeatureHydration, + PushParams.EnableMrTweetSentimentFeatureHydration, + PushParams.EnableMrTweetAuthorAggregatesFeatureHydration, + PushParams.EnableUserGeoFeatureSetHydration, + PushParams.EnableAuthorGeoFeatureSetHydration, + PushParams.EnableTwHINUserEngagementFeaturesHydration, + PushParams.EnableTwHINUserFollowFeaturesHydration, + PushParams.EnableTwHINAuthorFollowFeaturesHydration, + PushParams.EnableAuthorFollowTwhinEmbeddingFeatureHydration, + PushParams.RampupUserGeoFeatureSetHydration, + PushParams.RampupAuthorGeoFeatureSetHydration, + PushParams.EnablePredicateDetailedInfoScribing, + PushParams.EnablePushCapInfoScribing, + PushParams.EnableUserSignalLanguageFeatureHydration, + PushParams.EnableUserPreferredLanguageFeatureHydration, + PushParams.PopGeoCandidatesDecider, + PushParams.TrendsCandidateDecider, + PushParams.EnableInsTrafficDecider, + PushParams.EnableModelBasedPushcapAssignments, + PushParams.TripGeoTweetCandidatesDecider, + PushParams.ContentRecommenderMixerAdaptorDecider, + PushParams.GenericCandidateAdaptorDecider, + PushParams.TripGeoTweetContentMixerDarkTrafficDecider, + PushParams.EnableIsTweetTranslatableCheck, + PushParams.EnableMrTweetSimClusterFeatureHydration, + PushParams.EnableTwistlyAggregatesFeatureHydration, + PushParams.EnableTweetTwHINFavFeatureHydration, + PushParams.EnableRealGraphV2FeatureHydration, + PushParams.EnableTweetBeTFeatureHydration, + PushParams.EnableMrOfflineUserTweetTopicAggregateHydration, + PushParams.EnableMrOfflineUserTweetSimClusterAggregateHydration, + PushParams.EnableUserSendTimeFeatureHydration, + PushParams.EnableMrUserUtcSendTimeAggregateFeaturesHydration, + PushParams.EnableMrUserLocalSendTimeAggregateFeaturesHydration, + PushParams.EnableBqmlReportModelPredictionForF1Tweets, + PushParams.EnableUserTwhinEmbeddingFeatureHydration, + PushParams.EnableScribingMLFeaturesAsDataRecord, + PushParams.EnableAuthorVerifiedFeatureHydration, + PushParams.EnableAuthorCreatorSubscriptionFeatureHydration, + PushParams.EnableDirectHydrationForUserFeatures + ) + + private val intFeatureSwitchOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( + Pushservice.SportsMaxNumberOfPushesInIntervalPerEvent, + Pushservice.SportsMaxNumberOfPushesInInterval, + Pushservice.PushMixerMaxResults, + Pushservice.MaxTrendTweetNotificationsInDuration, + Pushservice.MaxRecommendedTrendsToQuery, + Pushservice.NumberOfMaxEarlybirdInNetworkCandidatesParam, + Pushservice.NumberOfMaxCandidatesToBatchInRFPHTakeStep, + Pushservice.MaxMrPushSends24HoursParam, + Pushservice.MaxMrPushSends24HoursNtabOnlyUsersParam, + Pushservice.NumberOfMaxCrMixerCandidatesParam, + Pushservice.RestrictStepSize, + Pushservice.MagicFanoutRankErgThresholdHeavy, + Pushservice.MagicFanoutRankErgThresholdNonHeavy, + Pushservice.MagicFanoutRelaxedEventIdFatigueIntervalInHours, + Pushservice.NumberOfMaxUTEGCandidatesQueriedParam, + Pushservice.HTLVisitFatigueTime, + Pushservice.MaxOnboardingPushInInterval, + Pushservice.MaxTopTweetsByGeoPushGivenInterval, + Pushservice.MaxHighQualityTweetsPushGivenInterval, + Pushservice.MaxTopTweetsByGeoCandidatesToTake, + Pushservice.SpaceRecsRealgraphThreshold, + Pushservice.SpaceRecsGlobalPushLimit, + Pushservice.OptoutExptPushCapParam, + Pushservice.MaxTopTweetImpressionsNotifications, + Pushservice.TopTweetImpressionsMinRequired, + Pushservice.TopTweetImpressionsThreshold, + Pushservice.TopTweetImpressionsOriginalTweetsNumDaysSearch, + Pushservice.TopTweetImpressionsMinNumOriginalTweets, + Pushservice.TopTweetImpressionsMaxFavoritesPerTweet, + Pushservice.TopTweetImpressionsTotalInboundFavoritesLimit, + Pushservice.TopTweetImpressionsTotalFavoritesLimitNumDaysSearch, + Pushservice.TopTweetImpressionsRecentTweetsByAuthorStoreMaxResults, + Pushservice.ANNEfQuery, + Pushservice.NumberOfMaxMrModelingBasedCandidates, + Pushservice.ThresholdOfFavMrModelingBasedCandidates, + Pushservice.LightRankingNumberOfCandidatesParam, + Pushservice.NumberOfDeTopicTweetCandidates, + Pushservice.NumberOfMaxDeTopicTweetCandidatesReturned, + Pushservice.OverrideNotificationsMaxNumOfSlots, + Pushservice.OverrideNotificationsMaxCountForNTab, + Pushservice.MFMaxNumberOfPushesInInterval, + Pushservice.SpacesTopKSimClusterCount, + Pushservice.SpaceRecsSimClusterUserMinimumFollowerCount, + Pushservice.OONSpaceRecsPushLimit, + Pushservice.MagicFanoutRealgraphRankThreshold, + Pushservice.CustomizedPushCapOffset, + Pushservice.NumberOfF1CandidatesThresholdForOONBackfill, + Pushservice.MinimumAllowedAuthorAccountAgeInHours, + Pushservice.RestrictedMinModelPushcap, + Pushservice.ListRecommendationsGeoHashLength, + Pushservice.ListRecommendationsSubscriberCount, + Pushservice.MaxListRecommendationsPushGivenInterval, + Pushservice.SendTimeByUserHistoryMaxOpenedThreshold, + Pushservice.SendTimeByUserHistoryNoSendsHours, + Pushservice.SendTimeByUserHistoryQuickSendBeforeHours, + Pushservice.SendTimeByUserHistoryQuickSendAfterHours, + Pushservice.SendTimeByUserHistoryQuickSendMinDurationInMinute, + Pushservice.SendTimeByUserHistoryNoSendMinDuration, + Pushservice.F1EmojiCopyNumOfPushesFatigue, + Pushservice.OonEmojiCopyNumOfPushesFatigue, + Pushservice.TripTweetMaxTotalCandidates, + Pushservice.InlineFeedbackSubstitutePosition, + Pushservice.HighQualityCandidatesNumberOfCandidates, + Pushservice.HighQualityCandidatesMinNumOfCandidatesToFallback, + Pushservice.ProductLaunchMaxNumberOfPushesInInterval, + Pushservice.CreatorSubscriptionPushMaxNumberOfPushesInInterval, + Pushservice.NewCreatorPushMaxNumberOfPushesInInterval, + Pushservice.TweetReplytoLikeRatioReplyCountThreshold, + Pushservice.MaxExploreVideoTweets, + ) + + private val doubleFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( + Pushservice.PercentileThresholdCohort1, + Pushservice.PercentileThresholdCohort2, + Pushservice.PercentileThresholdCohort3, + Pushservice.PercentileThresholdCohort4, + Pushservice.PercentileThresholdCohort5, + Pushservice.PercentileThresholdCohort6, + Pushservice.PnsfwTweetTextThreshold, + Pushservice.PnsfwTweetTextBucketingThreshold, + Pushservice.PnsfwTweetMediaThreshold, + Pushservice.PnsfwTweetImageThreshold, + Pushservice.PnsfwQuoteTweetThreshold, + Pushservice.PnsfwTweetMediaBucketingThreshold, + Pushservice.AgathaCalibratedNSFWThreshold, + Pushservice.AgathaCalibratedNSFWThresholdForMrTwistly, + Pushservice.AgathaTextNSFWThreshold, + Pushservice.AgathaTextNSFWThresholdForMrTwistly, + Pushservice.AgathaCalibratedNSFWBucketThreshold, + Pushservice.AgathaTextNSFWBucketThreshold, + Pushservice.BucketOptoutThresholdParam, + Pushservice.TweetMediaSensitiveCategoryThresholdParam, + Pushservice.CandidateGenerationModelCosineThreshold, + Pushservice.MrModelingBasedCandidatesTopicScoreThreshold, + Pushservice.HashspaceCandidatesTopicScoreThreshold, + Pushservice.FrsTweetCandidatesTopicScoreThreshold, + Pushservice.TopicProofTweetCandidatesTopicScoreThreshold, + Pushservice.SpacesTargetingSimClusterDotProductThreshold, + Pushservice.SautOonWithMediaTweetLengthThresholdParam, + Pushservice.NonSautOonWithMediaTweetLengthThresholdParam, + Pushservice.SautOonWithoutMediaTweetLengthThresholdParam, + Pushservice.NonSautOonWithoutMediaTweetLengthThresholdParam, + Pushservice.ArgfOonWithMediaTweetWordLengthThresholdParam, + Pushservice.EsfthOonWithMediaTweetWordLengthThresholdParam, + Pushservice.BqmlQualityModelPredicateThresholdParam, + Pushservice.LightRankingScribeCandidatesDownSamplingParam, + Pushservice.QualityUprankingBoostForHeavyRankingParam, + Pushservice.QualityUprankingSigmoidBiasForHeavyRankingParam, + Pushservice.QualityUprankingSigmoidWeightForHeavyRankingParam, + Pushservice.QualityUprankingLinearBarForHeavyRankingParam, + Pushservice.QualityUprankingBoostForHighQualityProducersParam, + Pushservice.QualityUprankingDownboostForLowQualityProducersParam, + Pushservice.BqmlHealthModelPredicateFilterThresholdParam, + Pushservice.BqmlHealthModelPredicateBucketThresholdParam, + Pushservice.PNegMultimodalPredicateModelThresholdParam, + Pushservice.PNegMultimodalPredicateBucketThresholdParam, + Pushservice.SeeLessOftenF1TriggerF1PushCapWeight, + Pushservice.SeeLessOftenF1TriggerNonF1PushCapWeight, + Pushservice.SeeLessOftenNonF1TriggerF1PushCapWeight, + Pushservice.SeeLessOftenNonF1TriggerNonF1PushCapWeight, + Pushservice.SeeLessOftenTripHqTweetTriggerF1PushCapWeight, + Pushservice.SeeLessOftenTripHqTweetTriggerNonF1PushCapWeight, + Pushservice.SeeLessOftenTripHqTweetTriggerTripHqTweetPushCapWeight, + Pushservice.SeeLessOftenNtabOnlyNotifUserPushCapWeight, + Pushservice.PromptFeedbackF1TriggerF1PushCapWeight, + Pushservice.PromptFeedbackF1TriggerNonF1PushCapWeight, + Pushservice.PromptFeedbackNonF1TriggerF1PushCapWeight, + Pushservice.PromptFeedbackNonF1TriggerNonF1PushCapWeight, + Pushservice.InlineFeedbackF1TriggerF1PushCapWeight, + Pushservice.InlineFeedbackF1TriggerNonF1PushCapWeight, + Pushservice.InlineFeedbackNonF1TriggerF1PushCapWeight, + Pushservice.InlineFeedbackNonF1TriggerNonF1PushCapWeight, + Pushservice.TweetNtabDislikeCountThresholdParam, + Pushservice.TweetNtabDislikeRateThresholdParam, + Pushservice.TweetNtabDislikeCountThresholdForMrTwistlyParam, + Pushservice.TweetNtabDislikeRateThresholdForMrTwistlyParam, + Pushservice.TweetNtabDislikeCountBucketThresholdParam, + Pushservice.MinAuthorSendsThresholdParam, + Pushservice.MinTweetSendsThresholdParam, + Pushservice.AuthorDislikeRateThresholdParam, + Pushservice.AuthorReportRateThresholdParam, + Pushservice.FavOverSendThresholdParam, + Pushservice.SpreadControlRatioParam, + Pushservice.TweetQTtoNtabClickRatioThresholdParam, + Pushservice.TweetReplytoLikeRatioThresholdLowerBound, + Pushservice.TweetReplytoLikeRatioThresholdUpperBound, + Pushservice.AuthorSensitiveMediaFilteringThreshold, + Pushservice.AuthorSensitiveMediaFilteringThresholdForMrTwistly, + Pushservice.MrRequestScribingEpsGreedyExplorationRatio, + Pushservice.SeeLessOftenTopicTriggerTopicPushCapWeight, + Pushservice.SeeLessOftenTopicTriggerF1PushCapWeight, + Pushservice.SeeLessOftenTopicTriggerOONPushCapWeight, + Pushservice.SeeLessOftenF1TriggerTopicPushCapWeight, + Pushservice.SeeLessOftenOONTriggerTopicPushCapWeight, + Pushservice.SeeLessOftenDefaultPushCapWeight, + Pushservice.OverrideMaxSlotFnWeight, + Pushservice.QualityPredicateExplicitThresholdParam, + Pushservice.AuthorSensitiveScoreWeightInReranking, + Pushservice.BigFilteringThresholdParam, + Pushservice.NsfwScoreThresholdForF1Copy, + Pushservice.NsfwScoreThresholdForOONCopy, + Pushservice.HighOONCThresholdForCopy, + Pushservice.LowOONCThresholdForCopy, + Pushservice.UserDeviceLanguageThresholdParam, + Pushservice.UserInferredLanguageThresholdParam, + Pushservice.SpammyTweetOonThreshold, + Pushservice.SpammyTweetInThreshold, + Pushservice.SpammyTweetBucketingThreshold, + Pushservice.NumFollowerThresholdForHealthAndQualityFilters, + Pushservice.NumFollowerThresholdForHealthAndQualityFiltersPreranking, + Pushservice.SoftRankFactorForSubscriptionCreators, + Pushservice.MagicFanoutSimClusterDotProductHeavyUserThreshold, + Pushservice.MagicFanoutSimClusterDotProductNonHeavyUserThreshold + ) + + private val doubleSeqFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getDoubleSeqFSOverrides( + Pushservice.MfGridSearchThresholdsCohort1, + Pushservice.MfGridSearchThresholdsCohort2, + Pushservice.MfGridSearchThresholdsCohort3, + Pushservice.MfGridSearchThresholdsCohort4, + Pushservice.MfGridSearchThresholdsCohort5, + Pushservice.MfGridSearchThresholdsCohort6, + Pushservice.MrPercentileGridSearchThresholdsCohort1, + Pushservice.MrPercentileGridSearchThresholdsCohort2, + Pushservice.MrPercentileGridSearchThresholdsCohort3, + Pushservice.MrPercentileGridSearchThresholdsCohort4, + Pushservice.MrPercentileGridSearchThresholdsCohort5, + Pushservice.MrPercentileGridSearchThresholdsCohort6, + Pushservice.GlobalOptoutThresholdParam, + Pushservice.BucketOptoutSlotThresholdParam, + Pushservice.BqmlQualityModelBucketThresholdListParam, + Pushservice.SeeLessOftenListOfDayKnobs, + Pushservice.SeeLessOftenListOfPushCapWeightKnobs, + Pushservice.SeeLessOftenListOfPowerKnobs, + Pushservice.PromptFeedbackListOfDayKnobs, + Pushservice.PromptFeedbackListOfPushCapWeightKnobs, + Pushservice.PromptFeedbackListOfPowerKnobs, + Pushservice.InlineFeedbackListOfDayKnobs, + Pushservice.InlineFeedbackListOfPushCapWeightKnobs, + Pushservice.InlineFeedbackListOfPowerKnobs, + Pushservice.OverrideMaxSlotFnPushCapKnobs, + Pushservice.OverrideMaxSlotFnPowerKnobs, + Pushservice.OverrideMaxSlotFnPushCapKnobs, + Pushservice.MagicRecsRelevanceScoreRange, + Pushservice.MagicFanoutRelevanceScoreRange, + Pushservice.MultilingualPnsfwTweetTextBucketingThreshold, + Pushservice.MultilingualPnsfwTweetTextFilteringThreshold, + ) + + private val booleanFeatureSwitchOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( + Pushservice.EnablePushRecommendationsParam, + Pushservice.DisableHeavyRankingModelFSParam, + Pushservice.EnablePushMixerReplacingAllSources, + Pushservice.EnablePushMixerReplacingAllSourcesWithControl, + Pushservice.EnablePushMixerReplacingAllSourcesWithExtra, + Pushservice.EnablePushMixerSource, + Common.EnableScheduledSpaceSpeakers, + Common.EnableScheduledSpaceSubscribers, + Pushservice.MagicFanoutNewsUserGeneratedEventsEnable, + Pushservice.MagicFanoutSkipAccountCountryPredicate, + Pushservice.MagicFanoutNewsEnableDescriptionCopy, + Pushservice.EnableF1TriggerSeeLessOftenFatigue, + Pushservice.EnableNonF1TriggerSeeLessOftenFatigue, + Pushservice.AdjustTripHqTweetTriggeredNtabCaretClickFatigue, + Pushservice.EnableCuratedTrendTweets, + Pushservice.EnableNonCuratedTrendTweets, + Pushservice.DisableMlInFilteringFeatureSwitchParam, + Pushservice.EnableTopicCopyForMF, + Pushservice.EnableTopicCopyForImplicitTopics, + Pushservice.EnableRestrictStep, + Pushservice.EnableHighPriorityPush, + Pushservice.BoostCandidatesFromSubscriptionCreators, + Pushservice.SoftRankCandidatesFromSubscriptionCreators, + Pushservice.EnableNewMROONCopyForPush, + Pushservice.EnableQueryAuthorMediaRepresentationStore, + Pushservice.EnableProfanityFilterParam, + Pushservice.EnableAbuseStrikeTop2PercentFilterSimCluster, + Pushservice.EnableAbuseStrikeTop1PercentFilterSimCluster, + Pushservice.EnableAbuseStrikeTop05PercentFilterSimCluster, + Pushservice.EnableAgathaUserHealthModelPredicate, + Pushservice.PnsfwTweetMediaFilterOonOnly, + Pushservice.EnableHealthSignalStorePnsfwTweetTextPredicate, + Pushservice.EnableHealthSignalStoreMultilingualPnsfwTweetTextPredicate, + Pushservice.DisableHealthFiltersForCrMixerCandidates, + Pushservice.EnableOverrideNotificationsForAndroid, + Pushservice.EnableOverrideNotificationsForIos, + Pushservice.EnableMrRequestScribingForTargetFiltering, + Pushservice.EnableMrRequestScribingForCandidateFiltering, + Pushservice.EnableMrRequestScribingWithFeatureHydrating, + Pushservice.EnableFlattenMrRequestScribing, + Pushservice.EnableMrRequestScribingForEpsGreedyExploration, + Pushservice.EnableMrRequestScribingDismissScore, + Pushservice.EnableMrRequestScribingBigFilteringSupervisedScores, + Pushservice.EnableMrRequestScribingBigFilteringRLScores, + Pushservice.EnableEventPrimaryMediaAndroid, + Pushservice.EnableEventSquareMediaIosMagicFanoutNewsEvent, + Pushservice.EnableEventSquareMediaAndroid, + Pushservice.EnableMagicFanoutNewsForYouNtabCopy, + Pushservice.EnableMfGeoTargeting, + Pushservice.EnableRuxLandingPage, + Pushservice.EnableNTabRuxLandingPage, + Pushservice.EnableGraduallyRampUpNotification, + Pushservice.EnableOnboardingPushes, + Pushservice.EnableAddressBookPush, + Pushservice.EnableCompleteOnboardingPush, + Pushservice.EnableOverrideNotificationsSmartPushConfigForAndroid, + Pushservice.DisableOnboardingPushFatigue, + Pushservice.EnableTopTweetsByGeoCandidates, + Pushservice.BackfillRankTopTweetsByGeoCandidates, + Pushservice.PopGeoTweetEnableAggressiveThresholds, + Pushservice.EnableMrMinDurationSinceMrPushFatigue, + Pushservice.EnableF1FromProtectedTweetAuthors, + Pushservice.MagicFanoutEnableCustomTargetingNewsEvent, + Pushservice.EnableSafeUserTweetTweetypieStore, + Pushservice.EnableMrMinDurationSinceMrPushFatigue, + Pushservice.EnableHydratingOnlineMRHistoryFeatures, + Common.SpaceRecsEnableHostNotifs, + Common.SpaceRecsEnableSpeakerNotifs, + Common.SpaceRecsEnableListenerNotifs, + Common.EnableMagicFanoutProductLaunch, + Pushservice.EnableTopTweetsByGeoCandidatesForDormantUsers, + Pushservice.EnableOverrideNotificationsScoreBasedOverride, + Pushservice.EnableOverrideNotificationsMultipleTargetIds, + Pushservice.EnableMinDurationModifier, + Pushservice.EnableMinDurationModifierV2, + Pushservice.EnableMinDurationModifierByUserHistory, + Pushservice.EnableQueryUserOpenedHistory, + Pushservice.EnableRandomHourForQuickSend, + Pushservice.EnableFrsCandidates, + Pushservice.EnableFrsTweetCandidatesTopicSetting, + Pushservice.EnableFrsTweetCandidatesTopicAnnotation, + Pushservice.EnableFrsTweetCandidatesTopicCopy, + Pushservice.EnableCandidateGenerationModelParam, + Pushservice.EnableOverrideForSportsCandidates, + Pushservice.EnableEventIdBasedOverrideForSportsCandidates, + Pushservice.EnableMrModelingBasedCandidates, + Pushservice.EnableMrModelingBasedCandidatesTopicSetting, + Pushservice.EnableMrModelingBasedCandidatesTopicAnnotation, + Pushservice.EnableMrModelingBasedCandidatesTopicCopy, + Pushservice.EnableResultFromFrsCandidates, + Pushservice.EnableHashspaceCandidates, + Pushservice.EnableHashspaceCandidatesTopicSetting, + Pushservice.EnableHashspaceCandidatesTopicAnnotation, + Pushservice.EnableHashspaceCandidatesTopicCopy, + Pushservice.EnableResultFromHashspaceCandidates, + Pushservice.EnableDownRankOfNewUserPlaybookTopicFollowPush, + Pushservice.EnableDownRankOfNewUserPlaybookTopicTweetPush, + Pushservice.EnableTopTweetImpressionsNotification, + Pushservice.EnableLightRankingParam, + Pushservice.EnableRandomBaselineLightRankingParam, + Pushservice.EnableQualityUprankingForHeavyRankingParam, + Pushservice.EnableQualityUprankingCrtScoreStatsForHeavyRankingParam, + Pushservice.EnableProducersQualityBoostingForHeavyRankingParam, + Pushservice.EnableMrScribingMLFeaturesAsFeatureMapForStaging, + Pushservice.EnableMrTweetSentimentFeatureHydrationFS, + Pushservice.EnableTimelineHealthSignalHydration, + Pushservice.EnableTopicEngagementRealTimeAggregatesFS, + Pushservice.EnableMrUserSemanticCoreFeatureForExpt, + Pushservice.EnableHydratingRealGraphTargetUserFeatures, + Pushservice.EnableHydratingUserDurationSinceLastVisitFeatures, + Pushservice.EnableRealGraphUserAuthorAndSocialContxtFeatureHydration, + Pushservice.EnableUserTopicAggregatesFS, + Pushservice.EnableTimelineHealthSignalHydrationForModelTraining, + Pushservice.EnableMrUserSocialContextAggregateFeatureHydration, + Pushservice.EnableMrUserSemanticCoreAggregateFeatureHydration, + Pushservice.EnableMrUserCandidateSparseOfflineAggregateFeatureHydration, + Pushservice.EnableMrUserCandidateOfflineAggregateFeatureHydration, + Pushservice.EnableMrUserCandidateOfflineCompactAggregateFeatureHydration, + Pushservice.EnableMrUserAuthorOfflineAggregateFeatureHydration, + Pushservice.EnableMrUserAuthorOfflineCompactAggregateFeatureHydration, + Pushservice.EnableMrUserOfflineCompactAggregateFeatureHydration, + Pushservice.EnableMrUserSimcluster2020AggregateFeatureHydration, + Pushservice.EnableMrUserOfflineAggregateFeatureHydration, + Pushservice.EnableBqmlQualityModelPredicateParam, + Pushservice.EnableBqmlQualityModelScoreHistogramParam, + Pushservice.EnableBqmlHealthModelPredicateParam, + Pushservice.EnableBqmlHealthModelPredictionForInNetworkCandidatesParam, + Pushservice.EnableBqmlHealthModelScoreHistogramParam, + Pushservice.EnablePNegMultimodalPredicateParam, + Pushservice.EnableNegativeKeywordsPredicateParam, + Pushservice.EnableTweetAuthorAggregatesFeatureHydrationParam, + Pushservice.OonTweetLengthPredicateUpdatedMediaLogic, + Pushservice.OonTweetLengthPredicateUpdatedQuoteTweetLogic, + Pushservice.OonTweetLengthPredicateMoreStrictForUndefinedLanguages, + Pushservice.EnablePrerankingTweetLengthPredicate, + Pushservice.EnableDeTopicTweetCandidates, + Pushservice.EnableDeTopicTweetCandidateResults, + Pushservice.EnableDeTopicTweetCandidatesCustomTopics, + Pushservice.EnableDeTopicTweetCandidatesCustomLanguages, + Pushservice.EnableMrTweetSimClusterFeatureHydrationFS, + Pushservice.DisableOutNetworkTweetCandidatesFS, + Pushservice.EnableLaunchVideosInImmersiveExplore, + Pushservice.EnableStoringNtabGenericNotifKey, + Pushservice.EnableDeletingNtabTimeline, + Pushservice.EnableOverrideNotificationsNSlots, + Pushservice.EnableNslotsForOverrideOnNtab, + Pushservice.EnableOverrideMaxSlotFn, + Pushservice.EnableTargetIdInSmartPushPayloadForMagicFanoutSportsEvent, + Pushservice.EnableOverrideIdNTabRequest, + Pushservice.EnableOverrideForSpaces, + Pushservice.EnableTopicProofTweetRecs, + Pushservice.EnableHealthFiltersForTopicProofTweet, + Pushservice.EnableTargetIdsInSmartPushPayload, + Pushservice.EnableSecondaryAccountPredicateMF, + Pushservice.EnableInlineVideo, + Pushservice.EnableAutoplayForInlineVideo, + Pushservice.EnableOONGeneratedInlineActions, + Pushservice.EnableInlineFeedbackOnPush, + Pushservice.UseInlineActionsV1, + Pushservice.UseInlineActionsV2, + Pushservice.EnableFeaturedSpacesOON, + Pushservice.CheckFeaturedSpaceOON, + Pushservice.EnableGeoTargetingForSpaces, + Pushservice.EnableEmployeeOnlySpaceNotifications, + Pushservice.EnableSpacesTtlForNtab, + Pushservice.EnableCustomThreadIdForOverride, + Pushservice.EnableSimClusterTargetingSpaces, + Pushservice.TargetInInlineActionAppVisitFatigue, + Pushservice.EnableInlineActionAppVisitFatigue, + Pushservice.EnableThresholdOfFavMrModelingBasedCandidates, + Pushservice.HydrateMrUserSimclusterV2020InModelingBasedCG, + Pushservice.HydrateMrUserSemanticCoreInModelingBasedCG, + Pushservice.HydrateOnboardingInModelingBasedCG, + Pushservice.HydrateTopicFollowInModelingBasedCG, + Pushservice.HydrateMrUserTopicInModelingBasedCG, + Pushservice.HydrateMrUserAuthorInModelingBasedCG, + Pushservice.HydrateUserPenguinLanguageInModelingBasedCG, + Pushservice.EnableMrUserEngagedTweetTokensFeature, + Pushservice.HydrateMrUserHashspaceEmbeddingInModelingBasedCG, + Pushservice.HydrateUseGeoInModelingBasedCG, + Pushservice.EnableSpaceCohostJoinEvent, + Pushservice.EnableOONFilteringBasedOnUserSettings, + Pushservice.EnableContFnF1TriggerSeeLessOftenFatigue, + Pushservice.EnableContFnNonF1TriggerSeeLessOftenFatigue, + Pushservice.EnableContFnF1TriggerPromptFeedbackFatigue, + Pushservice.EnableContFnNonF1TriggerPromptFeedbackFatigue, + Pushservice.EnableContFnF1TriggerInlineFeedbackFatigue, + Pushservice.EnableContFnNonF1TriggerInlineFeedbackFatigue, + Pushservice.UseInlineDislikeForFatigue, + Pushservice.UseInlineDismissForFatigue, + Pushservice.UseInlineSeeLessForFatigue, + Pushservice.UseInlineNotRelevantForFatigue, + Pushservice.GPEnableCustomMagicFanoutCricketFatigue, + Pushservice.IncludeRelevanceScoreInIbis2Payload, + Pushservice.BypassGlobalSpacePushCapForSoftDeviceFollow, + Pushservice.EnableCountryCodeBackoffTopTweetsByGeo, + Pushservice.EnableNewCreatorPush, + Pushservice.EnableCreatorSubscriptionPush, + Pushservice.EnableInsSender, + Pushservice.EnableOptoutAdjustedPushcap, + Pushservice.EnableOONBackfillBasedOnF1Candidates, + Pushservice.EnableVFInTweetypie, + Pushservice.EnablePushPresentationVerifiedSymbol, + Pushservice.EnableHighPrioritySportsPush, + Pushservice.EnableSearchURLRedirectForSportsFanout, + Pushservice.EnableScoreFanoutNotification, + Pushservice.EnableExplicitPushCap, + Pushservice.EnableNsfwTokenBasedFiltering, + Pushservice.EnableRestrictedMinModelPushcap, + Pushservice.EnableGenericCRTBasedFatiguePredicate, + Pushservice.EnableCopyFeaturesForF1, + Pushservice.EnableEmojiInF1Copy, + Pushservice.EnableTargetInF1Copy, + Pushservice.EnableCopyFeaturesForOon, + Pushservice.EnableEmojiInOonCopy, + Pushservice.EnableTargetInOonCopy, + Pushservice.EnableF1CopyBody, + Pushservice.EnableOONCopyBody, + Pushservice.EnableIosCopyBodyTruncate, + Pushservice.EnableHTLBasedFatigueBasicRule, + Pushservice.EnableTargetAndEmojiSplitFatigue, + Pushservice.EnableNsfwCopy, + Pushservice.EnableOONCopyBody, + Pushservice.EnableTweetDynamicInlineActions, + Pushservice.EnablePushcapRefactor, + Pushservice.BigFilteringEnableHistogramsParam, + Pushservice.EnableTweetTranslation, + Pushservice.TripTweetCandidateReturnEnable, + Pushservice.EnableSocialContextForRetweet, + Pushservice.EnableEmptyBody, + Pushservice.EnableLocalViralTweets, + Pushservice.EnableExploreVideoTweets, + Pushservice.EnableDynamicInlineActionsForDesktopWeb, + Pushservice.EnableDynamicInlineActionsForMobileWeb, + Pushservice.EnableNTabEntriesForSportsEventNotifications, + Pushservice.EnableNTabFacePileForSportsEventNotifications, + Pushservice.DisableIsTargetBlueVerifiedPredicate, + Pushservice.EnableNTabEntriesForProductLaunchNotifications, + Pushservice.DisableIsTargetLegacyVerifiedPredicate, + Pushservice.EnableNTabOverrideForSportsEventNotifications, + Pushservice.EnableOONCBasedCopy, + Pushservice.HighQualityCandidatesEnableCandidateSource, + Pushservice.HighQualityCandidatesEnableFallback, + Pushservice.EnableTweetLanguageFilter, + Pushservice.EnableListRecommendations, + Pushservice.EnableIDSListRecommendations, + Pushservice.EnablePopGeoListRecommendations, + Pushservice.SkipLanguageFilterForMediaTweets, + Pushservice.EnableSpammyTweetFilter, + Pushservice.EnableTweetPushToHomeAndroid, + Pushservice.EnableTweetPushToHomeiOS, + Pushservice.EnableBoundedFeatureSetForSocialContext, + Pushservice.EnableStpBoundedFeatureSetForUserSocialContext, + Pushservice.EnableCoreUserHistoryBoundedFeatureSetForSocialContext, + Pushservice.SkipPostRankingFilters, + Pushservice.MRWebHoldbackParam, + Pushservice.EnableIsTargetSuperFollowCreatorPredicate + ) + + private val longSeqFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getLongSeqFSOverrides( + Pushservice.MagicFanoutEventAllowlistToSkipAccountCountryPredicate + ) + + private val longSetFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getLongSetFSOverrides( + Pushservice.ListOfAdhocIdsForStatsTracking + ) + + private val stringSeqFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getStringSeqFSOverrides( + Pushservice.ListOfCrtsForOpenApp, + Pushservice.ListOfCrtsToUpRank, + Pushservice.OONCandidatesDisabledCrTagParam, + Pushservice.ListOfCrtsToDownRank, + Pushservice.MagicFanoutDenyListedCountries, + Pushservice.GlobalOptoutModelParam, + Pushservice.BqmlQualityModelBucketModelIdListParam, + Pushservice.CommonRecommendationTypeDenyListPushHoldbacks, + Pushservice.TargetLevelFeatureListForMrRequestScribing, + Pushservice.MagicFanoutSportsEventDenyListedCountries, + Pushservice.MultilingualPnsfwTweetTextSupportedLanguages, + Pushservice.NegativeKeywordsPredicateDenylist, + Pushservice.TripTweetCandidateSourceIds, + Pushservice.NsfwTokensParam, + Pushservice.HighQualityCandidatesFallbackSourceIds + ) + + private val intSeqFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getIntSeqFSOverrides( + Pushservice.BucketOptoutSlotPushcapParam, + Pushservice.GeoHashLengthList, + Pushservice.MinDurationModifierStartHourList, + Pushservice.MinDurationModifierEndHourList, + Pushservice.MinDurationTimeModifierConst + ) + + private val enumFeatureSwitchOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( + stat, + logger, + Pushservice.MRBoldTitleFavoriteAndRetweetParam, + Pushservice.QualityUprankingTransformTypeParam, + Pushservice.QualityPredicateIdParam, + Pushservice.BigFilteringNormalizationTypeIdParam, + Common.PushcapModelType, + Common.MFCricketTargetingPredicate, + Pushservice.RankingFunctionForTopTweetsByGeo, + Pushservice.TopTweetsByGeoCombinationParam, + Pushservice.PopGeoTweetVersionParam, + Pushservice.SubtextInAndroidPushHeaderParam, + Pushservice.HighOONCTweetFormat, + Pushservice.LowOONCTweetFormat, + ) + + private val enumSeqFeatureSwitchOverrides = FeatureSwitchOverrideUtil.getEnumSeqFSOverrides( + stat, + logger, + Pushservice.OONTweetDynamicInlineActionsList, + Pushservice.TweetDynamicInlineActionsList, + Pushservice.TweetDynamicInlineActionsListForWeb, + Pushservice.HighQualityCandidatesEnableGroups, + Pushservice.HighQualityCandidatesFallbackEnabledGroups, + Pushservice.OONCandidatesDisabledCrtGroupParam, + Pushservice.MultilingualPnsfwTweetTextBucketingModelList, + ) + + private val stringFeatureSwitchOverrides = FeatureSwitchOverrideUtil.getStringFSOverrides( + Common.PushcapModelPredictionVersion, + Pushservice.WeightedOpenOrNtabClickRankingModelParam, + Pushservice.WeightedOpenOrNtabClickFilteringModelParam, + Pushservice.BucketOptoutModelParam, + Pushservice.ScoringFuncForTopTweetsByGeo, + Pushservice.LightRankingModelTypeParam, + Pushservice.BigFilteringSupervisedSendingModelParam, + Pushservice.BigFilteringSupervisedWithoutSendingModelParam, + Pushservice.BigFilteringRLSendingModelParam, + Pushservice.BigFilteringRLWithoutSendingModelParam, + Pushservice.BqmlQualityModelTypeParam, + Pushservice.BqmlHealthModelTypeParam, + Pushservice.QualityUprankingModelTypeParam, + Pushservice.SearchURLRedirectForSportsFanout, + Pushservice.LocalViralTweetsBucket, + Pushservice.HighQualityCandidatesHeavyRankingModel, + Pushservice.HighQualityCandidatesNonPersonalizedQualityCnnModel, + Pushservice.HighQualityCandidatesBqmlNsfwModel, + Pushservice.HighQualityCandidatesBqmlReportModel, + Pushservice.ProductLaunchLandingPageDeepLink, + Pushservice.ProductLaunchTapThrough, + Pushservice.TweetLanguageFeatureNameParam + ) + + private val durationFeatureSwitchOverrides = + FeatureSwitchOverrideUtil.getBoundedDurationFSOverrides( + Common.NumberOfDaysToFilterMRForSeeLessOften, + Common.NumberOfDaysToReducePushCapForSeeLessOften, + Pushservice.NumberOfDaysToFilterForSeeLessOftenForF1TriggerF1, + Pushservice.NumberOfDaysToReducePushCapForSeeLessOftenForF1TriggerF1, + Pushservice.NumberOfDaysToFilterForSeeLessOftenForF1TriggerNonF1, + Pushservice.NumberOfDaysToReducePushCapForSeeLessOftenForF1TriggerNonF1, + Pushservice.NumberOfDaysToFilterForSeeLessOftenForNonF1TriggerF1, + Pushservice.NumberOfDaysToReducePushCapForSeeLessOftenForNonF1TriggerF1, + Pushservice.NumberOfDaysToFilterForSeeLessOftenForNonF1TriggerNonF1, + Pushservice.NumberOfDaysToReducePushCapForSeeLessOftenForNonF1TriggerNonF1, + Pushservice.TrendTweetNotificationsFatigueDuration, + Pushservice.MinDurationSincePushParam, + Pushservice.MFMinIntervalFatigue, + Pushservice.SimclusterBasedCandidateMaxTweetAgeParam, + Pushservice.DetopicBasedCandidateMaxTweetAgeParam, + Pushservice.F1CandidateMaxTweetAgeParam, + Pushservice.MaxTweetAgeParam, + Pushservice.ModelingBasedCandidateMaxTweetAgeParam, + Pushservice.GeoPopTweetMaxAgeInHours, + Pushservice.MinDurationSincePushParam, + Pushservice.GraduallyRampUpPhaseDurationDays, + Pushservice.MrMinDurationSincePushForOnboardingPushes, + Pushservice.FatigueForOnboardingPushes, + Pushservice.FrigateHistoryOtherNotificationWriteTtl, + Pushservice.FrigateHistoryTweetNotificationWriteTtl, + Pushservice.TopTweetsByGeoPushInterval, + Pushservice.HighQualityTweetsPushInterval, + Pushservice.MrMinDurationSincePushForTopTweetsByGeoPushes, + Pushservice.TimeSinceLastLoginForGeoPopTweetPush, + Pushservice.NewUserPlaybookAllowedLastLoginHours, + Pushservice.SpaceRecsAppFatigueDuration, + Pushservice.OONSpaceRecsFatigueDuration, + Pushservice.SpaceRecsFatigueMinIntervalDuration, + Pushservice.SpaceRecsGlobalFatigueDuration, + Pushservice.MinimumTimeSinceLastLoginForGeoPopTweetPush, + Pushservice.MinFatigueDurationSinceLastHTLVisit, + Pushservice.LastHTLVisitBasedNonFatigueWindow, + Pushservice.SpaceNotificationsTTLDurationForNTab, + Pushservice.OverrideNotificationsLookbackDurationForOverrideInfo, + Pushservice.OverrideNotificationsLookbackDurationForImpressionId, + Pushservice.OverrideNotificationsLookbackDurationForNTab, + Pushservice.TopTweetImpressionsNotificationInterval, + Pushservice.TopTweetImpressionsFatigueMinIntervalDuration, + Pushservice.MFPushIntervalInHours, + Pushservice.InlineActionAppVisitFatigue, + Pushservice.SpaceParticipantHistoryLastActiveThreshold, + Pushservice.SportsMinIntervalFatigue, + Pushservice.SportsPushIntervalInHours, + Pushservice.SportsMinIntervalFatiguePerEvent, + Pushservice.SportsPushIntervalInHoursPerEvent, + Pushservice.TargetNtabOnlyCapFatigueIntervalHours, + Pushservice.TargetPushCapFatigueIntervalHours, + Pushservice.CopyFeaturesHistoryLookbackDuration, + Pushservice.F1EmojiCopyFatigueDuration, + Pushservice.F1TargetCopyFatigueDuration, + Pushservice.OonEmojiCopyFatigueDuration, + Pushservice.OonTargetCopyFatigueDuration, + Pushservice.ProductLaunchPushIntervalInHours, + Pushservice.ExploreVideoTweetAgeParam, + Pushservice.ListRecommendationsPushInterval, + Pushservice.ProductLaunchMinIntervalFatigue, + Pushservice.NewCreatorPushIntervalInHours, + Pushservice.NewCreatorPushMinIntervalFatigue, + Pushservice.CreatorSubscriptionPushIntervalInHours, + Pushservice.CreatorSubscriptionPushhMinIntervalFatigue + ) + + private[params] val allFeatureSwitchOverrides = + booleanDeciderOverrides ++ + booleanFeatureSwitchOverrides ++ + intFeatureSwitchOverrides ++ + doubleFeatureSwitchOverrides ++ + doubleSeqFeatureSwitchOverrides ++ + enumFeatureSwitchOverrides ++ + stringSeqFeatureSwitchOverrides ++ + stringFeatureSwitchOverrides ++ + durationFeatureSwitchOverrides ++ + intSeqFeatureSwitchOverrides ++ + longSeqFeatureSwitchOverrides ++ + enumSeqFeatureSwitchOverrides ++ + longSetFeatureSwitchOverrides + + val config = BaseConfigBuilder(allFeatureSwitchOverrides).build() +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushMLModelParams.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushMLModelParams.scala new file mode 100644 index 000000000..c451a61bc --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushMLModelParams.scala @@ -0,0 +1,60 @@ +package com.twitter.frigate.pushservice.params + +/** + * This enum defines ML models for push + */ +object PushMLModel extends Enumeration { + type PushMLModel = Value + + val WeightedOpenOrNtabClickProbability = Value + val DauProbability = Value + val OptoutProbability = Value + val FilteringProbability = Value + val BigFilteringSupervisedSendingModel = Value + val BigFilteringSupervisedWithoutSendingModel = Value + val BigFilteringRLSendingModel = Value + val BigFilteringRLWithoutSendingModel = Value + val HealthNsfwProbability = Value +} + +object WeightedOpenOrNtabClickModel { + type ModelNameType = String + + // MR models + val Periodically_Refreshed_Prod_Model = + "Periodically_Refreshed_Prod_Model" // used in DBv2 service, needed for gradually migrate via feature switch +} + + +object OptoutModel { + type ModelNameType = String + val D0_has_realtime_features = "D0_has_realtime_features" + val D0_no_realtime_features = "D0_no_realtime_features" +} + +object HealthNsfwModel { + type ModelNameType = String + val Q2_2022_Mr_Bqml_Health_Model_NsfwV0 = "Q2_2022_Mr_Bqml_Health_Model_NsfwV0" +} + +object BigFilteringSupervisedModel { + type ModelNameType = String + val V0_0_BigFiltering_Supervised_Sending_Model = "Q3_2022_bigfiltering_supervised_send_model_v0" + val V0_0_BigFiltering_Supervised_Without_Sending_Model = + "Q3_2022_bigfiltering_supervised_not_send_model_v0" +} + +object BigFilteringRLModel { + type ModelNameType = String + val V0_0_BigFiltering_Rl_Sending_Model = "Q3_2022_bigfiltering_rl_send_model_dqn_dau_15_open" + val V0_0_BigFiltering_Rl_Without_Sending_Model = + "Q3_2022_bigfiltering_rl_not_send_model_dqn_dau_15_open" +} + +case class PushModelName( + modelType: PushMLModel.Value, + version: WeightedOpenOrNtabClickModel.ModelNameType) { + override def toString: String = { + modelType.toString + "_" + version + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushParams.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushParams.scala new file mode 100644 index 000000000..5e5f6af6a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushParams.scala @@ -0,0 +1,534 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.rux.common.context.thriftscala.ExperimentKey +import com.twitter.timelines.configapi.Param +import com.twitter.timelines.configapi.decider.BooleanDeciderParam + +object PushParams { + + /** + * Disable ML models in filtering + */ + object DisableMlInFilteringParam extends BooleanDeciderParam(DeciderKey.disableMLInFiltering) + + /** + * Disable ML models in ranking, use random ranking instead + * This param is used for ML holdback and training data collection + */ + object UseRandomRankingParam extends Param(false) + + /** + * Disable feature hydration, ML ranking, and ML filtering + * Use default order from candidate source + * This param is for service continuity + */ + object DisableAllRelevanceParam extends BooleanDeciderParam(DeciderKey.disableAllRelevance) + + /** + * Disable ML heavy ranking + * Use default order from candidate source + * This param is for service continuity + */ + object DisableHeavyRankingParam extends BooleanDeciderParam(DeciderKey.disableHeavyRanking) + + /** + * Restrict ML light ranking by selecting top3 candidates + * Use default order from candidate source + * This param is for service continuity + */ + object RestrictLightRankingParam extends BooleanDeciderParam(DeciderKey.restrictLightRanking) + + /** + * Downsample ML light ranking scribed candidates + */ + object DownSampleLightRankingScribeCandidatesParam + extends BooleanDeciderParam(DeciderKey.downSampleLightRankingScribeCandidates) + + /** + * Set it to true only for Android only ranking experiments + */ + object AndroidOnlyRankingExperimentParam extends Param(false) + + /** + * Enable the user_tweet_entity_graph tweet candidate source. + */ + object UTEGTweetCandidateSourceParam + extends BooleanDeciderParam(DeciderKey.entityGraphTweetRecsDeciderKey) + + /** + * Enable writes to Notification Service + */ + object EnableWritesToNotificationServiceParam + extends BooleanDeciderParam(DeciderKey.enablePushserviceWritesToNotificationServiceDeciderKey) + + /** + * Enable writes to Notification Service for all employees + */ + object EnableWritesToNotificationServiceForAllEmployeesParam + extends BooleanDeciderParam( + DeciderKey.enablePushserviceWritesToNotificationServiceForAllEmployeesDeciderKey) + + /** + * Enable writes to Notification Service for everyone + */ + object EnableWritesToNotificationServiceForEveryoneParam + extends BooleanDeciderParam( + DeciderKey.enablePushserviceWritesToNotificationServiceForEveryoneDeciderKey) + + /** + * Enable fatiguing MR for Ntab caret click + */ + object EnableFatigueNtabCaretClickingParam extends Param(true) + + /** + * Param for disabling in-network Tweet candidates + */ + object DisableInNetworkTweetCandidatesParam extends Param(false) + + /** + * Decider controlled param to enable prompt feedback response NO predicate + */ + object EnablePromptFeedbackFatigueResponseNoPredicate + extends BooleanDeciderParam( + DeciderKey.enablePromptFeedbackFatigueResponseNoPredicateDeciderKey) + + /** + * Enable hydration and generation of Social context (TF, TR) based candidates for Earlybird Tweets + */ + object EarlyBirdSCBasedCandidatesParam + extends BooleanDeciderParam(DeciderKey.enableUTEGSCForEarlybirdTweetsDecider) + + /** + * Param to allow reduce to one social proof for tweet param in UTEG + */ + object AllowOneSocialProofForTweetInUTEGParam extends Param(true) + + /** + * Param to query UTEG for out network tweets only + */ + object OutNetworkTweetsOnlyForUTEGParam extends Param(false) + + object EnablePushSendEventBus extends BooleanDeciderParam(DeciderKey.enablePushSendEventBus) + + /** + * Enable RUX Tweet landing page for push open on iOS + */ + object EnableRuxLandingPageIOSParam extends Param[Boolean](true) + + /** + * Enable RUX Tweet landing page for push open on Android + */ + object EnableRuxLandingPageAndroidParam extends Param[Boolean](true) + + /** + * Param to decide which ExperimentKey to be encoded into Rux landing page context object. + * The context object is sent to rux-api and rux-api applies logic (e.g. show reply module on + * rux landing page or not) accordingly based on the experiment key. + */ + object RuxLandingPageExperimentKeyIOSParam extends Param[Option[ExperimentKey]](None) + object RuxLandingPageExperimentKeyAndroidParam extends Param[Option[ExperimentKey]](None) + + /** + * Param to enable MR Tweet Fav Recs + */ + object MRTweetFavRecsParam extends BooleanDeciderParam(DeciderKey.enableTweetFavRecs) + + /** + * Param to enable MR Tweet Retweet Recs + */ + object MRTweetRetweetRecsParam extends BooleanDeciderParam(DeciderKey.enableTweetRetweetRecs) + + /** + * Param to disable writing to NTAB + * */ + object DisableWritingToNTAB extends Param[Boolean](default = false) + + /** + * Param to show RUX landing page as a modal on iOS + */ + object ShowRuxLandingPageAsModalOnIOS extends Param[Boolean](default = false) + + /** + * Param to enable mr end to end scribing + */ + object EnableMrRequestScribing extends BooleanDeciderParam(DeciderKey.enableMrRequestScribing) + + /** + * Param to enable scribing of high quality candidate scores + */ + object EnableHighQualityCandidateScoresScribing + extends BooleanDeciderParam(DeciderKey.enableHighQualityCandidateScoresScribing) + + /** + * Decider controlled param to pNeg multimodal predictions for F1 tweets + */ + object EnablePnegMultimodalPredictionForF1Tweets + extends BooleanDeciderParam(DeciderKey.enablePnegMultimodalPredictionForF1Tweets) + + /** + * Decider controlled param to scribe oonFav score for F1 tweets + */ + object EnableScribeOonFavScoreForF1Tweets + extends BooleanDeciderParam(DeciderKey.enableScribingOonFavScoreForF1Tweets) + + /** + * Param to enable htl user aggregates extended hydration + */ + object EnableHtlOfflineUserAggregatesExtendedHydration + extends BooleanDeciderParam(DeciderKey.enableHtlOfflineUserAggregateExtendedFeaturesHydration) + + /** + * Param to enable predicate detailed info scribing + */ + object EnablePredicateDetailedInfoScribing + extends BooleanDeciderParam(DeciderKey.enablePredicateDetailedInfoScribing) + + /** + * Param to enable predicate detailed info scribing + */ + object EnablePushCapInfoScribing + extends BooleanDeciderParam(DeciderKey.enablePredicateDetailedInfoScribing) + + /** + * Param to enable user signal language feature hydration + */ + object EnableUserSignalLanguageFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserSignalLanguageFeatureHydration) + + /** + * Param to enable user preferred language feature hydration + */ + object EnableUserPreferredLanguageFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserPreferredLanguageFeatureHydration) + + /** + * Param to enable ner erg feature hydration + */ + object EnableNerErgFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableNerErgFeaturesHydration) + + /** + * Param to enable inline action on push copy for Android + */ + object MRAndroidInlineActionOnPushCopyParam extends Param[Boolean](default = true) + + /** + * Param to enable hydrating mr user semantic core embedding features + * */ + object EnableMrUserSemanticCoreFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserSemanticCoreFeaturesHydration) + + /** + * Param to enable hydrating mr user semantic core embedding features filtered by 0.0000001 + * */ + object EnableMrUserSemanticCoreNoZeroFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserSemanticCoreNoZeroFeaturesHydration) + + /* + * Param to enable days since user's recent resurrection features hydration + */ + object EnableDaysSinceRecentResurrectionFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableDaysSinceRecentResurrectionFeatureHydration) + + /* + * Param to enable days since user past aggregates features hydration + */ + object EnableUserPastAggregatesFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserPastAggregatesFeatureHydration) + + /* + * Param to enable mr user simcluster features (v2020) hydration + * */ + object EnableMrUserSimclusterV2020FeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserSimclusterV2020FeaturesHydration) + + /* + * Param to enable mr user simcluster features (v2020) hydration + * */ + object EnableMrUserSimclusterV2020NoZeroFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserSimclusterV2020NoZeroFeaturesHydration) + + /* + * Param to enable HTL topic engagement realtime aggregate features + * */ + object EnableTopicEngagementRealTimeAggregatesFeatureHydration + extends BooleanDeciderParam( + DeciderKey.enableTopicEngagementRealTimeAggregatesFeatureHydration) + + object EnableUserTopicAggregatesFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserTopicAggregatesFeatureHydration) + + /** + * Param to enable user author RTA feature hydration + */ + object EnableHtlUserAuthorRTAFeaturesFromFeatureStoreHydration + extends BooleanDeciderParam(DeciderKey.enableHtlUserAuthorRealTimeAggregateFeatureHydration) + + /** + * Param to enable duration since last visit features + */ + object EnableDurationSinceLastVisitFeatures + extends BooleanDeciderParam(DeciderKey.enableDurationSinceLastVisitFeatureHydration) + + object EnableTweetAnnotationFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableTweetAnnotationFeatureHydration) + + /** + * Param to Enable visibility filtering through SpaceVisibilityLibrary from SpacePredicate + */ + object EnableSpaceVisibilityLibraryFiltering + extends BooleanDeciderParam(DeciderKey.enableSpaceVisibilityLibraryFiltering) + + /* + * Param to enable user topic follow feature set hydration + * */ + object EnableUserTopicFollowFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableUserTopicFollowFeatureSet) + + /* + * Param to enable onboarding new user feature set hydration + * */ + object EnableOnboardingNewUserFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableOnboardingNewUserFeatureSet) + + /* + * Param to enable mr user author sparse continuous feature set hydration + * */ + object EnableMrUserAuthorSparseContFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserAuthorSparseContFeatureSet) + + /* + * Param to enable mr user topic sparse continuous feature set hydration + * */ + object EnableMrUserTopicSparseContFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserTopicSparseContFeatureSet) + + /* + * Param to enable penguin language feature set hydration + * */ + object EnableUserPenguinLanguageFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableUserPenguinLanguageFeatureSet) + + /* + * Param to enable user engaged tweet tokens feature hydration + * */ + object EnableMrUserEngagedTweetTokensFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserEngagedTweetTokensFeaturesHydration) + + /* + * Param to enable candidate tweet tokens feature hydration + * */ + object EnableMrCandidateTweetTokensFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrCandidateTweetTokensFeaturesHydration) + + /* + * Param to enable mr user hashspace embedding feature set hydration + * */ + object EnableMrUserHashspaceEmbeddingFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserHashspaceEmbeddingFeatureSet) + + /* + * Param to enable mr tweet sentiment feature set hydration + * */ + object EnableMrTweetSentimentFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrTweetSentimentFeatureSet) + + /* + * Param to enable mr tweet_author aggregates feature set hydration + * */ + object EnableMrTweetAuthorAggregatesFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrTweetAuthorAggregatesFeatureSet) + + /** + * Param to enable twistly aggregated features + */ + object EnableTwistlyAggregatesFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableTwistlyAggregatesFeatureHydration) + + /** + * Param to enable tweet twhin favoriate features + */ + object EnableTweetTwHINFavFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableTweetTwHINFavFeaturesHydration) + + /* + * Param to enable mr user geo feature set hydration + * */ + object EnableUserGeoFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableUserGeoFeatureSet) + + /* + * Param to enable mr author geo feature set hydration + * */ + object EnableAuthorGeoFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.enableAuthorGeoFeatureSet) + + /* + * Param to ramp up mr user geo feature set hydration + * */ + object RampupUserGeoFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.rampupUserGeoFeatureSet) + + /* + * Param to ramp up mr author geo feature set hydration + * */ + object RampupAuthorGeoFeatureSetHydration + extends BooleanDeciderParam(DeciderKey.rampupAuthorGeoFeatureSet) + + /* + * Decider controlled param to enable Pop Geo Tweets + * */ + object PopGeoCandidatesDecider extends BooleanDeciderParam(DeciderKey.enablePopGeoTweets) + + /** + * Decider controlled param to enable Trip Geo Tweets + */ + object TripGeoTweetCandidatesDecider + extends BooleanDeciderParam(DeciderKey.enableTripGeoTweetCandidates) + + /** + * Decider controlled param to enable ContentRecommenderMixerAdaptor + */ + object ContentRecommenderMixerAdaptorDecider + extends BooleanDeciderParam(DeciderKey.enableContentRecommenderMixerAdaptor) + + /** + * Decider controlled param to enable GenericCandidateAdaptor + */ + object GenericCandidateAdaptorDecider + extends BooleanDeciderParam(DeciderKey.enableGenericCandidateAdaptor) + + /** + * Decider controlled param to enable dark traffic to ContentMixer for Trip Geo Tweets + */ + object TripGeoTweetContentMixerDarkTrafficDecider + extends BooleanDeciderParam(DeciderKey.enableTripGeoTweetContentMixerDarkTraffic) + + /* + * Decider controlled param to enable Pop Geo Tweets + * */ + object TrendsCandidateDecider extends BooleanDeciderParam(DeciderKey.enableTrendsTweets) + + /* + * Decider controlled param to enable INS Traffic + **/ + object EnableInsTrafficDecider extends BooleanDeciderParam(DeciderKey.enableInsTraffic) + + /** + * Param to enable assigning pushcap with ML predictions (read from MH table). + * Disabling will fallback to only use heuristics and default values. + */ + object EnableModelBasedPushcapAssignments + extends BooleanDeciderParam(DeciderKey.enableModelBasedPushcapAssignments) + + /** + * Param to enable twhin user engagement feature hydration + */ + object EnableTwHINUserEngagementFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableTwHINUserEngagementFeaturesHydration) + + /** + * Param to enable twhin user follow feature hydration + */ + object EnableTwHINUserFollowFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableTwHINUserFollowFeaturesHydration) + + /** + * Param to enable twhin author follow feature hydration + */ + object EnableTwHINAuthorFollowFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableTwHINAuthorFollowFeaturesHydration) + + /** + * Param to enable calls to the IsTweetTranslatable strato column + */ + object EnableIsTweetTranslatableCheck + extends BooleanDeciderParam(DeciderKey.enableIsTweetTranslatable) + + /** + * Decider controlled param to enable mr tweet simcluster feature set hydration + */ + object EnableMrTweetSimClusterFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableMrTweetSimClusterFeatureSet) + + /** + * Decider controlled param to enable real graph v2 feature set hydration + */ + object EnableRealGraphV2FeatureHydration + extends BooleanDeciderParam(DeciderKey.enableRealGraphV2FeatureHydration) + + /** + * Decider controlled param to enable Tweet BeT feature set hydration + */ + object EnableTweetBeTFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableTweetBeTFeatureHydration) + + /** + * Decider controlled param to enable mr user tweet topic feature set hydration + */ + object EnableMrOfflineUserTweetTopicAggregateHydration + extends BooleanDeciderParam(DeciderKey.enableMrOfflineUserTweetTopicAggregate) + + /** + * Decider controlled param to enable mr tweet simcluster feature set hydration + */ + object EnableMrOfflineUserTweetSimClusterAggregateHydration + extends BooleanDeciderParam(DeciderKey.enableMrOfflineUserTweetSimClusterAggregate) + + /** + * Decider controlled param to enable user send time features + */ + object EnableUserSendTimeFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserSendTimeFeatureHydration) + + /** + * Decider controlled param to enable mr user utc send time aggregate features + */ + object EnableMrUserUtcSendTimeAggregateFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserUtcSendTimeAggregateFeaturesHydration) + + /** + * Decider controlled param to enable mr user local send time aggregate features + */ + object EnableMrUserLocalSendTimeAggregateFeaturesHydration + extends BooleanDeciderParam(DeciderKey.enableMrUserLocalSendTimeAggregateFeaturesHydration) + + /** + * Decider controlled param to enable BQML report model predictions for F1 tweets + */ + object EnableBqmlReportModelPredictionForF1Tweets + extends BooleanDeciderParam(DeciderKey.enableBqmlReportModelPredictionForF1Tweets) + + /** + * Decider controlled param to enable user Twhin embedding feature hydration + */ + object EnableUserTwhinEmbeddingFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableUserTwhinEmbeddingFeatureHydration) + + /** + * Decider controlled param to enable author follow Twhin embedding feature hydration + */ + object EnableAuthorFollowTwhinEmbeddingFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableAuthorFollowTwhinEmbeddingFeatureHydration) + + object EnableScribingMLFeaturesAsDataRecord + extends BooleanDeciderParam(DeciderKey.enableScribingMLFeaturesAsDataRecord) + + /** + * Decider controlled param to enable feature hydration for Verified related feature + */ + object EnableAuthorVerifiedFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableAuthorVerifiedFeatureHydration) + + /** + * Decider controlled param to enable feature hydration for creator subscription related feature + */ + object EnableAuthorCreatorSubscriptionFeatureHydration + extends BooleanDeciderParam(DeciderKey.enableAuthorCreatorSubscriptionFeatureHydration) + + /** + * Decider controlled param to direct MH+Memcache hydration for the UserFeaturesDataset + */ + object EnableDirectHydrationForUserFeatures + extends BooleanDeciderParam(DeciderKey.enableDirectHydrationForUserFeatures) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushServiceTunableKeys.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushServiceTunableKeys.scala new file mode 100644 index 000000000..7920bb6cd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/PushServiceTunableKeys.scala @@ -0,0 +1,9 @@ +package com.twitter.frigate.pushservice.params + +import com.twitter.util.tunable.TunableMap + +object PushServiceTunableKeys { + final val IbisQpsLimitTunableKey = TunableMap.Key[Int]("ibis2.qps.limit") + final val NtabQpsLimitTunableKey = TunableMap.Key[Int]("ntab.qps.limit") + final val TweetPerspectiveStoreQpsLimit = TunableMap.Key[Int]("tweetperspective.qps.limit") +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/ShardParams.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/ShardParams.scala new file mode 100644 index 000000000..c0a68c939 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/params/ShardParams.scala @@ -0,0 +1,3 @@ +package com.twitter.frigate.pushservice.params + +case class ShardParams(numShards: Int, shardId: Int) diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BigFilteringEpsilonGreedyExplorationPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BigFilteringEpsilonGreedyExplorationPredicate.scala new file mode 100644 index 000000000..67a117cc5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BigFilteringEpsilonGreedyExplorationPredicate.scala @@ -0,0 +1,58 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hashing.KeyHasher +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +/* + * A predicate for epsilon-greedy exploration; + * We defined it as a candidate level predicate to avoid changing the predicate and scribing pipeline, + * but it is actually a post-ranking target level predicate: + * if a target user IS ENABLED for \epsilon-greedy exploration, + * then with probability epsilon, the user (and thus all candidates) will be blocked + */ +object BigFilteringEpsilonGreedyExplorationPredicate { + + val name = "BigFilteringEpsilonGreedyExplorationPredicate" + + private def shouldFilterBasedOnEpsilonGreedyExploration( + target: Target + ): Boolean = { + val seed = KeyHasher.FNV1A_64.hashKey(s"${target.targetId}".getBytes("UTF8")) + val hashKey = KeyHasher.FNV1A_64 + .hashKey( + s"${Trace.id.traceId.toString}:${seed.toString}".getBytes("UTF8") + ) + + math.abs(hashKey).toDouble / Long.MaxValue < + target.params(PushFeatureSwitchParams.MrRequestScribingEpsGreedyExplorationRatio) + } + + def apply()(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + val stats = statsReceiver.scope(s"predicate_$name") + + val enabledForEpsilonGreedyCounter = stats.counter("enabled_for_eps_greedy") + + new Predicate[PushCandidate] { + def apply(candidates: Seq[PushCandidate]): Future[Seq[Boolean]] = { + val results = candidates.map { candidate => + if (!candidate.target.skipFilters && candidate.target.params( + PushFeatureSwitchParams.EnableMrRequestScribingForEpsGreedyExploration)) { + enabledForEpsilonGreedyCounter.incr() + !shouldFilterBasedOnEpsilonGreedyExploration(candidate.target) + } else { + true + } + } + Future.value(results) + } + }.withStats(stats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlHealthModelPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlHealthModelPredicates.scala new file mode 100644 index 000000000..f7ff95c9b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlHealthModelPredicates.scala @@ -0,0 +1,129 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.ml.HealthFeatureGetter +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.util.Future +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.frigate.thriftscala.UserMediaRepresentation +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse +import com.twitter.storehaus.ReadableStore + +object BqmlHealthModelPredicates { + + def healthModelOonPredicate( + bqmlHealthModelScorer: PushMLModelScorer, + producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation], + userHealthScoreStore: ReadableStore[Long, UserHealthSignalResponse], + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType with TweetAuthor + ] = { + val name = "bqml_health_model_based_predicate" + val scopedStatsReceiver = stats.scope(name) + + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + val oonCandidatesCounter = scopedStatsReceiver.counter("oon_candidates") + val filteredOonCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + val emptyScoreCandidatesCounter = scopedStatsReceiver.counter("empty_score_candidates") + val healthScoreStat = scopedStatsReceiver.stat("health_model_dist") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType) || + RecTypes.outOfNetworkTopicTweetTypes.contains(candidate.commonRecType) + + lazy val enableBqmlHealthModelPredicateParam = + target.params(PushFeatureSwitchParams.EnableBqmlHealthModelPredicateParam) + lazy val enableBqmlHealthModelPredictionForInNetworkCandidates = + target.params( + PushFeatureSwitchParams.EnableBqmlHealthModelPredictionForInNetworkCandidatesParam) + lazy val bqmlHealthModelPredicateFilterThresholdParam = + target.params(PushFeatureSwitchParams.BqmlHealthModelPredicateFilterThresholdParam) + lazy val healthModelId = target.params(PushFeatureSwitchParams.BqmlHealthModelTypeParam) + lazy val enableBqmlHealthModelScoreHistogramParam = + target.params(PushFeatureSwitchParams.EnableBqmlHealthModelScoreHistogramParam) + val healthModelScoreFeature = "bqml_health_model_score" + + val histogramBinSize = 0.05 + lazy val healthCandidateScoreHistogramCounters = + bqmlHealthModelScorer.getScoreHistogramCounters( + scopedStatsReceiver, + "health_score_histogram", + histogramBinSize) + + candidate match { + case candidate: PushCandidate with TweetAuthor with TweetAuthorDetails + if enableBqmlHealthModelPredicateParam && (isOonCandidate || enableBqmlHealthModelPredictionForInNetworkCandidates) => + HealthFeatureGetter + .getFeatures( + candidate, + producerMediaRepresentationStore, + userHealthScoreStore, + Some(tweetHealthScoreStore)) + .flatMap { healthFeatures => + allCandidatesCounter.incr() + candidate.mergeFeatures(healthFeatures) + + val healthModelScoreFutOpt = + if (candidate.numericFeatures.contains(healthModelScoreFeature)) { + Future.value(candidate.numericFeatures.get(healthModelScoreFeature)) + } else + bqmlHealthModelScorer.singlePredicationForModelVersion( + healthModelId, + candidate + ) + + candidate.populateQualityModelScore( + PushMLModel.HealthNsfwProbability, + healthModelId, + healthModelScoreFutOpt + ) + + healthModelScoreFutOpt.map { + case Some(healthModelScore) => + healthScoreStat.add((healthModelScore * 10000).toFloat) + if (enableBqmlHealthModelScoreHistogramParam) { + healthCandidateScoreHistogramCounters( + math.ceil(healthModelScore / histogramBinSize).toInt).incr() + } + + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && isOonCandidate) { + oonCandidatesCounter.incr() + val threshold = bqmlHealthModelPredicateFilterThresholdParam + candidate.cachePredicateInfo( + name, + healthModelScore, + threshold, + healthModelScore > threshold) + if (healthModelScore > threshold) { + filteredOonCandidatesCounter.incr() + false + } else true + } else true + case _ => + emptyScoreCandidatesCounter.incr() + true + } + } + case _ => Future.True + } + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlQualityModelPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlQualityModelPredicates.scala new file mode 100644 index 000000000..76d52992b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/BqmlQualityModelPredicates.scala @@ -0,0 +1,141 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.params.PushConstants.TweetMediaEmbeddingBQKeyIds +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.util.Future +import com.twitter.frigate.pushservice.util.CandidateUtil._ + +object BqmlQualityModelPredicates { + + def ingestExtraFeatures(cand: PushCandidate): Unit = { + val tagsCRCountFeature = "tagsCR_count" + val hasPushOpenOrNtabClickFeature = "has_PushOpenOrNtabClick" + val onlyPushOpenOrNtabClickFeature = "only_PushOpenOrNtabClick" + val firstTweetMediaEmbeddingFeature = "media_embedding_0" + val tweetMediaEmbeddingFeature = + "media.mediaunderstanding.media_embeddings.twitter_clip_as_sparse_continuous_feature" + + if (!cand.numericFeatures.contains(tagsCRCountFeature)) { + cand.numericFeatures(tagsCRCountFeature) = getTagsCRCount(cand) + } + if (!cand.booleanFeatures.contains(hasPushOpenOrNtabClickFeature)) { + cand.booleanFeatures(hasPushOpenOrNtabClickFeature) = isRelatedToMrTwistlyCandidate(cand) + } + if (!cand.booleanFeatures.contains(onlyPushOpenOrNtabClickFeature)) { + cand.booleanFeatures(onlyPushOpenOrNtabClickFeature) = isMrTwistlyCandidate(cand) + } + if (!cand.numericFeatures.contains(firstTweetMediaEmbeddingFeature)) { + val tweetMediaEmbedding = cand.sparseContinuousFeatures + .getOrElse(tweetMediaEmbeddingFeature, Map.empty[String, Double]) + Seq.range(0, TweetMediaEmbeddingBQKeyIds.size).foreach { i => + cand.numericFeatures(s"media_embedding_$i") = + tweetMediaEmbedding.getOrElse(TweetMediaEmbeddingBQKeyIds(i).toString, 0.0) + } + } + } + + def BqmlQualityModelOonPredicate( + bqmlQualityModelScorer: PushMLModelScorer + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + + val name = "bqml_quality_model_based_predicate" + val scopedStatsReceiver = stats.scope(name) + val oonCandidatesCounter = scopedStatsReceiver.counter("oon_candidates") + val inCandidatesCounter = scopedStatsReceiver.counter("in_candidates") + val filteredOonCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + val bucketedCandidatesCounter = scopedStatsReceiver.counter("bucketed_oon_candidates") + val emptyScoreCandidatesCounter = scopedStatsReceiver.counter("empty_score_candidates") + val histogramBinSize = 0.05 + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val enableBqmlQualityModelScoreHistogramParam = + target.params(PushFeatureSwitchParams.EnableBqmlQualityModelScoreHistogramParam) + + lazy val qualityCandidateScoreHistogramCounters = + bqmlQualityModelScorer.getScoreHistogramCounters( + scopedStatsReceiver, + "quality_score_histogram", + histogramBinSize) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && (isOonCandidate || target + .params(PushParams.EnableBqmlReportModelPredictionForF1Tweets)) + && target.params(PushFeatureSwitchParams.EnableBqmlQualityModelPredicateParam)) { + ingestExtraFeatures(candidate) + + lazy val shouldFilterFutSeq = + target + .params(PushFeatureSwitchParams.BqmlQualityModelBucketModelIdListParam) + .zip(target.params(PushFeatureSwitchParams.BqmlQualityModelBucketThresholdListParam)) + .map { + case (modelId, bucketThreshold) => + val scoreFutOpt = + bqmlQualityModelScorer.singlePredicationForModelVersion(modelId, candidate) + + candidate.populateQualityModelScore( + PushMLModel.FilteringProbability, + modelId, + scoreFutOpt + ) + + if (isOonCandidate) { + oonCandidatesCounter.incr() + scoreFutOpt.map { + case Some(score) => + if (score >= bucketThreshold) { + bucketedCandidatesCounter.incr() + if (modelId == target.params( + PushFeatureSwitchParams.BqmlQualityModelTypeParam)) { + if (enableBqmlQualityModelScoreHistogramParam) { + val scoreHistogramBinId = + math.ceil(score / histogramBinSize).toInt + qualityCandidateScoreHistogramCounters(scoreHistogramBinId).incr() + } + if (score >= target.params( + PushFeatureSwitchParams.BqmlQualityModelPredicateThresholdParam)) { + filteredOonCandidatesCounter.incr() + true + } else false + } else false + } else false + case _ => + emptyScoreCandidatesCounter.incr() + false + } + } else { + inCandidatesCounter.incr() + Future.False + } + } + + Future.collect(shouldFilterFutSeq).flatMap { shouldFilterSeq => + if (shouldFilterSeq.contains(true)) { + Future.False + } else Future.True + } + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CaretFeedbackHistoryFilter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CaretFeedbackHistoryFilter.scala new file mode 100644 index 000000000..8ccccd14d --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CaretFeedbackHistoryFilter.scala @@ -0,0 +1,99 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.CaretFeedbackHistory +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.notificationservice.thriftscala.GenericNotificationMetadata +import com.twitter.notificationservice.thriftscala.GenericType + +object CaretFeedbackHistoryFilter { + + def caretFeedbackHistoryFilter( + categories: Seq[String] + ): TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[CaretFeedbackDetails] => Seq[ + CaretFeedbackDetails + ] = { target => caretFeedbackDetailsSeq => + caretFeedbackDetailsSeq.filter { caretFeedbackDetails => + caretFeedbackDetails.genericNotificationMetadata match { + case Some(genericNotificationMetadata) => + isFeedbackSupportedGenericType(genericNotificationMetadata) + case None => false + } + } + } + + private def filterCriteria( + caretFeedbackDetails: CaretFeedbackDetails, + genericTypes: Seq[GenericType] + ): Boolean = { + caretFeedbackDetails.genericNotificationMetadata match { + case Some(genericNotificationMetadata) => + genericTypes.contains(genericNotificationMetadata.genericType) + case None => false + } + } + + def caretFeedbackHistoryFilterByGenericType( + genericTypes: Seq[GenericType] + ): TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[CaretFeedbackDetails] => Seq[ + CaretFeedbackDetails + ] = { target => caretFeedbackDetailsSeq => + caretFeedbackDetailsSeq.filter { caretFeedbackDetails => + filterCriteria(caretFeedbackDetails, genericTypes) + } + } + + def caretFeedbackHistoryFilterByGenericTypeDenyList( + genericTypes: Seq[GenericType] + ): TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[CaretFeedbackDetails] => Seq[ + CaretFeedbackDetails + ] = { target => caretFeedbackDetailsSeq => + caretFeedbackDetailsSeq.filterNot { caretFeedbackDetails => + filterCriteria(caretFeedbackDetails, genericTypes) + } + } + + def caretFeedbackHistoryFilterByRefreshableType( + refreshableTypes: Set[Option[String]] + ): TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[CaretFeedbackDetails] => Seq[ + CaretFeedbackDetails + ] = { target => caretFeedbackDetailsSeq => + caretFeedbackDetailsSeq.filter { caretFeedbackDetails => + caretFeedbackDetails.genericNotificationMetadata match { + case Some(genericNotificationMetadata) => + refreshableTypes.contains(genericNotificationMetadata.refreshableType) + case None => false + } + } + } + + def caretFeedbackHistoryFilterByRefreshableTypeDenyList( + refreshableTypes: Set[Option[String]] + ): TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[CaretFeedbackDetails] => Seq[ + CaretFeedbackDetails + ] = { target => caretFeedbackDetailsSeq => + caretFeedbackDetailsSeq.filter { caretFeedbackDetails => + caretFeedbackDetails.genericNotificationMetadata match { + case Some(genericNotificationMetadata) => + !refreshableTypes.contains(genericNotificationMetadata.refreshableType) + case None => true + } + } + } + + private def isFeedbackSupportedGenericType( + notificationMetadata: GenericNotificationMetadata + ): Boolean = { + val genericNotificationTypeName = + (notificationMetadata.genericType, notificationMetadata.refreshableType) match { + case (GenericType.RefreshableNotification, Some(refreshableType)) => refreshableType + case _ => notificationMetadata.genericType.name + } + + MrNtabCopyObjects.AllNtabCopyTypes + .flatMap(_.refreshableType) + .contains(genericNotificationTypeName) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CasLockPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CasLockPredicate.scala new file mode 100644 index 000000000..22067405a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CasLockPredicate.scala @@ -0,0 +1,45 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.CasLock +import com.twitter.frigate.common.util.CasSuccess +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Duration +import com.twitter.util.Future + +object CasLockPredicate { + def apply( + casLock: CasLock, + expiryDuration: Duration + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val stats = statsReceiver.scope("predicate_addcaslock_for_candidate") + Predicate + .fromAsync { candidate: PushCandidate => + if (candidate.target.pushContext.exists(_.darkWrite.exists(_ == true))) { + Future.True + } else if (candidate.commonRecType == CommonRecommendationType.MagicFanoutSportsEvent) { + Future.True + } else { + candidate.target.history flatMap { h => + val now = candidate.createdAt + val expiry = now + expiryDuration + val oldTimestamp = h.lastNotificationTime map { + _.inSeconds + } getOrElse 0 + casLock.cas(candidate.target.targetId, oldTimestamp, now.inSeconds, expiry) map { + casResult => + stats.counter(s"cas_$casResult").incr() + casResult == CasSuccess + } + } + } + } + .withStats(stats) + .withName("add_cas_lock") + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CrtDeciderPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CrtDeciderPredicate.scala new file mode 100644 index 000000000..4b1abf221 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/CrtDeciderPredicate.scala @@ -0,0 +1,25 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.decider.Decider +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate + +object CrtDeciderPredicate { + val name = "crt_decider" + def apply( + decider: Decider + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + Predicate + .from { (candidate: PushCandidate) => + val prefix = "frigate_pushservice_" + val deciderKey = prefix + candidate.commonRecType + decider.feature(deciderKey).isAvailable + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/DiscoverTwitterPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/DiscoverTwitterPredicate.scala new file mode 100644 index 000000000..cb55be356 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/DiscoverTwitterPredicate.scala @@ -0,0 +1,47 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.predicate.FrigateHistoryFatiguePredicate +import com.twitter.frigate.common.predicate.{FatiguePredicate => TargetFatiguePredicate} +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.hermit.predicate.Predicate +import com.twitter.timelines.configapi.Param +import com.twitter.util.Duration + +object DiscoverTwitterPredicate { + + /** + * Predicate used to determine if a minimum duration has elapsed since the last MR push + * for a CRT to be valid. + * @param name Identifier of the caller (used for stats) + * @param intervalParam The minimum duration interval + * @param stats StatsReceiver + * @return Target Predicate + */ + def minDurationElapsedSinceLastMrPushPredicate( + name: String, + intervalParam: Param[Duration], + stats: StatsReceiver + ): Predicate[Target] = + Predicate + .fromAsync { target: Target => + val interval = + target.params(intervalParam) + FrigateHistoryFatiguePredicate( + minInterval = interval, + getSortedHistory = { h: History => + val magicRecsOnlyHistory = + TargetFatiguePredicate.magicRecsPushOnlyFilter(h.sortedPushDmHistory) + TargetFatiguePredicate.magicRecsNewUserPlaybookPushFilter(magicRecsOnlyHistory) + } + ).flatContraMap { target: TargetUser with FrigateHistory => + target.history + }.apply(Seq(target)).map { + _.head + } + }.withStats(stats.scope(s"${name}_predicate_mr_push_min_interval")) + .withName(s"${name}_predicate_mr_push_min_interval") +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/FatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/FatiguePredicate.scala new file mode 100644 index 000000000..457dc879c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/FatiguePredicate.scala @@ -0,0 +1,74 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.FatiguePredicate._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.{NotificationDisplayLocation => DisplayLocation} +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.util.Duration + +object FatiguePredicate { + + /** + * Predicate that operates on a candidate, and applies custom fatigue rules for the slice of history only + * corresponding to a given rec type. + * + * @param interval + * @param maxInInterval + * @param minInterval + * @param recommendationType + * @param statsReceiver + * @return + */ + def recTypeOnly( + interval: Duration, + maxInInterval: Int, + minInterval: Duration, + recommendationType: CommonRecommendationType, + notificationDisplayLocation: DisplayLocation = DisplayLocation.PushToMobileDevice + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + build( + interval = interval, + maxInInterval = maxInInterval, + minInterval = minInterval, + filterHistory = recOnlyFilter(recommendationType), + notificationDisplayLocation = notificationDisplayLocation + ).flatContraMap { candidate: PushCandidate => candidate.target.history } + .withStats(statsReceiver.scope(s"predicate_${recTypeOnlyFatigue}")) + .withName(recTypeOnlyFatigue) + } + + /** + * Predicate that operates on a candidate, and applies custom fatigue rules for the slice of history only + * corresponding to specified rec types + * + * @param interval + * @param maxInInterval + * @param minInterval + * @param statsReceiver + * @return + */ + def recTypeSetOnly( + interval: Duration, + maxInInterval: Int, + minInterval: Duration, + recTypes: Set[CommonRecommendationType], + notificationDisplayLocation: DisplayLocation = DisplayLocation.PushToMobileDevice + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "rec_type_set_fatigue" + build( + interval = interval, + maxInInterval = maxInInterval, + minInterval = minInterval, + filterHistory = recTypesOnlyFilter(recTypes), + notificationDisplayLocation = notificationDisplayLocation + ).flatContraMap { candidate: PushCandidate => candidate.target.history } + .withStats(statsReceiver.scope(s"${name}_predicate")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/HealthPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/HealthPredicates.scala new file mode 100644 index 000000000..f11ed1400 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/HealthPredicates.scala @@ -0,0 +1,740 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.abuse.detection.scoring.thriftscala.{Model => TweetHealthModel} +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.NsfwTextDetectionModel +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil +import com.twitter.frigate.thriftscala.UserMediaRepresentation +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hss.api.thriftscala.UserHealthSignal._ +import com.twitter.hss.api.thriftscala.SignalValue +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future +import com.twitter.util.Time + +object HealthPredicates { + + private val NsfwTextDetectionModelMap: Map[NsfwTextDetectionModel.Value, TweetHealthModel] = + Map( + NsfwTextDetectionModel.ProdModel -> TweetHealthModel.PnsfwTweetText, + NsfwTextDetectionModel.RetrainedModel -> TweetHealthModel.ExperimentalHealthModelScore1, + ) + + private def tweetIsSupportedLanguage( + candidate: PushCandidate, + supportedLanguages: Set[String] + ): Boolean = { + val tweetLanguage = + candidate.categoricalFeatures.getOrElse("RecTweet.TweetyPieResult.Language", "") + supportedLanguages.contains(tweetLanguage) + } + + def tweetHealthSignalScorePredicate( + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse], + applyToQuoteTweet: Boolean = false + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate with TweetDetails] = { + val name = "tweet_health_signal_store_applyToQuoteTweet_" + applyToQuoteTweet.toString + val scopedStatsReceiver = stats.scope(name) + val numCandidatesStats = scopedStatsReceiver.scope("num_candidates") + val numCandidatesMediaNsfwScoreStats = numCandidatesStats.scope("media_nsfw_score") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with TweetDetails => + numCandidatesStats.counter("all").incr() + val target = candidate.target + val tweetIdOpt = if (!applyToQuoteTweet) { + Some(candidate.tweetId) + } else candidate.tweetyPieResult.flatMap(_.quotedTweet.map(_.id)) + + tweetIdOpt match { + case Some(tweetId) => + val pMediaNsfwRequest = + TweetScoringRequest(tweetId, TweetHealthModel.ExperimentalHealthModelScore4) + tweetHealthScoreStore.get(pMediaNsfwRequest).map { + case Some(tweetScoringResponse) => + numCandidatesMediaNsfwScoreStats.counter("non_empty").incr() + val pMediaNsfwScore = tweetScoringResponse.score + + if (!applyToQuoteTweet) { + candidate + .cacheExternalScore("NsfwMediaProbability", Future.value(Some(pMediaNsfwScore))) + } + + val pMediaNsfwShouldBucket = + pMediaNsfwScore > target.params( + PushFeatureSwitchParams.PnsfwTweetMediaBucketingThreshold) + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && pMediaNsfwShouldBucket) { + numCandidatesMediaNsfwScoreStats.counter("bucketed").incr() + if (target.params(PushFeatureSwitchParams.PnsfwTweetMediaFilterOonOnly) + && !RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType)) { + true + } else { + val pMediaNsfwScoreThreshold = + if (applyToQuoteTweet) + target.params(PushFeatureSwitchParams.PnsfwQuoteTweetThreshold) + else if (candidate.hasPhoto) + target.params(PushFeatureSwitchParams.PnsfwTweetImageThreshold) + else target.params(PushFeatureSwitchParams.PnsfwTweetMediaThreshold) + candidate.cachePredicateInfo( + name + "_nsfwMedia", + pMediaNsfwScore, + pMediaNsfwScoreThreshold, + pMediaNsfwScore > pMediaNsfwScoreThreshold) + if (pMediaNsfwScore > pMediaNsfwScoreThreshold) { + numCandidatesMediaNsfwScoreStats.counter("filtered").incr() + false + } else true + } + } else true + case _ => + numCandidatesMediaNsfwScoreStats.counter("empty").incr() + if (candidate.hasPhoto || candidate.hasVideo) { + numCandidatesMediaNsfwScoreStats.counter("media_tweet_with_empty_score").incr() + } + true + } + case _ => Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def healthSignalScoreSpammyTweetPredicate( + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate with TweetDetails] = { + val name = "health_signal_store_spammy_tweet" + val statsScope = stats.scope(name) + val allCandidatesCounter = statsScope.counter("all_candidates") + val eligibleCandidatesCounter = statsScope.counter("eligible_candidates") + val oonCandidatesCounter = statsScope.counter("oon_candidates") + val inCandidatesCounter = statsScope.counter("in_candidates") + val bucketedCandidatesCounter = statsScope.counter("num_bucketed") + val nonEmptySpamScoreCounter = statsScope.counter("non_empty_spam_score") + val filteredOonCandidatesCounter = statsScope.counter("num_filtered_oon") + val filteredInCandidatesCounter = statsScope.counter("num_filtered_in") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with TweetDetails => + allCandidatesCounter.incr() + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + if (isOonCandidate) { + oonCandidatesCounter.incr() + } + val target = candidate.target + if (target.params(PushFeatureSwitchParams.EnableSpammyTweetFilter)) { + eligibleCandidatesCounter.incr() + val tweetSpamScore = + TweetScoringRequest(candidate.tweetId, TweetHealthModel.SpammyTweetContent) + tweetHealthScoreStore.get(tweetSpamScore).map { + case (Some(tweetScoringResponse)) => + nonEmptySpamScoreCounter.incr() + val candidateSpamScore = tweetScoringResponse.score + + candidate + .cacheExternalScore("SpammyTweetScore", Future.value(Some(candidateSpamScore))) + + val tweetSpamShouldBucket = + candidateSpamScore > target.params( + PushFeatureSwitchParams.SpammyTweetBucketingThreshold) + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && tweetSpamShouldBucket) { + bucketedCandidatesCounter.incr() + if (isOonCandidate) { + val spamScoreThreshold = + target.params(PushFeatureSwitchParams.SpammyTweetOonThreshold) + if (candidateSpamScore > spamScoreThreshold) { + filteredOonCandidatesCounter.incr() + false + } else true + } else { + inCandidatesCounter.incr() + val spamScoreThreshold = + target.params(PushFeatureSwitchParams.SpammyTweetInThreshold) + if (candidateSpamScore > spamScoreThreshold) { + filteredInCandidatesCounter.incr() + false + } else true + } + } else true + case _ => true + } + } else Future.True + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def healthSignalScorePnsfwTweetTextPredicate( + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "health_signal_store_pnsfw_tweet_text" + val statsScope = stats.scope(name) + val allCandidatesCounter = statsScope.counter("all_candidates") + val nonEmptyNsfwTextScoreNum = statsScope.counter("non_empty_nsfw_text_score") + val filteredCounter = statsScope.counter("num_filtered") + val lowScoreCounter = statsScope.counter("low_score_count") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate => + val target = candidate.target + val predEnabled = + target.params(PushFeatureSwitchParams.EnableHealthSignalStorePnsfwTweetTextPredicate) + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && predEnabled && tweetIsSupportedLanguage(candidate, Set(""))) { + allCandidatesCounter.incr() + val pnsfwTextRequest = + TweetScoringRequest(candidate.tweetId, TweetHealthModel.PnsfwTweetText) + tweetHealthScoreStore.get(pnsfwTextRequest).flatMap { + case Some(tweetScoringResponse) => { + nonEmptyNsfwTextScoreNum.incr() + if (tweetScoringResponse.score < 1e-8) { + lowScoreCounter.incr() + } + + candidate + .cacheExternalScore( + "NsfwTextProbability-en", + Future.value(Some(tweetScoringResponse.score))) + val threshold = target.params(PushFeatureSwitchParams.PnsfwTweetTextThreshold) + candidate.cachePredicateInfo( + name, + tweetScoringResponse.score, + threshold, + tweetScoringResponse.score > threshold) + if (tweetScoringResponse.score > threshold) { + filteredCounter.incr() + Future.False + } else Future.True + } + case _ => Future.True + } + } else Future.True + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def healthSignalScoreMultilingualPnsfwTweetTextPredicate( + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "health_signal_store_multilingual_pnsfw_tweet_text" + val statsScope = stats.scope(name) + + val allLanguagesIdentifier = "all" + val languagesSelectedForStats = + Set("") + allLanguagesIdentifier + + val candidatesCounterMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"candidates_$lang") + }.toMap + val nonEmptyHealthScoreMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"non_empty_health_score_$lang") + }.toMap + val emptyHealthScoreMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"empty_health_score_$lang") + }.toMap + val bucketedCounterMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"num_candidates_bucketed_$lang") + }.toMap + val filteredCounterMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"num_filtered_$lang") + }.toMap + val lowScoreCounterMap: Map[String, Counter] = languagesSelectedForStats.map { lang => + lang -> statsScope.counter(f"low_score_count_$lang") + }.toMap + + val wrongBucketingModelCounter = statsScope.counter("wrong_bucketing_model_count") + val wrongDetectionModelCounter = statsScope.counter("wrong_detection_model_count") + + def increaseCounterForLanguage(counterMap: Map[String, Counter], language: String): Unit = { + counterMap.get(allLanguagesIdentifier) match { + case Some(counter) => counter.incr() + case _ => + } + counterMap.get(language) match { + case Some(counter) => counter.incr() + case _ => + } + } + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate => + val target = candidate.target + + val languageFeatureName = "RecTweet.TweetyPieResult.Language" + + lazy val isPredicateEnabledForTarget = target.params( + PushFeatureSwitchParams.EnableHealthSignalStoreMultilingualPnsfwTweetTextPredicate) + + lazy val targetNsfwTextDetectionModel: NsfwTextDetectionModel.Value = + target.params(PushFeatureSwitchParams.MultilingualPnsfwTweetTextModel) + + lazy val targetPredicateSupportedLanguageSeq: Seq[String] = + target.params(PushFeatureSwitchParams.MultilingualPnsfwTweetTextSupportedLanguages) + + lazy val bucketingModelSeq: Seq[NsfwTextDetectionModel.Value] = + target.params(PushFeatureSwitchParams.MultilingualPnsfwTweetTextBucketingModelList) + + lazy val bucketingThresholdPerLanguageSeq: Seq[Double] = + target.params(PushFeatureSwitchParams.MultilingualPnsfwTweetTextBucketingThreshold) + + lazy val filteringThresholdPerLanguageSeq: Seq[Double] = + target.params(PushFeatureSwitchParams.MultilingualPnsfwTweetTextFilteringThreshold) + + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && isPredicateEnabledForTarget) { + val candidateLanguage = + candidate.categoricalFeatures.getOrElse(languageFeatureName, "") + + val indexOfCandidateLanguage = + targetPredicateSupportedLanguageSeq.indexOf(candidateLanguage) + + val isCandidateLanguageSupported = indexOfCandidateLanguage >= 0 + + if (isCandidateLanguageSupported) { + increaseCounterForLanguage(candidatesCounterMap, candidateLanguage) + + val bucketingModelScoreMap: Map[NsfwTextDetectionModel.Value, Future[Option[Double]]] = + bucketingModelSeq.map { modelName => + NsfwTextDetectionModelMap.get(modelName) match { + case Some(targetNsfwTextDetectionModel) => + val pnsfwTweetTextRequest: TweetScoringRequest = + TweetScoringRequest(candidate.tweetId, targetNsfwTextDetectionModel) + + val scoreOptFut: Future[Option[Double]] = + tweetHealthScoreStore.get(pnsfwTweetTextRequest).map(_.map(_.score)) + + candidate + .cacheExternalScore("NsfwTextProbability", scoreOptFut) + + modelName -> scoreOptFut + case _ => + wrongBucketingModelCounter.incr() + modelName -> Future.None + } + }.toMap + + val candidateLanguageBucketingThreshold = + bucketingThresholdPerLanguageSeq(indexOfCandidateLanguage) + + val userShouldBeBucketedFut: Future[Boolean] = + Future + .collect(bucketingModelScoreMap.map { + case (_, modelScoreOptFut) => + modelScoreOptFut.map { + case Some(score) => + increaseCounterForLanguage(nonEmptyHealthScoreMap, candidateLanguage) + score > candidateLanguageBucketingThreshold + case _ => + increaseCounterForLanguage(emptyHealthScoreMap, candidateLanguage) + false + } + }.toSeq).map(_.contains(true)) + + val candidateShouldBeFilteredFut: Future[Boolean] = userShouldBeBucketedFut.flatMap { + userShouldBeBucketed => + if (userShouldBeBucketed) { + increaseCounterForLanguage(bucketedCounterMap, candidateLanguage) + + val candidateLanguageFilteringThreshold = + filteringThresholdPerLanguageSeq(indexOfCandidateLanguage) + + bucketingModelScoreMap.get(targetNsfwTextDetectionModel) match { + case Some(scoreOptFut) => + scoreOptFut.map { + case Some(score) => + val candidateShouldBeFiltered = + score > candidateLanguageFilteringThreshold + if (candidateShouldBeFiltered) { + increaseCounterForLanguage(filteredCounterMap, candidateLanguage) + } + candidateShouldBeFiltered + case _ => false + } + case _ => + wrongDetectionModelCounter.incr() + Future.False + } + } else { + increaseCounterForLanguage(lowScoreCounterMap, candidateLanguage) + Future.False + } + } + candidateShouldBeFilteredFut.map(result => !result) + } else Future.True + } else Future.True + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def authorProfileBasedPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "author_profile" + val statsScope = stats.scope(name) + val filterByNsfwToken = statsScope.counter("filter_by_nsfw_token") + val filterByAccountAge = statsScope.counter("filter_by_account_age") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate => + val target = candidate.target + candidate match { + case cand: PushCandidate with TweetAuthorDetails => + cand.tweetAuthor.map { + case Some(author) => + val nsfwTokens = target.params(PushFeatureSwitchParams.NsfwTokensParam) + val accountAgeInHours = + (Time.now - Time.fromMilliseconds(author.createdAtMsec)).inHours + val isNsfwAccount = CandidateHydrationUtil.isNsfwAccount(author, nsfwTokens) + val isVerified = author.safety.map(_.verified).getOrElse(false) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && !isVerified) { + val enableNsfwTokenCheck = + target.params(PushFeatureSwitchParams.EnableNsfwTokenBasedFiltering) + val minimumAllowedAge = + target.params(PushFeatureSwitchParams.MinimumAllowedAuthorAccountAgeInHours) + cand.cachePredicateInfo( + name + "_nsfwToken", + if (isNsfwAccount) 1.0 else 0.0, + 0.0, + enableNsfwTokenCheck && isNsfwAccount) + cand.cachePredicateInfo( + name + "_authorAge", + accountAgeInHours, + minimumAllowedAge, + accountAgeInHours < minimumAllowedAge) + + if (enableNsfwTokenCheck && isNsfwAccount) { + filterByNsfwToken.incr() + false + } else if (accountAgeInHours < minimumAllowedAge) { + filterByAccountAge.incr() + false + } else true + } else true + case _ => true + } + case _ => Future.value(true) + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def authorSensitiveMediaPredicate( + producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + val name = "author_sensitive_media_mrtwistly" + val statsScope = stats.scope(name) + val enableQueryNum = statsScope.counter("enable_query") + val nonEmptyMediaRepresentationNum = statsScope.counter("non_empty_media_representation") + val filteredOON = statsScope.counter("filtered_oon") + + Predicate + .fromAsync { candidate: PushCandidate with TweetAuthor => + val target = candidate.target + val useAggressiveThresholds = CandidateUtil.useAggressiveHealthThresholds(candidate) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && + RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType) && + target.params(PushFeatureSwitchParams.EnableQueryAuthorMediaRepresentationStore)) { + enableQueryNum.incr() + + candidate.authorId match { + case Some(authorId) => + producerMediaRepresentationStore.get(authorId).map { + case Some(mediaRepresentation) => + nonEmptyMediaRepresentationNum.incr() + val sumScore: Double = mediaRepresentation.mediaRepresentation.values.sum + val nudityScore: Double = mediaRepresentation.mediaRepresentation + .getOrElse(MediaAnnotationsUtil.nudityCategoryId, 0.0) + val nudityRate = if (sumScore > 0) nudityScore / sumScore else 0.0 + + candidate + .cacheExternalScore("AuthorNudityScore", Future.value(Some(nudityScore))) + candidate.cacheExternalScore("AuthorNudityRate", Future.value(Some(nudityRate))) + + val threshold = if (useAggressiveThresholds) { + target.params( + PushFeatureSwitchParams.AuthorSensitiveMediaFilteringThresholdForMrTwistly) + } else { + target.params(PushFeatureSwitchParams.AuthorSensitiveMediaFilteringThreshold) + } + candidate.cachePredicateInfo( + name, + nudityRate, + threshold, + nudityRate > threshold, + Some(Map[String, Double]("sumScore" -> sumScore, "nudityScore" -> nudityScore))) + + if (nudityRate > threshold) { + filteredOON.incr() + false + } else true + case _ => true + } + case _ => Future.True + } + } else { + Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def sensitiveMediaCategoryPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "sensitive_media_category" + val tweetMediaAnnotationFeature = + "tweet.mediaunderstanding.tweet_annotations.sensitive_category_probabilities" + val scopedStatsReceiver = stats.scope(name) + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + val nonZeroNudityCandidatesCounter = scopedStatsReceiver.counter("non_zero_nudity_candidates") + val nudityScoreStats = scopedStatsReceiver.stat("nudity_scores") + + Predicate + .fromAsync { candidate: PushCandidate => + allCandidatesCounter.incr() + val target = candidate.target + val nudityScore = candidate.sparseContinuousFeatures + .getOrElse(tweetMediaAnnotationFeature, Map.empty[String, Double]).getOrElse( + MediaAnnotationsUtil.nudityCategoryId, + 0.0) + if (nudityScore > 0) nonZeroNudityCandidatesCounter.incr() + nudityScoreStats.add(nudityScore.toFloat) + val threshold = + target.params(PushFeatureSwitchParams.TweetMediaSensitiveCategoryThresholdParam) + candidate.cachePredicateInfo(name, nudityScore, threshold, nudityScore > threshold) + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && nudityScore > threshold) { + Future.False + } else { + Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def profanityPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "profanity_filter" + val scopedStatsReceiver = stats.scope(name) + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + + Predicate + .fromAsync { candidate: PushCandidate => + allCandidatesCounter.incr() + val target = candidate.target + + lazy val enableFilter = + target.params(PushFeatureSwitchParams.EnableProfanityFilterParam) + val tweetSemanticCoreIds = candidate.sparseBinaryFeatures + .getOrElse(PushConstants.TweetSemanticCoreIdFeature, Set.empty[String]) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && + tweetSemanticCoreIds.contains(PushConstants.ProfanityFilter_Id) && enableFilter) { + Future.False + } else { + Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def agathaAbusiveTweetAuthorPredicateMrTwistly( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with OutOfNetworkTweetCandidate] = { + val name = "agatha_abusive_tweet_author_mr_twistly" + val scopedStatsReceiver = stats.scope(name) + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + val isMrBackfillCRCandidateCounter = scopedStatsReceiver.counter("isMrBackfillCR_candidates") + Predicate + .fromAsync { cand: PushCandidate with OutOfNetworkTweetCandidate => + allCandidatesCounter.incr() + val target = cand.target + val tweetSemanticCoreIds = cand.sparseBinaryFeatures + .getOrElse(PushConstants.TweetSemanticCoreIdFeature, Set.empty[String]) + + val hasAbuseStrikeTop2Percent = + tweetSemanticCoreIds.contains(PushConstants.AbuseStrike_Top2Percent_Id) + val hasAbuseStrikeTop1Percent = + tweetSemanticCoreIds.contains(PushConstants.AbuseStrike_Top1Percent_Id) + val hasAbuseStrikeTop05Percent = + tweetSemanticCoreIds.contains(PushConstants.AbuseStrike_Top05Percent_Id) + + if (hasAbuseStrikeTop2Percent) { + scopedStatsReceiver.counter("abuse_strike_top_2_percent_candidates").incr() + } + if (hasAbuseStrikeTop1Percent) { + scopedStatsReceiver.counter("abuse_strike_top_1_percent_candidates").incr() + } + if (hasAbuseStrikeTop05Percent) { + scopedStatsReceiver.counter("abuse_strike_top_05_percent_candidates").incr() + } + + if (CandidateUtil.shouldApplyHealthQualityFilters(cand) && cand.isMrBackfillCR.getOrElse( + false)) { + isMrBackfillCRCandidateCounter.incr() + if (hasAbuseStrikeTop2Percent) { + if (target.params( + PushFeatureSwitchParams.EnableAbuseStrikeTop2PercentFilterSimCluster) && hasAbuseStrikeTop2Percent || + target.params( + PushFeatureSwitchParams.EnableAbuseStrikeTop1PercentFilterSimCluster) && hasAbuseStrikeTop1Percent || + target.params( + PushFeatureSwitchParams.EnableAbuseStrikeTop05PercentFilterSimCluster) && hasAbuseStrikeTop05Percent) { + Future.False + } else { + Future.True + } + } else { + Future.True + } + } else Future.True + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def userHealthSignalsPredicate( + userHealthSignalStore: ReadableStore[Long, UserHealthSignalResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetDetails] = { + val name = "agatha_user_health_model_score" + val scopedStatsReceiver = stats.scope(name) + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + val bucketedUserCandidatesCounter = + scopedStatsReceiver.counter("bucketed_user_candidates") + val filteredOON = scopedStatsReceiver.counter("filtered_oon") + + Predicate + .fromAsync { candidate: PushCandidate with TweetDetails => + allCandidatesCounter.incr() + val target = candidate.target + val useAggressiveThresholds = CandidateUtil.useAggressiveHealthThresholds(candidate) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && target.params( + PushFeatureSwitchParams.EnableAgathaUserHealthModelPredicate)) { + val healthSignalsResponseFutOpt: Future[Option[UserHealthSignalResponse]] = + candidate.authorId match { + case Some(authorId) => userHealthSignalStore.get(authorId) + case _ => Future.None + } + healthSignalsResponseFutOpt.map { + case Some(response) => + val agathaRecentAbuseStrikeScore: Double = userHealthSignalValueToDouble( + response.signalValues + .getOrElse(AgathaRecentAbuseStrikeDouble, SignalValue.DoubleValue(0.0))) + val agathaCalibratedNSFWScore: Double = userHealthSignalValueToDouble( + response.signalValues + .getOrElse(AgathaCalibratedNsfwDouble, SignalValue.DoubleValue(0.0))) + val agathaTextNSFWScore: Double = userHealthSignalValueToDouble(response.signalValues + .getOrElse(NsfwTextUserScoreDouble, SignalValue.DoubleValue(0.0))) + + candidate + .cacheExternalScore( + "agathaRecentAbuseStrikeScore", + Future.value(Some(agathaRecentAbuseStrikeScore))) + candidate + .cacheExternalScore( + "agathaCalibratedNSFWScore", + Future.value(Some(agathaCalibratedNSFWScore))) + candidate + .cacheExternalScore("agathaTextNSFWScore", Future.value(Some(agathaTextNSFWScore))) + + val NSFWShouldBucket = agathaCalibratedNSFWScore > target.params( + PushFeatureSwitchParams.AgathaCalibratedNSFWBucketThreshold) + val textNSFWShouldBucket = agathaTextNSFWScore > target.params( + PushFeatureSwitchParams.AgathaTextNSFWBucketThreshold) + + if (NSFWShouldBucket || textNSFWShouldBucket) { + bucketedUserCandidatesCounter.incr() + if (NSFWShouldBucket) { + scopedStatsReceiver.counter("calibrated_nsfw_bucketed_user_candidates").incr() + } + if (textNSFWShouldBucket) { + scopedStatsReceiver.counter("text_nsfw_bucketed_user_candidates").incr() + } + + val (thresholdAgathaNsfw, thresholdTextNsfw) = if (useAggressiveThresholds) { + ( + target.params( + PushFeatureSwitchParams.AgathaCalibratedNSFWThresholdForMrTwistly), + target + .params(PushFeatureSwitchParams.AgathaTextNSFWThresholdForMrTwistly)) + } else { + ( + target.params(PushFeatureSwitchParams.AgathaCalibratedNSFWThreshold), + target.params(PushFeatureSwitchParams.AgathaTextNSFWThreshold)) + } + candidate.cachePredicateInfo( + name + "_agathaNsfw", + agathaCalibratedNSFWScore, + thresholdAgathaNsfw, + agathaCalibratedNSFWScore > thresholdAgathaNsfw) + candidate.cachePredicateInfo( + name + "_authorTextNsfw", + agathaTextNSFWScore, + thresholdTextNsfw, + agathaTextNSFWScore > thresholdTextNsfw) + + if ((agathaCalibratedNSFWScore > thresholdAgathaNsfw) || + (agathaTextNSFWScore > thresholdTextNsfw)) { + filteredOON.incr() + false + } else true + } else { + true + } + case _ => true + } + } else { + Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } + + def userHealthSignalValueToDouble(signalValue: SignalValue): Double = { + signalValue match { + case SignalValue.DoubleValue(value) => value + case _ => throw new Exception(f"Could not convert signal value to double") + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/JointDauAndQualityModelPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/JointDauAndQualityModelPredicate.scala new file mode 100644 index 000000000..63095f4db --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/JointDauAndQualityModelPredicate.scala @@ -0,0 +1,39 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams.QualityPredicateIdParam +import com.twitter.frigate.pushservice.predicate.quality_model_predicate._ +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object JointDauAndQualityModelPredicate { + + val name = "JointDauAndQualityModelPredicate" + + def apply()(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + val stats = statsReceiver.scope(s"predicate_$name") + + val defaultPred = WeightedOpenOrNtabClickQualityPredicate() + val qualityPredicateMap = QualityPredicateMap() + + Predicate + .fromAsync { candidate: PushCandidate => + if (!candidate.target.skipModelPredicate) { + + val modelPredicate = + qualityPredicateMap.getOrElse( + candidate.target.params(QualityPredicateIdParam), + defaultPred) + + val modelPredicateResultFut = + modelPredicate.apply(Seq(candidate)).map(_.headOption.getOrElse(false)) + + modelPredicateResultFut + } else Future.True + } + .withStats(stats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ListPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ListPredicates.scala new file mode 100644 index 000000000..cbfb670d8 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ListPredicates.scala @@ -0,0 +1,110 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.ListRecommendationPushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.socialgraph.Edge +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.predicate.socialgraph.SocialGraphPredicate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.socialgraph.thriftscala.RelationshipType +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object ListPredicates { + + def listNameExistsPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ListRecommendationPushCandidate] = { + Predicate + .fromAsync { candidate: ListRecommendationPushCandidate => + candidate.listName.map(_.isDefined) + } + .withStats(stats) + .withName("list_name_exists") + } + + def listAuthorExistsPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ListRecommendationPushCandidate] = { + Predicate + .fromAsync { candidate: ListRecommendationPushCandidate => + candidate.listOwnerId.map(_.isDefined) + } + .withStats(stats) + .withName("list_owner_exists") + } + + def listAuthorAcceptableToTargetUser( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[ListRecommendationPushCandidate] = { + val name = "list_author_acceptable_to_target_user" + val sgsPredicate = SocialGraphPredicate + .anyRelationExists( + edgeStore, + Set( + RelationshipType.Blocking, + RelationshipType.BlockedBy, + RelationshipType.Muting + ) + ) + .withStats(statsReceiver.scope("list_sgs_any_relation_exists")) + .withName("list_sgs_any_relation_exists") + + Predicate + .fromAsync { candidate: ListRecommendationPushCandidate => + candidate.listOwnerId.flatMap { + case Some(ownerId) => + sgsPredicate.apply(Seq(Edge(candidate.target.targetId, ownerId))).map(_.head) + case _ => Future.True + } + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + /** + * Checks if the list is acceptable to Target user => + * - Is Target not following the list + * - Is Target not muted the list + */ + def listAcceptablePredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ListRecommendationPushCandidate] = { + val name = "list_acceptable_to_target_user" + Predicate + .fromAsync { candidate: ListRecommendationPushCandidate => + candidate.apiList.map { + case Some(apiList) => + !(apiList.following.contains(true) || apiList.muting.contains(true)) + case _ => false + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def listSubscriberCountPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ListRecommendationPushCandidate] = { + val name = "list_subscribe_count" + Predicate + .fromAsync { candidate: ListRecommendationPushCandidate => + candidate.apiList.map { apiListOpt => + apiListOpt.exists { apiList => + apiList.subscriberCount >= candidate.target.params( + PushFeatureSwitchParams.ListRecommendationsSubscriberCount) + } + } + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutPreRankingPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutPreRankingPredicates.scala new file mode 100644 index 000000000..9ba1c9f6f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutPreRankingPredicates.scala @@ -0,0 +1,37 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.common.predicate.tweet._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate + +class LoggedOutPreRankingPredicatesBuilder(implicit statsReceiver: StatsReceiver) { + + private val TweetPredicates = List[NamedPredicate[PushCandidate]]( + TweetObjectExistsPredicate[ + TweetCandidate with TweetDetails + ].applyOnlyToTweetCandidatesWithTweetDetails + .withName("tweet_object_exists"), + PredicatesForCandidate.oldTweetRecsPredicate.applyOnlyToTweetCandidateWithTargetAndABDeciderAndMaxTweetAge + .withName("old_tweet"), + PredicatesForCandidate.tweetIsNotAreply.applyOnlyToTweetCandidateWithoutSocialContextWithTweetDetails + .withName("tweet_candidate_not_a_reply"), + TweetAuthorPredicates + .recTweetAuthorUnsuitable[TweetCandidate with TweetAuthorDetails] + .applyOnlyToTweetCandidateWithTweetAuthorDetails + .withName("tweet_author_unsuitable") + ) + + final def build(): List[NamedPredicate[PushCandidate]] = { + TweetPredicates + } + +} + +object LoggedOutPreRankingPredicates { + def apply(statsReceiver: StatsReceiver): List[NamedPredicate[PushCandidate]] = + new LoggedOutPreRankingPredicatesBuilder()(statsReceiver).build() +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutTargetPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutTargetPredicates.scala new file mode 100644 index 000000000..085ad73e9 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/LoggedOutTargetPredicates.scala @@ -0,0 +1,53 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.abdecider.GuestRecipient +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.common.predicate.{FatiguePredicate => CommonFatiguePredicate} +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.conversions.DurationOps._ +import com.twitter.frigate.common.util.Experiments.LoggedOutRecsHoldback +import com.twitter.hermit.predicate.Predicate + +object LoggedOutTargetPredicates { + + def targetFatiguePredicate[T <: Target]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "logged_out_target_min_duration_since_push" + CommonFatiguePredicate + .magicRecsPushTargetFatiguePredicate( + minInterval = 24.hours, + maxInInterval = 1 + ).withStats(statsReceiver.scope(name)) + .withName(name) + } + + def loggedOutRecsHoldbackPredicate[T <: Target]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "logged_out_recs_holdback" + val guestIdNotFoundCounter = statsReceiver.scope("logged_out").counter("guest_id_not_found") + val controlBucketCounter = statsReceiver.scope("logged_out").counter("holdback_control") + val allowTrafficCounter = statsReceiver.scope("logged_out").counter("allow_traffic") + Predicate.from { target: T => + val guestId = target.targetGuestId match { + case Some(guest) => guest + case _ => + guestIdNotFoundCounter.incr() + throw new IllegalStateException("guest_id_not_found") + } + target.abDecider + .bucket(LoggedOutRecsHoldback.exptName, GuestRecipient(guestId)).map(_.name) match { + case Some(LoggedOutRecsHoldback.control) => + controlBucketCounter.incr() + false + case _ => + allowTrafficCounter.incr() + true + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/MlModelsHoldbackExperimentPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/MlModelsHoldbackExperimentPredicate.scala new file mode 100644 index 000000000..014393870 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/MlModelsHoldbackExperimentPredicate.scala @@ -0,0 +1,71 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object MlModelsHoldbackExperimentPredicate { + + val name = "MlModelsHoldbackExperimentPredicate" + + private val alwaysTruePred = PredicatesForCandidate.alwaysTruePushCandidatePredicate + + def getPredicateBasedOnCandidate( + pc: PushCandidate, + treatmentPred: Predicate[PushCandidate] + )( + implicit statsReceiver: StatsReceiver + ): Future[Predicate[PushCandidate]] = { + + Future + .join(Future.value(pc.target.skipFilters), pc.target.isInModelExclusionList) + .map { + case (skipFilters, isInModelExclusionList) => + if (skipFilters || + isInModelExclusionList || + pc.target.params(PushParams.DisableMlInFilteringParam) || + pc.target.params(PushFeatureSwitchParams.DisableMlInFilteringFeatureSwitchParam) || + pc.target.params(PushParams.DisableAllRelevanceParam) || + pc.target.params(PushParams.DisableHeavyRankingParam)) { + alwaysTruePred + } else { + treatmentPred + } + } + } + + def apply()(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + val stats = statsReceiver.scope(s"predicate_$name") + val statsProd = stats.scope("prod") + val counterAcceptedByModel = statsProd.counter("accepted") + val counterRejectedByModel = statsProd.counter("rejected") + val counterHoldback = stats.scope("holdback").counter("all") + val jointDauQualityPredicate = JointDauAndQualityModelPredicate() + + new Predicate[PushCandidate] { + def apply(items: Seq[PushCandidate]): Future[Seq[Boolean]] = { + val boolFuts = items.map { item => + getPredicateBasedOnCandidate(item, jointDauQualityPredicate)(statsReceiver) + .flatMap { predicate => + val predictionFut = predicate.apply(Seq(item)).map(_.headOption.getOrElse(false)) + predictionFut.foreach { prediction => + if (item.target.params(PushParams.DisableMlInFilteringParam) || item.target.params( + PushFeatureSwitchParams.DisableMlInFilteringFeatureSwitchParam)) { + counterHoldback.incr() + } else { + if (prediction) counterAcceptedByModel.incr() else counterRejectedByModel.incr() + } + } + predictionFut + } + } + Future.collect(boolFuts) + } + }.withStats(stats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONSpreadControlPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONSpreadControlPredicate.scala new file mode 100644 index 000000000..bcd9e30d0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONSpreadControlPredicate.scala @@ -0,0 +1,116 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushConstants._ +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object OONSpreadControlPredicate { + + def oonTweetSpreadControlPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + val name = "oon_tweet_spread_control_predicate" + val scopedStatsReceiver = stats.scope(name) + val allOonCandidatesCounter = scopedStatsReceiver.counter("all_oon_candidates") + val filteredCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val minTweetSendsThreshold = + target.params(PushFeatureSwitchParams.MinTweetSendsThresholdParam) + lazy val spreadControlRatio = + target.params(PushFeatureSwitchParams.SpreadControlRatioParam) + lazy val favOverSendThreshold = + target.params(PushFeatureSwitchParams.FavOverSendThresholdParam) + + lazy val sentCount = candidate.numericFeatures.getOrElse(sentFeatureName, 0.0) + lazy val followerCount = + candidate.numericFeatures.getOrElse(authorActiveFollowerFeatureName, 0.0) + lazy val favCount = candidate.numericFeatures.getOrElse(favFeatureName, 0.0) + lazy val favOverSends = favCount / (sentCount + 1.0) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && isOonCandidate) { + allOonCandidatesCounter.incr() + if (sentCount > minTweetSendsThreshold && + sentCount > spreadControlRatio * followerCount && + favOverSends < favOverSendThreshold) { + filteredCandidatesCounter.incr() + Future.False + } else Future.True + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } + + def oonAuthorSpreadControlPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + val name = "oon_author_spread_control_predicate" + val scopedStatsReceiver = stats.scope(name) + val allOonCandidatesCounter = scopedStatsReceiver.counter("all_oon_candidates") + val filteredCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val minAuthorSendsThreshold = + target.params(PushFeatureSwitchParams.MinAuthorSendsThresholdParam) + lazy val spreadControlRatio = + target.params(PushFeatureSwitchParams.SpreadControlRatioParam) + lazy val reportRateThreshold = + target.params(PushFeatureSwitchParams.AuthorReportRateThresholdParam) + lazy val dislikeRateThreshold = + target.params(PushFeatureSwitchParams.AuthorDislikeRateThresholdParam) + + lazy val authorSentCount = + candidate.numericFeatures.getOrElse(authorSendCountFeatureName, 0.0) + lazy val authorReportCount = + candidate.numericFeatures.getOrElse(authorReportCountFeatureName, 0.0) + lazy val authorDislikeCount = + candidate.numericFeatures.getOrElse(authorDislikeCountFeatureName, 0.0) + lazy val followerCount = candidate.numericFeatures + .getOrElse(authorActiveFollowerFeatureName, 0.0) + lazy val reportRate = + authorReportCount / (authorSentCount + 1.0) + lazy val dislikeRate = + authorDislikeCount / (authorSentCount + 1.0) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && isOonCandidate) { + allOonCandidatesCounter.incr() + if (authorSentCount > minAuthorSendsThreshold && + authorSentCount > spreadControlRatio * followerCount && + (reportRate > reportRateThreshold || dislikeRate > dislikeRateThreshold)) { + filteredCandidatesCounter.incr() + Future.False + } else Future.True + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONTweetNegativeFeedbackBasedPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONTweetNegativeFeedbackBasedPredicate.scala new file mode 100644 index 000000000..3efb23d88 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OONTweetNegativeFeedbackBasedPredicate.scala @@ -0,0 +1,82 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object OONTweetNegativeFeedbackBasedPredicate { + + def ntabDislikeBasedPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + val name = "oon_tweet_dislike_based_predicate" + val scopedStatsReceiver = stats.scope(name) + val allOonCandidatesCounter = scopedStatsReceiver.counter("all_oon_candidates") + val oonCandidatesImpressedCounter = + scopedStatsReceiver.counter("oon_candidates_impressed") + val filteredCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + + val ntabDislikeCountFeature = + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_ntab_disliked.any_feature.Duration.Top.count" + val sentFeature = + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_sent.any_feature.Duration.Top.count" + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val ntabDislikeCountThreshold = + target.params(PushFeatureSwitchParams.TweetNtabDislikeCountThresholdParam) + lazy val ntabDislikeRateThreshold = + target.params(PushFeatureSwitchParams.TweetNtabDislikeRateThresholdParam) + lazy val ntabDislikeCountThresholdForMrTwistly = + target.params(PushFeatureSwitchParams.TweetNtabDislikeCountThresholdForMrTwistlyParam) + lazy val ntabDislikeRateThresholdForMrTwistly = + target.params(PushFeatureSwitchParams.TweetNtabDislikeRateThresholdForMrTwistlyParam) + + val isMrTwistly = CandidateUtil.isMrTwistlyCandidate(candidate) + + lazy val dislikeCount = candidate.numericFeatures.getOrElse(ntabDislikeCountFeature, 0.0) + lazy val sentCount = candidate.numericFeatures.getOrElse(sentFeature, 0.0) + lazy val dislikeRate = if (sentCount > 0) dislikeCount / sentCount else 0.0 + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && isOonCandidate) { + allOonCandidatesCounter.incr() + val (countThreshold, rateThreshold) = if (isMrTwistly) { + (ntabDislikeCountThresholdForMrTwistly, ntabDislikeRateThresholdForMrTwistly) + } else { + (ntabDislikeCountThreshold, ntabDislikeRateThreshold) + } + candidate.cachePredicateInfo( + name + "_count", + dislikeCount, + countThreshold, + dislikeCount > countThreshold) + candidate.cachePredicateInfo( + name + "_rate", + dislikeRate, + rateThreshold, + dislikeRate > rateThreshold) + if (dislikeCount > countThreshold && dislikeRate > rateThreshold) { + filteredCandidatesCounter.incr() + Future.False + } else Future.True + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OutOfNetworkCandidatesQualityPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OutOfNetworkCandidatesQualityPredicates.scala new file mode 100644 index 000000000..6f09df0c7 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/OutOfNetworkCandidatesQualityPredicates.scala @@ -0,0 +1,221 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.data_pipeline.features_common.MrRequestContextForFeatureStore +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.ml.featurestore.lib.dynamic.DynamicFeatureStoreClient +import com.twitter.util.Future +import com.twitter.frigate.pushservice.predicate.PostRankingPredicateHelper._ +import com.twitter.frigate.pushservice.util.CandidateUtil + +object OutOfNetworkCandidatesQualityPredicates { + + def getTweetCharLengthThreshold( + target: TargetUser with TargetABDecider, + language: String, + useMediaThresholds: Boolean + ): Double = { + lazy val sautOonWithMediaTweetLengthThreshold = + target.params(PushFeatureSwitchParams.SautOonWithMediaTweetLengthThresholdParam) + lazy val nonSautOonWithMediaTweetLengthThreshold = + target.params(PushFeatureSwitchParams.NonSautOonWithMediaTweetLengthThresholdParam) + lazy val sautOonWithoutMediaTweetLengthThreshold = + target.params(PushFeatureSwitchParams.SautOonWithoutMediaTweetLengthThresholdParam) + lazy val nonSautOonWithoutMediaTweetLengthThreshold = + target.params(PushFeatureSwitchParams.NonSautOonWithoutMediaTweetLengthThresholdParam) + val moreStrictForUndefinedLanguages = + target.params(PushFeatureSwitchParams.OonTweetLengthPredicateMoreStrictForUndefinedLanguages) + val isSautLanguage = if (moreStrictForUndefinedLanguages) { + isTweetLanguageInSautOrUndefined(language) + } else isTweetLanguageInSaut(language) + + (useMediaThresholds, isSautLanguage) match { + case (true, true) => + sautOonWithMediaTweetLengthThreshold + case (true, false) => + nonSautOonWithMediaTweetLengthThreshold + case (false, true) => + sautOonWithoutMediaTweetLengthThreshold + case (false, false) => + nonSautOonWithoutMediaTweetLengthThreshold + case _ => -1 + } + } + + def getTweetWordLengthThreshold( + target: TargetUser with TargetABDecider, + language: String, + useMediaThresholds: Boolean + ): Double = { + lazy val argfOonWithMediaTweetWordLengthThresholdParam = + target.params(PushFeatureSwitchParams.ArgfOonWithMediaTweetWordLengthThresholdParam) + lazy val esfthOonWithMediaTweetWordLengthThresholdParam = + target.params(PushFeatureSwitchParams.EsfthOonWithMediaTweetWordLengthThresholdParam) + + lazy val argfOonCandidatesWithMediaCondition = + isTweetLanguageInArgf(language) && useMediaThresholds + lazy val esfthOonCandidatesWithMediaCondition = + isTweetLanguageInEsfth(language) && useMediaThresholds + lazy val afirfOonCandidatesWithoutMediaCondition = + isTweetLanguageInAfirf(language) && !useMediaThresholds + + val afirfOonCandidatesWithoutMediaTweetWordLengthThreshold = 5 + if (argfOonCandidatesWithMediaCondition) { + argfOonWithMediaTweetWordLengthThresholdParam + } else if (esfthOonCandidatesWithMediaCondition) { + esfthOonWithMediaTweetWordLengthThresholdParam + } else if (afirfOonCandidatesWithoutMediaCondition) { + afirfOonCandidatesWithoutMediaTweetWordLengthThreshold + } else -1 + } + + def oonTweetLengthBasedPrerankingPredicate( + characterBased: Boolean + )( + implicit stats: StatsReceiver + ): NamedPredicate[OutOfNetworkTweetCandidate with TargetInfo[ + TargetUser with TargetABDecider + ]] = { + val name = "oon_tweet_length_based_preranking_predicate" + val scopedStats = stats.scope(s"${name}_charBased_$characterBased") + + Predicate + .fromAsync { + cand: OutOfNetworkTweetCandidate with TargetInfo[TargetUser with TargetABDecider] => + cand match { + case candidate: TweetAuthorDetails => + val target = candidate.target + val crt = candidate.commonRecType + + val updatedMediaLogic = + target.params(PushFeatureSwitchParams.OonTweetLengthPredicateUpdatedMediaLogic) + val updatedQuoteTweetLogic = + target.params(PushFeatureSwitchParams.OonTweetLengthPredicateUpdatedQuoteTweetLogic) + val useMediaThresholds = if (updatedMediaLogic || updatedQuoteTweetLogic) { + val hasMedia = updatedMediaLogic && (candidate.hasPhoto || candidate.hasVideo) + val hasQuoteTweet = updatedQuoteTweetLogic && candidate.quotedTweet.nonEmpty + hasMedia || hasQuoteTweet + } else RecTypes.isMediaType(crt) + val enableFilter = + target.params(PushFeatureSwitchParams.EnablePrerankingTweetLengthPredicate) + + val language = candidate.tweet.flatMap(_.language.map(_.language)).getOrElse("") + val tweetTextOpt = candidate.tweet.flatMap(_.coreData.map(_.text)) + + val (length: Double, threshold: Double) = if (characterBased) { + ( + tweetTextOpt.map(_.size.toDouble).getOrElse(9999.0), + getTweetCharLengthThreshold(target, language, useMediaThresholds)) + } else { + ( + tweetTextOpt.map(getTweetWordLength).getOrElse(999.0), + getTweetWordLengthThreshold(target, language, useMediaThresholds)) + } + scopedStats.counter("threshold_" + threshold.toString).incr() + + CandidateUtil.shouldApplyHealthQualityFiltersForPrerankingPredicates(candidate).map { + case true if enableFilter => + length > threshold + case _ => true + } + case _ => + scopedStats.counter("author_is_not_hydrated").incr() + Future.True + } + }.withStats(scopedStats) + .withName(name) + } + + private def isTweetLanguageInAfirf(candidateLanguage: String): Boolean = { + val setAFIRF: Set[String] = Set("") + setAFIRF.contains(candidateLanguage) + } + private def isTweetLanguageInEsfth(candidateLanguage: String): Boolean = { + val setESFTH: Set[String] = Set("") + setESFTH.contains(candidateLanguage) + } + private def isTweetLanguageInArgf(candidateLanguage: String): Boolean = { + val setARGF: Set[String] = Set("") + setARGF.contains(candidateLanguage) + } + + private def isTweetLanguageInSaut(candidateLanguage: String): Boolean = { + val setSAUT = Set("") + setSAUT.contains(candidateLanguage) + } + + private def isTweetLanguageInSautOrUndefined(candidateLanguage: String): Boolean = { + val setSautOrUndefined = Set("") + setSautOrUndefined.contains(candidateLanguage) + } + + def containTargetNegativeKeywords(text: String, denylist: Seq[String]): Boolean = { + if (denylist.isEmpty) + false + else { + denylist + .map { negativeKeyword => + text.toLowerCase().contains(negativeKeyword) + }.reduce(_ || _) + } + } + + def NegativeKeywordsPredicate( + postRankingFeatureStoreClient: DynamicFeatureStoreClient[MrRequestContextForFeatureStore] + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + + val name = "negative_keywords_predicate" + val scopedStatsReceiver = stats.scope(name) + val allOonCandidatesCounter = scopedStatsReceiver.counter("all_oon_candidates") + val filteredOonCandidatesCounter = scopedStatsReceiver.counter("filtered_oon_candidates") + val tweetLanguageFeature = "RecTweet.TweetyPieResult.Language" + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isTwistlyCandidate = RecTypes.twistlyTweets.contains(crt) + + lazy val enableNegativeKeywordsPredicateParam = + target.params(PushFeatureSwitchParams.EnableNegativeKeywordsPredicateParam) + lazy val negativeKeywordsPredicateDenylist = + target.params(PushFeatureSwitchParams.NegativeKeywordsPredicateDenylist) + lazy val candidateLanguage = + candidate.categoricalFeatures.getOrElse(tweetLanguageFeature, "") + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && candidateLanguage.equals( + "en") && isTwistlyCandidate && enableNegativeKeywordsPredicateParam) { + allOonCandidatesCounter.incr() + + val tweetTextFuture: Future[String] = + getTweetText(candidate, postRankingFeatureStoreClient) + + tweetTextFuture.map { tweetText => + val containsNegativeWords = + containTargetNegativeKeywords(tweetText, negativeKeywordsPredicateDenylist) + candidate.cachePredicateInfo( + name, + if (containsNegativeWords) 1.0 else 0.0, + 0.0, + containsNegativeWords) + if (containsNegativeWords) { + filteredOonCandidatesCounter.incr() + false + } else true + } + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PNegMultimodalPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PNegMultimodalPredicates.scala new file mode 100644 index 000000000..f838d7ae6 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PNegMultimodalPredicates.scala @@ -0,0 +1,83 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.abuse.detection.scoring.thriftscala.Model +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest +import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object PNegMultimodalPredicates { + + def healthSignalScorePNegMultimodalPredicate( + tweetHealthScoreStore: ReadableStore[TweetScoringRequest, TweetScoringResponse] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "pneg_multimodal_predicate" + val statsScope = stats.scope(name) + val oonCandidatesCounter = statsScope.counter("oon_candidates") + val nonEmptyModelScoreCounter = statsScope.counter("non_empty_model_score") + val bucketedCounter = statsScope.counter("bucketed_oon_candidates") + val filteredCounter = statsScope.counter("filtered_oon_candidates") + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val enablePNegMultimodalPredicateParam = + target.params(PushFeatureSwitchParams.EnablePNegMultimodalPredicateParam) + lazy val pNegMultimodalPredicateModelThresholdParam = + target.params(PushFeatureSwitchParams.PNegMultimodalPredicateModelThresholdParam) + lazy val pNegMultimodalPredicateBucketThresholdParam = + target.params(PushFeatureSwitchParams.PNegMultimodalPredicateBucketThresholdParam) + val pNegMultimodalEnabledForF1Tweets = + target.params(PushParams.EnablePnegMultimodalPredictionForF1Tweets) + + if (CandidateUtil.shouldApplyHealthQualityFilters( + candidate) && (isOonCandidate || pNegMultimodalEnabledForF1Tweets) && enablePNegMultimodalPredicateParam) { + + val pNegMultimodalRequest = TweetScoringRequest(candidate.tweetId, Model.PNegMultimodal) + tweetHealthScoreStore.get(pNegMultimodalRequest).map { + case Some(tweetScoringResponse) => + nonEmptyModelScoreCounter.incr() + + val pNegMultimodalScore = 1.0 - tweetScoringResponse.score + + candidate + .cacheExternalScore("PNegMultimodalScore", Future.value(Some(pNegMultimodalScore))) + + if (isOonCandidate) { + oonCandidatesCounter.incr() + + if (pNegMultimodalScore > pNegMultimodalPredicateBucketThresholdParam) { + bucketedCounter.incr() + if (pNegMultimodalScore > pNegMultimodalPredicateModelThresholdParam) { + filteredCounter.incr() + false + } else true + } else true + } else { + true + } + case _ => true + } + } else { + Future.True + } + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PostRankingPredicateHelper.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PostRankingPredicateHelper.scala new file mode 100644 index 000000000..604f7b07c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PostRankingPredicateHelper.scala @@ -0,0 +1,50 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.frigate.common.base._ +import com.twitter.frigate.data_pipeline.features_common.MrRequestContextForFeatureStore +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.ml.featurestore.catalog.entities.core.Tweet +import com.twitter.ml.featurestore.catalog.features.core.Tweet.Text +import com.twitter.ml.featurestore.lib.TweetId +import com.twitter.ml.featurestore.lib.dynamic.DynamicFeatureStoreClient +import com.twitter.ml.featurestore.lib.online.FeatureStoreRequest +import com.twitter.util.Future + +object PostRankingPredicateHelper { + + val tweetTextFeature = "tweet.core.tweet.text" + + def getTweetText( + candidate: PushCandidate with TweetCandidate, + dynamicClient: DynamicFeatureStoreClient[MrRequestContextForFeatureStore] + ): Future[String] = { + if (candidate.categoricalFeatures.contains(tweetTextFeature)) { + Future.value(candidate.categoricalFeatures.getOrElse(tweetTextFeature, "")) + } else { + val candidateTweetEntity = Tweet.withId(TweetId(candidate.tweetId)) + val featureStoreRequests = Seq( + FeatureStoreRequest( + entityIds = Seq(candidateTweetEntity) + )) + val predictionRecords = dynamicClient( + featureStoreRequests, + requestContext = candidate.target.mrRequestContextForFeatureStore) + + predictionRecords.map { records => + val tweetText = records.head + .getFeatureValue(candidateTweetEntity, Text).getOrElse( + "" + ) + candidate.categoricalFeatures(tweetTextFeature) = tweetText + tweetText + } + } + } + + def getTweetWordLength(tweetText: String): Double = { + val tweetTextWithoutUrl: String = + tweetText.replaceAll("https?://\\S+\\s?", "").replaceAll("[\\s]+", " ") + tweetTextWithoutUrl.trim().split(" ").length.toDouble + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PreRankingPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PreRankingPredicates.scala new file mode 100644 index 000000000..4b61b23e3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PreRankingPredicates.scala @@ -0,0 +1,158 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.SocialContextUserDetails +import com.twitter.frigate.common.base.TargetInfo +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.candidate.TweetImpressionHistory +import com.twitter.frigate.common.predicate.socialcontext.{Predicates => SocialContextPredicates, _} +import com.twitter.frigate.common.predicate.tweet._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.NtabCaretClickContFnFatiguePredicate +import com.twitter.hermit.predicate.NamedPredicate + +class PreRankingPredicatesBuilder( +)( + implicit statsReceiver: StatsReceiver) { + + private val SocialProofPredicates = List[NamedPredicate[PushCandidate]]( + SocialContextPredicates + .authorInSocialContext() + .applyOnlyToTweetAuthorWithSocialContextActions + .withName("author_social_context"), + SocialContextPredicates + .selfInSocialContext[TargetUser, SocialContextActions with TargetInfo[TargetUser]]() + .applyOnlyToSocialContextActionsWithTargetUser + .withName("self_social_context"), + SocialContextPredicates + .duplicateSocialContext[SocialContextActions]() + .applyOnlyToSocialContextActions + .withName("duplicate_social_context"), + SocialContextPredicates + .socialContextProtected[SocialContextUserDetails]() + .applyOnlyToSocialContextUserDetails + .withName("social_context_protected"), + SocialContextPredicates + .socialContextUnsuitable[SocialContextUserDetails]() + .applyOnlyToSocialContextUserDetails + .withName("social_context_unsuitable"), + SocialContextPredicates + .socialContextBlink[SocialContextUserDetails]() + .applyOnlyToSocialContextUserDetails + .withName("social_context_blink") + ) + + private val CommonPredicates = List[NamedPredicate[PushCandidate]]( + PredicatesForCandidate.candidateEnabledForEmailPredicate(), + PredicatesForCandidate.openAppExperimentUserCandidateAllowList(statsReceiver) + ) + + private val TweetPredicates = List[NamedPredicate[PushCandidate]]( + PredicatesForCandidate.tweetCandidateWithLessThan2SocialContextsIsAReply.applyOnlyToTweetCandidatesWithSocialContextActions + .withName("tweet_candidate_with_less_than_2_social_contexts_is_not_a_reply"), + PredicatesForCandidate.filterOONCandidatePredicate(), + PredicatesForCandidate.oldTweetRecsPredicate.applyOnlyToTweetCandidateWithTargetAndABDeciderAndMaxTweetAge + .withName("old_tweet"), + DuplicatePushTweetPredicate + .apply[ + TargetUser with FrigateHistory, + TweetCandidate with TargetInfo[TargetUser with FrigateHistory] + ] + .applyOnlyToTweetCandidateWithTargetAndFrigateHistory + .withName("duplicate_push_tweet"), + DuplicateEmailTweetPredicate + .apply[ + TargetUser with FrigateHistory, + TweetCandidate with TargetInfo[TargetUser with FrigateHistory] + ] + .applyOnlyToTweetCandidateWithTargetAndFrigateHistory + .withName("duplicate_email_tweet"), + TweetAuthorPredicates + .recTweetAuthorUnsuitable[TweetCandidate with TweetAuthorDetails] + .applyOnlyToTweetCandidateWithTweetAuthorDetails + .withName("tweet_author_unsuitable"), + TweetObjectExistsPredicate[ + TweetCandidate with TweetDetails + ].applyOnlyToTweetCandidatesWithTweetDetails + .withName("tweet_object_exists"), + TweetImpressionPredicate[ + TargetUser with TweetImpressionHistory, + TweetCandidate with TargetInfo[TargetUser with TweetImpressionHistory] + ].applyOnlyToTweetCandidateWithTargetAndTweetImpressionHistory + .withStats(statsReceiver.scope("tweet_impression")) + .withName("tweet_impression"), + SelfTweetPredicate[ + TargetUser, + TweetAuthor with TargetInfo[TargetUser]]().applyOnlyToTweetAuthorWithTargetInfo + .withName("self_author"), + PredicatesForCandidate.tweetIsNotAreply.applyOnlyToTweetCandidateWithoutSocialContextWithTweetDetails + .withName("tweet_candidate_not_a_reply"), + PredicatesForCandidate.f1CandidateIsNotAReply.applyOnlyToF1CandidateWithTargetAndABDecider + .withName("f1_candidate_is_not_a_reply"), + PredicatesForCandidate.outOfNetworkTweetCandidateIsNotAReply.applyOnlyToOutOfNetworkTweetCandidateWithTargetAndABDecider + .withName("out_of_network_tweet_candidate_is_not_a_reply"), + PredicatesForCandidate.outOfNetworkTweetCandidateEnabledCrTag.applyOnlyToOutOfNetworkTweetCandidateWithTargetAndABDecider + .withName("out_of_network_tweet_candidate_enabled_crtag"), + PredicatesForCandidate.outOfNetworkTweetCandidateEnabledCrtGroup.applyOnlyToOutOfNetworkTweetCandidateWithTargetAndABDecider + .withName("out_of_network_tweet_candidate_enabled_crt_group"), + OutOfNetworkCandidatesQualityPredicates + .oonTweetLengthBasedPrerankingPredicate(characterBased = true) + .applyOnlyToOutOfNetworkTweetCandidateWithTargetAndABDecider + .withName("oon_tweet_char_length_too_short"), + OutOfNetworkCandidatesQualityPredicates + .oonTweetLengthBasedPrerankingPredicate(characterBased = false) + .applyOnlyToOutOfNetworkTweetCandidateWithTargetAndABDecider + .withName("oon_tweet_word_length_too_short"), + PredicatesForCandidate + .protectedTweetF1ExemptPredicate[ + TargetUser with TargetABDecider, + TweetCandidate with TweetAuthorDetails with TargetInfo[ + TargetUser with TargetABDecider + ] + ] + .applyOnlyToTweetCandidateWithAuthorDetailsWithTargetABDecider + .withName("f1_exempt_tweet_author_protected"), + ) + + private val SgsPreRankingPredicates = List[NamedPredicate[PushCandidate]]( + SGSPredicatesForCandidate.authorBeingFollowed.applyOnlyToAuthorBeingFollowPredicates + .withName("author_not_being_followed"), + SGSPredicatesForCandidate.authorNotBeingDeviceFollowed.applyOnlyToBasicTweetPredicates + .withName("author_being_device_followed"), + SGSPredicatesForCandidate.recommendedTweetAuthorAcceptableToTargetUser.applyOnlyToBasicTweetPredicates + .withName("recommended_tweet_author_not_acceptable_to_target_user"), + SGSPredicatesForCandidate.disableInNetworkTweetPredicate.applyOnlyToBasicTweetPredicates + .withName("enable_in_network_tweet"), + SGSPredicatesForCandidate.disableOutNetworkTweetPredicate.applyOnlyToBasicTweetPredicates + .withName("enable_out_network_tweet") + ) + + private val SeeLessOftenPredicates = List[NamedPredicate[PushCandidate]]( + NtabCaretClickContFnFatiguePredicate + .ntabCaretClickContFnFatiguePredicates( + ) + .withName("seelessoften_cont_fn_fatigue") + ) + + final def build(): List[NamedPredicate[PushCandidate]] = { + TweetPredicates ++ + CommonPredicates ++ + SocialProofPredicates ++ + SgsPreRankingPredicates ++ + SeeLessOftenPredicates + } +} + +object PreRankingPredicates { + def apply( + statsReceiver: StatsReceiver + ): List[NamedPredicate[PushCandidate]] = + new PreRankingPredicatesBuilder()(statsReceiver).build() +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PredicatesForCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PredicatesForCandidate.scala new file mode 100644 index 000000000..e18667b51 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/PredicatesForCandidate.scala @@ -0,0 +1,874 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.candidate.MaxTweetAge +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.predicate.tweet.TweetAuthorPredicates +import com.twitter.frigate.common.predicate._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.util.SnowflakeUtils +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.gizmoduck._ +import com.twitter.hermit.predicate.socialgraph.Edge +import com.twitter.hermit.predicate.socialgraph.MultiEdge +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.predicate.socialgraph.SocialGraphPredicate +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.socialgraph.thriftscala.RelationshipType +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.Param +import com.twitter.util.Duration +import com.twitter.util.Future + +object PredicatesForCandidate { + + def oldTweetRecsPredicate(implicit stats: StatsReceiver): Predicate[ + TweetCandidate with RecommendationType with TargetInfo[ + TargetUser with TargetABDecider with MaxTweetAge + ] + ] = { + val name = "old_tweet" + Predicate + .from[TweetCandidate with RecommendationType with TargetInfo[ + TargetUser with TargetABDecider with MaxTweetAge + ]] { candidate => + { + val crt = candidate.commonRecType + val defaultAge = if (RecTypes.mrModelingBasedTypes.contains(crt)) { + candidate.target.params(PushFeatureSwitchParams.ModelingBasedCandidateMaxTweetAgeParam) + } else if (RecTypes.GeoPopTweetTypes.contains(crt)) { + candidate.target.params(PushFeatureSwitchParams.GeoPopTweetMaxAgeInHours) + } else if (RecTypes.simclusterBasedTweets.contains(crt)) { + candidate.target.params( + PushFeatureSwitchParams.SimclusterBasedCandidateMaxTweetAgeParam) + } else if (RecTypes.detopicTypes.contains(crt)) { + candidate.target.params(PushFeatureSwitchParams.DetopicBasedCandidateMaxTweetAgeParam) + } else if (RecTypes.f1FirstDegreeTypes.contains(crt)) { + candidate.target.params(PushFeatureSwitchParams.F1CandidateMaxTweetAgeParam) + } else if (crt == CommonRecommendationType.ExploreVideoTweet) { + candidate.target.params(PushFeatureSwitchParams.ExploreVideoTweetAgeParam) + } else + candidate.target.params(PushFeatureSwitchParams.MaxTweetAgeParam) + SnowflakeUtils.isRecent(candidate.tweetId, defaultAge) + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def tweetIsNotAreply( + implicit stats: StatsReceiver + ): NamedPredicate[TweetCandidate with TweetDetails] = { + val name = "tweet_candidate_not_a_reply" + Predicate + .from[TweetCandidate with TweetDetails] { c => + c.isReply match { + case Some(true) => false + case _ => true + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + /** + * Check if tweet contains any optouted free form interests. + * Currently, we use it for media categories and semantic core + * @param stats + * @return + */ + def noOptoutFreeFormInterestPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "free_form_interest_opt_out" + val tweetMediaAnnotationFeature = + "tweet.mediaunderstanding.tweet_annotations.safe_category_probabilities" + val tweetSemanticCoreFeature = + "tweet.core.tweet.semantic_core_annotations" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + val withOptOutFreeFormInterestsCounter = stats.counter("with_optout_interests") + val withoutOptOutInterestsCounter = stats.counter("without_optout_interests") + val withOptOutFreeFormInterestsFromMediaAnnotationCounter = + stats.counter("with_optout_interests_from_media_annotation") + val withOptOutFreeFormInterestsFromSemanticCoreCounter = + stats.counter("with_optout_interests_from_semantic_core") + Predicate + .fromAsync { candidate: PushCandidate => + val tweetSemanticCoreEntityIds = candidate.sparseBinaryFeatures + .getOrElse(tweetSemanticCoreFeature, Set.empty[String]).map { id => + id.split('.')(2) + }.toSet + val tweetMediaAnnotationIds = candidate.sparseContinuousFeatures + .getOrElse(tweetMediaAnnotationFeature, Map.empty[String, Double]).keys.toSet + + candidate.target.optOutFreeFormUserInterests.map { + case optOutUserInterests: Seq[String] => + withOptOutFreeFormInterestsCounter.incr() + val optOutUserInterestsSet = optOutUserInterests.toSet + val mediaAnnoIntersect = optOutUserInterestsSet.intersect(tweetMediaAnnotationIds) + val semanticCoreIntersect = optOutUserInterestsSet.intersect(tweetSemanticCoreEntityIds) + if (!mediaAnnoIntersect.isEmpty) { + withOptOutFreeFormInterestsFromMediaAnnotationCounter.incr() + } + if (!semanticCoreIntersect.isEmpty) { + withOptOutFreeFormInterestsFromSemanticCoreCounter.incr() + } + semanticCoreIntersect.isEmpty && mediaAnnoIntersect.isEmpty + case _ => + withoutOptOutInterestsCounter.incr() + true + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def tweetCandidateWithLessThan2SocialContextsIsAReply( + implicit stats: StatsReceiver + ): NamedPredicate[TweetCandidate with TweetDetails with SocialContextActions] = { + val name = "tweet_candidate_with_less_than_2_social_contexts_is_not_a_reply" + Predicate + .from[TweetCandidate with TweetDetails with SocialContextActions] { cand => + cand.isReply match { + case Some(true) if cand.socialContextTweetIds.size < 2 => false + case _ => true + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def f1CandidateIsNotAReply(implicit stats: StatsReceiver): NamedPredicate[F1Candidate] = { + val name = "f1_candidate_is_not_a_reply" + Predicate + .from[F1Candidate] { candidate => + candidate.isReply match { + case Some(true) => false + case _ => true + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def outOfNetworkTweetCandidateEnabledCrTag( + implicit stats: StatsReceiver + ): NamedPredicate[OutOfNetworkTweetCandidate with TargetInfo[TargetUser with TargetABDecider]] = { + val name = "out_of_network_tweet_candidate_enabled_crtag" + val scopedStats = stats.scope(name) + Predicate + .from[OutOfNetworkTweetCandidate with TargetInfo[TargetUser with TargetABDecider]] { cand => + val disabledCrTag = cand.target + .params(PushFeatureSwitchParams.OONCandidatesDisabledCrTagParam) + val candGeneratedByDisabledSignal = cand.tagsCR.exists { tagsCR => + val tagsCRSet = tagsCR.map(_.toString).toSet + tagsCRSet.nonEmpty && tagsCRSet.subsetOf(disabledCrTag.toSet) + } + if (candGeneratedByDisabledSignal) { + cand.tagsCR.getOrElse(Nil).foreach(tag => scopedStats.counter(tag.toString).incr()) + false + } else true + } + .withStats(scopedStats) + .withName(name) + } + + def outOfNetworkTweetCandidateEnabledCrtGroup( + implicit stats: StatsReceiver + ): NamedPredicate[OutOfNetworkTweetCandidate with TargetInfo[TargetUser with TargetABDecider]] = { + val name = "out_of_network_tweet_candidate_enabled_crt_group" + val scopedStats = stats.scope(name) + Predicate + .from[OutOfNetworkTweetCandidate with TargetInfo[TargetUser with TargetABDecider]] { cand => + val disabledCrtGroup = cand.target + .params(PushFeatureSwitchParams.OONCandidatesDisabledCrtGroupParam) + val crtGroup = CandidateUtil.getCrtGroup(cand.commonRecType) + val candGeneratedByDisabledCrt = disabledCrtGroup.contains(crtGroup) + if (candGeneratedByDisabledCrt) { + scopedStats.counter("filter_" + crtGroup.toString).incr() + false + } else true + } + .withStats(scopedStats) + .withName(name) + } + + def outOfNetworkTweetCandidateIsNotAReply( + implicit stats: StatsReceiver + ): NamedPredicate[OutOfNetworkTweetCandidate] = { + val name = "out_of_network_tweet_candidate_is_not_a_reply" + Predicate + .from[OutOfNetworkTweetCandidate] { cand => + cand.isReply match { + case Some(true) => false + case _ => true + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def recommendedTweetIsAuthoredBySelf( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = + Predicate + .from[PushCandidate] { + case tweetCandidate: PushCandidate with TweetDetails => + tweetCandidate.authorId match { + case Some(authorId) => authorId != tweetCandidate.target.targetId + case None => true + } + case _ => + true + } + .withStats(statsReceiver.scope("predicate_self_author")) + .withName("self_author") + + def authorInSocialContext(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = + Predicate + .from[PushCandidate] { + case tweetCandidate: PushCandidate with TweetDetails with SocialContextActions => + tweetCandidate.authorId match { + case Some(authorId) => + !tweetCandidate.socialContextUserIds.contains(authorId) + case None => true + } + case _ => true + } + .withStats(statsReceiver.scope("predicate_author_social_context")) + .withName("author_social_context") + + def selfInSocialContext(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + val name = "self_social_context" + Predicate + .from[PushCandidate] { + case candidate: PushCandidate with SocialContextActions => + !candidate.socialContextUserIds.contains(candidate.target.targetId) + case _ => + true + } + .withStats(statsReceiver.scope(s"${name}_predicate")) + .withName(name) + } + + def minSocialContext( + threshold: Int + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with SocialContextActions] = { + Predicate + .from { candidate: PushCandidate with SocialContextActions => + candidate.socialContextUserIds.size >= threshold + } + .withStats(statsReceiver.scope("predicate_min_social_context")) + .withName("min_social_context") + } + + private def anyWithheldContent( + userStore: ReadableStore[Long, User], + userCountryStore: ReadableStore[Long, Location] + )( + implicit statsReceiver: StatsReceiver + ): Predicate[TargetRecUser] = + GizmoduckUserPredicate.withheldContentPredicate( + userStore = userStore, + userCountryStore = userCountryStore, + statsReceiver = statsReceiver, + checkAllCountries = true + ) + + def targetUserExists(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + TargetUserPredicates + .targetUserExists()(statsReceiver) + .flatContraMap { candidate: PushCandidate => Future.value(candidate.target) } + .withName("target_user_exists") + } + + def secondaryDormantAccountPredicate( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "secondary_dormant_account" + TargetUserPredicates + .secondaryDormantAccountPredicate()(statsReceiver) + .on { candidate: PushCandidate => candidate.target } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + def socialContextBeingFollowed( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with SocialContextActions] = + SocialGraphPredicate + .allRelationEdgesExist(edgeStore, RelationshipType.Following) + .on { candidate: PushCandidate with SocialContextActions => + candidate.socialContextUserIds.map { u => Edge(candidate.target.targetId, u) } + } + .withStats(statsReceiver.scope("predicate_social_context_being_followed")) + .withName("social_context_being_followed") + + private def edgeFromCandidate(candidate: PushCandidate with TweetAuthor): Option[Edge] = { + candidate.authorId map { authorId => Edge(candidate.target.targetId, authorId) } + } + + def authorNotBeingDeviceFollowed( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + SocialGraphPredicate + .relationExists(edgeStore, RelationshipType.DeviceFollowing) + .optionalOn( + edgeFromCandidate, + missingResult = false + ) + .flip + .withStats(statsReceiver.scope("predicate_author_not_device_followed")) + .withName("author_not_device_followed") + } + + def authorBeingFollowed( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + SocialGraphPredicate + .relationExists(edgeStore, RelationshipType.Following) + .optionalOn( + edgeFromCandidate, + missingResult = false + ) + .withStats(statsReceiver.scope("predicate_author_being_followed")) + .withName("author_being_followed") + } + + def authorNotBeingFollowed( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + SocialGraphPredicate + .relationExists(edgeStore, RelationshipType.Following) + .optionalOn( + edgeFromCandidate, + missingResult = false + ) + .flip + .withStats(statsReceiver.scope("predicate_author_not_being_followed")) + .withName("author_not_being_followed") + } + + def recommendedTweetAuthorAcceptableToTargetUser( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + val name = "recommended_tweet_author_acceptable_to_target_user" + SocialGraphPredicate + .anyRelationExists( + edgeStore, + Set( + RelationshipType.Blocking, + RelationshipType.BlockedBy, + RelationshipType.HideRecommendations, + RelationshipType.Muting + ) + ) + .flip + .optionalOn( + edgeFromCandidate, + missingResult = false + ) + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + def relationNotExistsPredicate( + edgeStore: ReadableStore[RelationEdge, Boolean], + relations: Set[RelationshipType] + ): Predicate[(Long, Iterable[Long])] = + SocialGraphPredicate + .anyRelationExistsForMultiEdge( + edgeStore, + relations + ) + .flip + .on { + case (targetUserId, userIds) => + MultiEdge(targetUserId, userIds.toSet) + } + + def blocking(edgeStore: ReadableStore[RelationEdge, Boolean]): Predicate[(Long, Iterable[Long])] = + relationNotExistsPredicate( + edgeStore, + Set(RelationshipType.BlockedBy, RelationshipType.Blocking) + ) + + def blockingOrMuting( + edgeStore: ReadableStore[RelationEdge, Boolean] + ): Predicate[(Long, Iterable[Long])] = + relationNotExistsPredicate( + edgeStore, + Set(RelationshipType.BlockedBy, RelationshipType.Blocking, RelationshipType.Muting) + ) + + def socialContextNotRetweetFollowing( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with SocialContextActions] = { + val name = "social_context_not_retweet_following" + relationNotExistsPredicate(edgeStore, Set(RelationshipType.NotRetweetFollowing)) + .optionalOn[PushCandidate with SocialContextActions]( + { + case candidate: PushCandidate with SocialContextActions + if RecTypes.isTweetRetweetType(candidate.commonRecType) => + Some((candidate.target.targetId, candidate.socialContextUserIds)) + case _ => + None + }, + missingResult = true + ) + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + def socialContextBlockingOrMuting( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with SocialContextActions] = + blockingOrMuting(edgeStore) + .on { candidate: PushCandidate with SocialContextActions => + (candidate.target.targetId, candidate.socialContextUserIds) + } + .withStats(statsReceiver.scope("predicate_social_context_blocking_or_muting")) + .withName("social_context_blocking_or_muting") + + /** + * Use hyrated Tweet object for F1 Protected experiment for checking null cast as Tweetypie hydration + * fails for protected Authors without passing in Target id. We do this specifically for + * F1 Protected Tweet Experiment in Earlybird Adaptor. + * For rest of the traffic refer to existing Nullcast Predicate + */ + def nullCastF1ProtectedExperientPredicate( + tweetypieStore: ReadableStore[Long, TweetyPieResult] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate with TweetDetails] = { + val name = "f1_exempted_null_cast_tweet" + val f1NullCastCheckCounter = statsReceiver.scope(name).counter("f1_null_cast_check") + Predicate + .fromAsync { tweetCandidate: PushCandidate with TweetCandidate with TweetDetails => + if (RecTypes.f1FirstDegreeTypes(tweetCandidate.commonRecType) && tweetCandidate.target + .params(PushFeatureSwitchParams.EnableF1FromProtectedTweetAuthors)) { + f1NullCastCheckCounter.incr() + tweetCandidate.tweet match { + case Some(tweetObj) => + baseNullCastTweet().apply(Seq(TweetyPieResult(tweetObj, None, None))).map(_.head) + case _ => Future.False + } + } else { + nullCastTweet(tweetypieStore).apply(Seq(tweetCandidate)).map(_.head) + } + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + private def baseNullCastTweet(): Predicate[TweetyPieResult] = + Predicate.from { t: TweetyPieResult => !t.tweet.coreData.exists { cd => cd.nullcast } } + + def nullCastTweet( + tweetyPieStore: ReadableStore[Long, TweetyPieResult] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "null_cast_tweet" + baseNullCastTweet() + .flatOptionContraMap[PushCandidate with TweetCandidate]( + f = (tweetCandidate: PushCandidate + with TweetCandidate) => tweetyPieStore.get(tweetCandidate.tweetId), + missingResult = false + ) + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + /** + * Use the predicate except fn is true. + */ + def exceptedPredicate[T <: PushCandidate]( + name: String, + fn: T => Future[Boolean], + predicate: Predicate[T] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + Predicate + .fromAsync { e: T => fn(e) } + .or(predicate) + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * + * @param edgeStore [[ReadableStore[RelationEdge, Boolean]]] + * @return - allow only out-network tweets if in-network tweets are disabled + */ + def disableInNetworkTweetPredicate( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + val name = "disable_in_network_tweet" + Predicate + .fromAsync { candidate: PushCandidate with TweetAuthor => + if (candidate.target.params(PushParams.DisableInNetworkTweetCandidatesParam)) { + authorNotBeingFollowed(edgeStore) + .apply(Seq(candidate)) + .map(_.head) + } else Future.True + }.withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * + * @param edgeStore [[ReadableStore[RelationEdge, Boolean]]] + * @return - allow only in-network tweets if out-network tweets are disabled + */ + def disableOutNetworkTweetPredicate( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor] = { + val name = "disable_out_network_tweet" + Predicate + .fromAsync { candidate: PushCandidate with TweetAuthor => + if (candidate.target.params(PushFeatureSwitchParams.DisableOutNetworkTweetCandidatesFS)) { + authorBeingFollowed(edgeStore) + .apply(Seq(candidate)) + .map(_.head) + } else Future.True + }.withStats(statsReceiver.scope(name)) + .withName(name) + } + + def alwaysTruePredicate: NamedPredicate[PushCandidate] = { + Predicate + .all[PushCandidate] + .withName("predicate_AlwaysTrue") + } + + def alwaysTruePushCandidatePredicate: NamedPredicate[PushCandidate] = { + Predicate + .all[PushCandidate] + .withName("predicate_AlwaysTrue") + } + + def alwaysFalsePredicate(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + val name = "predicate_AlwaysFalse" + val scopedStatsReceiver = statsReceiver.scope(name) + Predicate + .from { candidate: PushCandidate => false } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def accountCountryPredicate( + allowedCountries: Set[String] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "AccountCountryPredicate" + val stats = statsReceiver.scope(name) + AccountCountryPredicate(allowedCountries) + .on { candidate: PushCandidate => candidate.target } + .withStats(stats) + .withName(name) + } + + def paramPredicate[T <: PushCandidate]( + param: Param[Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = param.getClass.getSimpleName.stripSuffix("$") + TargetPredicates + .paramPredicate(param) + .on { candidate: PushCandidate => candidate.target } + .withStats(statsReceiver.scope(s"param_${name}_controlled_predicate")) + .withName(s"param_${name}_controlled_predicate") + } + + def isDeviceEligibleForNewsOrSports( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "is_device_eligible_for_news_or_sports" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate => + candidate.target.deviceInfo.map(_.exists(_.isNewsEligible)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def isDeviceEligibleForCreatorPush( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "is_device_eligible_for_creator_push" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate => + candidate.target.deviceInfo.map(_.exists(settings => + settings.isNewsEligible || settings.isRecommendationsEligible)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + /** + * Like [[TargetUserPredicates.homeTimelineFatigue()]] but for candidate. + */ + def htlFatiguePredicate( + fatigueDuration: Param[Duration] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "htl_fatigue" + Predicate + .fromAsync { candidate: PushCandidate => + val _fatigueDuration = candidate.target.params(fatigueDuration) + TargetUserPredicates + .homeTimelineFatigue( + fatigueDuration = _fatigueDuration + ).apply(Seq(candidate.target)).map(_.head) + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def mrWebHoldbackPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "mr_web_holdback_for_candidate" + val scopedStats = stats.scope(name) + PredicatesForCandidate.exludeCrtFromPushHoldback + .or( + TargetPredicates + .webNotifsHoldback() + .on { candidate: PushCandidate => candidate.target } + ) + .withStats(scopedStats) + .withName(name) + } + + def candidateEnabledForEmailPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "candidates_enabled_for_email" + Predicate + .from { candidate: PushCandidate => + if (candidate.target.isEmailUser) + candidate.isInstanceOf[TweetCandidate with TweetAuthor with RecommendationType] + else true + } + .withStats(stats.scope(name)) + .withName(name) + } + + def protectedTweetF1ExemptPredicate[ + T <: TargetUser with TargetABDecider, + Cand <: TweetCandidate with TweetAuthorDetails with TargetInfo[T] + ]( + implicit stats: StatsReceiver + ): NamedPredicate[ + TweetCandidate with TweetAuthorDetails with TargetInfo[ + TargetUser with TargetABDecider + ] + ] = { + val name = "f1_exempt_tweet_author_protected" + val skipForProtectedAuthorScope = stats.scope(name).scope("skip_protected_author_for_f1") + val authorIsProtectedCounter = skipForProtectedAuthorScope.counter("author_protected_true") + val authorIsNotProtectedCounter = skipForProtectedAuthorScope.counter("author_protected_false") + val authorNotFoundCounter = stats.scope(name).counter("author_not_found") + Predicate + .fromAsync[TweetCandidate with TweetAuthorDetails with TargetInfo[ + TargetUser with TargetABDecider + ]] { + case candidate: F1Candidate + if candidate.target.params(PushFeatureSwitchParams.EnableF1FromProtectedTweetAuthors) => + candidate.tweetAuthor.foreach { + case Some(author) => + if (GizmoduckUserPredicate.isProtected(author)) { + authorIsProtectedCounter.incr() + } else authorIsNotProtectedCounter.incr() + case _ => authorNotFoundCounter.incr() + } + Future.True + case cand => + TweetAuthorPredicates.recTweetAuthorProtected.apply(Seq(cand)).map(_.head) + } + .withStats(stats.scope(name)) + .withName(name) + } + + /** + * filter a notification if user has already received ANY prior notification about the space id + * @param stats + * @return + */ + def duplicateSpacesPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[Space with PushCandidate] = { + val name = "duplicate_spaces_predicate" + Predicate + .fromAsync { c: Space with PushCandidate => + c.target.pushRecItems.map { pushRecItems => + !pushRecItems.spaceIds.contains(c.spaceId) + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + def filterOONCandidatePredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "filter_oon_candidate" + + Predicate + .fromAsync[PushCandidate] { cand => + val crt = cand.commonRecType + val isOONCandidate = + RecTypes.isOutOfNetworkTweetRecType(crt) || RecTypes.outOfNetworkTopicTweetTypes + .contains(crt) || RecTypes.isOutOfNetworkSpaceType(crt) || RecTypes.userTypes.contains( + crt) + if (isOONCandidate) { + cand.target.notificationsFromOnlyPeopleIFollow.map { inNetworkOnly => + if (inNetworkOnly) { + stats.scope(name, crt.toString).counter("inNetworkOnlyOn").incr() + } else { + stats.scope(name, crt.toString).counter("inNetworkOnlyOff").incr() + } + !(inNetworkOnly && cand.target.params( + PushFeatureSwitchParams.EnableOONFilteringBasedOnUserSettings)) + } + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } + + def exludeCrtFromPushHoldback( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = Predicate + .from { candidate: PushCandidate => + val crtName = candidate.commonRecType.name + val target = candidate.target + target + .params(PushFeatureSwitchParams.CommonRecommendationTypeDenyListPushHoldbacks) + .exists(crtName.equalsIgnoreCase) + } + .withStats(stats.scope("exclude_crt_from_push_holdbacks")) + + def enableSendHandlerCandidates(implicit stats: StatsReceiver): NamedPredicate[PushCandidate] = { + val name = "sendhandler_enable_push_recommendations" + PredicatesForCandidate.exludeCrtFromPushHoldback + .or(PredicatesForCandidate.paramPredicate( + PushFeatureSwitchParams.EnablePushRecommendationsParam)) + .withStats(stats.scope(name)) + .withName(name) + } + + def openAppExperimentUserCandidateAllowList( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "open_app_experiment_user_candidate_allow_list" + Predicate + .fromAsync { candidate: PushCandidate => + val target = candidate.target + Future.join(target.isOpenAppExperimentUser, target.targetUser).map { + case (isOpenAppUser, targetUser) => + val shouldLimitOpenAppCrts = + isOpenAppUser || targetUser.exists(_.userType == UserType.Soft) + + if (shouldLimitOpenAppCrts) { + val listOfAllowedCrt = target + .params(PushFeatureSwitchParams.ListOfCrtsForOpenApp) + .flatMap(CommonRecommendationType.valueOf) + listOfAllowedCrt.contains(candidate.commonRecType) + } else true + } + }.withStats(stats.scope(name)) + .withName(name) + } + + def isTargetBlueVerified( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "is_target_already_blue_verified" + Predicate + .fromAsync { candidate: PushCandidate => + val target = candidate.target + target.isBlueVerified.map(_.getOrElse(false)) + }.withStats(stats.scope(name)) + .withName(name) + } + + def isTargetLegacyVerified( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "is_target_already_legacy_verified" + Predicate + .fromAsync { candidate: PushCandidate => + val target = candidate.target + target.isVerified.map(_.getOrElse(false)) + }.withStats(stats.scope(name)) + .withName(name) + } + + def isTargetSuperFollowCreator(implicit stats: StatsReceiver): NamedPredicate[PushCandidate] = { + val name = "is_target_already_super_follow_creator" + Predicate + .fromAsync { candidate: PushCandidate => + val target = candidate.target + target.isSuperFollowCreator.map( + _.getOrElse(false) + ) + }.withStats(stats.scope(name)) + .withName(name) + } + + def isChannelValidPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "is_channel_valid" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate => + candidate + .getChannels().map(channels => + !(channels.toSet.size == 1 && channels.head == ChannelName.None)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SGSPredicatesForCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SGSPredicatesForCandidate.scala new file mode 100644 index 000000000..e335c8d9c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SGSPredicatesForCandidate.scala @@ -0,0 +1,174 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialGraphServiceRelationshipMap +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.socialgraph.Edge +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.socialgraph.thriftscala.RelationshipType +import com.twitter.util.Future + +/** + * Refactor SGS predicates so that predicates can use relationshipMap we generate in hydrate step + */ +object SGSPredicatesForCandidate { + + case class RelationshipMapEdge(edge: Edge, relationshipMap: Map[RelationEdge, Boolean]) + + private def relationshipMapEdgeFromCandidate( + candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap + ): Option[RelationshipMapEdge] = { + candidate.authorId map { authorId => + RelationshipMapEdge(Edge(candidate.target.targetId, authorId), candidate.relationshipMap) + } + } + + def authorBeingFollowed( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + val name = "author_not_being_followed" + val stats = statsReceiver.scope(name) + val softUserCounter = stats.counter("soft_user") + + val sgsAuthorBeingFollowedPredicate = Predicate + .from { relationshipMapEdge: RelationshipMapEdge => + anyRelationExist(relationshipMapEdge, Set(RelationshipType.Following)) + } + + Predicate + .fromAsync { + candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap => + val target = candidate.target + target.targetUser.flatMap { + case Some(gizmoduckUser) if gizmoduckUser.userType == UserType.Soft => + softUserCounter.incr() + target.seedsWithWeight.map { followedUsersWithWeightOpt => + candidate.authorId match { + case Some(authorId) => + val followedUsers = followedUsersWithWeightOpt.getOrElse(Map.empty).keys + followedUsers.toSet.contains(authorId) + + case None => false + } + } + + case _ => + sgsAuthorBeingFollowedPredicate + .optionalOn(relationshipMapEdgeFromCandidate, missingResult = false) + .apply(Seq(candidate)) + .map(_.head) + } + }.withStats(stats) + .withName(name) + } + + def authorNotBeingDeviceFollowed( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + val name = "author_being_device_followed" + Predicate + .from { relationshipMapEdge: RelationshipMapEdge => + { + anyRelationExist(relationshipMapEdge, Set(RelationshipType.DeviceFollowing)) + } + } + .optionalOn(relationshipMapEdgeFromCandidate, missingResult = false) + .flip + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def recommendedTweetAuthorAcceptableToTargetUser( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + val name = "recommended_tweet_author_not_acceptable_to_target_user" + Predicate + .from { relationshipMapEdge: RelationshipMapEdge => + { + anyRelationExist( + relationshipMapEdge, + Set( + RelationshipType.Blocking, + RelationshipType.BlockedBy, + RelationshipType.HideRecommendations, + RelationshipType.Muting + )) + } + } + .flip + .optionalOn(relationshipMapEdgeFromCandidate, missingResult = false) + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def authorNotBeingFollowed( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + Predicate + .from { relationshipMapEdge: RelationshipMapEdge => + { + anyRelationExist(relationshipMapEdge, Set(RelationshipType.Following)) + } + } + .optionalOn(relationshipMapEdgeFromCandidate, missingResult = false) + .flip + .withStats(statsReceiver.scope("predicate_author_not_being_followed_pre_ranking")) + .withName("author_not_being_followed") + } + + def disableInNetworkTweetPredicate( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + val name = "enable_in_network_tweet" + Predicate + .fromAsync { + candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap => + if (candidate.target.params(PushParams.DisableInNetworkTweetCandidatesParam)) { + authorNotBeingFollowed + .apply(Seq(candidate)) + .map(_.head) + } else Future.True + }.withStats(statsReceiver.scope(name)) + .withName(name) + } + + def disableOutNetworkTweetPredicate( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap] = { + val name = "enable_out_network_tweet" + Predicate + .fromAsync { + candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap => + if (candidate.target.params(PushFeatureSwitchParams.DisableOutNetworkTweetCandidatesFS)) { + authorBeingFollowed + .apply(Seq(candidate)) + .map(_.head) + } else Future.True + }.withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * Returns true if the provided relationshipEdge exists among + * @param candidate candidate + * @param relationships relaionships + * @return Boolean result + */ + private def anyRelationExist( + relationshipMapEdge: RelationshipMapEdge, + relationships: Set[RelationshipType] + ): Boolean = { + val resultSeq = relationships.map { relationship => + relationshipMapEdge.relationshipMap.getOrElse( + RelationEdge(relationshipMapEdge.edge, relationship), + false) + }.toSeq + resultSeq.contains(true) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ScarecrowPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ScarecrowPredicate.scala new file mode 100644 index 000000000..a4728eba2 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ScarecrowPredicate.scala @@ -0,0 +1,138 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala._ +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.scarecrow.{ScarecrowPredicate => HermitScarecrowPredicate} +import com.twitter.relevance.feature_store.thriftscala.FeatureData +import com.twitter.relevance.feature_store.thriftscala.FeatureValue +import com.twitter.service.gen.scarecrow.thriftscala.Event +import com.twitter.service.gen.scarecrow.thriftscala.TieredActionResult +import com.twitter.storehaus.ReadableStore + +object ScarecrowPredicate { + val name = "" + + def candidateToEvent(candidate: PushCandidate): Event = { + val recommendedUserIdOpt = candidate match { + case tweetCandidate: TweetCandidate with TweetAuthor => + tweetCandidate.authorId + case userCandidate: UserCandidate => + Some(userCandidate.userId) + case _ => None + } + val hashtagsInTweet = candidate match { + case tweetCandidate: TweetCandidate with TweetDetails => + tweetCandidate.tweetyPieResult + .flatMap { tweetPieResult => + tweetPieResult.tweet.hashtags.map(_.map(_.text)) + }.getOrElse(Nil) + case _ => + Nil + } + val urlsInTweet = candidate match { + case tweetCandidate: TweetCandidate with TweetDetails => + tweetCandidate.tweetyPieResult + .flatMap { tweetPieResult => + tweetPieResult.tweet.urls.map(_.flatMap(_.expanded)) + } + case _ => None + } + val tweetIdOpt = candidate match { + case tweetCandidate: TweetCandidate => + Some(tweetCandidate.tweetId) + case _ => + None + } + val urlOpt = candidate match { + case candidate: UrlCandidate => + Some(candidate.url) + case _ => + None + } + val scUserIds = candidate match { + case hasSocialContext: SocialContextActions => Some(hasSocialContext.socialContextUserIds) + case _ => None + } + + val eventTitleOpt = candidate match { + case eventCandidate: EventCandidate with EventDetails => + Some(eventCandidate.eventTitle) + case _ => + None + } + + val urlTitleOpt = candidate match { + case candidate: UrlCandidate => + candidate.title + case _ => + None + } + + val urlDescriptionOpt = candidate match { + case candidate: UrlCandidate with UrlCandidateWithDetails => + candidate.description + case _ => + None + } + + Event( + "magicrecs_recommendation_write", + Map( + "targetUserId" -> FeatureData(Some(FeatureValue.LongValue(candidate.target.targetId))), + "type" -> FeatureData( + Some( + FeatureValue.StrValue(candidate.commonRecType.name) + ) + ), + "recommendedUserId" -> FeatureData(recommendedUserIdOpt map { id => + FeatureValue.LongValue(id) + }), + "tweetId" -> FeatureData(tweetIdOpt map { id => + FeatureValue.LongValue(id) + }), + "url" -> FeatureData(urlOpt map { url => + FeatureValue.StrValue(url) + }), + "hashtagsInTweet" -> FeatureData(Some(FeatureValue.StrListValue(hashtagsInTweet))), + "urlsInTweet" -> FeatureData(urlsInTweet.map(FeatureValue.StrListValue)), + "socialContexts" -> FeatureData(scUserIds.map { sc => + FeatureValue.LongListValue(sc) + }), + "eventTitle" -> FeatureData(eventTitleOpt.map { eventTitle => + FeatureValue.StrValue(eventTitle) + }), + "urlTitle" -> FeatureData(urlTitleOpt map { title => + FeatureValue.StrValue(title) + }), + "urlDescription" -> FeatureData(urlDescriptionOpt map { des => + FeatureValue.StrValue(des) + }) + ) + ) + } + + def candidateToPossibleEvent(c: PushCandidate): Option[Event] = { + if (c.frigateNotification.notificationDisplayLocation == NotificationDisplayLocation.PushToMobileDevice) { + Some(candidateToEvent(c)) + } else { + None + } + } + + def apply( + scarecrowCheckEventStore: ReadableStore[Event, TieredActionResult] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate] = { + HermitScarecrowPredicate(scarecrowCheckEventStore) + .optionalOn( + candidateToPossibleEvent, + missingResult = true + ) + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SpacePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SpacePredicate.scala new file mode 100644 index 000000000..044c0afdb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/SpacePredicate.scala @@ -0,0 +1,153 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SpaceCandidate +import com.twitter.frigate.common.base.SpaceCandidateDetails +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.socialgraph.Edge +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.predicate.socialgraph.SocialGraphPredicate +import com.twitter.socialgraph.thriftscala.RelationshipType +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.response.Err +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.ubs.thriftscala.BroadcastState +import com.twitter.ubs.thriftscala.ParticipantUser +import com.twitter.ubs.thriftscala.Participants +import com.twitter.util.Future + +object SpacePredicate { + + /** Filters the request if the target is present in the space as a listener, speakeTestConfigr, or admin */ + def targetInSpace( + audioSpaceParticipantsStore: ReadableStore[String, Participants] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[SpaceCandidateDetails with RawCandidate] = { + val name = "target_in_space" + Predicate + .fromAsync[SpaceCandidateDetails with RawCandidate] { spaceCandidate => + audioSpaceParticipantsStore.get(spaceCandidate.spaceId).map { + case Some(participants) => + val allParticipants: Seq[ParticipantUser] = + (participants.admins ++ participants.speakers ++ participants.listeners).flatten.toSeq + val isInSpace = allParticipants.exists { participant => + participant.twitterUserId.contains(spaceCandidate.target.targetId) + } + !isInSpace + case None => false + } + }.withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * + * @param audioSpaceStore: space metadata store + * @param statsReceiver: record stats + * @return: true if the space not started ELSE false to filter out notification + */ + def scheduledSpaceStarted( + audioSpaceStore: ReadableStore[String, AudioSpace] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[SpaceCandidate with RawCandidate] = { + val name = "scheduled_space_started" + Predicate + .fromAsync[SpaceCandidate with RawCandidate] { spaceCandidate => + audioSpaceStore + .get(spaceCandidate.spaceId) + .map(_.exists(_.state.contains(BroadcastState.NotStarted))) + .rescue { + case Err(Err.Authorization, _, _) => + Future.False + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + private def relationshipMapEdgeFromSpaceCandidate( + candidate: RawCandidate with SpaceCandidate + ): Option[(Long, Seq[Long])] = { + candidate.hostId.map { spaceHostId => + (candidate.target.targetId, Seq(spaceHostId)) + } + } + + /** + * Check only host block for scheduled space reminders + * @return: True if no blocking relation between host and target user, else False + */ + def spaceHostTargetUserBlocking( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[SpaceCandidate with RawCandidate] = { + val name = "space_host_target_user_blocking" + PredicatesForCandidate + .blocking(edgeStore) + .optionalOn(relationshipMapEdgeFromSpaceCandidate, false) + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + private def edgeFromCandidate( + candidate: PushCandidate with TweetAuthorDetails + ): Future[Option[Edge]] = { + candidate.tweetAuthor.map(_.map { author => Edge(candidate.target.targetId, author.id) }) + } + + def recommendedTweetAuthorAcceptableToTargetUser( + edgeStore: ReadableStore[RelationEdge, Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetAuthorDetails] = { + val name = "recommended_tweet_author_acceptable_to_target_user" + SocialGraphPredicate + .anyRelationExists( + edgeStore, + Set( + RelationshipType.Blocking, + RelationshipType.BlockedBy, + RelationshipType.HideRecommendations, + RelationshipType.Muting + ) + ) + .flip + .flatOptionContraMap( + edgeFromCandidate, + missingResult = false + ) + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + def narrowCastSpace( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[SpaceCandidateDetails with RawCandidate] = { + val name = "narrow_cast_space" + val narrowCastSpaceScope = statsReceiver.scope(name) + val employeeSpaceCounter = narrowCastSpaceScope.counter("employees") + val superFollowerSpaceCounter = narrowCastSpaceScope.counter("super_followers") + + Predicate + .fromAsync[SpaceCandidateDetails with RawCandidate] { candidate => + candidate.audioSpaceFut.map { + case Some(audioSpace) if audioSpace.narrowCastSpaceType.contains(1L) => + employeeSpaceCounter.incr() + candidate.target.params(PushFeatureSwitchParams.EnableEmployeeOnlySpaceNotifications) + case Some(audioSpace) if audioSpace.narrowCastSpaceType.contains(2L) => + superFollowerSpaceCounter.incr() + false + case _ => true + } + }.withStats(narrowCastSpaceScope) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetEngagementPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetEngagementPredicate.scala new file mode 100644 index 000000000..d02e5b89a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetEngagementPredicate.scala @@ -0,0 +1,27 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.tweetypie.EngagementsPredicate +import com.twitter.hermit.predicate.tweetypie.Perspective +import com.twitter.hermit.predicate.tweetypie.UserTweet +import com.twitter.storehaus.ReadableStore + +object TargetEngagementPredicate { + val name = "target_engagement" + def apply( + perspectiveStore: ReadableStore[UserTweet, Perspective], + defaultForMissing: Boolean + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + EngagementsPredicate(perspectiveStore, defaultForMissing) + .on { candidate: PushCandidate with TweetCandidate => + UserTweet(candidate.target.targetId, candidate.tweetId) + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetNtabCaretClickFatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetNtabCaretClickFatiguePredicate.scala new file mode 100644 index 000000000..c5042bbc8 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetNtabCaretClickFatiguePredicate.scala @@ -0,0 +1,91 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.CaretFeedbackHistory +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.candidate.HTLVisitHistory +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.predicate.FrigateHistoryFatiguePredicate.TimeSeries +import com.twitter.frigate.common.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicateHelper +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.util.FeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.frigate.common.predicate.{FatiguePredicate => CommonFatiguePredicate} + +object TargetNtabCaretClickFatiguePredicate { + import NtabCaretClickFatiguePredicateHelper._ + + private val MagicRecsCategory = "MagicRecs" + + def apply[ + T <: TargetUser with TargetABDecider with CaretFeedbackHistory with FrigateHistory with HTLVisitHistory + ]( + filterHistory: TimeSeries => TimeSeries = + CommonFatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: TargetUser with TargetABDecider with CaretFeedbackHistory => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = + CaretFeedbackHistoryFilter.caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + calculateFatiguePeriod: Seq[CaretFeedbackDetails] => Duration = calculateFatiguePeriodMagicRecs, + useMostRecentDislikeTime: Boolean = false, + name: String = "NtabCaretClickFatiguePredicate" + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + + val scopedStats = statsReceiver.scope(name) + val crtStats = scopedStats.scope("crt") + Predicate + .fromAsync { target: T => + Future.join(target.history, target.caretFeedbacks).map { + case (history, Some(feedbackDetails)) => { + val feedbackDetailsDeduped = dedupFeedbackDetails( + filterCaretFeedbackHistory(target)(feedbackDetails), + scopedStats + ) + + val fatiguePeriod = + if (hasUserDislikeInLast30Days(feedbackDetailsDeduped) && target.params( + PushFeatureSwitchParams.EnableReducedFatigueRulesForSeeLessOften)) { + durationToFilterMRForSeeLessOftenExpt( + feedbackDetailsDeduped, + target.params(FeatureSwitchParams.NumberOfDaysToFilterMRForSeeLessOften), + target.params(FeatureSwitchParams.NumberOfDaysToReducePushCapForSeeLessOften), + scopedStats + ) + } else { + calculateFatiguePeriod(feedbackDetailsDeduped) + } + + val crtlist = feedbackDetailsDeduped + .flatMap { fd => + fd.genericNotificationMetadata.map { gm => + gm.genericType.name + } + }.distinct.sorted.mkString("-") + + if (fatiguePeriod > 0.days) { + crtStats.scope(crtlist).counter("fatigued").incr() + } else { + crtStats.scope(crtlist).counter("non_fatigued").incr() + } + + val hasRecentSent = + hasRecentSend(History(filterHistory(history.history.toSeq).toMap), fatiguePeriod) + !hasRecentSent + } + case _ => true + } + } + .withStats(scopedStats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetPredicates.scala new file mode 100644 index 000000000..45d0b7578 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TargetPredicates.scala @@ -0,0 +1,292 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.candidate.HTLVisitHistory +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.candidate.UserDetails +import com.twitter.frigate.common.predicate.TargetUserPredicates +import com.twitter.frigate.common.predicate.{FatiguePredicate => CommonFatiguePredicate} +import com.twitter.frigate.common.store.deviceinfo.MobileClientType +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.target.TargetScoringDetails +import com.twitter.frigate.pushservice.util.PushCapUtil +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.timelines.configapi.FSBoundedParam +import com.twitter.timelines.configapi.Param +import com.twitter.util.Duration +import com.twitter.util.Future + +object TargetPredicates { + + def paramPredicate[T <: Target]( + param: Param[Boolean] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = param.getClass.getSimpleName.stripSuffix("$") + Predicate + .from { target: T => target.params(param) } + .withStats(statsReceiver.scope(s"param_${name}_controlled_predicate")) + .withName(s"param_${name}_controlled_predicate") + } + + /** + * Use the predicate except fn is true., Same as the candidate version but for Target + */ + def exceptedPredicate[T <: TargetUser]( + name: String, + fn: T => Future[Boolean], + predicate: Predicate[T] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + Predicate + .fromAsync { e: T => fn(e) } + .or(predicate) + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * Refresh For push handler target user predicate to fatigue on visiting Home timeline + */ + def targetHTLVisitPredicate[ + T <: TargetUser with UserDetails with TargetABDecider with HTLVisitHistory + ]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "target_htl_visit_predicate" + Predicate + .fromAsync { target: T => + val hoursToFatigue = target.params(PushFeatureSwitchParams.HTLVisitFatigueTime) + TargetUserPredicates + .homeTimelineFatigue(hoursToFatigue.hours) + .apply(Seq(target)) + .map(_.head) + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def targetPushBitEnabledPredicate[T <: Target]( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "push_bit_enabled" + val scopedStats = statsReceiver.scope(s"targetpredicate_$name") + + Predicate + .fromAsync { target: T => + target.deviceInfo + .map { info => + info.exists { deviceInfo => + deviceInfo.isRecommendationsEligible || + deviceInfo.isNewsEligible || + deviceInfo.isTopicsEligible || + deviceInfo.isSpacesEligible + } + } + }.withStats(scopedStats) + .withName(name) + } + + def targetFatiguePredicate[T <: Target]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "target_fatigue_predicate" + val predicateStatScope = statsReceiver.scope(name) + Predicate + .fromAsync { target: T => + PushCapUtil + .getPushCapFatigue(target, predicateStatScope) + .flatMap { pushCapInfo => + CommonFatiguePredicate + .magicRecsPushTargetFatiguePredicate( + interval = pushCapInfo.fatigueInterval, + maxInInterval = pushCapInfo.pushcap + ) + .apply(Seq(target)) + .map(_.headOption.getOrElse(false)) + } + } + .withStats(predicateStatScope) + .withName(name) + } + + def teamExceptedPredicate[T <: TargetUser]( + predicate: NamedPredicate[T] + )( + implicit stats: StatsReceiver + ): NamedPredicate[T] = { + Predicate + .fromAsync { t: T => t.isTeamMember } + .or(predicate) + .withStats(stats.scope(predicate.name)) + .withName(predicate.name) + } + + def targetValidMobileSDKPredicate[T <: Target]( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "valid_mobile_sdk" + val scopedStats = statsReceiver.scope(s"targetpredicate_$name") + + Predicate + .fromAsync { target: T => + TargetUserPredicates.validMobileSDKPredicate + .apply(Seq(target)).map(_.headOption.getOrElse(false)) + }.withStats(scopedStats) + .withName(name) + } + + def magicRecsMinDurationSinceSent[T <: Target]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "target_min_duration_since_push" + Predicate + .fromAsync { target: T => + PushCapUtil.getMinDurationSincePush(target, statsReceiver).flatMap { minDurationSincePush => + CommonFatiguePredicate + .magicRecsMinDurationSincePush(interval = minDurationSincePush) + .apply(Seq(target)).map(_.head) + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def optoutProbPredicate[ + T <: TargetUser with TargetABDecider with TargetScoringDetails with FrigateHistory + ]( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[T] = { + val name = "target_has_high_optout_probability" + Predicate + .fromAsync { target: T => + val isNewUser = target.is30DayNewUserFromSnowflakeIdTime + if (isNewUser) { + statsReceiver.scope(name).counter("all_new_users").incr() + } + target.bucketOptoutProbability + .flatMap { + case Some(optoutProb) => + if (optoutProb >= target.params(PushFeatureSwitchParams.BucketOptoutThresholdParam)) { + CommonFatiguePredicate + .magicRecsPushTargetFatiguePredicate( + interval = 24.hours, + maxInInterval = target.params(PushFeatureSwitchParams.OptoutExptPushCapParam) + ) + .apply(Seq(target)) + .map { values => + val isValid = values.headOption.getOrElse(false) + if (!isValid && isNewUser) { + statsReceiver.scope(name).counter("filtered_new_users").incr() + } + isValid + } + } else Future.True + case _ => Future.True + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + /** + * Predicate used to specify CRT fatigue given interval and max number of candidates within interval. + * @param crt The specific CRT that this predicate is being applied to + * @param intervalParam The fatigue interval + * @param maxInIntervalParam The max number of the given CRT's candidates that are acceptable + * in the interval + * @param stats StatsReceiver + * @return Target Predicate + */ + def pushRecTypeFatiguePredicate( + crt: CRT, + intervalParam: Param[Duration], + maxInIntervalParam: FSBoundedParam[Int], + stats: StatsReceiver + ): Predicate[Target] = + Predicate.fromAsync { target: Target => + val interval = target.params(intervalParam) + val maxIninterval = target.params(maxInIntervalParam) + CommonFatiguePredicate + .recTypeTargetFatiguePredicate( + interval = interval, + maxInInterval = maxIninterval, + recommendationType = crt, + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice, + minInterval = 30.minutes + )(stats.scope(s"${crt}_push_candidate_fatigue")).apply(Seq(target)).map(_.head) + } + + def inlineActionFatiguePredicate( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[Target] = { + val name = "inline_action_fatigue" + val predicateRequests = statsReceiver.scope(name).counter("requests") + val targetIsInExpt = statsReceiver.scope(name).counter("target_in_expt") + val predicateEnabled = statsReceiver.scope(name).counter("enabled") + val predicateDisabled = statsReceiver.scope(name).counter("disabled") + val inlineFatigueDisabled = statsReceiver.scope(name).counter("inline_fatigue_disabled") + + Predicate + .fromAsync { target: Target => + predicateRequests.incr() + if (target.params(PushFeatureSwitchParams.TargetInInlineActionAppVisitFatigue)) { + targetIsInExpt.incr() + target.inlineActionHistory.map { inlineHistory => + if (inlineHistory.nonEmpty && target.params( + PushFeatureSwitchParams.EnableInlineActionAppVisitFatigue)) { + predicateEnabled.incr() + val inlineFatigue = target.params(PushFeatureSwitchParams.InlineActionAppVisitFatigue) + val lookbackInMs = inlineFatigue.ago.inMilliseconds + val filteredHistory = inlineHistory.filter { + case (time, _) => time > lookbackInMs + } + filteredHistory.isEmpty + } else { + inlineFatigueDisabled.incr() + true + } + } + } else { + predicateDisabled.incr() + Future.True + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def webNotifsHoldback[T <: TargetUser with UserDetails with TargetABDecider]( + )( + implicit stats: StatsReceiver + ): NamedPredicate[T] = { + val name = "mr_web_notifs_holdback" + Predicate + .fromAsync { targetUserContext: T => + targetUserContext.deviceInfo.map { deviceInfoOpt => + val isPrimaryWeb = deviceInfoOpt.exists { + _.guessedPrimaryClient.exists { clientType => + clientType == MobileClientType.Web + } + } + !(isPrimaryWeb && targetUserContext.params(PushFeatureSwitchParams.MRWebHoldbackParam)) + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TopTweetImpressionsPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TopTweetImpressionsPredicates.scala new file mode 100644 index 000000000..be5993b71 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TopTweetImpressionsPredicates.scala @@ -0,0 +1,56 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.TopTweetImpressionsPushCandidate +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate + +object TopTweetImpressionsPredicates { + + def topTweetImpressionsFatiguePredicate( + implicit stats: StatsReceiver + ): NamedPredicate[TopTweetImpressionsPushCandidate] = { + val name = "top_tweet_impressions_fatigue" + val scopedStats = stats.scope(name) + val bucketImpressionCounter = scopedStats.counter("bucket_impression_count") + Predicate + .fromAsync { candidate: TopTweetImpressionsPushCandidate => + val interval = candidate.target.params(FS.TopTweetImpressionsNotificationInterval) + val maxInInterval = candidate.target.params(FS.MaxTopTweetImpressionsNotifications) + val minInterval = candidate.target.params(FS.TopTweetImpressionsFatigueMinIntervalDuration) + bucketImpressionCounter.incr() + + val fatiguePredicate = FatiguePredicate.recTypeOnly( + interval = interval, + maxInInterval = maxInInterval, + minInterval = minInterval, + recommendationType = CommonRecommendationType.TweetImpressions + ) + fatiguePredicate.apply(Seq(candidate)).map(_.head) + } + .withStats(stats.scope(s"predicate_${name}")) + .withName(name) + } + + def topTweetImpressionsThreshold( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[TopTweetImpressionsPushCandidate] = { + val name = "top_tweet_impressions_threshold" + val scopedStats = statsReceiver.scope(name) + val meetsImpressionsCounter = scopedStats.counter("meets_impressions_count") + val bucketImpressionCounter = scopedStats.counter("bucket_impression_count") + Predicate + .from[TopTweetImpressionsPushCandidate] { candidate => + val meetsImpressionsThreshold = + candidate.impressionsCount >= candidate.target.params(FS.TopTweetImpressionsThreshold) + if (meetsImpressionsThreshold) meetsImpressionsCounter.incr() + bucketImpressionCounter.incr() + meetsImpressionsThreshold + } + .withStats(statsReceiver.scope(s"predicate_${name}")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetEngagementRatioPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetEngagementRatioPredicate.scala new file mode 100644 index 000000000..d1a3a1c64 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetEngagementRatioPredicate.scala @@ -0,0 +1,112 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object TweetEngagementRatioPredicate { + + def QTtoNtabClickBasedPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with TweetCandidate with RecommendationType + ] = { + val name = "oon_tweet_engagement_filter_qt_to_ntabclick_ratio_based_predicate" + val scopedStatsReceiver = stats.scope(name) + val allOonCandidatesCounter = scopedStatsReceiver.counter("all_oon_candidates") + val filteredCandidatesCounter = + scopedStatsReceiver.counter("filtered_oon_candidates") + + val quoteCountFeature = + "tweet.core.tweet_counts.quote_count" + val ntabClickCountFeature = + "tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_ntab_clicked.any_feature.Duration.Top.count" + + Predicate + .fromAsync { candidate: PushCandidate with TweetCandidate with RecommendationType => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + lazy val QTtoNtabClickRatioThreshold = + target.params(PushFeatureSwitchParams.TweetQTtoNtabClickRatioThresholdParam) + lazy val quoteCount = candidate.numericFeatures.getOrElse(quoteCountFeature, 0.0) + lazy val ntabClickCount = candidate.numericFeatures.getOrElse(ntabClickCountFeature, 0.0) + lazy val quoteRate = if (ntabClickCount > 0) quoteCount / ntabClickCount else 1.0 + + if (isOonCandidate) allOonCandidatesCounter.incr() + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && isOonCandidate) { + val ntabClickThreshold = 1000 + candidate.cachePredicateInfo( + name + "_count", + ntabClickCount, + ntabClickThreshold, + ntabClickCount >= ntabClickThreshold) + candidate.cachePredicateInfo( + name + "_ratio", + quoteRate, + QTtoNtabClickRatioThreshold, + quoteRate < QTtoNtabClickRatioThreshold) + if (ntabClickCount >= ntabClickThreshold && quoteRate < QTtoNtabClickRatioThreshold) { + filteredCandidatesCounter.incr() + Future.False + } else Future.True + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } + + def TweetReplyLikeRatioPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with TweetCandidate] = { + val name = "tweet_reply_like_ratio" + val scopedStatsReceiver = stats.scope(name) + val allCandidatesCounter = scopedStatsReceiver.counter("all_candidates") + val filteredCandidatesCounter = scopedStatsReceiver.counter("filtered_candidates") + val bucketedCandidatesCounter = scopedStatsReceiver.counter("bucketed_candidates") + + Predicate + .fromAsync { candidate: PushCandidate => + allCandidatesCounter.incr() + val target = candidate.target + val likeCount = candidate.numericFeatures + .getOrElse(PushConstants.TweetLikesFeatureName, 0.0) + val replyCount = candidate.numericFeatures + .getOrElse(PushConstants.TweetRepliesFeatureName, 0.0) + val ratio = replyCount / likeCount.max(1) + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType) || + RecTypes.outOfNetworkTopicTweetTypes.contains(candidate.commonRecType) + + if (isOonCandidate + && CandidateUtil.shouldApplyHealthQualityFilters(candidate) + && replyCount > target.params( + PushFeatureSwitchParams.TweetReplytoLikeRatioReplyCountThreshold)) { + bucketedCandidatesCounter.incr() + if (ratio > target.params( + PushFeatureSwitchParams.TweetReplytoLikeRatioThresholdLowerBound) + && ratio < target.params( + PushFeatureSwitchParams.TweetReplytoLikeRatioThresholdUpperBound)) { + filteredCandidatesCounter.incr() + Future.False + } else { + Future.True + } + } else { + Future.True + } + } + .withStats(stats.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetLanguagePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetLanguagePredicate.scala new file mode 100644 index 000000000..4ff24ae77 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetLanguagePredicate.scala @@ -0,0 +1,109 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.util.CandidateUtil +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.language.normalization.UserDisplayLanguage +import com.twitter.util.Future + +object TweetLanguagePredicate { + + def oonTweeetLanguageMatch( + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate with RecommendationType with TweetDetails + ] = { + val name = "oon_tweet_language_predicate" + val scopedStatsReceiver = stats.scope(name) + val oonCandidatesCounter = + scopedStatsReceiver.counter("oon_candidates") + val enableFilterCounter = + scopedStatsReceiver.counter("enabled_filter") + val skipMediaTweetsCounter = + scopedStatsReceiver.counter("skip_media_tweets") + + Predicate + .fromAsync { candidate: PushCandidate with RecommendationType with TweetDetails => + val target = candidate.target + val crt = candidate.commonRecType + val isOonCandidate = RecTypes.isOutOfNetworkTweetRecType(crt) || + RecTypes.outOfNetworkTopicTweetTypes.contains(crt) + + if (CandidateUtil.shouldApplyHealthQualityFilters(candidate) && isOonCandidate) { + oonCandidatesCounter.incr() + + target.featureMap.map { featureMap => + val userPreferredLanguages = featureMap.sparseBinaryFeatures + .getOrElse("user.language.user.preferred_contents", Set.empty[String]) + val userEngagementLanguages = featureMap.sparseContinuousFeatures.getOrElse( + "user.language.user.engagements", + Map.empty[String, Double]) + val userFollowLanguages = featureMap.sparseContinuousFeatures.getOrElse( + "user.language.user.following_accounts", + Map.empty[String, Double]) + val userProducedTweetLanguages = featureMap.sparseContinuousFeatures + .getOrElse("user.language.user.produced_tweets", Map.empty) + val userDeviceLanguages = featureMap.sparseContinuousFeatures.getOrElse( + "user.language.user.recent_devices", + Map.empty[String, Double]) + val tweetLanguageOpt = candidate.categoricalFeatures + .get(target.params(PushFeatureSwitchParams.TweetLanguageFeatureNameParam)) + + if (userPreferredLanguages.isEmpty) + scopedStatsReceiver.counter("userPreferredLanguages_empty").incr() + if (userEngagementLanguages.isEmpty) + scopedStatsReceiver.counter("userEngagementLanguages_empty").incr() + if (userFollowLanguages.isEmpty) + scopedStatsReceiver.counter("userFollowLanguages_empty").incr() + if (userProducedTweetLanguages.isEmpty) + scopedStatsReceiver + .counter("userProducedTweetLanguages_empty") + .incr() + if (userDeviceLanguages.isEmpty) + scopedStatsReceiver.counter("userDeviceLanguages_empty").incr() + if (tweetLanguageOpt.isEmpty) scopedStatsReceiver.counter("tweetLanguage_empty").incr() + + val tweetLanguage = tweetLanguageOpt.getOrElse("und") + val undefinedTweetLanguages = Set("") + + if (!undefinedTweetLanguages.contains(tweetLanguage)) { + lazy val userInferredLanguageThreshold = + target.params(PushFeatureSwitchParams.UserInferredLanguageThresholdParam) + lazy val userDeviceLanguageThreshold = + target.params(PushFeatureSwitchParams.UserDeviceLanguageThresholdParam) + lazy val enableTweetLanguageFilter = + target.params(PushFeatureSwitchParams.EnableTweetLanguageFilter) + lazy val skipLanguageFilterForMediaTweets = + target.params(PushFeatureSwitchParams.SkipLanguageFilterForMediaTweets) + + lazy val allLanguages = userPreferredLanguages ++ + userEngagementLanguages.filter(_._2 > userInferredLanguageThreshold).keySet ++ + userFollowLanguages.filter(_._2 > userInferredLanguageThreshold).keySet ++ + userProducedTweetLanguages.filter(_._2 > userInferredLanguageThreshold).keySet ++ + userDeviceLanguages.filter(_._2 > userDeviceLanguageThreshold).keySet + + if (enableTweetLanguageFilter && allLanguages.nonEmpty) { + enableFilterCounter.incr() + val hasMedia = candidate.hasPhoto || candidate.hasVideo + + if (hasMedia && skipLanguageFilterForMediaTweets) { + skipMediaTweetsCounter.incr() + true + } else { + allLanguages.map(UserDisplayLanguage.toTweetLanguage).contains(tweetLanguage) + } + } else true + } else true + } + } else Future.True + } + .withStats(stats.scope(name)) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetWithheldContentPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetWithheldContentPredicate.scala new file mode 100644 index 000000000..c05536909 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/TweetWithheldContentPredicate.scala @@ -0,0 +1,35 @@ +package com.twitter.frigate.pushservice.predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.tweetypie.UserLocationAndTweet +import com.twitter.hermit.predicate.tweetypie.WithheldTweetPredicate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.util.Future + +object TweetWithheldContentPredicate { + val name = "withheld_content" + val defaultLocation = Location(city = "", region = "", countryCode = "", confidence = 0.0) + + def apply( + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with TweetDetails] = { + Predicate + .fromAsync { candidate: PushCandidate with TweetDetails => + candidate.tweet match { + case Some(tweet) => + WithheldTweetPredicate(checkAllCountries = true) + .apply(Seq(UserLocationAndTweet(defaultLocation, tweet))) + .map(_.head) + case None => + Future.value(false) + } + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/event/EventPredicatesForCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/event/EventPredicatesForCandidate.scala new file mode 100644 index 000000000..86c1f8abd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/event/EventPredicatesForCandidate.scala @@ -0,0 +1,155 @@ +package com.twitter.frigate.pushservice.predicate.event + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.EventCandidate +import com.twitter.frigate.common.base.TargetInfo +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.history.RecItems +import com.twitter.frigate.magic_events.thriftscala.Locale +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventPushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutNewsEventPushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutPredicatesUtil._ +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object EventPredicatesForCandidate { + def hasTitle( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[MagicFanoutEventHydratedCandidate] = { + val name = "event_title_available" + val scopedStatsReceiver = statsReceiver.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: MagicFanoutEventHydratedCandidate => + candidate.eventTitleFut.map(_.nonEmpty) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def isNotDuplicateWithEventId( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[MagicFanoutEventHydratedCandidate] = { + val name = "duplicate_event_id" + Predicate + .fromAsync { candidate: MagicFanoutEventHydratedCandidate => + val useRelaxedFatigueLengthFut: Future[Boolean] = + candidate match { + case mfNewsEvent: MagicFanoutNewsEventPushCandidate => + mfNewsEvent.isHighPriorityEvent + case _ => Future.value(false) + } + Future.join(candidate.target.history, useRelaxedFatigueLengthFut).map { + case (history, useRelaxedFatigueLength) => + val filteredNotifications = if (useRelaxedFatigueLength) { + val relaxedFatigueInterval = + candidate.target + .params( + PushFeatureSwitchParams.MagicFanoutRelaxedEventIdFatigueIntervalInHours).hours + history.notificationMap.filterKeys { time => + time.untilNow <= relaxedFatigueInterval + }.values + } else history.notificationMap.values + !RecItems(filteredNotifications.toSeq).events.exists(_.eventId == candidate.eventId) + } + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } + + def isNotDuplicateWithEventIdForCandidate[ + T <: TargetUser with FrigateHistory, + Cand <: EventCandidate with TargetInfo[T] + ]( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[Cand] = { + val name = "is_not_duplicate_event" + Predicate + .fromAsync { candidate: Cand => + candidate.target.pushRecItems.map { + !_.events.map(_.eventId).contains(candidate.eventId) + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def accountCountryPredicateWithAllowlist( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "account_country_predicate_with_allowlist" + val scopedStats = stats.scope(name) + + val skipPredicate = Predicate + .from { candidate: MagicFanoutEventPushCandidate => + candidate.target.params(PushFeatureSwitchParams.MagicFanoutSkipAccountCountryPredicate) + } + .withStats(stats.scope("skip_account_country_predicate_mf")) + .withName("skip_account_country_predicate_mf") + + val excludeEventFromAccountCountryPredicateFiltering = Predicate + .from { candidate: MagicFanoutEventPushCandidate => + val eventId = candidate.eventId + val target = candidate.target + target + .params(PushFeatureSwitchParams.MagicFanoutEventAllowlistToSkipAccountCountryPredicate) + .exists(eventId.equals) + } + .withStats(stats.scope("exclude_event_from_account_country_predicate_filtering")) + .withName("exclude_event_from_account_country_predicate_filtering") + + skipPredicate + .or(excludeEventFromAccountCountryPredicateFiltering) + .or(accountCountryPredicate) + .withStats(scopedStats) + .withName(name) + } + + /** + * Check if user's country is targeted + * @param stats + */ + def accountCountryPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "account_country_predicate" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + val internationalLocalePassedCounter = + scopedStatsReceiver.counter("international_locale_passed") + val internationalLocaleFilteredCounter = + scopedStatsReceiver.counter("international_locale_filtered") + Predicate + .fromAsync { candidate: MagicFanoutEventPushCandidate => + candidate.target.countryCode.map { + case Some(countryCode) => + val denyListedCountryCodes: Seq[String] = + if (candidate.commonRecType == CommonRecommendationType.MagicFanoutNewsEvent) { + candidate.target + .params(PushFeatureSwitchParams.MagicFanoutDenyListedCountries) + } else if (candidate.commonRecType == CommonRecommendationType.MagicFanoutSportsEvent) { + candidate.target + .params(PushFeatureSwitchParams.MagicFanoutSportsEventDenyListedCountries) + } else Seq() + val eventCountries = + candidate.newsForYouMetadata + .flatMap(_.locales).getOrElse(Seq.empty[Locale]).flatMap(_.country) + if (isInCountryList(countryCode, eventCountries) + && !isInCountryList(countryCode, denyListedCountryCodes)) { + internationalLocalePassedCounter.incr() + true + } else { + internationalLocaleFilteredCounter.incr() + false + } + case _ => false + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesForCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesForCandidate.scala new file mode 100644 index 000000000..52371b488 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesForCandidate.scala @@ -0,0 +1,525 @@ +package com.twitter.frigate.pushservice.predicate.magic_fanout + +import com.twitter.audience_rewards.thriftscala.HasSuperFollowingRelationshipRequest +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutCandidate +import com.twitter.frigate.common.base.MagicFanoutCreatorEventCandidate +import com.twitter.frigate.common.base.MagicFanoutProductLaunchCandidate +import com.twitter.frigate.common.history.RecItems +import com.twitter.frigate.common.predicate.FatiguePredicate.build +import com.twitter.frigate.common.predicate.FatiguePredicate.productLaunchTypeRecTypesOnlyFilter +import com.twitter.frigate.common.predicate.FatiguePredicate.recOnlyFilter +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.common.store.interests.SemanticCoreEntityId +import com.twitter.frigate.common.util.IbisAppPushDeviceSettingsUtil +import com.twitter.frigate.magic_events.thriftscala.CreatorFanoutType +import com.twitter.frigate.magic_events.thriftscala.ProductType +import com.twitter.frigate.magic_events.thriftscala.TargetID +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventHydratedCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutEventPushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutNewsEventPushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.predicate.FatiguePredicate +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.Param +import com.twitter.util.Duration +import com.twitter.util.Future + +object MagicFanoutPredicatesForCandidate { + + /** + * Check if Semantic Core reasons satisfy rank threshold ( for heavy users a non broad entity should satisfy the threshold) + */ + def magicFanoutErgInterestRankThresholdPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventHydratedCandidate] = { + val name = "magicfanout_interest_erg_rank_threshold" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: MagicFanoutEventHydratedCandidate => + candidate.target.isHeavyUserState.map { isHeavyUser => + lazy val rankThreshold = + if (isHeavyUser) { + candidate.target.params(PushFeatureSwitchParams.MagicFanoutRankErgThresholdHeavy) + } else { + candidate.target.params(PushFeatureSwitchParams.MagicFanoutRankErgThresholdNonHeavy) + } + MagicFanoutPredicatesUtil + .checkIfValidErgScEntityReasonExists( + candidate.effectiveMagicEventsReasons, + rankThreshold + ) + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def newsNotificationFatigue( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val name = "news_notification_fatigue" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate => + FatiguePredicate + .recTypeSetOnly( + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice, + recTypes = Set(CommonRecommendationType.MagicFanoutNewsEvent), + maxInInterval = + candidate.target.params(PushFeatureSwitchParams.MFMaxNumberOfPushesInInterval), + interval = candidate.target.params(PushFeatureSwitchParams.MFPushIntervalInHours), + minInterval = candidate.target.params(PushFeatureSwitchParams.MFMinIntervalFatigue) + ) + .apply(Seq(candidate)) + .map(_.headOption.getOrElse(false)) + + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + /** + * Check if reason contains any optouted semantic core entity interests. + * + * @param stats + * + * @return + */ + def magicFanoutNoOptoutInterestPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "magicfanout_optout_interest_predicate" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + val withOptOutInterestsCounter = stats.counter("with_optout_interests") + val withoutOptOutInterestsCounter = stats.counter("without_optout_interests") + Predicate + .fromAsync { candidate: MagicFanoutEventPushCandidate => + candidate.target.optOutSemanticCoreInterests.map { + case ( + optOutUserInterests: Seq[SemanticCoreEntityId] + ) => + withOptOutInterestsCounter.incr() + optOutUserInterests + .intersect(candidate.annotatedAndInferredSemanticCoreEntities).isEmpty + case _ => + withoutOptOutInterestsCounter.incr() + true + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + /** + * Checks if the target has only one device language language, + * and that language is targeted for that event + * + * @param statsReceiver + * + * @return + */ + def inferredUserDeviceLanguagePredicate( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "inferred_device_language" + val scopedStats = statsReceiver.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: MagicFanoutEventPushCandidate => + val target = candidate.target + target.deviceInfo.map { + _.flatMap { deviceInfo => + val languages = deviceInfo.deviceLanguages.getOrElse(Seq.empty[String]) + val distinctDeviceLanguages = + IbisAppPushDeviceSettingsUtil.distinctDeviceLanguages(languages) + + candidate.newsForYouMetadata.map { newsForYouMetadata => + val eventLocales = newsForYouMetadata.locales.getOrElse(Seq.empty) + val eventLanguages = eventLocales.flatMap(_.language).map(_.toLowerCase).distinct + + eventLanguages.intersect(distinctDeviceLanguages).nonEmpty + } + }.getOrElse(false) + } + } + .withStats(scopedStats) + .withName(name) + } + + /** + * Bypass predicate if high priority push + */ + def highPriorityNewsEventExceptedPredicate( + predicate: NamedPredicate[MagicFanoutNewsEventPushCandidate] + )( + implicit config: Config + ): NamedPredicate[MagicFanoutNewsEventPushCandidate] = { + PredicatesForCandidate.exceptedPredicate( + name = "high_priority_excepted_" + predicate.name, + fn = MagicFanoutPredicatesUtil.checkIfHighPriorityNewsEventForCandidate, + predicate + )(config.statsReceiver) + } + + /** + * Bypass predicate if high priority push + */ + def highPriorityEventExceptedPredicate( + predicate: NamedPredicate[MagicFanoutEventPushCandidate] + )( + implicit config: Config + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + PredicatesForCandidate.exceptedPredicate( + name = "high_priority_excepted_" + predicate.name, + fn = MagicFanoutPredicatesUtil.checkIfHighPriorityEventForCandidate, + predicate + )(config.statsReceiver) + } + + def magicFanoutSimClusterTargetingPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "simcluster_targeting" + val scopedStats = stats.scope(s"predicate_$name") + val userStateCounters = scopedStats.scope("user_state") + Predicate + .fromAsync { candidate: MagicFanoutEventPushCandidate => + candidate.target.isHeavyUserState.map { isHeavyUser => + val simClusterEmbeddings = candidate.newsForYouMetadata.flatMap( + _.eventContextScribe.flatMap(_.simClustersEmbeddings)) + val TopKSimClustersCount = 50 + val eventSimClusterVectorOpt: Option[MagicFanoutPredicatesUtil.SimClusterScores] = + MagicFanoutPredicatesUtil.getEventSimClusterVector( + simClusterEmbeddings.map(_.toMap), + (ModelVersion.Model20m145kUpdated, EmbeddingType.FollowBasedTweet), + TopKSimClustersCount + ) + val userSimClusterVectorOpt: Option[MagicFanoutPredicatesUtil.SimClusterScores] = + MagicFanoutPredicatesUtil.getUserSimClusterVector(candidate.effectiveMagicEventsReasons) + (eventSimClusterVectorOpt, userSimClusterVectorOpt) match { + case ( + Some(eventSimClusterVector: MagicFanoutPredicatesUtil.SimClusterScores), + Some(userSimClusterVector)) => + val score = eventSimClusterVector + .normedDotProduct(userSimClusterVector, eventSimClusterVector) + val threshold = if (isHeavyUser) { + candidate.target.params( + PushFeatureSwitchParams.MagicFanoutSimClusterDotProductHeavyUserThreshold) + } else { + candidate.target.params( + PushFeatureSwitchParams.MagicFanoutSimClusterDotProductNonHeavyUserThreshold) + } + val isPassed = score >= threshold + userStateCounters.scope(isHeavyUser.toString).counter(s"$isPassed").incr() + isPassed + + case (None, Some(userSimClusterVector)) => + candidate.commonRecType == CommonRecommendationType.MagicFanoutSportsEvent + + case _ => false + } + } + } + .withStats(scopedStats) + .withName(name) + } + + def geoTargetingHoldback( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCandidate] = { + Predicate + .from[PushCandidate with MagicFanoutCandidate] { candidate => + if (MagicFanoutPredicatesUtil.reasonsContainGeoTarget( + candidate.candidateMagicEventsReasons)) { + candidate.target.params(PushFeatureSwitchParams.EnableMfGeoTargeting) + } else true + } + .withStats(stats.scope("geo_targeting_holdback")) + .withName("geo_targeting_holdback") + } + + def geoOptOutPredicate( + userStore: ReadableStore[Long, User] + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCandidate] = { + Predicate + .fromAsync[PushCandidate with MagicFanoutCandidate] { candidate => + if (MagicFanoutPredicatesUtil.reasonsContainGeoTarget( + candidate.candidateMagicEventsReasons)) { + userStore.get(candidate.target.targetId).map { userOpt => + val isGeoAllowed = userOpt + .flatMap(_.account) + .exists(_.allowLocationHistoryPersonalization) + isGeoAllowed + } + } else { + Future.True + } + } + .withStats(stats.scope("geo_opt_out_predicate")) + .withName("geo_opt_out_predicate") + } + + /** + * Check if Semantic Core reasons contains valid utt reason & reason is within top k topics followed by user + */ + def magicFanoutTopicFollowsTargetingPredicate( + implicit stats: StatsReceiver, + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests] + ): NamedPredicate[MagicFanoutEventHydratedCandidate] = { + val name = "magicfanout_topic_follows_targeting" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync[PushCandidate with MagicFanoutEventHydratedCandidate] { candidate => + candidate.followedTopicLocalizedEntities.map(_.nonEmpty) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + /** Requires the magicfanout candidate to have a UserID reason which ranks below the follow + * rank threshold. If no UserID target exists the candidate is dropped. */ + def followRankThreshold( + threshold: Param[Int] + )( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCandidate] = { + val name = "follow_rank_threshold" + Predicate + .from[PushCandidate with MagicFanoutCandidate] { c => + c.candidateMagicEventsReasons.exists { fanoutReason => + fanoutReason.reason match { + case TargetID.UserID(_) => + fanoutReason.rank.exists { rank => + rank <= c.target.params(threshold) + } + case _ => false + } + } + } + .withStats(statsReceiver.scope(name)) + .withName(name) + } + + def userGeneratedEventsPredicate( + implicit statsReceiver: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutEventHydratedCandidate] = { + val name = "user_generated_moments" + val stats = statsReceiver.scope(name) + + Predicate + .from { candidate: PushCandidate with MagicFanoutEventHydratedCandidate => + val isUgmMoment = candidate.semanticCoreEntityTags.values.flatten.toSet + .contains(MagicFanoutPredicatesUtil.UgmMomentTag) + if (isUgmMoment) { + candidate.target.params(PushFeatureSwitchParams.MagicFanoutNewsUserGeneratedEventsEnable) + } else true + }.withStats(stats) + .withName(name) + } + def escherbirdMagicfanoutEventParam( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutEventPushCandidate] = { + val name = "magicfanout_escherbird_fs" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + + Predicate + .fromAsync[PushCandidate with MagicFanoutEventPushCandidate] { candidate => + val candidateFrigateNotif = candidate.frigateNotification.magicFanoutEventNotification + val isEscherbirdEvent = candidateFrigateNotif.exists(_.isEscherbirdEvent.contains(true)) + scopedStatsReceiver.counter(s"with_escherbird_flag_$isEscherbirdEvent").incr() + + if (isEscherbirdEvent) { + + val listOfEventsSemanticCoreDomainIds = + candidate.target.params(PushFeatureSwitchParams.ListOfEventSemanticCoreDomainIds) + + val candScDomainEvent = + if (listOfEventsSemanticCoreDomainIds.nonEmpty) { + candidate.eventSemanticCoreDomainIds + .intersect(listOfEventsSemanticCoreDomainIds).nonEmpty + } else { + false + } + scopedStatsReceiver + .counter( + s"with_escherbird_fs_in_list_of_event_semantic_core_domains_$candScDomainEvent").incr() + Future.value(candScDomainEvent) + } else { + Future.True + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + /** + * Checks if the user has custom targeting enabled.If so, bucket the user in experiment. This custom targeting refers to adding + * tweet authors as targets in the eventfanout service. + * @param stats [StatsReceiver] + * @return NamedPredicate[PushCandidate with MagicFanoutEventPushCandidate] + */ + def hasCustomTargetingForNewsEventsParam( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutEventPushCandidate] = { + val name = "magicfanout_hascustomtargeting" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + + Predicate + .from[PushCandidate with MagicFanoutEventPushCandidate] { candidate => + candidate.candidateMagicEventsReasons.exists { fanoutReason => + fanoutReason.reason match { + case userIdReason: TargetID.UserID => + if (userIdReason.userID.hasCustomTargeting.contains(true)) { + candidate.target.params( + PushFeatureSwitchParams.MagicFanoutEnableCustomTargetingNewsEvent) + } else true + case _ => true + } + } + } + .withStats(scopedStatsReceiver) + .withName(name) + + } + + def magicFanoutProductLaunchFatigue( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutProductLaunchCandidate] = { + val name = "magic_fanout_product_launch_fatigue" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate with MagicFanoutProductLaunchCandidate => + val target = candidate.target + val (interval, maxInInterval, minInterval) = { + candidate.productLaunchType match { + case ProductType.BlueVerified => + ( + target.params(PushFeatureSwitchParams.ProductLaunchPushIntervalInHours), + target.params(PushFeatureSwitchParams.ProductLaunchMaxNumberOfPushesInInterval), + target.params(PushFeatureSwitchParams.ProductLaunchMinIntervalFatigue)) + case _ => + (Duration.fromDays(1), 0, Duration.Zero) + } + } + build( + interval = interval, + maxInInterval = maxInInterval, + minInterval = minInterval, + filterHistory = productLaunchTypeRecTypesOnlyFilter( + Set(CommonRecommendationType.MagicFanoutProductLaunch), + candidate.productLaunchType.toString), + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice + ).flatContraMap { candidate: PushCandidate => candidate.target.history } + .apply(Seq(candidate)) + .map(_.headOption.getOrElse(false)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def creatorPushTargetIsNotCreator( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCreatorEventCandidate] = { + val name = "magic_fanout_creator_is_self" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .from { candidate: PushCandidate with MagicFanoutCreatorEventCandidate => + candidate.target.targetId != candidate.creatorId + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def duplicateCreatorPredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCreatorEventCandidate] = { + val name = "magic_fanout_creator_duplicate_creator_id" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { cand: PushCandidate with MagicFanoutCreatorEventCandidate => + cand.target.pushRecItems.map { recItems: RecItems => + !recItems.creatorIds.contains(cand.creatorId) + } + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def isSuperFollowingCreator( + )( + implicit config: Config, + stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCreatorEventCandidate] = { + val name = "magic_fanout_is_already_superfollowing_creator" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { cand: PushCandidate with MagicFanoutCreatorEventCandidate => + config.hasSuperFollowingRelationshipStore + .get( + HasSuperFollowingRelationshipRequest( + sourceUserId = cand.target.targetId, + targetUserId = cand.creatorId)).map(_.getOrElse(false)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } + + def magicFanoutCreatorPushFatiguePredicate( + )( + implicit stats: StatsReceiver + ): NamedPredicate[PushCandidate with MagicFanoutCreatorEventCandidate] = { + val name = "magic_fanout_creator_fatigue" + val scopedStatsReceiver = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate with MagicFanoutCreatorEventCandidate => + val target = candidate.target + val (interval, maxInInterval, minInterval) = { + candidate.creatorFanoutType match { + case CreatorFanoutType.UserSubscription => + ( + target.params(PushFeatureSwitchParams.CreatorSubscriptionPushIntervalInHours), + target.params( + PushFeatureSwitchParams.CreatorSubscriptionPushMaxNumberOfPushesInInterval), + target.params(PushFeatureSwitchParams.CreatorSubscriptionPushhMinIntervalFatigue)) + case CreatorFanoutType.NewCreator => + ( + target.params(PushFeatureSwitchParams.NewCreatorPushIntervalInHours), + target.params(PushFeatureSwitchParams.NewCreatorPushMaxNumberOfPushesInInterval), + target.params(PushFeatureSwitchParams.NewCreatorPushMinIntervalFatigue)) + case _ => + (Duration.fromDays(1), 0, Duration.Zero) + } + } + build( + interval = interval, + maxInInterval = maxInInterval, + minInterval = minInterval, + filterHistory = recOnlyFilter(candidate.commonRecType), + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice + ).flatContraMap { candidate: PushCandidate => candidate.target.history } + .apply(Seq(candidate)) + .map(_.headOption.getOrElse(false)) + } + .withStats(scopedStatsReceiver) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesUtil.scala new file mode 100644 index 000000000..306f2b3b6 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutPredicatesUtil.scala @@ -0,0 +1,218 @@ +package com.twitter.frigate.pushservice.predicate.magic_fanout + +import com.twitter.eventdetection.event_context.util.SimClustersUtil +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.magic_events.thriftscala._ +import com.twitter.frigate.pushservice.model.MagicFanoutEventPushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutNewsEventPushCandidate +import com.twitter.frigate.pushservice.model.MagicFanoutProductLaunchPushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.simclusters_v2.common.SimClustersEmbedding +import com.twitter.simclusters_v2.thriftscala.EmbeddingType +import com.twitter.simclusters_v2.thriftscala.ModelVersion +import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId +import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} +import com.twitter.util.Future + +object MagicFanoutPredicatesUtil { + + val UttDomain: Long = 0L + type DomainId = Long + type EntityId = Long + val BroadCategoryTag = "utt:broad_category" + val UgmMomentTag = "MMTS.isUGMMoment" + val TopKSimClustersCount = 50 + + case class SimClusterScores(simClusterScoreVector: Map[Int, Double]) { + def dotProduct(other: SimClusterScores): Double = { + simClusterScoreVector + .map { + case (clusterId, score) => other.simClusterScoreVector.getOrElse(clusterId, 0.0) * score + }.foldLeft(0.0) { _ + _ } + } + + def norm(): Double = { + val sumOfSquares: Double = simClusterScoreVector + .map { + case (clusterId, score) => score * score + }.foldLeft(0.0)(_ + _) + scala.math.sqrt(sumOfSquares) + } + + def normedDotProduct(other: SimClusterScores, normalizer: SimClusterScores): Double = { + val denominator = normalizer.norm() + val score = dotProduct(other) + if (denominator != 0.0) { + score / denominator + } else { + score + } + } + } + + private def isSemanticCoreEntityBroad( + semanticCoreEntityTags: Map[(DomainId, EntityId), Set[String]], + scEntityId: SemanticCoreID + ): Boolean = { + semanticCoreEntityTags + .getOrElse((scEntityId.domainId, scEntityId.entityId), Set.empty).contains(BroadCategoryTag) + } + + def isInCountryList(accountCountryCode: String, locales: Seq[String]): Boolean = { + locales.map(_.toLowerCase).contains(accountCountryCode.toLowerCase) + } + + /** + * Boolean check of if a MagicFanout is high priority push + */ + def checkIfHighPriorityNewsEventForCandidate( + candidate: MagicFanoutNewsEventPushCandidate + ): Future[Boolean] = { + candidate.isHighPriorityEvent.map { isHighPriority => + isHighPriority && (candidate.target.params(PushFeatureSwitchParams.EnableHighPriorityPush)) + } + } + + /** + * Boolean check of if a MagicFanout event is high priority push + */ + def checkIfHighPriorityEventForCandidate( + candidate: MagicFanoutEventPushCandidate + ): Future[Boolean] = { + candidate.isHighPriorityEvent.map { isHighPriority => + candidate.commonRecType match { + case CommonRecommendationType.MagicFanoutSportsEvent => + isHighPriority && (candidate.target.params( + PushFeatureSwitchParams.EnableHighPrioritySportsPush)) + case _ => false + } + } + } + + /** + * Boolean check if to skip target blue verified + */ + def shouldSkipBlueVerifiedCheckForCandidate( + candidate: MagicFanoutProductLaunchPushCandidate + ): Future[Boolean] = + Future.value( + candidate.target.params(PushFeatureSwitchParams.DisableIsTargetBlueVerifiedPredicate)) + + /** + * Boolean check if to skip target is legacy verified + */ + def shouldSkipLegacyVerifiedCheckForCandidate( + candidate: MagicFanoutProductLaunchPushCandidate + ): Future[Boolean] = + Future.value( + candidate.target.params(PushFeatureSwitchParams.DisableIsTargetLegacyVerifiedPredicate)) + + def shouldSkipSuperFollowCreatorCheckForCandidate( + candidate: MagicFanoutProductLaunchPushCandidate + ): Future[Boolean] = + Future.value( + !candidate.target.params(PushFeatureSwitchParams.EnableIsTargetSuperFollowCreatorPredicate)) + + /** + * Boolean check of if a reason of a MagicFanout is higher than the rank threshold of an event + */ + def checkIfErgScEntityReasonMeetsThreshold( + rankThreshold: Int, + reason: MagicEventsReason, + ): Boolean = { + reason.reason match { + case TargetID.SemanticCoreID(scEntityId: SemanticCoreID) => + reason.rank match { + case Some(rank) => rank < rankThreshold + case _ => false + } + case _ => false + } + } + + /** + * Check if MagicEventsReasons contains a reason that matches the thresholdw + */ + def checkIfValidErgScEntityReasonExists( + magicEventsReasons: Option[Seq[MagicEventsReason]], + rankThreshold: Int + )( + implicit stats: StatsReceiver + ): Boolean = { + magicEventsReasons match { + case Some(reasons) if reasons.exists(_.isNewUser.contains(true)) => true + case Some(reasons) => + reasons.exists { reason => + reason.source.contains(ReasonSource.ErgShortTermInterestSemanticCore) && + checkIfErgScEntityReasonMeetsThreshold( + rankThreshold, + reason + ) + } + + case _ => false + } + } + + /** + * Get event simcluster vector from event context + */ + def getEventSimClusterVector( + simClustersEmbeddingOption: Option[Map[SimClustersEmbeddingId, ThriftSimClustersEmbedding]], + embeddingMapKey: (ModelVersion, EmbeddingType), + topKSimClustersCount: Int + ): Option[SimClusterScores] = { + simClustersEmbeddingOption.map { thriftSimClustersEmbeddings => + val simClustersEmbeddings: Map[SimClustersEmbeddingId, SimClustersEmbedding] = + thriftSimClustersEmbeddings.map { + case (simClustersEmbeddingId, simClustersEmbeddingValue) => + (simClustersEmbeddingId, SimClustersEmbedding(simClustersEmbeddingValue)) + }.toMap + val emptySeq = Seq[(Int, Double)]() + val simClusterScoreTuple: Map[(ModelVersion, EmbeddingType), Seq[(Int, Double)]] = + SimClustersUtil + .getMaxTopKTweetSimClusters(simClustersEmbeddings, topKSimClustersCount) + SimClusterScores(simClusterScoreTuple.getOrElse(embeddingMapKey, emptySeq).toMap) + } + } + + /** + * Get user simcluster vector magic events reasons + */ + def getUserSimClusterVector( + magicEventsReasonsOpt: Option[Seq[MagicEventsReason]] + ): Option[SimClusterScores] = { + magicEventsReasonsOpt.map { magicEventsReasons: Seq[MagicEventsReason] => + val reasons: Seq[(Int, Double)] = magicEventsReasons.flatMap { reason => + reason.reason match { + case TargetID.SimClusterID(simClusterId: SimClusterID) => + Some((simClusterId.clusterId, reason.score.getOrElse(0.0))) + case _ => + None + } + } + SimClusterScores(reasons.toMap) + } + } + + def reasonsContainGeoTarget(reasons: Seq[MagicEventsReason]): Boolean = { + reasons.exists { reason => + val isGeoGraphSource = reason.source.contains(ReasonSource.GeoGraph) + reason.reason match { + case TargetID.PlaceID(_) if isGeoGraphSource => true + case _ => false + } + } + } + + def geoPlaceIdsFromReasons(reasons: Seq[MagicEventsReason]): Set[Long] = { + reasons.flatMap { reason => + val isGeoGraphSource = reason.source.contains(ReasonSource.GeoGraph) + reason.reason match { + case TargetID.PlaceID(PlaceID(id)) if isGeoGraphSource => Some(id) + case _ => None + } + }.toSet + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutSportsUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutSportsUtil.scala new file mode 100644 index 000000000..224be3ad5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutSportsUtil.scala @@ -0,0 +1,231 @@ +package com.twitter.frigate.pushservice.predicate.magic_fanout + +import com.twitter.datatools.entityservice.entities.sports.thriftscala.NflFootballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SoccerMatchLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SoccerPeriod +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SportsEventHomeAwayTeamScore +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SportsEventStatus +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SportsEventTeamAlignment.Away +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SportsEventTeamAlignment.Home +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.frigate.pushservice.params.SportGameEnum +import com.twitter.frigate.common.base.GenericGameScore +import com.twitter.frigate.common.base.NflGameScore +import com.twitter.frigate.common.base.SoccerGameScore +import com.twitter.frigate.common.base.TeamInfo +import com.twitter.frigate.common.base.TeamScore +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object MagicFanoutSportsUtil { + + def transformSoccerGameScore(game: SoccerMatchLiveUpdate): Option[SoccerGameScore] = { + require(game.status.isDefined) + val gameScore = transformToGameScore(game.score, game.status.get) + val _penaltyKicks = transformToGameScore(game.penaltyScore, game.status.get) + gameScore.map { score => + val _isGameEnd = game.status.get match { + case SportsEventStatus.Completed(_) => true + case _ => false + } + + val _isHalfTime = game.period.exists { period => + period match { + case SoccerPeriod.Halftime(_) => true + case _ => false + } + } + + val _isOvertime = game.period.exists { period => + period match { + case SoccerPeriod.PreOvertime(_) => true + case _ => false + } + } + + val _isPenaltyKicks = game.period.exists { period => + period match { + case SoccerPeriod.PrePenalty(_) => true + case SoccerPeriod.Penalty(_) => true + case _ => false + } + } + + val _gameMinute = game.gameMinute.map { soccerGameMinute => + game.minutesInInjuryTime match { + case Some(injuryTime) => s"($soccerGameMinute+$injuryTime′)" + case None => s"($soccerGameMinute′)" + } + } + + SoccerGameScore( + score.home, + score.away, + isGameOngoing = score.isGameOngoing, + penaltyKicks = _penaltyKicks, + gameMinute = _gameMinute, + isHalfTime = _isHalfTime, + isOvertime = _isOvertime, + isPenaltyKicks = _isPenaltyKicks, + isGameEnd = _isGameEnd + ) + } + } + + def transformNFLGameScore(game: NflFootballGameLiveUpdate): Option[NflGameScore] = { + require(game.status.isDefined) + + val gameScore = transformToGameScore(game.score, game.status.get) + gameScore.map { score => + val _isGameEnd = game.status.get match { + case SportsEventStatus.Completed(_) => true + case _ => false + } + + val _matchTime = (game.quarter, game.remainingSecondsInQuarter) match { + case (Some(quarter), Some(remainingSeconds)) if remainingSeconds != 0L => + val m = (remainingSeconds / 60) % 60 + val s = remainingSeconds % 60 + val formattedSeconds = "%02d:%02d".format(m, s) + s"(Q$quarter - $formattedSeconds)" + case (Some(quarter), None) => s"(Q$quarter)" + case _ => "" + } + + NflGameScore( + score.home, + score.away, + isGameOngoing = score.isGameOngoing, + isGameEnd = _isGameEnd, + matchTime = _matchTime + ) + } + } + + /** + Takes a score from Strato columns and turns it into an easier to handle structure (GameScore class) + We do this to easily access the home/away scenario for copy setting + */ + def transformToGameScore( + scoreOpt: Option[SportsEventHomeAwayTeamScore], + status: SportsEventStatus + ): Option[GenericGameScore] = { + val isGameOngoing = status match { + case SportsEventStatus.InProgress(_) => true + case SportsEventStatus.Completed(_) => false + case _ => false + } + + val scoresWithTeam = scoreOpt + .map { score => + score.scores.map { score => (score.score, score.participantAlignment, score.participantId) } + }.getOrElse(Seq()) + + val tuple = scoresWithTeam match { + case Seq(teamOne, teamTwo, _*) => Some((teamOne, teamTwo)) + case _ => None + } + tuple.flatMap { + case ((Some(teamOneScore), teamOneAlignment, teamOne), (Some(teamTwoScore), _, teamTwo)) => + teamOneAlignment.flatMap { + case Home(_) => + val home = TeamScore(teamOneScore, teamOne.entityId, teamOne.domainId) + val away = TeamScore(teamTwoScore, teamTwo.entityId, teamTwo.domainId) + Some(GenericGameScore(home, away, isGameOngoing)) + case Away(_) => + val away = TeamScore(teamOneScore, teamOne.entityId, teamOne.domainId) + val home = TeamScore(teamTwoScore, teamTwo.entityId, teamTwo.domainId) + Some(GenericGameScore(home, away, isGameOngoing)) + case _ => None + } + case _ => None + } + } + + def getTeamInfo( + team: TeamScore, + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata] + ): Future[Option[TeamInfo]] = { + semanticCoreMegadataStore + .get(SemanticEntityForQuery(team.teamDomainId, team.teamEntityId)).map { + _.flatMap { + _.basicMetadata.map { metadata => + TeamInfo( + name = metadata.name, + twitterUserId = metadata.twitter.flatMap(_.preferredTwitterUserId)) + } + } + } + } + + def getNFLReadableName(name: String): String = { + val teamNames = + Seq("") + teamNames.find(teamName => name.contains(teamName)).getOrElse(name) + } + + def getSoccerIbisMap(game: SoccerGameScore): Map[String, String] = { + val gameMinuteMap = game.gameMinute + .map { gameMinute => Map("match_time" -> gameMinute) } + .getOrElse(Map.empty) + + val updateTypeMap = { + if (game.isGameEnd) Map("is_game_end" -> "true") + else if (game.isHalfTime) Map("is_half_time" -> "true") + else if (game.isOvertime) Map("is_overtime" -> "true") + else if (game.isPenaltyKicks) Map("is_penalty_kicks" -> "true") + else Map("is_score_update" -> "true") + } + + val awayScore = game match { + case SoccerGameScore(_, away, _, None, _, _, _, _, _) => + away.score.toString + case SoccerGameScore(_, away, _, Some(penaltyKick), _, _, _, _, _) => + s"${away.score} (${penaltyKick.away.score}) " + case _ => "" + } + + val homeScore = game match { + case SoccerGameScore(home, _, _, None, _, _, _, _, _) => + home.score.toString + case SoccerGameScore(home, _, _, Some(penaltyKick), _, _, _, _, _) => + s"${home.score} (${penaltyKick.home.score}) " + case _ => "" + } + + val scoresMap = Map( + "away_score" -> awayScore, + "home_score" -> homeScore, + ) + + gameType(SportGameEnum.Soccer) ++ updateTypeMap ++ gameMinuteMap ++ scoresMap + } + + def getNflIbisMap(game: NflGameScore): Map[String, String] = { + val gameMinuteMap = Map("match_time" -> game.matchTime) + + val updateTypeMap = { + if (game.isGameEnd) Map("is_game_end" -> "true") + else Map("is_score_update" -> "true") + } + + val awayScore = game.away.score + val homeScore = game.home.score + + val scoresMap = Map( + "away_score" -> awayScore.toString, + "home_score" -> homeScore.toString, + ) + + gameType(SportGameEnum.Nfl) ++ updateTypeMap ++ gameMinuteMap ++ scoresMap + } + + private def gameType(game: SportGameEnum.Value): Map[String, String] = { + game match { + case SportGameEnum.Soccer => Map("is_soccer_game" -> "true") + case SportGameEnum.Nfl => Map("is_nfl_game" -> "true") + case _ => Map.empty + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutTargetingPredicateWrappersForCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutTargetingPredicateWrappersForCandidate.scala new file mode 100644 index 000000000..758c9ef34 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/magic_fanout/MagicFanoutTargetingPredicateWrappersForCandidate.scala @@ -0,0 +1,133 @@ +package com.twitter.frigate.pushservice.predicate.magic_fanout + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.common.util.FeatureSwitchParams +import com.twitter.frigate.common.util.MagicFanoutTargetingPredicatesEnum +import com.twitter.frigate.common.util.MagicFanoutTargetingPredicatesEnum.MagicFanoutTargetingPredicatesEnum +import com.twitter.frigate.pushservice.model.MagicFanoutEventPushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.FSEnumParam + +object MagicFanoutTargetingPredicateWrappersForCandidate { + + /** + * Combine Prod and Experimental Targeting predicate logic + * @return: NamedPredicate[MagicFanoutNewsEventPushCandidate] + */ + def magicFanoutTargetingPredicate( + stats: StatsReceiver, + config: Config + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + val name = "magic_fanout_targeting_predicate" + Predicate + .fromAsync { candidate: MagicFanoutEventPushCandidate => + val mfTargetingPredicateParam = getTargetingPredicateParams(candidate) + val mfTargetingPredicate = MagicFanoutTargetingPredicateMapForCandidate + .apply(config) + .get(candidate.target.params(mfTargetingPredicateParam)) + mfTargetingPredicate match { + case Some(predicate) => + predicate.apply(Seq(candidate)).map(_.head) + case None => + throw new Exception( + s"MFTargetingPredicateMap doesnt contain value for TargetingParam: ${FeatureSwitchParams.MFTargetingPredicate}") + } + } + .withStats(stats.scope(name)) + .withName(name) + } + + private def getTargetingPredicateParams( + candidate: MagicFanoutEventPushCandidate + ): FSEnumParam[MagicFanoutTargetingPredicatesEnum.type] = { + if (candidate.commonRecType == CommonRecommendationType.MagicFanoutSportsEvent) { + FeatureSwitchParams.MFCricketTargetingPredicate + } else FeatureSwitchParams.MFTargetingPredicate + } + + /** + * SimCluster and ERG and Topic Follows Targeting Predicate + */ + def simClusterErgTopicFollowsTargetingPredicate( + implicit stats: StatsReceiver, + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests] + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + simClusterErgTargetingPredicate + .or(MagicFanoutPredicatesForCandidate.magicFanoutTopicFollowsTargetingPredicate) + .withName("sim_cluster_erg_topic_follows_targeting") + } + + /** + * SimCluster and ERG and Topic Follows Targeting Predicate + */ + def simClusterErgTopicFollowsUserFollowsTargetingPredicate( + implicit stats: StatsReceiver, + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests] + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + simClusterErgTopicFollowsTargetingPredicate + .or( + MagicFanoutPredicatesForCandidate.followRankThreshold( + PushFeatureSwitchParams.MagicFanoutRealgraphRankThreshold)) + .withName("sim_cluster_erg_topic_follows_user_follows_targeting") + } + + /** + * SimCluster and ERG Targeting Predicate + */ + def simClusterErgTargetingPredicate( + implicit stats: StatsReceiver + ): NamedPredicate[MagicFanoutEventPushCandidate] = { + MagicFanoutPredicatesForCandidate.magicFanoutSimClusterTargetingPredicate + .or(MagicFanoutPredicatesForCandidate.magicFanoutErgInterestRankThresholdPredicate) + .withName("sim_cluster_erg_targeting") + } +} + +/** + * Object to initalze and get predicate map + */ +object MagicFanoutTargetingPredicateMapForCandidate { + + /** + * Called from the Config.scala at the time of server initialization + * @param statsReceiver: implict stats receiver + * @return Map[MagicFanoutTargetingPredicatesEnum, NamedPredicate[MagicFanoutNewsEventPushCandidate]] + */ + def apply( + config: Config + ): Map[MagicFanoutTargetingPredicatesEnum, NamedPredicate[MagicFanoutEventPushCandidate]] = { + Map( + MagicFanoutTargetingPredicatesEnum.SimClusterAndERGAndTopicFollows -> MagicFanoutTargetingPredicateWrappersForCandidate + .simClusterErgTopicFollowsTargetingPredicate( + config.statsReceiver, + config.interestsWithLookupContextStore), + MagicFanoutTargetingPredicatesEnum.SimClusterAndERG -> MagicFanoutTargetingPredicateWrappersForCandidate + .simClusterErgTargetingPredicate(config.statsReceiver), + MagicFanoutTargetingPredicatesEnum.SimCluster -> MagicFanoutPredicatesForCandidate + .magicFanoutSimClusterTargetingPredicate(config.statsReceiver), + MagicFanoutTargetingPredicatesEnum.ERG -> MagicFanoutPredicatesForCandidate + .magicFanoutErgInterestRankThresholdPredicate(config.statsReceiver), + MagicFanoutTargetingPredicatesEnum.TopicFollows -> MagicFanoutPredicatesForCandidate + .magicFanoutTopicFollowsTargetingPredicate( + config.statsReceiver, + config.interestsWithLookupContextStore), + MagicFanoutTargetingPredicatesEnum.UserFollows -> MagicFanoutPredicatesForCandidate + .followRankThreshold( + PushFeatureSwitchParams.MagicFanoutRealgraphRankThreshold + )(config.statsReceiver), + MagicFanoutTargetingPredicatesEnum.SimClusterAndERGAndTopicFollowsAndUserFollows -> + MagicFanoutTargetingPredicateWrappersForCandidate + .simClusterErgTopicFollowsUserFollowsTargetingPredicate( + config.statsReceiver, + config.interestsWithLookupContextStore + ) + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/CRTBasedNtabCaretClickFatiguePredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/CRTBasedNtabCaretClickFatiguePredicates.scala new file mode 100644 index 000000000..704f300a5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/CRTBasedNtabCaretClickFatiguePredicates.scala @@ -0,0 +1,973 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.notificationservice.thriftscala.GenericType +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.hermit.predicate.Predicate +import com.twitter.frigate.common.base.Candidate +import com.twitter.frigate.common.base.RecommendationType +import com.twitter.frigate.common.base.TargetInfo +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.SeeLessOftenType +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.predicate.FrigateHistoryFatiguePredicate.TimeSeries +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.common.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicateHelper +import com.twitter.frigate.pushservice.predicate.CaretFeedbackHistoryFilter +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.frigate.common.predicate.FatiguePredicate +import com.twitter.frigate.pushservice.util.PushCapUtil +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.util.PushDeviceUtil + +object CRTBasedNtabCaretClickFatiguePredicates { + + private val MagicRecsCategory = "MagicRecs" + + private val HighQualityRefreshableTypes: Set[Option[String]] = Set( + Some("MagicRecHighQualityTweet"), + ) + + private def getUserStateWeight(target: Target): Future[Double] = { + PushDeviceUtil.isNtabOnlyEligible.map { + case true => + target.params(PushFeatureSwitchParams.SeeLessOftenNtabOnlyNotifUserPushCapWeight) + case _ => 1.0 + } + } + + def crtToSeeLessOftenType( + crt: CommonRecommendationType, + candidate: Candidate + with RecommendationType + with TargetInfo[ + Target + ], + ): SeeLessOftenType = { + val crtToSeeLessOftenTypeMap: Map[CommonRecommendationType, SeeLessOftenType] = { + RecTypes.f1FirstDegreeTypes.map((_, SeeLessOftenType.F1Type)).toMap + } + + crtToSeeLessOftenTypeMap.getOrElse(crt, SeeLessOftenType.OtherTypes) + } + + def genericTypeToSeeLessOftenType( + genericType: GenericType, + candidate: Candidate + with RecommendationType + with TargetInfo[ + Target + ] + ): SeeLessOftenType = { + val genericTypeToSeeLessOftenTypeMap: Map[GenericType, SeeLessOftenType] = { + Map(GenericType.MagicRecFirstDegreeTweetRecent -> SeeLessOftenType.F1Type) + } + + genericTypeToSeeLessOftenTypeMap.getOrElse(genericType, SeeLessOftenType.OtherTypes) + } + + def getWeightForCaretFeedback( + dislikedType: SeeLessOftenType, + candidate: Candidate + with RecommendationType + with TargetInfo[ + Target + ] + ): Double = { + def getWeightFromDislikedAndCurrentType( + dislikedType: SeeLessOftenType, + currentType: SeeLessOftenType + ): Double = { + val weightMap: Map[(SeeLessOftenType, SeeLessOftenType), Double] = { + + Map( + (SeeLessOftenType.F1Type, SeeLessOftenType.F1Type) -> candidate.target.params( + PushFeatureSwitchParams.SeeLessOftenF1TriggerF1PushCapWeight), + (SeeLessOftenType.OtherTypes, SeeLessOftenType.OtherTypes) -> candidate.target.params( + PushFeatureSwitchParams.SeeLessOftenNonF1TriggerNonF1PushCapWeight), + (SeeLessOftenType.F1Type, SeeLessOftenType.OtherTypes) -> candidate.target.params( + PushFeatureSwitchParams.SeeLessOftenF1TriggerNonF1PushCapWeight), + (SeeLessOftenType.OtherTypes, SeeLessOftenType.F1Type) -> candidate.target.params( + PushFeatureSwitchParams.SeeLessOftenNonF1TriggerF1PushCapWeight) + ) + } + + weightMap + .getOrElse( + (dislikedType, currentType), + candidate.target.params(PushFeatureSwitchParams.SeeLessOftenDefaultPushCapWeight)) + } + + getWeightFromDislikedAndCurrentType( + dislikedType, + crtToSeeLessOftenType(candidate.commonRecType, candidate)) + } + + private def isOutsideCrtBasedNtabCaretClickFatiguePeriodContFn( + candidate: Candidate + with RecommendationType + with TargetInfo[ + Target + ], + history: History, + feedbackDetails: Seq[CaretFeedbackDetails], + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = + CaretFeedbackHistoryFilter.caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + knobs: Seq[Double], + pushCapKnobs: Seq[Double], + powerKnobs: Seq[Double], + f1Weight: Double, + nonF1Weight: Double, + defaultPushCap: Int, + stats: StatsReceiver, + tripHqTweetWeight: Double = 0.0, + ): Boolean = { + val filteredFeedbackDetails = filterCaretFeedbackHistory(candidate.target)(feedbackDetails) + val weight = { + if (RecTypes.HighQualityTweetTypes.contains( + candidate.commonRecType) && (tripHqTweetWeight != 0)) { + tripHqTweetWeight + } else if (RecTypes.isF1Type(candidate.commonRecType)) { + f1Weight + } else { + nonF1Weight + } + } + val filteredHistory = History(filterHistory(history.history.toSeq).toMap) + isOutsideFatiguePeriod( + filteredHistory, + filteredFeedbackDetails, + Seq(), + ContinuousFunctionParam( + knobs, + pushCapKnobs, + powerKnobs, + weight, + defaultPushCap + ), + stats.scope( + if (RecTypes.isF1Type(candidate.commonRecType)) "mr_ntab_dislike_f1_candidate_fn" + else if (RecTypes.HighQualityTweetTypes.contains(candidate.commonRecType)) + "mr_ntab_dislike_high_quality_candidate_fn" + else "mr_ntab_dislike_nonf1_candidate_fn") + ) + } + + private def isOutsideFatiguePeriod( + history: History, + feedbackDetails: Seq[CaretFeedbackDetails], + feedbacks: Seq[FeedbackModel], + param: ContinuousFunctionParam, + stats: StatsReceiver + ): Boolean = { + val fatiguePeriod: Duration = + NtabCaretClickFatigueUtils.durationToFilterForFeedback( + feedbackDetails, + feedbacks, + param, + param.defaultValue, + stats + ) + + val hasRecentSent = + NtabCaretClickFatiguePredicateHelper.hasRecentSend(history, fatiguePeriod) + !hasRecentSent + + } + + def genericCRTBasedNtabCaretClickFnFatiguePredicate[ + Cand <: Candidate with RecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + filterInlineFeedbackHistory: Seq[FeedbackModel] => Seq[FeedbackModel] = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(RecTypes.sharedNTabCaretFatigueTypes) + )( + implicit stats: StatsReceiver + ): NamedPredicate[Cand] = { + val predicateName = "generic_crt_based_ntab_dislike_fatigue_fn" + Predicate + .fromAsync[Cand] { cand: Cand => + { + if (!cand.target.params(PushFeatureSwitchParams.EnableGenericCRTBasedFatiguePredicate)) { + Future.True + } else { + val scopedStats = stats.scope(predicateName) + val totalRequests = scopedStats.counter("mr_ntab_dislike_total") + val total90Day = + scopedStats.counter("mr_ntab_dislike_90day_dislike") + val totalDisabled = + scopedStats.counter("mr_ntab_dislike_not_90day_dislike") + val totalSuccess = scopedStats.counter("mr_ntab_dislike_success") + val totalFiltered = scopedStats.counter("mr_ntab_dislike_filtered") + val totalWithHistory = + scopedStats.counter("mr_ntab_dislike_with_history") + val totalWithoutHistory = + scopedStats.counter("mr_ntab_dislike_without_history") + totalRequests.incr() + + Future + .join( + cand.target.history, + cand.target.caretFeedbacks, + cand.target.dynamicPushcap, + cand.target.optoutAdjustedPushcap, + PushCapUtil.getDefaultPushCap(cand.target), + getUserStateWeight(cand.target) + ).map { + case ( + history, + Some(feedbackDetails), + dynamicPushcapOpt, + optoutAdjustedPushcapOpt, + defaultPushCap, + userStateWeight) => { + totalWithHistory.incr() + + val feedbackDetailsDeduped = + NtabCaretClickFatiguePredicateHelper.dedupFeedbackDetails( + filterCaretFeedbackHistory(cand.target)(feedbackDetails), + stats + ) + + val pushCap: Int = (dynamicPushcapOpt, optoutAdjustedPushcapOpt) match { + case (_, Some(optoutAdjustedPushcap)) => optoutAdjustedPushcap + case (Some(pushcapInfo), _) => pushcapInfo.pushcap + case _ => defaultPushCap + } + val filteredHistory = History(filterHistory(history.history.toSeq).toMap) + + val hasUserDislikeInLast90Days = + NtabCaretClickFatigueUtils.hasUserDislikeInLast90Days(feedbackDetailsDeduped) + val isF1TriggerFatigueEnabled = cand.target + .params(PushFeatureSwitchParams.EnableContFnF1TriggerSeeLessOftenFatigue) + val isNonF1TriggerFatigueEnabled = cand.target.params( + PushFeatureSwitchParams.EnableContFnNonF1TriggerSeeLessOftenFatigue) + + val isOutisdeSeeLessOftenFatigue = + if (hasUserDislikeInLast90Days && (isF1TriggerFatigueEnabled || isNonF1TriggerFatigueEnabled)) { + total90Day.incr() + + val feedbackDetailsGroupedBySeeLessOftenType: Map[Option[ + SeeLessOftenType + ], Seq[ + CaretFeedbackDetails + ]] = feedbackDetails.groupBy(feedbackDetail => + feedbackDetail.genericNotificationMetadata.map(x => + genericTypeToSeeLessOftenType(x.genericType, cand))) + + val isOutsideFatiguePeriodSeq = + for (elem <- feedbackDetailsGroupedBySeeLessOftenType if elem._1.isDefined) + yield { + val dislikedSeeLessOftenType: SeeLessOftenType = elem._1.get + val seqCaretFeedbackDetails: Seq[CaretFeedbackDetails] = elem._2 + + val weight = getWeightForCaretFeedback( + dislikedSeeLessOftenType, + cand) * userStateWeight + + if (isOutsideFatiguePeriod( + history = filteredHistory, + feedbackDetails = seqCaretFeedbackDetails, + feedbacks = Seq(), + param = ContinuousFunctionParam( + knobs = cand.target + .params(PushFeatureSwitchParams.SeeLessOftenListOfDayKnobs), + knobValues = cand.target + .params( + PushFeatureSwitchParams.SeeLessOftenListOfPushCapWeightKnobs).map( + _ * pushCap), + powers = cand.target + .params(PushFeatureSwitchParams.SeeLessOftenListOfPowerKnobs), + weight = weight, + defaultValue = pushCap + ), + scopedStats + )) { + true + } else { + false + } + } + + isOutsideFatiguePeriodSeq.forall(identity) + } else { + totalDisabled.incr() + true + } + + if (isOutisdeSeeLessOftenFatigue) { + totalSuccess.incr() + } else totalFiltered.incr() + + isOutisdeSeeLessOftenFatigue + } + + case _ => + totalSuccess.incr() + totalWithoutHistory.incr() + true + } + } + } + }.withStats(stats.scope(predicateName)) + .withName(predicateName) + } + + def f1TriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Cand <: Candidate with RecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + filterInlineFeedbackHistory: Seq[FeedbackModel] => Seq[FeedbackModel] = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(RecTypes.sharedNTabCaretFatigueTypes) + )( + implicit stats: StatsReceiver + ): NamedPredicate[Cand] = { + val predicateName = "f1_triggered_crt_based_ntab_dislike_fatigue_fn" + Predicate + .fromAsync[Cand] { cand: Cand => + { + val scopedStats = stats.scope(predicateName) + val totalRequests = scopedStats.counter("mr_ntab_dislike_total") + val total90Day = + scopedStats.counter("mr_ntab_dislike_90day_dislike") + val totalDisabled = + scopedStats.counter("mr_ntab_dislike_not_90day_dislike") + val totalSuccess = scopedStats.counter("mr_ntab_dislike_success") + val totalFiltered = scopedStats.counter("mr_ntab_dislike_filtered") + val totalWithHistory = + scopedStats.counter("mr_ntab_dislike_with_history") + val totalWithoutHistory = + scopedStats.counter("mr_ntab_dislike_without_history") + totalRequests.incr() + + Future + .join( + cand.target.history, + cand.target.caretFeedbacks, + cand.target.dynamicPushcap, + cand.target.optoutAdjustedPushcap, + cand.target.notificationFeedbacks, + PushCapUtil.getDefaultPushCap(cand.target), + getUserStateWeight(cand.target) + ).map { + case ( + history, + Some(feedbackDetails), + dynamicPushcapOpt, + optoutAdjustedPushcapOpt, + Some(feedbacks), + defaultPushCap, + userStateWeight) => + totalWithHistory.incr() + + val feedbackDetailsDeduped = + NtabCaretClickFatiguePredicateHelper.dedupFeedbackDetails( + filterCaretFeedbackHistory(cand.target)(feedbackDetails), + stats + ) + + val pushCap: Int = (dynamicPushcapOpt, optoutAdjustedPushcapOpt) match { + case (_, Some(optoutAdjustedPushcap)) => optoutAdjustedPushcap + case (Some(pushcapInfo), _) => pushcapInfo.pushcap + case _ => defaultPushCap + } + val filteredHistory = History(filterHistory(history.history.toSeq).toMap) + + val isOutsideInlineDislikeFatigue = + if (cand.target + .params(PushFeatureSwitchParams.EnableContFnF1TriggerInlineFeedbackFatigue)) { + val weight = + if (RecTypes.isF1Type(cand.commonRecType)) { + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackF1TriggerF1PushCapWeight) + } else { + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackF1TriggerNonF1PushCapWeight) + } + + val inlineFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + isInlineDislikeOutsideFatiguePeriod( + cand, + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + InlineFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT( + RecTypes.f1FirstDegreeTypes)), + inlineFeedbackFatigueParam, + scopedStats + ) + } else true + + lazy val isOutsidePromptDislikeFatigue = + if (cand.target + .params(PushFeatureSwitchParams.EnableContFnF1TriggerPromptFeedbackFatigue)) { + val weight = + if (RecTypes.isF1Type(cand.commonRecType)) { + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackF1TriggerF1PushCapWeight) + } else { + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackF1TriggerNonF1PushCapWeight) + } + + val promptFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + isPromptDislikeOutsideFatiguePeriod( + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + PromptFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT( + RecTypes.f1FirstDegreeTypes)), + promptFeedbackFatigueParam, + scopedStats + ) + } else true + + isOutsideInlineDislikeFatigue && isOutsidePromptDislikeFatigue + + case _ => + totalSuccess.incr() + totalWithoutHistory.incr() + true + } + } + }.withStats(stats.scope(predicateName)) + .withName(predicateName) + } + + def nonF1TriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Cand <: Candidate with RecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + filterInlineFeedbackHistory: Seq[FeedbackModel] => Seq[FeedbackModel] = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(RecTypes.sharedNTabCaretFatigueTypes) + )( + implicit stats: StatsReceiver + ): NamedPredicate[Cand] = { + val predicateName = "non_f1_triggered_crt_based_ntab_dislike_fatigue_fn" + Predicate + .fromAsync[Cand] { cand: Cand => + { + val scopedStats = stats.scope(predicateName) + val totalRequests = scopedStats.counter("mr_ntab_dislike_total") + val total90Day = + scopedStats.counter("mr_ntab_dislike_90day_dislike") + val totalDisabled = + scopedStats.counter("mr_ntab_dislike_not_90day_dislike") + val totalSuccess = scopedStats.counter("mr_ntab_dislike_success") + val totalFiltered = scopedStats.counter("mr_ntab_dislike_filtered") + val totalWithHistory = + scopedStats.counter("mr_ntab_dislike_with_history") + val totalWithoutHistory = + scopedStats.counter("mr_ntab_dislike_without_history") + val totalFeedbackSuccess = scopedStats.counter("mr_total_feedback_success") + totalRequests.incr() + + Future + .join( + cand.target.history, + cand.target.caretFeedbacks, + cand.target.dynamicPushcap, + cand.target.optoutAdjustedPushcap, + cand.target.notificationFeedbacks, + PushCapUtil.getDefaultPushCap(cand.target), + getUserStateWeight(cand.target), + ).map { + case ( + history, + Some(feedbackDetails), + dynamicPushcapOpt, + optoutAdjustedPushcapOpt, + Some(feedbacks), + defaultPushCap, + userStateWeight) => + totalWithHistory.incr() + + val filteredfeedbackDetails = + if (cand.target.params( + PushFeatureSwitchParams.AdjustTripHqTweetTriggeredNtabCaretClickFatigue)) { + val refreshableTypeFilter = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilterByRefreshableTypeDenyList( + HighQualityRefreshableTypes) + refreshableTypeFilter(cand.target)(feedbackDetails) + } else { + feedbackDetails + } + + val feedbackDetailsDeduped = + NtabCaretClickFatiguePredicateHelper.dedupFeedbackDetails( + filterCaretFeedbackHistory(cand.target)(filteredfeedbackDetails), + stats + ) + + val pushCap: Int = (dynamicPushcapOpt, optoutAdjustedPushcapOpt) match { + case (_, Some(optoutAdjustedPushcap)) => optoutAdjustedPushcap + case (Some(pushcapInfo), _) => pushcapInfo.pushcap + case _ => defaultPushCap + } + val filteredHistory = History(filterHistory(history.history.toSeq).toMap) + + val isOutsideInlineDislikeFatigue = + if (cand.target + .params( + PushFeatureSwitchParams.EnableContFnNonF1TriggerInlineFeedbackFatigue)) { + val weight = + if (RecTypes.isF1Type(cand.commonRecType)) + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackNonF1TriggerF1PushCapWeight) + else + cand.target + .params( + PushFeatureSwitchParams.InlineFeedbackNonF1TriggerNonF1PushCapWeight) + + val inlineFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + val excludedCRTs: Set[CommonRecommendationType] = + if (cand.target.params( + PushFeatureSwitchParams.AdjustTripHqTweetTriggeredNtabCaretClickFatigue)) { + RecTypes.f1FirstDegreeTypes ++ RecTypes.HighQualityTweetTypes + } else { + RecTypes.f1FirstDegreeTypes + } + + isInlineDislikeOutsideFatiguePeriod( + cand, + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + InlineFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelExcludeCRT(excludedCRTs)), + inlineFeedbackFatigueParam, + scopedStats + ) + } else true + + lazy val isOutsidePromptDislikeFatigue = + if (cand.target + .params( + PushFeatureSwitchParams.EnableContFnNonF1TriggerPromptFeedbackFatigue)) { + val weight = + if (RecTypes.isF1Type(cand.commonRecType)) + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackNonF1TriggerF1PushCapWeight) + else + cand.target + .params( + PushFeatureSwitchParams.PromptFeedbackNonF1TriggerNonF1PushCapWeight) + + val promptFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + isPromptDislikeOutsideFatiguePeriod( + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + PromptFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelExcludeCRT( + RecTypes.f1FirstDegreeTypes)), + promptFeedbackFatigueParam, + scopedStats + ) + } else true + + isOutsideInlineDislikeFatigue && isOutsidePromptDislikeFatigue + case _ => + totalFeedbackSuccess.incr() + totalWithoutHistory.incr() + true + } + } + }.withStats(stats.scope(predicateName)) + .withName(predicateName) + } + + def tripHqTweetTriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Cand <: Candidate with RecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + filterInlineFeedbackHistory: Seq[FeedbackModel] => Seq[FeedbackModel] = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(RecTypes.sharedNTabCaretFatigueTypes) + )( + implicit stats: StatsReceiver + ): NamedPredicate[Cand] = { + val predicateName = "trip_hq_tweet_triggered_crt_based_ntab_dislike_fatigue_fn" + Predicate + .fromAsync[Cand] { cand: Cand => + { + val scopedStats = stats.scope(predicateName) + val totalRequests = scopedStats.counter("mr_ntab_dislike_total") + val total90Day = + scopedStats.counter("mr_ntab_dislike_90day_dislike") + val totalDisabled = + scopedStats.counter("mr_ntab_dislike_not_90day_dislike") + val totalSuccess = scopedStats.counter("mr_ntab_dislike_success") + val totalFiltered = scopedStats.counter("mr_ntab_dislike_filtered") + val totalWithHistory = + scopedStats.counter("mr_ntab_dislike_with_history") + val totalWithoutHistory = + scopedStats.counter("mr_ntab_dislike_without_history") + val totalFeedbackSuccess = scopedStats.counter("mr_total_feedback_success") + totalRequests.incr() + + Future + .join( + cand.target.history, + cand.target.caretFeedbacks, + cand.target.dynamicPushcap, + cand.target.optoutAdjustedPushcap, + cand.target.notificationFeedbacks, + PushCapUtil.getDefaultPushCap(cand.target), + getUserStateWeight(cand.target), + ).map { + case ( + history, + Some(feedbackDetails), + dynamicPushcapOpt, + optoutAdjustedPushcapOpt, + Some(feedbacks), + defaultPushCap, + userStateWeight) => + totalWithHistory.incr() + if (cand.target.params( + PushFeatureSwitchParams.AdjustTripHqTweetTriggeredNtabCaretClickFatigue)) { + + val refreshableTypeFilter = CaretFeedbackHistoryFilter + .caretFeedbackHistoryFilterByRefreshableType(HighQualityRefreshableTypes) + val filteredfeedbackDetails = refreshableTypeFilter(cand.target)(feedbackDetails) + + val feedbackDetailsDeduped = + NtabCaretClickFatiguePredicateHelper.dedupFeedbackDetails( + filterCaretFeedbackHistory(cand.target)(filteredfeedbackDetails), + stats + ) + + val pushCap: Int = (dynamicPushcapOpt, optoutAdjustedPushcapOpt) match { + case (_, Some(optoutAdjustedPushcap)) => optoutAdjustedPushcap + case (Some(pushcapInfo), _) => pushcapInfo.pushcap + case _ => defaultPushCap + } + val filteredHistory = History(filterHistory(history.history.toSeq).toMap) + + val isOutsideInlineDislikeFatigue = + if (cand.target + .params( + PushFeatureSwitchParams.EnableContFnNonF1TriggerInlineFeedbackFatigue)) { + val weight = { + if (RecTypes.HighQualityTweetTypes.contains(cand.commonRecType)) { + cand.target + .params( + PushFeatureSwitchParams.InlineFeedbackNonF1TriggerNonF1PushCapWeight) + } else { + cand.target + .params( + PushFeatureSwitchParams.InlineFeedbackNonF1TriggerF1PushCapWeight) + } + } + + val inlineFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.InlineFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + val includedCRTs: Set[CommonRecommendationType] = + RecTypes.HighQualityTweetTypes + + isInlineDislikeOutsideFatiguePeriod( + cand, + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + InlineFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(includedCRTs)), + inlineFeedbackFatigueParam, + scopedStats + ) + } else true + + lazy val isOutsidePromptDislikeFatigue = + if (cand.target + .params( + PushFeatureSwitchParams.EnableContFnNonF1TriggerPromptFeedbackFatigue)) { + val weight = + if (RecTypes.isF1Type(cand.commonRecType)) + cand.target + .params( + PushFeatureSwitchParams.PromptFeedbackNonF1TriggerF1PushCapWeight) + else + cand.target + .params( + PushFeatureSwitchParams.PromptFeedbackNonF1TriggerNonF1PushCapWeight) + + val promptFeedbackFatigueParam = ContinuousFunctionParam( + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfDayKnobs), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPushCapWeightKnobs) + .map(_ * pushCap), + cand.target + .params(PushFeatureSwitchParams.PromptFeedbackListOfPowerKnobs), + weight, + pushCap + ) + + isPromptDislikeOutsideFatiguePeriod( + feedbacks + .collect { + case feedbackPromptValue: FeedbackPromptValue => + PromptFeedbackModel(feedbackPromptValue, None) + }, + filteredHistory, + Seq( + filterInlineFeedbackHistory, + NtabCaretClickFatigueUtils.feedbackModelExcludeCRT( + RecTypes.f1FirstDegreeTypes)), + promptFeedbackFatigueParam, + scopedStats + ) + } else true + + isOutsideInlineDislikeFatigue && isOutsidePromptDislikeFatigue + } else { + true + } + case _ => + totalFeedbackSuccess.incr() + totalWithoutHistory.incr() + true + } + } + }.withStats(stats.scope(predicateName)) + .withName(predicateName) + } + + private def getDedupedInlineFeedbackByType( + inlineFeedbacks: Seq[FeedbackModel], + feedbackType: FeedbackTypeEnum.Value, + revertedFeedbackType: FeedbackTypeEnum.Value + ): Seq[FeedbackModel] = { + inlineFeedbacks + .filter(feedback => + feedback.feedbackTypeEnum == feedbackType || + feedback.feedbackTypeEnum == revertedFeedbackType) + .groupBy(feedback => feedback.notificationImpressionId.getOrElse("")) + .toSeq + .collect { + case (impressionId, feedbacks: Seq[FeedbackModel]) if (feedbacks.nonEmpty) => + val latestFeedback = feedbacks.maxBy(feedback => feedback.timestampMs) + if (latestFeedback.feedbackTypeEnum == feedbackType) + Some(latestFeedback) + else None + case _ => None + } + .flatten + } + + private def getDedupedInlineFeedback( + inlineFeedbacks: Seq[FeedbackModel], + target: Target + ): Seq[FeedbackModel] = { + val inlineDislikeFeedback = + if (target.params(PushFeatureSwitchParams.UseInlineDislikeForFatigue)) { + getDedupedInlineFeedbackByType( + inlineFeedbacks, + FeedbackTypeEnum.InlineDislike, + FeedbackTypeEnum.InlineRevertedDislike) + } else Seq() + val inlineDismissFeedback = + if (target.params(PushFeatureSwitchParams.UseInlineDismissForFatigue)) { + getDedupedInlineFeedbackByType( + inlineFeedbacks, + FeedbackTypeEnum.InlineDismiss, + FeedbackTypeEnum.InlineRevertedDismiss) + } else Seq() + val inlineSeeLessFeedback = + if (target.params(PushFeatureSwitchParams.UseInlineSeeLessForFatigue)) { + getDedupedInlineFeedbackByType( + inlineFeedbacks, + FeedbackTypeEnum.InlineSeeLess, + FeedbackTypeEnum.InlineRevertedSeeLess) + } else Seq() + val inlineNotRelevantFeedback = + if (target.params(PushFeatureSwitchParams.UseInlineNotRelevantForFatigue)) { + getDedupedInlineFeedbackByType( + inlineFeedbacks, + FeedbackTypeEnum.InlineNotRelevant, + FeedbackTypeEnum.InlineRevertedNotRelevant) + } else Seq() + + inlineDislikeFeedback ++ inlineDismissFeedback ++ inlineSeeLessFeedback ++ inlineNotRelevantFeedback + } + + private def isInlineDislikeOutsideFatiguePeriod( + candidate: Candidate + with RecommendationType + with TargetInfo[ + Target + ], + inlineFeedbacks: Seq[FeedbackModel], + filteredHistory: History, + feedbackFilters: Seq[Seq[FeedbackModel] => Seq[FeedbackModel]], + inlineFeedbackFatigueParam: ContinuousFunctionParam, + stats: StatsReceiver + ): Boolean = { + val scopedStats = stats.scope("inline_dislike_fatigue") + + val inlineNegativeFeedback = + getDedupedInlineFeedback(inlineFeedbacks, candidate.target) + + val hydratedInlineNegativeFeedback = FeedbackModelHydrator.HydrateNotification( + inlineNegativeFeedback, + filteredHistory.history.toSeq.map(_._2)) + + if (isOutsideFatiguePeriod( + filteredHistory, + Seq(), + feedbackFilters.foldLeft(hydratedInlineNegativeFeedback)((feedbacks, feedbackFilter) => + feedbackFilter(feedbacks)), + inlineFeedbackFatigueParam, + scopedStats + )) { + scopedStats.counter("feedback_inline_dislike_success").incr() + true + } else { + scopedStats.counter("feedback_inline_dislike_filtered").incr() + false + } + } + + private def isPromptDislikeOutsideFatiguePeriod( + feedbacks: Seq[FeedbackModel], + filteredHistory: History, + feedbackFilters: Seq[Seq[FeedbackModel] => Seq[FeedbackModel]], + inlineFeedbackFatigueParam: ContinuousFunctionParam, + stats: StatsReceiver + ): Boolean = { + val scopedStats = stats.scope("prompt_dislike_fatigue") + + val promptDislikeFeedback = feedbacks + .filter(feedback => feedback.feedbackTypeEnum == FeedbackTypeEnum.PromptIrrelevant) + val hydratedPromptDislikeFeedback = FeedbackModelHydrator.HydrateNotification( + promptDislikeFeedback, + filteredHistory.history.toSeq.map(_._2)) + + if (isOutsideFatiguePeriod( + filteredHistory, + Seq(), + feedbackFilters.foldLeft(hydratedPromptDislikeFeedback)((feedbacks, feedbackFilter) => + feedbackFilter(feedbacks)), + inlineFeedbackFatigueParam, + scopedStats + )) { + scopedStats.counter("feedback_prompt_dislike_success").incr() + true + } else { + scopedStats.counter("feedback_prompt_dislike_filtered").incr() + false + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/ContinuousFunction.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/ContinuousFunction.scala new file mode 100644 index 000000000..862541b63 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/ContinuousFunction.scala @@ -0,0 +1,148 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver + +case class ContinuousFunctionParam( + knobs: Seq[Double], + knobValues: Seq[Double], + powers: Seq[Double], + weight: Double, + defaultValue: Double) { + + def validateParams(): Boolean = { + knobs.size > 0 && knobs.size - 1 == powers.size && knobs.size == knobValues.size + } +} + +object ContinuousFunction { + + /** + * Evalutate the value for function f(x) = w(x - b)^power + * where w and b are decided by the start, startVal, end, endVal + * such that + * w(start - b) ^ power = startVal + * w(end - b) ^ power = endVal + * + * @param value the value at which we will evaluate the param + * @return weight * f(value) + */ + def evaluateFn( + value: Double, + start: Double, + startVal: Double, + end: Double, + endVal: Double, + power: Double, + weight: Double + ): Double = { + val b = + (math.pow(startVal / endVal, 1 / power) * end - start) / (math.pow( + startVal / endVal, + 1 / power) - 1) + val w = startVal / math.pow(start - b, power) + weight * w * math.pow(value - b, power) + } + + /** + * Evaluate value for function f(x), and return weight * f(x) + * + * f(x) is a piecewise function + * f(x) = w_i * (x - b_i)^powers[i] for knobs[i] <= x < knobs[i+1] + * such that + * w(knobs[i] - b) ^ power = knobVals[i] + * w(knobs[i+1] - b) ^ power = knobVals[i+1] + * + * @return Evaluate value for weight * f(x), for the function described above. If the any of the input is invalid, returns defaultVal + */ + def safeEvaluateFn( + value: Double, + knobs: Seq[Double], + knobVals: Seq[Double], + powers: Seq[Double], + weight: Double, + defaultVal: Double, + statsReceiver: StatsReceiver + ): Double = { + val totalStats = statsReceiver.counter("safe_evalfn_total") + val validStats = + statsReceiver.counter("safe_evalfn_valid") + val validEndCaseStats = + statsReceiver.counter("safe_evalfn_valid_endcase") + val invalidStats = statsReceiver.counter("safe_evalfn_invalid") + + totalStats.incr() + if (knobs.size <= 0 || knobs.size - 1 != powers.size || knobs.size != knobVals.size) { + invalidStats.incr() + defaultVal + } else { + val endIndex = knobs.indexWhere(knob => knob > value) + validStats.incr() + endIndex match { + case -1 => { + validEndCaseStats.incr() + knobVals(knobVals.size - 1) * weight + } + case 0 => { + validEndCaseStats.incr() + knobVals(0) * weight + } + case _ => { + val startIndex = endIndex - 1 + evaluateFn( + value, + knobs(startIndex), + knobVals(startIndex), + knobs(endIndex), + knobVals(endIndex), + powers(startIndex), + weight) + } + } + } + } + + def safeEvaluateFn( + value: Double, + fnParams: ContinuousFunctionParam, + statsReceiver: StatsReceiver + ): Double = { + val totalStats = statsReceiver.counter("safe_evalfn_total") + val validStats = + statsReceiver.counter("safe_evalfn_valid") + val validEndCaseStats = + statsReceiver.counter("safe_evalfn_valid_endcase") + val invalidStats = statsReceiver.counter("safe_evalfn_invalid") + + totalStats.incr() + + if (fnParams.validateParams()) { + val endIndex = fnParams.knobs.indexWhere(knob => knob > value) + validStats.incr() + endIndex match { + case -1 => { + validEndCaseStats.incr() + fnParams.knobValues(fnParams.knobValues.size - 1) * fnParams.weight + } + case 0 => { + validEndCaseStats.incr() + fnParams.knobValues(0) * fnParams.weight + } + case _ => { + val startIndex = endIndex - 1 + evaluateFn( + value, + fnParams.knobs(startIndex), + fnParams.knobValues(startIndex), + fnParams.knobs(endIndex), + fnParams.knobValues(endIndex), + fnParams.powers(startIndex), + fnParams.weight + ) + } + } + } else { + invalidStats.incr() + fnParams.defaultValue + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/FeedbackModel.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/FeedbackModel.scala new file mode 100644 index 000000000..654889901 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/FeedbackModel.scala @@ -0,0 +1,136 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.notificationservice.thriftscala.GenericType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.notificationservice.feedback.thriftscala.FeedbackMetadata +import com.twitter.notificationservice.feedback.thriftscala.InlineFeedback +import com.twitter.notificationservice.feedback.thriftscala.FeedbackValue +import com.twitter.notificationservice.feedback.thriftscala.YesOrNoAnswer + +object FeedbackTypeEnum extends Enumeration { + val Unknown = Value + val CaretDislike = Value + val InlineDislike = Value + val InlineLike = Value + val InlineRevertedLike = Value + val InlineRevertedDislike = Value + val PromptRelevant = Value + val PromptIrrelevant = Value + val InlineDismiss = Value + val InlineRevertedDismiss = Value + val InlineSeeLess = Value + val InlineRevertedSeeLess = Value + val InlineNotRelevant = Value + val InlineRevertedNotRelevant = Value + + def safeFindByName(name: String): Value = + values.find(_.toString.toLowerCase() == name.toLowerCase()).getOrElse(Unknown) +} + +trait FeedbackModel { + + def timestampMs: Long + + def feedbackTypeEnum: FeedbackTypeEnum.Value + + def notificationImpressionId: Option[String] + + def notification: Option[FrigateNotification] = None +} + +case class CaretFeedbackModel( + caretFeedbackDetails: CaretFeedbackDetails, + notificationOpt: Option[FrigateNotification] = None) + extends FeedbackModel { + + override def timestampMs: Long = caretFeedbackDetails.eventTimestamp + + override def feedbackTypeEnum: FeedbackTypeEnum.Value = FeedbackTypeEnum.CaretDislike + + override def notificationImpressionId: Option[String] = caretFeedbackDetails.impressionId + + override def notification: Option[FrigateNotification] = notificationOpt + + def notificationGenericType: Option[GenericType] = { + caretFeedbackDetails.genericNotificationMetadata match { + case Some(genericNotificationMetadata) => + Some(genericNotificationMetadata.genericType) + case None => None + } + } +} + +case class InlineFeedbackModel( + feedback: FeedbackPromptValue, + notificationOpt: Option[FrigateNotification] = None) + extends FeedbackModel { + + override def timestampMs: Long = feedback.createdAt.inMilliseconds + + override def feedbackTypeEnum: FeedbackTypeEnum.Value = { + feedback.feedbackValue match { + case FeedbackValue( + _, + _, + _, + Some(FeedbackMetadata.InlineFeedback(InlineFeedback(Some(answer))))) => + FeedbackTypeEnum.safeFindByName("inline" + answer) + case _ => FeedbackTypeEnum.Unknown + } + } + + override def notificationImpressionId: Option[String] = Some(feedback.feedbackValue.impressionId) + + override def notification: Option[FrigateNotification] = notificationOpt +} + +case class PromptFeedbackModel( + feedback: FeedbackPromptValue, + notificationOpt: Option[FrigateNotification] = None) + extends FeedbackModel { + + override def timestampMs: Long = feedback.createdAt.inMilliseconds + + override def feedbackTypeEnum: FeedbackTypeEnum.Value = { + feedback.feedbackValue match { + case FeedbackValue(_, _, _, Some(FeedbackMetadata.YesOrNoAnswer(answer))) => + answer match { + case YesOrNoAnswer.Yes => FeedbackTypeEnum.PromptRelevant + case YesOrNoAnswer.No => FeedbackTypeEnum.PromptIrrelevant + case _ => FeedbackTypeEnum.Unknown + } + case _ => FeedbackTypeEnum.Unknown + } + } + + override def notificationImpressionId: Option[String] = Some(feedback.feedbackValue.impressionId) + + override def notification: Option[FrigateNotification] = notificationOpt +} + +object FeedbackModelHydrator { + + def HydrateNotification( + feedbacks: Seq[FeedbackModel], + history: Seq[FrigateNotification] + ): Seq[FeedbackModel] = { + feedbacks.map { + case feedback @ (inlineFeedback: InlineFeedbackModel) => + inlineFeedback.copy(notificationOpt = history.find( + _.impressionId + .equals(feedback.notificationImpressionId))) + case feedback @ (caretFeedback: CaretFeedbackModel) => + caretFeedback.copy(notificationOpt = history.find( + _.impressionId + .equals(feedback.notificationImpressionId))) + case feedback @ (promptFeedback: PromptFeedbackModel) => + promptFeedback.copy(notificationOpt = history.find( + _.impressionId + .equals(feedback.notificationImpressionId))) + case feedback => feedback + } + + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/MagicFanoutNtabCaretFatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/MagicFanoutNtabCaretFatiguePredicate.scala new file mode 100644 index 000000000..040543660 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/MagicFanoutNtabCaretFatiguePredicate.scala @@ -0,0 +1,28 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicateHelper +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate + +object MagicFanoutNtabCaretFatiguePredicate { + val name = "MagicFanoutNtabCaretFatiguePredicateForCandidate" + + private val MomentsCategory = "Moments" + private val MomentsViaMagicRecsCategory = "MomentsViaMagicRecs" + + def apply()(implicit globalStats: StatsReceiver): NamedPredicate[PushCandidate] = { + val scopedStats = globalStats.scope(name) + val genericTypeCategories = Seq(MomentsCategory, MomentsViaMagicRecsCategory) + val crts = RecTypes.magicFanoutEventTypes + RecTypeNtabCaretClickFatiguePredicate + .apply( + genericTypeCategories, + crts, + NtabCaretClickFatiguePredicateHelper.calculateFatiguePeriodMagicRecs, + useMostRecentDislikeTime = true, + name = name + ).withStats(scopedStats).withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickCandidateFatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickCandidateFatiguePredicate.scala new file mode 100644 index 000000000..376d9b11f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickCandidateFatiguePredicate.scala @@ -0,0 +1,87 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.FatiguePredicate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.frigate.common.base.Candidate +import com.twitter.frigate.common.base.TargetInfo +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.base.{RecommendationType => BaseRecommendationType} +import com.twitter.frigate.common.predicate.CandidateWithRecommendationTypeAndTargetInfoWithCaretFeedbackHistory +import com.twitter.frigate.common.predicate.FrigateHistoryFatiguePredicate.TimeSeries +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.predicate.CaretFeedbackHistoryFilter + +object NtabCaretClickContFnFatiguePredicate { + + private val MagicRecsCategory = "MagicRecs" + + def ntabCaretClickContFnFatiguePredicates( + filterHistory: TimeSeries => TimeSeries = + FatiguePredicate.recTypesOnlyFilter(RecTypes.sharedNTabCaretFatigueTypes), + filterCaretFeedbackHistory: Target => Seq[ + CaretFeedbackDetails + ] => Seq[CaretFeedbackDetails] = + CaretFeedbackHistoryFilter.caretFeedbackHistoryFilter(Seq(MagicRecsCategory)), + filterInlineFeedbackHistory: Seq[FeedbackModel] => Seq[FeedbackModel] = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(RecTypes.sharedNTabCaretFatigueTypes), + name: String = "NTabCaretClickFnCandidatePredicates" + )( + implicit globalStats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val scopedStats = globalStats.scope(name) + CRTBasedNtabCaretClickFatiguePredicates + .f1TriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Candidate with BaseRecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory = filterHistory, + filterCaretFeedbackHistory = filterCaretFeedbackHistory, + filterInlineFeedbackHistory = filterInlineFeedbackHistory + ) + .applyOnlyToCandidateWithRecommendationTypeAndTargetWithCaretFeedbackHistory + .withName("f1_triggered_fn_seelessoften_fatigue") + .andThen( + CRTBasedNtabCaretClickFatiguePredicates + .nonF1TriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Candidate with BaseRecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory = filterHistory, + filterCaretFeedbackHistory = filterCaretFeedbackHistory, + filterInlineFeedbackHistory = filterInlineFeedbackHistory + ) + .applyOnlyToCandidateWithRecommendationTypeAndTargetWithCaretFeedbackHistory) + .withName("nonf1_triggered_fn_seelessoften_fatigue") + .andThen( + CRTBasedNtabCaretClickFatiguePredicates + .tripHqTweetTriggeredCRTBasedNtabCaretClickFnFatiguePredicate[ + Candidate with BaseRecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory = filterHistory, + filterCaretFeedbackHistory = filterCaretFeedbackHistory, + filterInlineFeedbackHistory = filterInlineFeedbackHistory + ) + .applyOnlyToCandidateWithRecommendationTypeAndTargetWithCaretFeedbackHistory) + .withName("trip_hq_tweet_triggered_fn_seelessoften_fatigue") + .andThen( + CRTBasedNtabCaretClickFatiguePredicates + .genericCRTBasedNtabCaretClickFnFatiguePredicate[ + Candidate with BaseRecommendationType with TargetInfo[ + Target + ] + ]( + filterHistory = filterHistory, + filterCaretFeedbackHistory = filterCaretFeedbackHistory, + filterInlineFeedbackHistory = filterInlineFeedbackHistory) + .applyOnlyToCandidateWithRecommendationTypeAndTargetWithCaretFeedbackHistory + .withName("generic_fn_seelessoften_fatigue") + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatiguePredicate.scala new file mode 100644 index 000000000..579f4b25f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatiguePredicate.scala @@ -0,0 +1,47 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicateHelper +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object NtabCaretClickFatiguePredicate { + val name = "NtabCaretClickFatiguePredicate" + + def isSpacesTypeAndTeamMember(candidate: PushCandidate): Future[Boolean] = { + candidate.target.isTeamMember.map { isTeamMember => + val isSpacesType = RecTypes.isRecommendedSpacesType(candidate.commonRecType) + isTeamMember && isSpacesType + } + } + + def apply()(implicit globalStats: StatsReceiver): NamedPredicate[PushCandidate] = { + val scopedStats = globalStats.scope(name) + val genericTypeCategories = Seq("MagicRecs") + val crts = RecTypes.sharedNTabCaretFatigueTypes + val recTypeNtabCaretClickFatiguePredicate = + RecTypeNtabCaretClickFatiguePredicate.apply( + genericTypeCategories, + crts, + NtabCaretClickFatiguePredicateHelper.calculateFatiguePeriodMagicRecs, + useMostRecentDislikeTime = false + ) + Predicate + .fromAsync { candidate: PushCandidate => + isSpacesTypeAndTeamMember(candidate).flatMap { isSpacesTypeAndTeamMember => + if (RecTypes.sharedNTabCaretFatigueTypes( + candidate.commonRecType) && !isSpacesTypeAndTeamMember) { + recTypeNtabCaretClickFatiguePredicate + .apply(Seq(candidate)).map(_.headOption.getOrElse(false)) + } else { + Future.True + } + } + } + .withStats(scopedStats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatigueUtils.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatigueUtils.scala new file mode 100644 index 000000000..cc2c0c072 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/NtabCaretClickFatigueUtils.scala @@ -0,0 +1,108 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicateHelper +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.util.Duration +import com.twitter.conversions.DurationOps._ +import scala.math.min +import com.twitter.util.Time +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} + +object NtabCaretClickFatigueUtils { + + private def pushCapForFeedback( + feedbackDetails: Seq[CaretFeedbackDetails], + feedbacks: Seq[FeedbackModel], + param: ContinuousFunctionParam, + statsReceiver: StatsReceiver + ): Double = { + val stats = statsReceiver.scope("mr_seelessoften_contfn_pushcap") + val pushCapTotal = stats.counter("pushcap_total") + val pushCapInvalid = + stats.counter("pushcap_invalid") + + pushCapTotal.incr() + val timeSinceMostRecentDislikeMs = + NtabCaretClickFatiguePredicateHelper.getDurationSinceMostRecentDislike(feedbackDetails) + val mostRecentFeedbackTimestamp: Option[Long] = + feedbacks + .map { feedback => + feedback.timestampMs + }.reduceOption(_ max _) + val timeSinceMostRecentFeedback: Option[Duration] = + mostRecentFeedbackTimestamp.map(Time.now - Time.fromMilliseconds(_)) + + val nTabDislikePushCap = timeSinceMostRecentDislikeMs match { + case Some(lastDislikeTimeMs) => { + ContinuousFunction.safeEvaluateFn(lastDislikeTimeMs.inDays.toDouble, param, stats) + } + case _ => { + pushCapInvalid.incr() + param.defaultValue + } + } + val feedbackPushCap = timeSinceMostRecentFeedback match { + case Some(lastDislikeTimeVal) => { + ContinuousFunction.safeEvaluateFn(lastDislikeTimeVal.inDays.toDouble, param, stats) + } + case _ => { + pushCapInvalid.incr() + param.defaultValue + } + } + + min(nTabDislikePushCap, feedbackPushCap) + } + + def durationToFilterForFeedback( + feedbackDetails: Seq[CaretFeedbackDetails], + feedbacks: Seq[FeedbackModel], + param: ContinuousFunctionParam, + defaultPushCap: Double, + statsReceiver: StatsReceiver + ): Duration = { + val pushCap = min( + pushCapForFeedback(feedbackDetails, feedbacks, param, statsReceiver), + defaultPushCap + ) + if (pushCap <= 0) { + Duration.Top + } else { + 24.hours / pushCap + } + } + + def hasUserDislikeInLast90Days(feedbackDetails: Seq[CaretFeedbackDetails]): Boolean = { + val timeSinceMostRecentDislike = + NtabCaretClickFatiguePredicateHelper.getDurationSinceMostRecentDislike(feedbackDetails) + + timeSinceMostRecentDislike.exists(_ < 90.days) + } + + def feedbackModelFilterByCRT( + crts: Set[CRT] + ): Seq[FeedbackModel] => Seq[ + FeedbackModel + ] = { feedbacks => + feedbacks.filter { feedback => + feedback.notification match { + case Some(notification) => crts.contains(notification.commonRecommendationType) + case None => false + } + } + } + + def feedbackModelExcludeCRT( + crts: Set[CRT] + ): Seq[FeedbackModel] => Seq[ + FeedbackModel + ] = { feedbacks => + feedbacks.filter { feedback => + feedback.notification match { + case Some(notification) => !crts.contains(notification.commonRecommendationType) + case None => true + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/RecTypeNtabCaretFatiguePredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/RecTypeNtabCaretFatiguePredicate.scala new file mode 100644 index 000000000..d83650de0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/ntab_caret_fatigue/RecTypeNtabCaretFatiguePredicate.scala @@ -0,0 +1,87 @@ +package com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.FatiguePredicate +import com.twitter.frigate.pushservice.predicate.CaretFeedbackHistoryFilter +import com.twitter.frigate.pushservice.predicate.{ + TargetNtabCaretClickFatiguePredicate => CommonNtabCaretClickFatiguePredicate +} +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.util.Duration +import com.twitter.util.Future + +object RecTypeNtabCaretClickFatiguePredicate { + val defaultName = "RecTypeNtabCaretClickFatiguePredicateForCandidate" + + private def candidateFatiguePredicate( + genericTypeCategories: Seq[String], + crts: Set[CRT] + )( + implicit stats: StatsReceiver + ): NamedPredicate[ + PushCandidate + ] = { + val name = "f1TriggeredCRTBasedFatiguePredciate" + val scopedStats = stats.scope(s"predicate_$name") + Predicate + .fromAsync { candidate: PushCandidate => + if (candidate.frigateNotification.notificationDisplayLocation == NotificationDisplayLocation.PushToMobileDevice) { + if (candidate.target.params(PushParams.EnableFatigueNtabCaretClickingParam)) { + NtabCaretClickContFnFatiguePredicate + .ntabCaretClickContFnFatiguePredicates( + filterHistory = FatiguePredicate.recTypesOnlyFilter(crts), + filterCaretFeedbackHistory = + CaretFeedbackHistoryFilter.caretFeedbackHistoryFilter(genericTypeCategories), + filterInlineFeedbackHistory = + NtabCaretClickFatigueUtils.feedbackModelFilterByCRT(crts) + ).apply(Seq(candidate)) + .map(_.headOption.getOrElse(false)) + } else Future.True + } else { + Future.True + } + }.withStats(scopedStats) + .withName(name) + } + + def apply( + genericTypeCategories: Seq[String], + crts: Set[CRT], + calculateFatiguePeriod: Seq[CaretFeedbackDetails] => Duration, + useMostRecentDislikeTime: Boolean, + name: String = defaultName + )( + implicit globalStats: StatsReceiver + ): NamedPredicate[PushCandidate] = { + val scopedStats = globalStats.scope(name) + val commonNtabCaretClickFatiguePredicate = CommonNtabCaretClickFatiguePredicate( + filterCaretFeedbackHistory = + CaretFeedbackHistoryFilter.caretFeedbackHistoryFilter(genericTypeCategories), + filterHistory = FatiguePredicate.recTypesOnlyFilter(crts), + calculateFatiguePeriod = calculateFatiguePeriod, + useMostRecentDislikeTime = useMostRecentDislikeTime, + name = name + )(globalStats) + + Predicate + .fromAsync { candidate: PushCandidate => + if (candidate.frigateNotification.notificationDisplayLocation == NotificationDisplayLocation.PushToMobileDevice) { + if (candidate.target.params(PushParams.EnableFatigueNtabCaretClickingParam)) { + commonNtabCaretClickFatiguePredicate + .apply(Seq(candidate.target)) + .map(_.headOption.getOrElse(false)) + } else Future.True + } else { + Future.True + } + }.andThen(candidateFatiguePredicate(genericTypeCategories, crts)) + .withStats(scopedStats) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/package.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/package.scala new file mode 100644 index 000000000..61c1f78cc --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/package.scala @@ -0,0 +1,44 @@ +package com.twitter.frigate.pushservice + +import com.twitter.frigate.common.base.Candidate +import com.twitter.frigate.common.base.SocialGraphServiceRelationshipMap +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.rec_types.RecTypes.isInNetworkTweetType +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.Predicate + +package object predicate { + implicit class CandidatesWithAuthorFollowPredicates( + predicate: Predicate[ + PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap + ]) { + def applyOnlyToAuthorBeingFollowPredicates: Predicate[Candidate] = + predicate.optionalOn[Candidate]( + { + case candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap + if isInNetworkTweetType(candidate.commonRecType) => + Some(candidate) + case _ => + None + }, + missingResult = true + ) + } + + implicit class TweetCandidateWithTweetAuthor( + predicate: Predicate[ + PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap + ]) { + def applyOnlyToBasicTweetPredicates: Predicate[Candidate] = + predicate.optionalOn[Candidate]( + { + case candidate: PushCandidate with TweetAuthor with SocialGraphServiceRelationshipMap + if isInNetworkTweetType(candidate.commonRecType) => + Some(candidate) + case _ => + None + }, + missingResult = true + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/OpenOrNtabClickQualityPredicate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/OpenOrNtabClickQualityPredicate.scala new file mode 100644 index 000000000..d7f38349b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/OpenOrNtabClickQualityPredicate.scala @@ -0,0 +1,27 @@ +package com.twitter.frigate.pushservice.predicate.quality_model_predicate + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.util.Future + +object ExplicitOONCFilterPredicate extends QualityPredicateBase { + override lazy val name = "open_or_ntab_click_explicit_threshold" + + override lazy val thresholdExtractor = (t: Target) => + Future.value(t.params(PushFeatureSwitchParams.QualityPredicateExplicitThresholdParam)) + + override def scoreExtractor = (candidate: PushCandidate) => + candidate.mrWeightedOpenOrNtabClickRankingProbability +} + +object WeightedOpenOrNtabClickQualityPredicate extends QualityPredicateBase { + override lazy val name = "weighted_open_or_ntab_click_model" + + override lazy val thresholdExtractor = (t: Target) => { + Future.value(0.0) + } + + override def scoreExtractor = + (candidate: PushCandidate) => candidate.mrWeightedOpenOrNtabClickFilteringProbability +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateCommon.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateCommon.scala new file mode 100644 index 000000000..d22f8c68f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateCommon.scala @@ -0,0 +1,165 @@ +package com.twitter.frigate.pushservice.predicate.quality_model_predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.target.TargetScoringDetails +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +object PDauCohort extends Enumeration { + type PDauCohort = Value + + val cohort1 = Value + val cohort2 = Value + val cohort3 = Value + val cohort4 = Value + val cohort5 = Value + val cohort6 = Value +} + +object PDauCohortUtil { + + case class DauThreshold( + threshold1: Double, + threshold2: Double, + threshold3: Double, + threshold4: Double, + threshold5: Double) + + val defaultDAUProb = 0.0 + + val dauProbThresholds = DauThreshold( + threshold1 = 0.05, + threshold2 = 0.14, + threshold3 = 0.33, + threshold4 = 0.7, + threshold5 = 0.959 + ) + + val finerThresholdMap = + Map( + PDauCohort.cohort2 -> List(0.05, 0.0539, 0.0563, 0.0600, 0.0681, 0.0733, 0.0800, 0.0849, + 0.0912, 0.0975, 0.1032, 0.1092, 0.1134, 0.1191, 0.1252, 0.1324, 0.14), + PDauCohort.cohort3 -> List(0.14, 0.1489, 0.1544, 0.1625, 0.1704, 0.1797, 0.1905, 0.2001, + 0.2120, 0.2248, 0.2363, 0.2500, 0.2650, 0.2801, 0.2958, 0.3119, 0.33), + PDauCohort.cohort4 -> List(0.33, 0.3484, 0.3686, 0.3893, 0.4126, 0.4350, 0.4603, 0.4856, + 0.5092, 0.5348, 0.5602, 0.5850, 0.6087, 0.6319, 0.6548, 0.6779, 0.7), + PDauCohort.cohort5 -> List(0.7, 0.7295, 0.7581, 0.7831, 0.8049, 0.8251, 0.8444, 0.8612, + 0.8786, 0.8936, 0.9043, 0.9175, 0.9290, 0.9383, 0.9498, 0.9587, 0.959) + ) + + def getBucket(targetUser: PushTypes.Target, doImpression: Boolean) = { + implicit val stats = targetUser.stats.scope("PDauCohortUtil") + if (doImpression) targetUser.getBucket _ else targetUser.getBucketWithoutImpression _ + } + + def threshold1(targetUser: PushTypes.Target): Double = dauProbThresholds.threshold1 + + def threshold2(targetUser: PushTypes.Target): Double = dauProbThresholds.threshold2 + + def threshold3(targetUser: PushTypes.Target): Double = dauProbThresholds.threshold3 + + def threshold4(targetUser: PushTypes.Target): Double = dauProbThresholds.threshold4 + + def threshold5(targetUser: PushTypes.Target): Double = dauProbThresholds.threshold5 + + def thresholdForCohort(targetUser: PushTypes.Target, dauCohort: Int): Double = { + if (dauCohort == 0) 0.0 + else if (dauCohort == 1) threshold1(targetUser) + else if (dauCohort == 2) threshold2(targetUser) + else if (dauCohort == 3) threshold3(targetUser) + else if (dauCohort == 4) threshold4(targetUser) + else if (dauCohort == 5) threshold5(targetUser) + else 1.0 + } + + def getPDauCohort(dauProbability: Double, thresholds: DauThreshold): PDauCohort.Value = { + dauProbability match { + case dauProb if dauProb >= 0.0 && dauProb < thresholds.threshold1 => PDauCohort.cohort1 + case dauProb if dauProb >= thresholds.threshold1 && dauProb < thresholds.threshold2 => + PDauCohort.cohort2 + case dauProb if dauProb >= thresholds.threshold2 && dauProb < thresholds.threshold3 => + PDauCohort.cohort3 + case dauProb if dauProb >= thresholds.threshold3 && dauProb < thresholds.threshold4 => + PDauCohort.cohort4 + case dauProb if dauProb >= thresholds.threshold4 && dauProb < thresholds.threshold5 => + PDauCohort.cohort5 + case dauProb if dauProb >= thresholds.threshold5 && dauProb <= 1.0 => PDauCohort.cohort6 + } + } + + def getDauProb(target: TargetScoringDetails): Future[Double] = { + target.dauProbability.map { dauProb => + dauProb.map(_.probability).getOrElse(defaultDAUProb) + } + } + + def getPDauCohort(target: TargetScoringDetails): Future[PDauCohort.Value] = { + getDauProb(target).map { getPDauCohort(_, dauProbThresholds) } + } + + def getPDauCohortWithPDau(target: TargetScoringDetails): Future[(PDauCohort.Value, Double)] = { + getDauProb(target).map { prob => + (getPDauCohort(prob, dauProbThresholds), prob) + } + } + + def updateStats( + target: PushTypes.Target, + modelName: String, + predicateResult: Boolean + )( + implicit statsReceiver: StatsReceiver + ): Unit = { + val dauCohortOp = getPDauCohort(target) + dauCohortOp.map { dauCohort => + val cohortStats = statsReceiver.scope(modelName).scope(dauCohort.toString) + cohortStats.counter(s"filter_$predicateResult").incr() + } + if (target.isNewSignup) { + val newUserModelStats = statsReceiver.scope(modelName) + newUserModelStats.counter(s"new_user_filter_$predicateResult").incr() + } + } +} + +trait QualityPredicateBase { + def name: String + def thresholdExtractor: Target => Future[Double] + def scoreExtractor: PushCandidate => Future[Option[Double]] + def isPredicateEnabled: PushCandidate => Future[Boolean] = _ => Future.True + def comparator: (Double, Double) => Boolean = + (score: Double, threshold: Double) => score >= threshold + def updateCustomStats( + candidate: PushCandidate, + score: Double, + threshold: Double, + result: Boolean + )( + implicit statsReceiver: StatsReceiver + ): Unit = {} + + def apply()(implicit statsReceiver: StatsReceiver): NamedPredicate[PushCandidate] = { + Predicate + .fromAsync { candidate: PushCandidate => + isPredicateEnabled(candidate).flatMap { + case true => + scoreExtractor(candidate).flatMap { scoreOpt => + thresholdExtractor(candidate.target).map { threshold => + val score = scoreOpt.getOrElse(0.0) + val result = comparator(score, threshold) + PDauCohortUtil.updateStats(candidate.target, name, result) + updateCustomStats(candidate, score, threshold, result) + result + } + } + case _ => Future.True + } + } + .withStats(statsReceiver.scope(s"predicate_$name")) + .withName(name) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateMap.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateMap.scala new file mode 100644 index 000000000..9ac360df0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/predicate/quality_model_predicate/QualityPredicateMap.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.predicate.quality_model_predicate + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.QualityPredicateEnum +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.hermit.predicate.NamedPredicate + +object QualityPredicateMap { + + def apply( + )( + implicit statsReceiver: StatsReceiver + ): Map[QualityPredicateEnum.Value, NamedPredicate[PushCandidate]] = { + Map( + QualityPredicateEnum.WeightedOpenOrNtabClick -> WeightedOpenOrNtabClickQualityPredicate(), + QualityPredicateEnum.ExplicitOpenOrNtabClickFilter -> ExplicitOONCFilterPredicate(), + QualityPredicateEnum.AlwaysTrue -> PredicatesForCandidate.alwaysTruePushCandidatePredicate, + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTBoostRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTBoostRanker.scala new file mode 100644 index 000000000..de95c0695 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTBoostRanker.scala @@ -0,0 +1,54 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType + +/** + * This Ranker re-ranks MR candidates, boosting input CRTs. + * Relative ranking between input CRTs and rest of the candidates doesn't change + * + * Ex: T: Tweet candidate, F: input CRT candidatess + * + * T3, F2, T1, T2, F1 => F2, F1, T3, T1, T2 + */ +case class CRTBoostRanker(statsReceiver: StatsReceiver) { + + private val recsToBoostStat = statsReceiver.stat("recs_to_boost") + private val otherRecsStat = statsReceiver.stat("other_recs") + + private def boostCrtToTop( + inputCandidates: Seq[CandidateDetails[PushCandidate]], + crtToBoost: CommonRecommendationType + ): Seq[CandidateDetails[PushCandidate]] = { + val (upRankedCandidates, otherCandidates) = + inputCandidates.partition(_.candidate.commonRecType == crtToBoost) + recsToBoostStat.add(upRankedCandidates.size) + otherRecsStat.add(otherCandidates.size) + upRankedCandidates ++ otherCandidates + } + + final def boostCrtsToTop( + inputCandidates: Seq[CandidateDetails[PushCandidate]], + crtsToBoost: Seq[CommonRecommendationType] + ): Seq[CandidateDetails[PushCandidate]] = { + crtsToBoost.headOption match { + case Some(crt) => + val upRankedCandidates = boostCrtToTop(inputCandidates, crt) + boostCrtsToTop(upRankedCandidates, crtsToBoost.tail) + case None => inputCandidates + } + } + + final def boostCrtsToTopStableOrder( + inputCandidates: Seq[CandidateDetails[PushCandidate]], + crtsToBoost: Seq[CommonRecommendationType] + ): Seq[CandidateDetails[PushCandidate]] = { + val crtsToBoostSet = crtsToBoost.toSet + val (upRankedCandidates, otherCandidates) = inputCandidates.partition(candidateDetail => + crtsToBoostSet.contains(candidateDetail.candidate.commonRecType)) + + upRankedCandidates ++ otherCandidates + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTDownRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTDownRanker.scala new file mode 100644 index 000000000..4a8d74504 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/CRTDownRanker.scala @@ -0,0 +1,45 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType + +/** + * This Ranker re-ranks MR candidates, down ranks input CRTs. + * Relative ranking between input CRTs and rest of the candidates doesn't change + * + * Ex: T: Tweet candidate, F: input CRT candidates + * + * T3, F2, T1, T2, F1 => T3, T1, T2, F2, F1 + */ +case class CRTDownRanker(statsReceiver: StatsReceiver) { + + private val recsToDownRankStat = statsReceiver.stat("recs_to_down_rank") + private val otherRecsStat = statsReceiver.stat("other_recs") + private val downRankerRequests = statsReceiver.counter("down_ranker_requests") + + private def downRank( + inputCandidates: Seq[CandidateDetails[PushCandidate]], + crtToDownRank: CommonRecommendationType + ): Seq[CandidateDetails[PushCandidate]] = { + downRankerRequests.incr() + val (downRankedCandidates, otherCandidates) = + inputCandidates.partition(_.candidate.commonRecType == crtToDownRank) + recsToDownRankStat.add(downRankedCandidates.size) + otherRecsStat.add(otherCandidates.size) + otherCandidates ++ downRankedCandidates + } + + final def downRank( + inputCandidates: Seq[CandidateDetails[PushCandidate]], + crtsToDownRank: Seq[CommonRecommendationType] + ): Seq[CandidateDetails[PushCandidate]] = { + crtsToDownRank.headOption match { + case Some(crt) => + val downRankedCandidates = downRank(inputCandidates, crt) + downRank(downRankedCandidates, crtsToDownRank.tail) + case None => inputCandidates + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/LoggedOutRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/LoggedOutRanker.scala new file mode 100644 index 000000000..5ab0c240a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/LoggedOutRanker.scala @@ -0,0 +1,45 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class LoggedOutRanker(tweetyPieStore: ReadableStore[Long, TweetyPieResult], stats: StatsReceiver) { + private val statsReceiver = stats.scope(this.getClass.getSimpleName) + private val rankedCandidates = statsReceiver.counter("ranked_candidates_count") + + def rank( + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val tweetIds = candidates.map { cand => cand.candidate.asInstanceOf[TweetCandidate].tweetId } + val results = tweetyPieStore.multiGet(tweetIds.toSet).values.toSeq + val futureOfResults = Future.traverseSequentially(results)(r => r) + val tweetsFut = futureOfResults.map { tweetyPieResults => + tweetyPieResults.map(_.map(_.tweet)) + } + val sortedTweetsFuture = tweetsFut.map { tweets => + tweets + .map { tweet => + if (tweet.isDefined && tweet.get.counts.isDefined) { + tweet.get.id -> tweet.get.counts.get.favoriteCount.getOrElse(0L) + } else { + 0 -> 0L + } + }.sortBy(_._2)(Ordering[Long].reverse) + } + val finalCandidates = sortedTweetsFuture.map { sortedTweets => + sortedTweets + .map { tweet => + candidates.find(_.candidate.asInstanceOf[TweetCandidate].tweetId == tweet._1).orNull + }.filter { cand => cand != null } + } + finalCandidates.map { fc => + rankedCandidates.incr(fc.size) + } + finalCandidates + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/ModelBasedRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/ModelBasedRanker.scala new file mode 100644 index 000000000..372888a66 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/ModelBasedRanker.scala @@ -0,0 +1,204 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.util.Future + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.params.MrQualityUprankingPartialTypeEnum +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.params.PushConstants.OoncQualityCombinedScore + +object ModelBasedRanker { + + def rankBySpecifiedScore( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + scoreExtractor: PushCandidate => Future[Option[Double]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + val scoredCandidatesFutures = candidatesDetails.map { cand => + scoreExtractor(cand.candidate).map { scoreOp => (cand, scoreOp.getOrElse(0.0)) } + } + + Future.collect(scoredCandidatesFutures).map { scores => + val sorted = scores.sortBy { candidateDetails => -1 * candidateDetails._2 } + sorted.map(_._1) + } + } + + def populatePredictionScoreStats( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + scoreExtractor: PushCandidate => Future[Option[Double]], + predictionScoreStats: StatsReceiver + ): Unit = { + val scoreScaleFactorForStat = 10000 + val statName = "prediction_scores" + candidatesDetails.map { + case CandidateDetails(candidate, source) => + val crt = candidate.commonRecType + scoreExtractor(candidate).map { scoreOp => + val scaledScore = (scoreOp.getOrElse(0.0) * scoreScaleFactorForStat).toFloat + predictionScoreStats.scope("all_candidates").stat(statName).add(scaledScore) + predictionScoreStats.scope(crt.toString()).stat(statName).add(scaledScore) + } + } + } + + def populateMrWeightedOpenOrNtabClickScoreStats( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + predictionScoreStats: StatsReceiver + ): Unit = { + populatePredictionScoreStats( + candidatesDetails, + candidate => candidate.mrWeightedOpenOrNtabClickRankingProbability, + predictionScoreStats + ) + } + + def populateMrQualityUprankingScoreStats( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + predictionScoreStats: StatsReceiver + ): Unit = { + populatePredictionScoreStats( + candidatesDetails, + candidate => candidate.mrQualityUprankingProbability, + predictionScoreStats + ) + } + + def rankByMrWeightedOpenOrNtabClickScore( + candidatesDetails: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + rankBySpecifiedScore( + candidatesDetails, + candidate => candidate.mrWeightedOpenOrNtabClickRankingProbability + ) + } + + def transformSigmoid( + score: Double, + weight: Double = 1.0, + bias: Double = 0.0 + ): Double = { + val base = -1.0 * (weight * score + bias) + val cappedBase = math.max(math.min(base, 100.0), -100.0) + 1.0 / (1.0 + math.exp(cappedBase)) + } + + def transformLinear( + score: Double, + bar: Double = 1.0 + ): Double = { + val positiveBar = math.abs(bar) + val cappedScore = math.max(math.min(score, positiveBar), -1.0 * positiveBar) + cappedScore / positiveBar + } + + def transformIdentity( + score: Double + ): Double = score + + def rankByQualityOoncCombinedScore( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + qualityScoreTransform: Double => Double, + qualityScoreBoost: Double = 1.0 + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + rankBySpecifiedScore( + candidatesDetails, + candidate => { + val ooncScoreFutOpt: Future[Option[Double]] = + candidate.mrWeightedOpenOrNtabClickRankingProbability + val qualityScoreFutOpt: Future[Option[Double]] = + candidate.mrQualityUprankingProbability + Future + .join( + ooncScoreFutOpt, + qualityScoreFutOpt + ).map { + case (Some(ooncScore), Some(qualityScore)) => + val transformedQualityScore = qualityScoreTransform(qualityScore) + val combinedScore = ooncScore * (1.0 + qualityScoreBoost * transformedQualityScore) + candidate + .cacheExternalScore(OoncQualityCombinedScore, Future.value(Some(combinedScore))) + Some(combinedScore) + case _ => None + } + } + ) + } + + def rerankByProducerQualityOoncCombinedScore( + candidateDetails: Seq[CandidateDetails[PushCandidate]] + )( + implicit stat: StatsReceiver + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val scopedStat = stat.scope("producer_quality_reranking") + val oonCandidates = candidateDetails.filter { + case CandidateDetails(pushCandidate: PushCandidate, _) => + tweetCandidateSelector(pushCandidate, MrQualityUprankingPartialTypeEnum.Oon) + } + + val rankedOonCandidatesFut = rankBySpecifiedScore( + oonCandidates, + candidate => { + val baseScoreFutureOpt: Future[Option[Double]] = { + val qualityCombinedScoreFutureOpt = + candidate.getExternalCachedScoreByName(OoncQualityCombinedScore) + val ooncScoreFutureOpt = candidate.mrWeightedOpenOrNtabClickRankingProbability + Future.join(qualityCombinedScoreFutureOpt, ooncScoreFutureOpt).map { + case (Some(qualityCombinedScore), _) => + scopedStat.counter("quality_combined_score").incr() + Some(qualityCombinedScore) + case (_, ooncScoreOpt) => + scopedStat.counter("oonc_score").incr() + ooncScoreOpt + } + } + baseScoreFutureOpt.map { + case Some(baseScore) => + val boostRatio = candidate.mrProducerQualityUprankingBoost.getOrElse(1.0) + if (boostRatio > 1.0) scopedStat.counter("author_uprank").incr() + else if (boostRatio < 1.0) scopedStat.counter("author_downrank").incr() + else scopedStat.counter("author_noboost").incr() + Some(baseScore * boostRatio) + case _ => + scopedStat.counter("empty_score").incr() + None + } + } + ) + + rankedOonCandidatesFut.map { rankedOonCandidates => + val sortedOonCandidateIterator = rankedOonCandidates.toIterator + candidateDetails.map { ooncRankedCandidate => + val isOon = tweetCandidateSelector( + ooncRankedCandidate.candidate, + MrQualityUprankingPartialTypeEnum.Oon) + + if (sortedOonCandidateIterator.hasNext && isOon) + sortedOonCandidateIterator.next() + else ooncRankedCandidate + } + } + } + + def tweetCandidateSelector( + pushCandidate: PushCandidate, + selectedCandidateType: MrQualityUprankingPartialTypeEnum.Value + ): Boolean = { + pushCandidate match { + case candidate: PushCandidate with TweetCandidate => + selectedCandidateType match { + case MrQualityUprankingPartialTypeEnum.Oon => + val crt = candidate.commonRecType + RecTypes.isOutOfNetworkTweetRecType(crt) || RecTypes.outOfNetworkTopicTweetTypes + .contains(crt) + case _ => true + } + case _ => false + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/PushserviceRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/PushserviceRanker.scala new file mode 100644 index 000000000..26a3a4239 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/PushserviceRanker.scala @@ -0,0 +1,31 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.Ranker +import com.twitter.util.Future + +trait PushserviceRanker[T, C] extends Ranker[T, C] { + + /** + * Initial Ranking of input candidates + */ + def initialRank(target: T, candidates: Seq[CandidateDetails[C]]): Future[Seq[CandidateDetails[C]]] + + /** + * Re-ranks input ranked candidates. Useful when a subset of candidates are ranked + * by a different logic, while preserving the initial ranking for the rest + */ + def reRank( + target: T, + rankedCandidates: Seq[CandidateDetails[C]] + ): Future[Seq[CandidateDetails[C]]] + + /** + * Final ranking that does Initial + Rerank + */ + override final def rank(target: T, candidates: Seq[CandidateDetails[C]]): ( + Future[Seq[CandidateDetails[C]]] + ) = { + initialRank(target, candidates).flatMap { rankedCandidates => reRank(target, rankedCandidates) } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHLightRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHLightRanker.scala new file mode 100644 index 000000000..3fdae08c3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHLightRanker.scala @@ -0,0 +1,139 @@ +package com.twitter.frigate.pushservice.rank +import com.twitter.contentrecommender.thriftscala.LightRankingCandidate +import com.twitter.contentrecommender.thriftscala.LightRankingFeatureHydrationContext +import com.twitter.contentrecommender.thriftscala.MagicRecsFeatureHydrationContext +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.RandomRanker +import com.twitter.frigate.common.base.Ranker +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.ml.featurestore.lib.UserId +import com.twitter.nrel.lightranker.MagicRecsServeDataRecordLightRanker +import com.twitter.util.Future + +class RFPHLightRanker( + lightRanker: MagicRecsServeDataRecordLightRanker, + stats: StatsReceiver) + extends Ranker[Target, PushCandidate] { + + private val statsReceiver = stats.scope(this.getClass.getSimpleName) + + private val lightRankerCandidateCounter = statsReceiver.counter("light_ranker_candidate_count") + private val lightRankerRequestCounter = statsReceiver.counter("light_ranker_request_count") + private val lightRankingStats: StatsReceiver = statsReceiver.scope("light_ranking") + private val restrictLightRankingCounter: Counter = + lightRankingStats.counter("restrict_light_ranking") + private val selectedLightRankerScribedTargetCandidateCountStats: Stat = + lightRankingStats.stat("selected_light_ranker_scribed_target_candidate_count") + private val selectedLightRankerScribedCandidatesStats: Stat = + lightRankingStats.stat("selected_light_ranker_scribed_candidates") + private val lightRankingRandomBaselineStats: StatsReceiver = + statsReceiver.scope("light_ranking_random_baseline") + + override def rank( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val enableLightRanker = target.params(PushFeatureSwitchParams.EnableLightRankingParam) + val restrictLightRanker = target.params(PushParams.RestrictLightRankingParam) + val lightRankerSelectionThreshold = + target.params(PushFeatureSwitchParams.LightRankingNumberOfCandidatesParam) + val randomRanker = RandomRanker[Target, PushCandidate]()(lightRankingRandomBaselineStats) + + if (enableLightRanker && candidates.length > lightRankerSelectionThreshold && !target.scribeFeatureForRequestScribe) { + val (tweetCandidates, nonTweetCandidates) = + candidates.partition { + case CandidateDetails(pushCandidate: PushCandidate with TweetCandidate, source) => true + case _ => false + } + val lightRankerSelectedTweetCandidatesFut = { + if (restrictLightRanker) { + restrictLightRankingCounter.incr() + lightRankThenTake( + target, + tweetCandidates + .asInstanceOf[Seq[CandidateDetails[PushCandidate with TweetCandidate]]], + PushConstants.RestrictLightRankingCandidatesThreshold + ) + } else if (target.params(PushFeatureSwitchParams.EnableRandomBaselineLightRankingParam)) { + randomRanker.rank(target, tweetCandidates).map { randomLightRankerCands => + randomLightRankerCands.take(lightRankerSelectionThreshold) + } + } else { + lightRankThenTake( + target, + tweetCandidates + .asInstanceOf[Seq[CandidateDetails[PushCandidate with TweetCandidate]]], + lightRankerSelectionThreshold + ) + } + } + lightRankerSelectedTweetCandidatesFut.map { returnedTweetCandidates => + nonTweetCandidates ++ returnedTweetCandidates + } + } else if (target.scribeFeatureForRequestScribe) { + val downSampleRate: Double = + if (target.params(PushParams.DownSampleLightRankingScribeCandidatesParam)) + PushConstants.DownSampleLightRankingScribeCandidatesRate + else target.params(PushFeatureSwitchParams.LightRankingScribeCandidatesDownSamplingParam) + val selectedCandidateCounter: Int = math.ceil(candidates.size * downSampleRate).toInt + selectedLightRankerScribedTargetCandidateCountStats.add(selectedCandidateCounter.toFloat) + + randomRanker.rank(target, candidates).map { randomLightRankerCands => + val selectedCandidates = randomLightRankerCands.take(selectedCandidateCounter) + selectedLightRankerScribedCandidatesStats.add(selectedCandidates.size.toFloat) + selectedCandidates + } + } else Future.value(candidates) + } + + private def lightRankThenTake( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate with TweetCandidate]], + numOfCandidates: Int + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + lightRankerCandidateCounter.incr(candidates.length) + lightRankerRequestCounter.incr() + val lightRankerCandidates: Seq[LightRankingCandidate] = candidates.map { + case CandidateDetails(tweetCandidate, _) => + val tweetAuthor = tweetCandidate match { + case t: TweetCandidate with TweetAuthor => t.authorId + case _ => None + } + val hydrationContext: LightRankingFeatureHydrationContext = + LightRankingFeatureHydrationContext.MagicRecsHydrationContext( + MagicRecsFeatureHydrationContext( + tweetAuthor = tweetAuthor, + pushString = tweetCandidate.getPushCopy.flatMap(_.pushStringGroup).map(_.toString)) + ) + LightRankingCandidate( + tweetId = tweetCandidate.tweetId, + hydrationContext = Some(hydrationContext) + ) + } + val modelName = target.params(PushFeatureSwitchParams.LightRankingModelTypeParam) + val lightRankedCandidatesFut = { + lightRanker + .rank(UserId(target.targetId), lightRankerCandidates, modelName) + } + + lightRankedCandidatesFut.map { lightRankedCandidates => + val lrScoreMap = lightRankedCandidates.map { lrCand => + lrCand.tweetId -> lrCand.score + }.toMap + val candScoreMap: Seq[Option[Double]] = candidates.map { candidateDetails => + lrScoreMap.get(candidateDetails.candidate.tweetId) + } + sortCandidatesByScore(candidates, candScoreMap) + .take(numOfCandidates) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHRanker.scala new file mode 100644 index 000000000..83bdf3932 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/RFPHRanker.scala @@ -0,0 +1,297 @@ +package com.twitter.frigate.pushservice.rank +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.Ranker +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.ml.HealthFeatureGetter +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.params.MrQualityUprankingPartialTypeEnum +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushMLModel +import com.twitter.frigate.pushservice.params.PushModelName +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil.updateMediaCategoryStats +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.util.Future +import com.twitter.frigate.pushservice.params.MrQualityUprankingTransformTypeEnum +import com.twitter.storehaus.ReadableStore +import com.twitter.frigate.thriftscala.UserMediaRepresentation +import com.twitter.hss.api.thriftscala.UserHealthSignalResponse + +class RFPHRanker( + randomRanker: Ranker[Target, PushCandidate], + weightedOpenOrNtabClickModelScorer: PushMLModelScorer, + subscriptionCreatorRanker: SubscriptionCreatorRanker, + userHealthSignalStore: ReadableStore[Long, UserHealthSignalResponse], + producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation], + stats: StatsReceiver) + extends PushserviceRanker[Target, PushCandidate] { + + private val statsReceiver = stats.scope(this.getClass.getSimpleName) + + private val boostCRTsRanker = CRTBoostRanker(statsReceiver.scope("boost_desired_crts")) + private val crtDownRanker = CRTDownRanker(statsReceiver.scope("down_rank_desired_crts")) + + private val crtsToDownRank = statsReceiver.stat("crts_to_downrank") + private val crtsToUprank = statsReceiver.stat("crts_to_uprank") + + private val randomRankingCounter = stats.counter("randomRanking") + private val mlRankingCounter = stats.counter("mlRanking") + private val disableAllRelevanceCounter = stats.counter("disableAllRelevance") + private val disableHeavyRankingCounter = stats.counter("disableHeavyRanking") + + private val heavyRankerCandidateCounter = stats.counter("heavy_ranker_candidate_count") + private val heavyRankerScoreStats = statsReceiver.scope("heavy_ranker_prediction_scores") + + private val producerUprankingCounter = statsReceiver.counter("producer_quality_upranking") + private val producerBoostedCounter = statsReceiver.counter("producer_boosted_candidates") + private val producerDownboostedCounter = statsReceiver.counter("producer_downboosted_candidates") + + override def initialRank( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + heavyRankerCandidateCounter.incr(candidates.size) + + updateMediaCategoryStats(candidates)(stats) + target.targetUserState + .flatMap { targetUserState => + val useRandomRanking = target.skipMlRanker || target.params( + PushParams.UseRandomRankingParam + ) + + if (useRandomRanking) { + randomRankingCounter.incr() + randomRanker.rank(target, candidates) + } else if (target.params(PushParams.DisableAllRelevanceParam)) { + disableAllRelevanceCounter.incr() + Future.value(candidates) + } else if (target.params(PushParams.DisableHeavyRankingParam) || target.params( + PushFeatureSwitchParams.DisableHeavyRankingModelFSParam)) { + disableHeavyRankingCounter.incr() + Future.value(candidates) + } else { + mlRankingCounter.incr() + + val scoredCandidatesFut = scoring(target, candidates) + + target.rankingModelParam.map { rankingModelParam => + val modelName = PushModelName( + PushMLModel.WeightedOpenOrNtabClickProbability, + target.params(rankingModelParam)).toString + ModelBasedRanker.populateMrWeightedOpenOrNtabClickScoreStats( + candidates, + heavyRankerScoreStats.scope(modelName) + ) + } + + if (target.params( + PushFeatureSwitchParams.EnableQualityUprankingCrtScoreStatsForHeavyRankingParam)) { + val modelName = PushModelName( + PushMLModel.FilteringProbability, + target.params(PushFeatureSwitchParams.QualityUprankingModelTypeParam) + ).toString + ModelBasedRanker.populateMrQualityUprankingScoreStats( + candidates, + heavyRankerScoreStats.scope(modelName) + ) + } + + val ooncRankedCandidatesFut = + scoredCandidatesFut.flatMap(ModelBasedRanker.rankByMrWeightedOpenOrNtabClickScore) + + val qualityUprankedCandidatesFut = + if (target.params(PushFeatureSwitchParams.EnableQualityUprankingForHeavyRankingParam)) { + ooncRankedCandidatesFut.flatMap { ooncRankedCandidates => + val transformFunc: Double => Double = + target.params(PushFeatureSwitchParams.QualityUprankingTransformTypeParam) match { + case MrQualityUprankingTransformTypeEnum.Linear => + ModelBasedRanker.transformLinear( + _, + bar = target.params( + PushFeatureSwitchParams.QualityUprankingLinearBarForHeavyRankingParam)) + case MrQualityUprankingTransformTypeEnum.Sigmoid => + ModelBasedRanker.transformSigmoid( + _, + weight = target.params( + PushFeatureSwitchParams.QualityUprankingSigmoidWeightForHeavyRankingParam), + bias = target.params( + PushFeatureSwitchParams.QualityUprankingSigmoidBiasForHeavyRankingParam) + ) + case _ => ModelBasedRanker.transformIdentity + } + + ModelBasedRanker.rankByQualityOoncCombinedScore( + ooncRankedCandidates, + transformFunc, + target.params(PushFeatureSwitchParams.QualityUprankingBoostForHeavyRankingParam) + ) + } + } else ooncRankedCandidatesFut + + if (target.params( + PushFeatureSwitchParams.EnableProducersQualityBoostingForHeavyRankingParam)) { + producerUprankingCounter.incr() + qualityUprankedCandidatesFut.flatMap(cands => + ModelBasedRanker.rerankByProducerQualityOoncCombinedScore(cands)(statsReceiver)) + } else qualityUprankedCandidatesFut + } + } + } + + private def scoring( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + val ooncScoredCandidatesFut = target.rankingModelParam.map { rankingModelParam => + weightedOpenOrNtabClickModelScorer.scoreByBatchPredictionForModelVersion( + target, + candidates, + rankingModelParam + ) + } + + val scoredCandidatesFut = { + if (target.params(PushFeatureSwitchParams.EnableQualityUprankingForHeavyRankingParam)) { + ooncScoredCandidatesFut.map { candidates => + weightedOpenOrNtabClickModelScorer.scoreByBatchPredictionForModelVersion( + target = target, + candidatesDetails = candidates, + modelVersionParam = PushFeatureSwitchParams.QualityUprankingModelTypeParam, + overridePushMLModelOpt = Some(PushMLModel.FilteringProbability) + ) + } + } else ooncScoredCandidatesFut + } + + scoredCandidatesFut.foreach { candidates => + val oonCandidates = candidates.filter { + case CandidateDetails(pushCandidate: PushCandidate, _) => + ModelBasedRanker.tweetCandidateSelector( + pushCandidate, + MrQualityUprankingPartialTypeEnum.Oon) + } + setProducerQuality( + target, + oonCandidates, + userHealthSignalStore, + producerMediaRepresentationStore) + } + } + + private def setProducerQuality( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]], + userHealthSignalStore: ReadableStore[Long, UserHealthSignalResponse], + producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation] + ): Unit = { + lazy val boostRatio = + target.params(PushFeatureSwitchParams.QualityUprankingBoostForHighQualityProducersParam) + lazy val downboostRatio = + target.params(PushFeatureSwitchParams.QualityUprankingDownboostForLowQualityProducersParam) + candidates.foreach { + case CandidateDetails(pushCandidate, _) => + HealthFeatureGetter + .getFeatures(pushCandidate, producerMediaRepresentationStore, userHealthSignalStore).map { + featureMap => + val agathaNsfwScore = featureMap.numericFeatures.getOrElse("agathaNsfwScore", 0.5) + val textNsfwScore = featureMap.numericFeatures.getOrElse("textNsfwScore", 0.15) + val nudityRate = featureMap.numericFeatures.getOrElse("nudityRate", 0.0) + val activeFollowers = featureMap.numericFeatures.getOrElse("activeFollowers", 0.0) + val favorsRcvd28Days = featureMap.numericFeatures.getOrElse("favorsRcvd28Days", 0.0) + val tweets28Days = featureMap.numericFeatures.getOrElse("tweets28Days", 0.0) + val authorDislikeCount = featureMap.numericFeatures + .getOrElse("authorDislikeCount", 0.0) + val authorDislikeRate = featureMap.numericFeatures.getOrElse("authorDislikeRate", 0.0) + val authorReportRate = featureMap.numericFeatures.getOrElse("authorReportRate", 0.0) + val abuseStrikeTop2Percent = + featureMap.booleanFeatures.getOrElse("abuseStrikeTop2Percent", false) + val abuseStrikeTop1Percent = + featureMap.booleanFeatures.getOrElse("abuseStrikeTop1Percent", false) + val hasNsfwToken = featureMap.booleanFeatures.getOrElse("hasNsfwToken", false) + + if ((activeFollowers > 3000000) || + (activeFollowers > 1000000 && agathaNsfwScore < 0.7 && nudityRate < 0.01 && !hasNsfwToken && !abuseStrikeTop2Percent) || + (activeFollowers > 100000 && agathaNsfwScore < 0.7 && nudityRate < 0.01 && !hasNsfwToken && !abuseStrikeTop2Percent && + tweets28Days > 0 && favorsRcvd28Days / tweets28Days > 3000 && authorReportRate < 0.000001 && authorDislikeRate < 0.0005)) { + producerBoostedCounter.incr() + pushCandidate.setProducerQualityUprankingBoost(boostRatio) + } else if (activeFollowers < 5 || agathaNsfwScore > 0.9 || nudityRate > 0.03 || hasNsfwToken || abuseStrikeTop1Percent || + textNsfwScore > 0.4 || (authorDislikeRate > 0.005 && authorDislikeCount > 5) || + (tweets28Days > 56 && favorsRcvd28Days / tweets28Days < 100)) { + producerDownboostedCounter.incr() + pushCandidate.setProducerQualityUprankingBoost(downboostRatio) + } else pushCandidate.setProducerQualityUprankingBoost(1.0) + } + } + } + + private def rerankBySubscriptionCreatorRanker( + target: Target, + rankedCandidates: Future[Seq[CandidateDetails[PushCandidate]]], + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + if (target.params(PushFeatureSwitchParams.SoftRankCandidatesFromSubscriptionCreators)) { + val factor = target.params(PushFeatureSwitchParams.SoftRankFactorForSubscriptionCreators) + subscriptionCreatorRanker.boostByScoreFactor(rankedCandidates, factor) + } else + subscriptionCreatorRanker.boostSubscriptionCreator(rankedCandidates) + } + + override def reRank( + target: Target, + rankedCandidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val numberOfF1Candidates = + rankedCandidates.count(candidateDetails => + RecTypes.isF1Type(candidateDetails.candidate.commonRecType)) + lazy val threshold = + target.params(PushFeatureSwitchParams.NumberOfF1CandidatesThresholdForOONBackfill) + lazy val enableOONBackfillBasedOnF1 = + target.params(PushFeatureSwitchParams.EnableOONBackfillBasedOnF1Candidates) + + val f1BoostedCandidates = + if (enableOONBackfillBasedOnF1 && numberOfF1Candidates > threshold) { + boostCRTsRanker.boostCrtsToTopStableOrder( + rankedCandidates, + RecTypes.f1FirstDegreeTypes.toSeq) + } else rankedCandidates + + val topTweetsByGeoDownRankedCandidates = + if (target.params(PushFeatureSwitchParams.BackfillRankTopTweetsByGeoCandidates)) { + crtDownRanker.downRank( + f1BoostedCandidates, + Seq(CommonRecommendationType.GeoPopTweet) + ) + } else f1BoostedCandidates + + val reRankedCandidatesWithBoostedCrts = { + val listOfCrtsToUpRank = target + .params(PushFeatureSwitchParams.ListOfCrtsToUpRank) + .flatMap(CommonRecommendationType.valueOf) + crtsToUprank.add(listOfCrtsToUpRank.size) + boostCRTsRanker.boostCrtsToTop(topTweetsByGeoDownRankedCandidates, listOfCrtsToUpRank) + } + + val reRankedCandidatesWithDownRankedCrts = { + val listOfCrtsToDownRank = target + .params(PushFeatureSwitchParams.ListOfCrtsToDownRank) + .flatMap(CommonRecommendationType.valueOf) + crtsToDownRank.add(listOfCrtsToDownRank.size) + crtDownRanker.downRank(reRankedCandidatesWithBoostedCrts, listOfCrtsToDownRank) + } + + val rerankBySubscriptionCreatorFut = { + if (target.params(PushFeatureSwitchParams.BoostCandidatesFromSubscriptionCreators)) { + rerankBySubscriptionCreatorRanker( + target, + Future.value(reRankedCandidatesWithDownRankedCrts)) + } else Future.value(reRankedCandidatesWithDownRankedCrts) + } + + rerankBySubscriptionCreatorFut + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/SubscriptionCreatorRanker.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/SubscriptionCreatorRanker.scala new file mode 100644 index 000000000..3a2bff9a5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/rank/SubscriptionCreatorRanker.scala @@ -0,0 +1,110 @@ +package com.twitter.frigate.pushservice.rank + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class SubscriptionCreatorRanker( + superFollowEligibilityUserStore: ReadableStore[Long, Boolean], + statsReceiver: StatsReceiver) { + + private val scopedStats = statsReceiver.scope("SubscriptionCreatorRanker") + private val boostStats = scopedStats.scope("boostSubscriptionCreator") + private val softUprankStats = scopedStats.scope("boostByScoreFactor") + private val boostTotalCandidates = boostStats.stat("total_input_candidates") + private val softRankTotalCandidates = softUprankStats.stat("total_input_candidates") + private val softRankNumCandidatesCreators = softUprankStats.counter("candidates_from_creators") + private val softRankNumCandidatesNonCreators = + softUprankStats.counter("candidates_not_from_creators") + private val boostNumCandidatesCreators = boostStats.counter("candidates_from_creators") + private val boostNumCandidatesNonCreators = + boostStats.counter("candidates_not_from_creators") + + def boostSubscriptionCreator( + inputCandidatesFut: Future[Seq[CandidateDetails[PushCandidate]]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + inputCandidatesFut.flatMap { inputCandidates => + boostTotalCandidates.add(inputCandidates.size) + val tweetAuthorIds = inputCandidates.flatMap { + case CandidateDetails(candidate: TweetCandidate with TweetAuthor, s) => + candidate.authorId + case _ => None + }.toSet + + FutureOps + .mapCollect(superFollowEligibilityUserStore.multiGet(tweetAuthorIds)) + .map { creatorAuthorMap => + val (upRankedCandidates, otherCandidates) = inputCandidates.partition { + case CandidateDetails(candidate: TweetCandidate with TweetAuthor, s) => + candidate.authorId match { + case Some(authorId) => + creatorAuthorMap(authorId).getOrElse(false) + case _ => false + } + case _ => false + } + boostNumCandidatesCreators.incr(upRankedCandidates.size) + boostNumCandidatesNonCreators.incr(otherCandidates.size) + upRankedCandidates ++ otherCandidates + } + } + } + + def boostByScoreFactor( + inputCandidatesFut: Future[Seq[CandidateDetails[PushCandidate]]], + factor: Double = 1.0, + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + inputCandidatesFut.flatMap { inputCandidates => + softRankTotalCandidates.add(inputCandidates.size) + val tweetAuthorIds = inputCandidates.flatMap { + case CandidateDetails(candidate: TweetCandidate with TweetAuthor, s) => + candidate.authorId + case _ => None + }.toSet + + FutureOps + .mapCollect(superFollowEligibilityUserStore.multiGet(tweetAuthorIds)) + .flatMap { creatorAuthorMap => + val (upRankedCandidates, otherCandidates) = inputCandidates.partition { + case CandidateDetails(candidate: TweetCandidate with TweetAuthor, s) => + candidate.authorId match { + case Some(authorId) => + creatorAuthorMap(authorId).getOrElse(false) + case _ => false + } + case _ => false + } + softRankNumCandidatesCreators.incr(upRankedCandidates.size) + softRankNumCandidatesNonCreators.incr(otherCandidates.size) + + ModelBasedRanker.rankBySpecifiedScore( + inputCandidates, + candidate => { + val isFromCreator = candidate match { + case candidate: TweetCandidate with TweetAuthor => + candidate.authorId match { + case Some(authorId) => + creatorAuthorMap(authorId).getOrElse(false) + case _ => false + } + case _ => false + } + candidate.mrWeightedOpenOrNtabClickRankingProbability.map { + case Some(score) => + if (isFromCreator) Some(score * factor) + else Some(score) + case _ => None + } + } + ) + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/LoggedOutRefreshForPushHandler.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/LoggedOutRefreshForPushHandler.scala new file mode 100644 index 000000000..f626d5b08 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/LoggedOutRefreshForPushHandler.scala @@ -0,0 +1,259 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.CandidateSource +import com.twitter.frigate.common.base.FetchRankFlowWithHydratedCandidates +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Response +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.base.Stats.trackSeq +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.adaptor.LoggedOutPushCandidateSourceGenerator +import com.twitter.frigate.pushservice.predicate.LoggedOutPreRankingPredicates +import com.twitter.frigate.pushservice.predicate.LoggedOutTargetPredicates +import com.twitter.frigate.pushservice.rank.LoggedOutRanker +import com.twitter.frigate.pushservice.take.LoggedOutRefreshForPushNotifier +import com.twitter.frigate.pushservice.scriber.MrRequestScribeHandler +import com.twitter.frigate.pushservice.target.LoggedOutPushTargetUserBuilder +import com.twitter.frigate.pushservice.thriftscala.LoggedOutRequest +import com.twitter.frigate.pushservice.thriftscala.LoggedOutResponse +import com.twitter.frigate.pushservice.thriftscala.PushContext +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.SequentialPredicate +import com.twitter.util.Future + +class LoggedOutRefreshForPushHandler( + val loPushTargetUserBuilder: LoggedOutPushTargetUserBuilder, + val loPushCandidateSourceGenerator: LoggedOutPushCandidateSourceGenerator, + candidateHydrator: PushCandidateHydrator, + val loRanker: LoggedOutRanker, + val loRfphNotifier: LoggedOutRefreshForPushNotifier, + loMrRequestScriberNode: String +)( + globalStats: StatsReceiver) + extends FetchRankFlowWithHydratedCandidates[Target, RawCandidate, PushCandidate] { + + val log = MRLogger("LORefreshForPushHandler") + implicit val statsReceiver: StatsReceiver = + globalStats.scope("LORefreshForPushHandler") + private val loggedOutBuildStats = statsReceiver.scope("logged_out_build_target") + private val loggedOutProcessStats = statsReceiver.scope("logged_out_process") + private val loggedOutNotifyStats = statsReceiver.scope("logged_out_notify") + private val loCandidateHydrationStats: StatsReceiver = + statsReceiver.scope("logged_out_candidate_hydration") + val mrLORequestCandidateScribeStats = + statsReceiver.scope("mr_logged_out_request_scribe_candidates") + + val mrRequestScribeHandler = + new MrRequestScribeHandler(loMrRequestScriberNode, statsReceiver.scope("lo_mr_request_scribe")) + val loMrRequestTargetScribeStats = statsReceiver.scope("lo_mr_request_scribe_target") + + lazy val loCandSourceEligibleCounter: Counter = + loCandidateStats.counter("logged_out_cand_source_eligible") + lazy val loCandSourceNotEligibleCounter: Counter = + loCandidateStats.counter("logged_out_cand_source_not_eligible") + lazy val allCandidatesCounter: Counter = statsReceiver.counter("all_logged_out_candidates") + val allCandidatesFilteredPreRank = filterStats.counter("all_logged_out_candidates_filtered") + + override def targetPredicates(target: Target): List[Predicate[Target]] = List( + LoggedOutTargetPredicates.targetFatiguePredicate(), + LoggedOutTargetPredicates.loggedOutRecsHoldbackPredicate() + ) + + override def isTargetValid(target: Target): Future[Result] = { + val resultFut = + if (target.skipFilters) { + Future.value(OK) + } else { + predicateSeq(target).track(Seq(target)).map { resultArr => + trackTargetPredStats(resultArr(0)) + } + } + track(targetStats)(resultFut) + } + + override def rank( + target: Target, + candidateDetails: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + loRanker.rank(candidateDetails) + } + + override def validCandidates( + target: Target, + candidates: Seq[PushCandidate] + ): Future[Seq[Result]] = { + Future.value(candidates.map { c => OK }) + } + + override def desiredCandidateCount(target: Target): Int = 1 + + private val loggedOutPreRankingPredicates = + LoggedOutPreRankingPredicates(filterStats.scope("logged_out_predicates")) + + private val loggedOutPreRankingPredicateChain = + new SequentialPredicate[PushCandidate](loggedOutPreRankingPredicates) + + override def filter( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[ + (Seq[CandidateDetails[PushCandidate]], Seq[CandidateResult[PushCandidate, Result]]) + ] = { + val predicateChain = loggedOutPreRankingPredicateChain + predicateChain + .track(candidates.map(_.candidate)) + .map { results => + val resultForPreRankingFiltering = + results + .zip(candidates) + .foldLeft( + ( + Seq.empty[CandidateDetails[PushCandidate]], + Seq.empty[CandidateResult[PushCandidate, Result]] + ) + ) { + case ((goodCandidates, filteredCandidates), (result, candidateDetails)) => + result match { + case None => + (goodCandidates :+ candidateDetails, filteredCandidates) + + case Some(pred: NamedPredicate[_]) => + val r = Invalid(Some(pred.name)) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + case Some(_) => + val r = Invalid(Some("Filtered by un-named predicate")) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + } + } + resultForPreRankingFiltering match { + case (validCandidates, _) if validCandidates.isEmpty && candidates.nonEmpty => + allCandidatesFilteredPreRank.incr() + case _ => () + + } + resultForPreRankingFiltering + + } + + } + + override def candidateSources( + target: Target + ): Future[Seq[CandidateSource[Target, RawCandidate]]] = { + Future + .collect(loPushCandidateSourceGenerator.sources.map { cs => + cs.isCandidateSourceAvailable(target).map { isEligible => + if (isEligible) { + loCandSourceEligibleCounter.incr() + Some(cs) + } else { + loCandSourceNotEligibleCounter.incr() + None + } + } + }).map(_.flatten) + } + + override def process( + target: Target, + externalCandidates: Seq[RawCandidate] = Nil + ): Future[Response[PushCandidate, Result]] = { + isTargetValid(target).flatMap { + case OK => + for { + candidatesFromSources <- trackSeq(fetchStats)(fetchCandidates(target)) + externalCandidateDetails = externalCandidates.map( + CandidateDetails(_, "logged_out_refresh_for_push_handler_external_candidates")) + allCandidates = candidatesFromSources ++ externalCandidateDetails + hydratedCandidatesWithCopy <- + trackSeq(loCandidateHydrationStats)(hydrateCandidates(allCandidates)) + (candidates, preRankingFilteredCandidates) <- + track(filterStats)(filter(target, hydratedCandidatesWithCopy)) + rankedCandidates <- trackSeq(rankingStats)(rank(target, candidates)) + allTakeCandidateResults <- track(takeStats)( + take(target, rankedCandidates, desiredCandidateCount(target)) + ) + _ <- track(mrLORequestCandidateScribeStats)( + mrRequestScribeHandler.scribeForCandidateFiltering( + target, + hydratedCandidatesWithCopy, + preRankingFilteredCandidates, + rankedCandidates, + rankedCandidates, + rankedCandidates, + allTakeCandidateResults + )) + + } yield { + val takeCandidateResults = allTakeCandidateResults.filterNot { candResult => + candResult.result == MoreThanDesiredCandidates + } + val allCandidateResults = takeCandidateResults ++ preRankingFilteredCandidates + allCandidatesCounter.incr(allCandidateResults.size) + Response(OK, allCandidateResults) + } + + case result: Result => + for (_ <- track(loMrRequestTargetScribeStats)( + mrRequestScribeHandler.scribeForTargetFiltering(target, result))) yield { + Response(result, Nil) + } + } + } + + def buildTarget( + guestId: Long, + inputPushContext: Option[PushContext] + ): Future[Target] = + loPushTargetUserBuilder.buildTarget(guestId, inputPushContext) + + /** + * Hydrate candidate by querying downstream services + * + * @param candidates - candidates + * + * @return - hydrated candidates + */ + override def hydrateCandidates( + candidates: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = candidateHydrator(candidates) + + override def batchForCandidatesCheck(target: Target): Int = 1 + + def refreshAndSend(request: LoggedOutRequest): Future[LoggedOutResponse] = { + for { + target <- track(loggedOutBuildStats)( + loPushTargetUserBuilder.buildTarget(request.guestId, request.context)) + response <- track(loggedOutProcessStats)(process(target, externalCandidates = Seq.empty)) + loggedOutRefreshResponse <- + track(loggedOutNotifyStats)(loRfphNotifier.checkResponseAndNotify(response)) + } yield { + loggedOutRefreshResponse + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/PushCandidateHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/PushCandidateHydrator.scala new file mode 100644 index 000000000..b8bf675bd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/PushCandidateHydrator.scala @@ -0,0 +1,239 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.channels.common.thriftscala.ApiList +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.rec_types.RecTypes.isInNetworkTweetType +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.TrendTweetPushCandidate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.refresh_handler.cross.CandidateCopyExpansion +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil._ +import com.twitter.frigate.pushservice.util.MrUserStateUtil +import com.twitter.frigate.pushservice.util.RelationshipUtil +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class PushCandidateHydrator( + socialGraphServiceProcessStore: ReadableStore[RelationEdge, Boolean], + safeUserStore: ReadableStore[Long, User], + apiListStore: ReadableStore[Long, ApiList], + candidateCopyCross: CandidateCopyExpansion +)( + implicit statsReceiver: StatsReceiver, + implicit val weightedOpenOrNtabClickModelScorer: PushMLModelScorer) { + + lazy val candidateWithCopyNumStat = statsReceiver.stat("candidate_with_copy_num") + lazy val hydratedCandidateStat = statsReceiver.scope("hydrated_candidates") + lazy val mrUserStateStat = statsReceiver.scope("mr_user_state") + + lazy val queryStep = statsReceiver.scope("query_step") + lazy val relationEdgeWithoutDuplicateInQueryStep = + queryStep.counter("number_of_relationEdge_without_duplicate_in_query_step") + lazy val relationEdgeWithoutDuplicateInQueryStepDistribution = + queryStep.stat("number_of_relationEdge_without_duplicate_in_query_step_distribution") + + case class Entities( + users: Set[Long] = Set.empty[Long], + relationshipEdges: Set[RelationEdge] = Set.empty[RelationEdge]) { + def merge(otherEntities: Entities): Entities = { + this.copy( + users = this.users ++ otherEntities.users, + relationshipEdges = + this.relationshipEdges ++ otherEntities.relationshipEdges + ) + } + } + + case class EntitiesMap( + userMap: Map[Long, User] = Map.empty[Long, User], + relationshipMap: Map[RelationEdge, Boolean] = Map.empty[RelationEdge, Boolean]) + + private def updateCandidateAndCrtStats( + candidate: RawCandidate, + candidateType: String, + numEntities: Int = 1 + ): Unit = { + statsReceiver + .scope(candidateType).scope(candidate.commonRecType.name).stat( + "totalEntitiesPerCandidateTypePerCrt").add(numEntities) + statsReceiver.scope(candidateType).stat("totalEntitiesPerCandidateType").add(numEntities) + } + + private def collectEntities( + candidateDetailsSeq: Seq[CandidateDetails[RawCandidate]] + ): Entities = { + candidateDetailsSeq + .map { candidateDetails => + val pushCandidate = candidateDetails.candidate + + val userEntities = pushCandidate match { + case tweetWithSocialContext: RawCandidate with TweetWithSocialContextTraits => + val authorIdOpt = getAuthorIdFromTweetCandidate(tweetWithSocialContext) + val scUserIds = tweetWithSocialContext.socialContextUserIds.toSet + updateCandidateAndCrtStats(pushCandidate, "tweetWithSocialContext", scUserIds.size + 1) + Entities(users = scUserIds ++ authorIdOpt.toSet) + + case _ => Entities() + } + + val relationEntities = { + if (isInNetworkTweetType(pushCandidate.commonRecType)) { + Entities( + relationshipEdges = + RelationshipUtil.getPreCandidateRelationshipsForInNetworkTweets(pushCandidate).toSet + ) + } else Entities() + } + + userEntities.merge(relationEntities) + } + .foldLeft(Entities()) { (e1, e2) => e1.merge(e2) } + + } + + /** + * This method calls Gizmoduck and Social Graph Service, keep the results in EntitiesMap + * and passed onto the update candidate phase in the hydration step + * + * @param entities contains all userIds and relationEdges for all candidates + * @return EntitiesMap contains userMap and relationshipMap + */ + private def queryEntities(entities: Entities): Future[EntitiesMap] = { + + relationEdgeWithoutDuplicateInQueryStep.incr(entities.relationshipEdges.size) + relationEdgeWithoutDuplicateInQueryStepDistribution.add(entities.relationshipEdges.size) + + val relationshipMapFuture = Future + .collect(socialGraphServiceProcessStore.multiGet(entities.relationshipEdges)) + .map { resultMap => + resultMap.collect { + case (relationshipEdge, Some(res)) => relationshipEdge -> res + case (relationshipEdge, None) => relationshipEdge -> false + } + } + + val userMapFuture = Future + .collect(safeUserStore.multiGet(entities.users)) + .map { userMap => + userMap.collect { + case (userId, Some(user)) => + userId -> user + } + } + + Future.join(userMapFuture, relationshipMapFuture).map { + case (uMap, rMap) => EntitiesMap(userMap = uMap, relationshipMap = rMap) + } + } + + /** + * @param candidateDetails: recommendation candidates for a user + * @return sequence of candidates tagged with push and ntab copy id + */ + private def expandCandidatesWithCopy( + candidateDetails: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[(CandidateDetails[RawCandidate], CopyIds)]] = { + candidateCopyCross.expandCandidatesWithCopyId(candidateDetails) + } + + def updateCandidates( + candidateDetailsWithCopies: Seq[(CandidateDetails[RawCandidate], CopyIds)], + entitiesMaps: EntitiesMap + ): Seq[CandidateDetails[PushCandidate]] = { + candidateDetailsWithCopies.map { + case (candidateDetail, copyIds) => + val pushCandidate = candidateDetail.candidate + val userMap = entitiesMaps.userMap + val relationshipMap = entitiesMaps.relationshipMap + + val hydratedCandidate = pushCandidate match { + + case f1TweetCandidate: F1FirstDegree => + getHydratedCandidateForF1FirstDegreeTweet( + f1TweetCandidate, + userMap, + relationshipMap, + copyIds) + + case tweetRetweet: TweetRetweetCandidate => + getHydratedCandidateForTweetRetweet(tweetRetweet, userMap, copyIds) + + case tweetFavorite: TweetFavoriteCandidate => + getHydratedCandidateForTweetFavorite(tweetFavorite, userMap, copyIds) + + case tripTweetCandidate: OutOfNetworkTweetCandidate with TripCandidate => + getHydratedCandidateForTripTweetCandidate(tripTweetCandidate, userMap, copyIds) + + case outOfNetworkTweetCandidate: OutOfNetworkTweetCandidate with TopicCandidate => + getHydratedCandidateForOutOfNetworkTweetCandidate( + outOfNetworkTweetCandidate, + userMap, + copyIds) + + case topicProofTweetCandidate: TopicProofTweetCandidate => + getHydratedTopicProofTweetCandidate(topicProofTweetCandidate, userMap, copyIds) + + case subscribedSearchTweetCandidate: SubscribedSearchTweetCandidate => + getHydratedSubscribedSearchTweetCandidate( + subscribedSearchTweetCandidate, + userMap, + copyIds) + + case listRecommendation: ListPushCandidate => + getHydratedListCandidate(apiListStore, listRecommendation, copyIds) + + case discoverTwitterCandidate: DiscoverTwitterCandidate => + getHydratedCandidateForDiscoverTwitterCandidate(discoverTwitterCandidate, copyIds) + + case topTweetImpressionsCandidate: TopTweetImpressionsCandidate => + getHydratedCandidateForTopTweetImpressionsCandidate( + topTweetImpressionsCandidate, + copyIds) + + case trendTweetCandidate: TrendTweetCandidate => + new TrendTweetPushCandidate( + trendTweetCandidate, + trendTweetCandidate.authorId.flatMap(userMap.get), + copyIds) + + case unknownCandidate => + throw new IllegalArgumentException( + s"Incorrect candidate for hydration: ${unknownCandidate.commonRecType}") + } + + CandidateDetails( + hydratedCandidate, + source = candidateDetail.source + ) + } + } + + def apply( + candidateDetails: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val isLoggedOutRequest = + candidateDetails.headOption.exists(_.candidate.target.isLoggedOutUser) + if (!isLoggedOutRequest) { + candidateDetails.headOption.map { cd => + MrUserStateUtil.updateMrUserStateStats(cd.candidate.target)(mrUserStateStat) + } + } + + expandCandidatesWithCopy(candidateDetails).flatMap { candidateDetailsWithCopy => + candidateWithCopyNumStat.add(candidateDetailsWithCopy.size) + val entities = collectEntities(candidateDetailsWithCopy.map(_._1)) + queryEntities(entities).flatMap { entitiesMap => + val updatedCandidates = updateCandidates(candidateDetailsWithCopy, entitiesMap) + updatedCandidates.foreach { cand => + hydratedCandidateStat.counter(cand.candidate.commonRecType.name).incr() + } + Future.value(updatedCandidates) + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHFeatureHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHFeatureHydrator.scala new file mode 100644 index 000000000..6d1172cb9 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHFeatureHydrator.scala @@ -0,0 +1,69 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.data_pipeline.features_common.MrRequestContextForFeatureStore +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.ml.HydrationContextBuilder +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.util.MrUserStateUtil +import com.twitter.nrel.heavyranker.FeatureHydrator +import com.twitter.util.Future + +class RFPHFeatureHydrator( + featureHydrator: FeatureHydrator +)( + implicit globalStats: StatsReceiver) { + + implicit val statsReceiver: StatsReceiver = + globalStats.scope("RefreshForPushHandler") + + //stat for feature hydration + private val featureHydrationEnabledCounter = statsReceiver.counter("featureHydrationEnabled") + private val mrUserStateStat = statsReceiver.scope("mr_user_state") + + private def hydrateFromRelevanceHydrator( + candidateDetails: Seq[CandidateDetails[PushCandidate]], + mrRequestContextForFeatureStore: MrRequestContextForFeatureStore + ): Future[Unit] = { + val pushCandidates = candidateDetails.map(_.candidate) + val candidatesAndContextsFut = Future.collect(pushCandidates.map { pc => + val contextFut = HydrationContextBuilder.build(pc) + contextFut.map { ctx => (pc, ctx) } + }) + candidatesAndContextsFut.flatMap { candidatesAndContexts => + val contexts = candidatesAndContexts.map(_._2) + val resultsFut = featureHydrator.hydrateCandidate(contexts, mrRequestContextForFeatureStore) + resultsFut.map { hydrationResult => + candidatesAndContexts.foreach { + case (pushCandidate, context) => + val resultFeatures = hydrationResult.getOrElse(context, FeatureMap()) + pushCandidate.mergeFeatures(resultFeatures) + } + } + } + } + + def candidateFeatureHydration( + candidateDetails: Seq[CandidateDetails[PushCandidate]], + mrRequestContextForFeatureStore: MrRequestContextForFeatureStore + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + candidateDetails.headOption match { + case Some(cand) => + val target = cand.candidate.target + MrUserStateUtil.updateMrUserStateStats(target)(mrUserStateStat) + if (target.params(PushParams.DisableAllRelevanceParam)) { + Future.value(candidateDetails) + } else { + featureHydrationEnabledCounter.incr() + for { + _ <- hydrateFromRelevanceHydrator(candidateDetails, mrRequestContextForFeatureStore) + } yield { + candidateDetails + } + } + case _ => Future.Nil + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHPrerankFilter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHPrerankFilter.scala new file mode 100644 index 000000000..fe52428b3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHPrerankFilter.scala @@ -0,0 +1,104 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.predicate.PreRankingPredicates +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.SequentialPredicate +import com.twitter.util._ + +class RFPHPrerankFilter( +)( + globalStats: StatsReceiver) { + def filter( + target: Target, + hydratedCandidates: Seq[CandidateDetails[PushCandidate]] + ): Future[ + (Seq[CandidateDetails[PushCandidate]], Seq[CandidateResult[PushCandidate, Result]]) + ] = { + lazy val filterStats: StatsReceiver = globalStats.scope("RefreshForPushHandler/filter") + lazy val okFilterCounter: Counter = filterStats.counter("ok") + lazy val invalidFilterCounter: Counter = filterStats.counter("invalid") + lazy val invalidFilterStat: StatsReceiver = filterStats.scope("invalid") + lazy val invalidFilterReasonStat: StatsReceiver = invalidFilterStat.scope("reason") + val allCandidatesFilteredPreRank = filterStats.counter("all_candidates_filtered") + + lazy val preRankingPredicates = PreRankingPredicates( + filterStats.scope("predicates") + ) + + lazy val preRankingPredicateChain = + new SequentialPredicate[PushCandidate](preRankingPredicates) + + val predicateChain = if (target.pushContext.exists(_.predicatesToEnable.exists(_.nonEmpty))) { + val predicatesToEnable = target.pushContext.flatMap(_.predicatesToEnable).getOrElse(Nil) + new SequentialPredicate[PushCandidate](preRankingPredicates.filter { pred => + predicatesToEnable.contains(pred.name) + }) + } else preRankingPredicateChain + + predicateChain + .track(hydratedCandidates.map(_.candidate)) + .map { results => + val resultForPreRankFiltering = results + .zip(hydratedCandidates) + .foldLeft( + ( + Seq.empty[CandidateDetails[PushCandidate]], + Seq.empty[CandidateResult[PushCandidate, Result]] + ) + ) { + case ((goodCandidates, filteredCandidates), (result, candidateDetails)) => + result match { + case None => + okFilterCounter.incr() + (goodCandidates :+ candidateDetails, filteredCandidates) + + case Some(pred: NamedPredicate[_]) => + invalidFilterCounter.incr() + invalidFilterReasonStat.counter(pred.name).incr() + invalidFilterReasonStat + .scope(candidateDetails.candidate.commonRecType.toString).counter( + pred.name).incr() + + val r = Invalid(Some(pred.name)) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + case Some(_) => + invalidFilterCounter.incr() + invalidFilterReasonStat.counter("unknown").incr() + invalidFilterReasonStat + .scope(candidateDetails.candidate.commonRecType.toString).counter( + "unknown").incr() + + val r = Invalid(Some("Filtered by un-named predicate")) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + } + } + + resultForPreRankFiltering match { + case (validCandidates, _) if validCandidates.isEmpty && hydratedCandidates.nonEmpty => + allCandidatesFilteredPreRank.incr() + case _ => () + } + + resultForPreRankFiltering + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHRestrictStep.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHRestrictStep.scala new file mode 100644 index 000000000..037479111 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHRestrictStep.scala @@ -0,0 +1,34 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.target.TargetScoringDetails + +class RFPHRestrictStep()(implicit stats: StatsReceiver) { + + private val statsReceiver: StatsReceiver = stats.scope("RefreshForPushHandler") + private val restrictStepStats: StatsReceiver = statsReceiver.scope("restrict") + private val restrictStepNumCandidatesDroppedStat: Stat = + restrictStepStats.stat("candidates_dropped") + + /** + * Limit the number of candidates that enter the Take step + */ + def restrict( + target: TargetUser with TargetABDecider with TargetScoringDetails, + candidates: Seq[CandidateDetails[PushCandidate]] + ): (Seq[CandidateDetails[PushCandidate]], Seq[CandidateDetails[PushCandidate]]) = { + if (target.params(PushFeatureSwitchParams.EnableRestrictStep)) { + val restrictSizeParam = PushFeatureSwitchParams.RestrictStepSize + val (newCandidates, filteredCandidates) = candidates.splitAt(target.params(restrictSizeParam)) + val numDropped = candidates.length - newCandidates.length + restrictStepNumCandidatesDroppedStat.add(numDropped) + (newCandidates, filteredCandidates) + } else (candidates, Seq.empty) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHStatsRecorder.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHStatsRecorder.scala new file mode 100644 index 000000000..c09b4348a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RFPHStatsRecorder.scala @@ -0,0 +1,77 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType + +class RFPHStatsRecorder(implicit statsReceiver: StatsReceiver) { + + private val selectedCandidateScoreStats: StatsReceiver = + statsReceiver.scope("score_of_sent_candidate_times_10000") + + private val emptyScoreStats: StatsReceiver = + statsReceiver.scope("score_of_sent_candidate_empty") + + def trackPredictionScoreStats(candidate: PushCandidate): Unit = { + candidate.mrWeightedOpenOrNtabClickRankingProbability.foreach { + case Some(s) => + selectedCandidateScoreStats + .stat("weighted_open_or_ntab_click_ranking") + .add((s * 10000).toFloat) + case None => + emptyScoreStats.counter("weighted_open_or_ntab_click_ranking").incr() + } + candidate.mrWeightedOpenOrNtabClickFilteringProbability.foreach { + case Some(s) => + selectedCandidateScoreStats + .stat("weighted_open_or_ntab_click_filtering") + .add((s * 10000).toFloat) + case None => + emptyScoreStats.counter("weighted_open_or_ntab_click_filtering").incr() + } + candidate.mrWeightedOpenOrNtabClickRankingProbability.foreach { + case Some(s) => + selectedCandidateScoreStats + .scope(candidate.commonRecType.toString) + .stat("weighted_open_or_ntab_click_ranking") + .add((s * 10000).toFloat) + case None => + emptyScoreStats + .scope(candidate.commonRecType.toString) + .counter("weighted_open_or_ntab_click_ranking") + .incr() + } + } + + def refreshRequestExceptionStats( + exception: Throwable, + bStats: StatsReceiver + ): Unit = { + bStats.counter("failures").incr() + bStats.scope("failures").counter(exception.getClass.getCanonicalName).incr() + } + + def loggedOutRequestExceptionStats( + exception: Throwable, + bStats: StatsReceiver + ): Unit = { + bStats.counter("logged_out_failures").incr() + bStats.scope("failures").counter(exception.getClass.getCanonicalName).incr() + } + + def rankDistributionStats( + candidatesDetails: Seq[CandidateDetails[PushCandidate]], + numRecsPerTypeStat: (CommonRecommendationType => Stat) + ): Unit = { + candidatesDetails + .groupBy { c => + c.candidate.commonRecType + } + .mapValues { s => + s.size + } + .foreach { case (crt, numRecs) => numRecsPerTypeStat(crt).add(numRecs) } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushHandler.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushHandler.scala new file mode 100644 index 000000000..17fb846cf --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushHandler.scala @@ -0,0 +1,292 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.base.Stats.trackSeq +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.adaptor._ +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.rank.RFPHLightRanker +import com.twitter.frigate.pushservice.rank.RFPHRanker +import com.twitter.frigate.pushservice.scriber.MrRequestScribeHandler +import com.twitter.frigate.pushservice.take.candidate_validator.RFPHCandidateValidator +import com.twitter.frigate.pushservice.target.PushTargetUserBuilder +import com.twitter.frigate.pushservice.target.RFPHTargetPredicates +import com.twitter.frigate.pushservice.util.RFPHTakeStepUtil +import com.twitter.frigate.pushservice.util.AdhocStatsUtil +import com.twitter.frigate.pushservice.thriftscala.PushContext +import com.twitter.frigate.pushservice.thriftscala.RefreshRequest +import com.twitter.frigate.pushservice.thriftscala.RefreshResponse +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.Predicate +import com.twitter.timelines.configapi.FeatureValue +import com.twitter.util._ + +case class ResultWithDebugInfo(result: Result, predicateResults: Seq[PredicateWithResult]) + +class RefreshForPushHandler( + val pushTargetUserBuilder: PushTargetUserBuilder, + val candSourceGenerator: PushCandidateSourceGenerator, + rfphRanker: RFPHRanker, + candidateHydrator: PushCandidateHydrator, + candidateValidator: RFPHCandidateValidator, + rfphTakeStepUtil: RFPHTakeStepUtil, + rfphRestrictStep: RFPHRestrictStep, + val rfphNotifier: RefreshForPushNotifier, + rfphStatsRecorder: RFPHStatsRecorder, + mrRequestScriberNode: String, + rfphFeatureHydrator: RFPHFeatureHydrator, + rfphPrerankFilter: RFPHPrerankFilter, + rfphLightRanker: RFPHLightRanker +)( + globalStats: StatsReceiver) + extends FetchRankFlowWithHydratedCandidates[Target, RawCandidate, PushCandidate] { + + val log = MRLogger("RefreshForPushHandler") + + implicit val statsReceiver: StatsReceiver = + globalStats.scope("RefreshForPushHandler") + private val maxCandidatesToBatchInTakeStat: Stat = + statsReceiver.stat("max_cands_to_batch_in_take") + + private val rfphRequestCounter = statsReceiver.counter("requests") + + private val buildTargetStats = statsReceiver.scope("build_target") + private val processStats = statsReceiver.scope("process") + private val notifyStats = statsReceiver.scope("notify") + + private val lightRankingStats: StatsReceiver = statsReceiver.scope("light_ranking") + private val reRankingStats: StatsReceiver = statsReceiver.scope("rerank") + private val featureHydrationLatency: StatsReceiver = + statsReceiver.scope("featureHydrationLatency") + private val candidateHydrationStats: StatsReceiver = statsReceiver.scope("candidate_hydration") + + lazy val candSourceEligibleCounter: Counter = + candidateStats.counter("cand_source_eligible") + lazy val candSourceNotEligibleCounter: Counter = + candidateStats.counter("cand_source_not_eligible") + + //pre-ranking stats + val allCandidatesFilteredPreRank = filterStats.counter("all_candidates_filtered") + + // total invalid candidates + val totalStats: StatsReceiver = statsReceiver.scope("total") + val totalInvalidCandidatesStat: Stat = totalStats.stat("candidates_invalid") + + val mrRequestScribeBuiltStats: Counter = statsReceiver.counter("mr_request_scribe_built") + + val mrRequestCandidateScribeStats = statsReceiver.scope("mr_request_scribe_candidates") + val mrRequestTargetScribeStats = statsReceiver.scope("mr_request_scribe_target") + + val mrRequestScribeHandler = + new MrRequestScribeHandler(mrRequestScriberNode, statsReceiver.scope("mr_request_scribe")) + + val adhocStatsUtil = new AdhocStatsUtil(statsReceiver.scope("adhoc_stats")) + + private def numRecsPerTypeStat(crt: CommonRecommendationType) = + fetchStats.scope(crt.toString).stat("dist") + + // static list of target predicates + private val targetPredicates = RFPHTargetPredicates(targetStats.scope("predicates")) + + def buildTarget( + userId: Long, + inputPushContext: Option[PushContext], + forcedFeatureValues: Option[Map[String, FeatureValue]] = None + ): Future[Target] = + pushTargetUserBuilder.buildTarget(userId, inputPushContext, forcedFeatureValues) + + override def targetPredicates(target: Target): List[Predicate[Target]] = targetPredicates + + override def isTargetValid(target: Target): Future[Result] = { + val resultFut = if (target.skipFilters) { + Future.value(trackTargetPredStats(None)) + } else { + predicateSeq(target).track(Seq(target)).map { resultArr => + trackTargetPredStats(resultArr(0)) + } + } + track(targetStats)(resultFut) + } + + override def candidateSources( + target: Target + ): Future[Seq[CandidateSource[Target, RawCandidate]]] = { + Future + .collect(candSourceGenerator.sources.map { cs => + cs.isCandidateSourceAvailable(target).map { isEligible => + if (isEligible) { + candSourceEligibleCounter.incr() + Some(cs) + } else { + candSourceNotEligibleCounter.incr() + None + } + } + }).map(_.flatten) + } + + override def updateCandidateCounter( + candidateResults: Seq[CandidateResult[PushCandidate, Result]] + ): Unit = { + candidateResults.foreach { + case candidateResult if candidateResult.result == OK => + okCandidateCounter.incr() + case candidateResult if candidateResult.result.isInstanceOf[Invalid] => + invalidCandidateCounter.incr() + case _ => + } + } + + override def hydrateCandidates( + candidates: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = candidateHydrator(candidates) + + override def filter( + target: Target, + hydratedCandidates: Seq[CandidateDetails[PushCandidate]] + ): Future[ + (Seq[CandidateDetails[PushCandidate]], Seq[CandidateResult[PushCandidate, Result]]) + ] = rfphPrerankFilter.filter(target, hydratedCandidates) + + def lightRankAndTake( + target: Target, + candidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + rfphLightRanker.rank(target, candidates) + } + + override def rank( + target: Target, + candidatesDetails: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + val featureHydratedCandidatesFut = trackSeq(featureHydrationLatency)( + rfphFeatureHydrator + .candidateFeatureHydration(candidatesDetails, target.mrRequestContextForFeatureStore) + ) + featureHydratedCandidatesFut.flatMap { featureHydratedCandidates => + rfphStatsRecorder.rankDistributionStats(featureHydratedCandidates, numRecsPerTypeStat) + rfphRanker.initialRank(target, candidatesDetails) + } + } + + def reRank( + target: Target, + rankedCandidates: Seq[CandidateDetails[PushCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + rfphRanker.reRank(target, rankedCandidates) + } + + override def validCandidates( + target: Target, + candidates: Seq[PushCandidate] + ): Future[Seq[Result]] = { + Future.collect(candidates.map { candidate => + rfphTakeStepUtil.isCandidateValid(candidate, candidateValidator).map(res => res.result) + }) + } + + override def desiredCandidateCount(target: Target): Int = target.desiredCandidateCount + + override def batchForCandidatesCheck(target: Target): Int = { + val fsParam = PushFeatureSwitchParams.NumberOfMaxCandidatesToBatchInRFPHTakeStep + val maxToBatch = target.params(fsParam) + maxCandidatesToBatchInTakeStat.add(maxToBatch) + maxToBatch + } + + override def process( + target: Target, + externalCandidates: Seq[RawCandidate] = Nil + ): Future[Response[PushCandidate, Result]] = { + isTargetValid(target).flatMap { + case OK => + for { + candidatesFromSources <- trackSeq(fetchStats)(fetchCandidates(target)) + externalCandidateDetails = externalCandidates.map( + CandidateDetails(_, "refresh_for_push_handler_external_candidate")) + allCandidates = candidatesFromSources ++ externalCandidateDetails + hydratedCandidatesWithCopy <- + trackSeq(candidateHydrationStats)(hydrateCandidates(allCandidates)) + _ = adhocStatsUtil.getCandidateSourceStats(hydratedCandidatesWithCopy) + (candidates, preRankingFilteredCandidates) <- + track(filterStats)(filter(target, hydratedCandidatesWithCopy)) + _ = adhocStatsUtil.getPreRankingFilterStats(preRankingFilteredCandidates) + lightRankerFilteredCandidates <- + trackSeq(lightRankingStats)(lightRankAndTake(target, candidates)) + _ = adhocStatsUtil.getLightRankingStats(lightRankerFilteredCandidates) + rankedCandidates <- trackSeq(rankingStats)(rank(target, lightRankerFilteredCandidates)) + _ = adhocStatsUtil.getRankingStats(rankedCandidates) + rerankedCandidates <- trackSeq(reRankingStats)(reRank(target, rankedCandidates)) + _ = adhocStatsUtil.getReRankingStats(rerankedCandidates) + (restrictedCandidates, restrictFilteredCandidates) = + rfphRestrictStep.restrict(target, rerankedCandidates) + allTakeCandidateResults <- track(takeStats)( + take(target, restrictedCandidates, desiredCandidateCount(target)) + ) + _ = adhocStatsUtil.getTakeCandidateResultStats(allTakeCandidateResults) + _ <- track(mrRequestCandidateScribeStats)( + mrRequestScribeHandler.scribeForCandidateFiltering( + target, + hydratedCandidatesWithCopy, + preRankingFilteredCandidates, + rankedCandidates, + rerankedCandidates, + restrictFilteredCandidates, + allTakeCandidateResults + )) + } yield { + + /** + * Take processes post restrict step candidates and returns both: + * 1. valid + invalid candidates + * 2. Candidates that are not processed (more than desired) + restricted candidates + * We need #2 only for importance sampling + */ + val takeCandidateResults = + allTakeCandidateResults.filterNot { candResult => + candResult.result == MoreThanDesiredCandidates + } + + val totalInvalidCandidates = { + preRankingFilteredCandidates.size + //pre-ranking filtered candidates + (rerankedCandidates.length - restrictedCandidates.length) + //candidates reject in restrict step + takeCandidateResults.count(_.result != OK) //candidates reject in take step + } + takeInvalidCandidateDist.add( + takeCandidateResults + .count(_.result != OK) + ) // take step invalid candidates + totalInvalidCandidatesStat.add(totalInvalidCandidates) + val allCandidateResults = takeCandidateResults ++ preRankingFilteredCandidates + Response(OK, allCandidateResults) + } + + case result: Result => + for (_ <- track(mrRequestTargetScribeStats)( + mrRequestScribeHandler.scribeForTargetFiltering(target, result))) yield { + mrRequestScribeBuiltStats.incr() + Response(result, Nil) + } + } + } + + def refreshAndSend(request: RefreshRequest): Future[RefreshResponse] = { + rfphRequestCounter.incr() + for { + target <- track(buildTargetStats)( + pushTargetUserBuilder + .buildTarget(request.userId, request.context)) + response <- track(processStats)(process(target, externalCandidates = Seq.empty)) + refreshResponse <- track(notifyStats)(rfphNotifier.checkResponseAndNotify(response, target)) + } yield { + refreshResponse + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushNotifier.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushNotifier.scala new file mode 100644 index 000000000..ae68d46ea --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/RefreshForPushNotifier.scala @@ -0,0 +1,128 @@ +package com.twitter.frigate.pushservice.refresh_handler + +import com.twitter.finagle.stats.BroadcastStatsReceiver +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.config.CommonConstants +import com.twitter.frigate.common.util.PushServiceUtil.FilteredRefreshResponseFut +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.take.CandidateNotifier +import com.twitter.frigate.pushservice.util.ResponseStatsTrackUtils.trackStatsForResponseToRequest +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.frigate.pushservice.thriftscala.RefreshResponse +import com.twitter.util.Future +import com.twitter.util.JavaTimer +import com.twitter.util.Timer + +class RefreshForPushNotifier( + rfphStatsRecorder: RFPHStatsRecorder, + candidateNotifier: CandidateNotifier +)( + globalStats: StatsReceiver) { + + private implicit val statsReceiver: StatsReceiver = + globalStats.scope("RefreshForPushHandler") + + private val pushStats: StatsReceiver = statsReceiver.scope("push") + private val sendLatency: StatsReceiver = statsReceiver.scope("send_handler") + implicit private val timer: Timer = new JavaTimer(true) + + private def notify( + candidatesResult: CandidateResult[PushCandidate, Result], + target: Target, + receivers: Seq[StatsReceiver] + ): Future[RefreshResponse] = { + + val candidate = candidatesResult.candidate + + val predsResult = candidatesResult.result + + if (predsResult != OK) { + val invalidResult = predsResult + invalidResult match { + case Invalid(Some(reason)) => + Future.value(RefreshResponse(PushStatus.Filtered, Some(reason))) + case _ => + Future.value(RefreshResponse(PushStatus.Filtered, None)) + } + } else { + rfphStatsRecorder.trackPredictionScoreStats(candidate) + + val isQualityUprankingCandidate = candidate.mrQualityUprankingBoost.isDefined + val commonRecTypeStats = Seq( + statsReceiver.scope(candidate.commonRecType.toString), + globalStats.scope(candidate.commonRecType.toString) + ) + val qualityUprankingStats = Seq( + statsReceiver.scope("QualityUprankingCandidates").scope(candidate.commonRecType.toString), + globalStats.scope("QualityUprankingCandidates").scope(candidate.commonRecType.toString) + ) + + val receiversWithRecTypeStats = { + if (isQualityUprankingCandidate) { + receivers ++ commonRecTypeStats ++ qualityUprankingStats + } else { + receivers ++ commonRecTypeStats + } + } + track(sendLatency)(candidateNotifier.notify(candidate).map { res => + trackStatsForResponseToRequest( + candidate.commonRecType, + candidate.target, + res, + receiversWithRecTypeStats + )(globalStats) + RefreshResponse(res.status) + }) + } + } + + def checkResponseAndNotify( + response: Response[PushCandidate, Result], + targetUserContext: Target + ): Future[RefreshResponse] = { + val receivers = Seq(statsReceiver) + val refreshResponse = response match { + case Response(OK, processedCandidates) => + // valid rec candidates + val validCandidates = processedCandidates.filter(_.result == OK) + + // top rec candidate + validCandidates.headOption match { + case Some(candidatesResult) => + candidatesResult.result match { + case OK => + notify(candidatesResult, targetUserContext, receivers) + .onSuccess { nr => + pushStats.scope("result").counter(nr.status.name).incr() + } + case _ => + targetUserContext.isTeamMember.flatMap { isTeamMember => + FilteredRefreshResponseFut + } + } + case _ => + FilteredRefreshResponseFut + } + case Response(Invalid(reason), _) => + // invalid target with known reason + FilteredRefreshResponseFut.map(_.copy(targetFilteredBy = reason)) + case _ => + // invalid target + FilteredRefreshResponseFut + } + + val bStats = BroadcastStatsReceiver(receivers) + Stat + .timeFuture(bStats.stat("latency"))( + refreshResponse + .raiseWithin(CommonConstants.maxPushRequestDuration) + ) + .onFailure { exception => + rfphStatsRecorder.refreshRequestExceptionStats(exception, bStats) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/BaseCopyFramework.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/BaseCopyFramework.scala new file mode 100644 index 000000000..47426a386 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/BaseCopyFramework.scala @@ -0,0 +1,79 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.MRNtabCopy +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.util.Future + +abstract class BaseCopyFramework(statsReceiver: StatsReceiver) { + + private val NoAvailableCopyStat = statsReceiver.scope("no_copy_for_crt") + private val NoAvailableNtabCopyStat = statsReceiver.scope("no_ntab_copy") + + /** + * Instantiate push copy filters + */ + protected final val copyFilters = new CopyFilters(statsReceiver.scope("filters")) + + /** + * + * The following method fetches all the push copies for a [[com.twitter.frigate.thriftscala.CommonRecommendationType]] + * associated with a candidate and then filters the eligible copies based on + * [[PushTypes.PushCandidate]] features. These filters are defined in + * [[CopyFilters]] + * + * @param rawCandidate - [[RawCandidate]] object representing a recommendation candidate + * + * @return - set of eligible push copies for a given candidate + */ + protected[cross] final def getEligiblePushCopiesFromCandidate( + rawCandidate: RawCandidate + ): Future[Seq[MRPushCopy]] = { + val pushCopiesFromRectype = CandidateToCopy.getPushCopiesFromRectype(rawCandidate.commonRecType) + + if (pushCopiesFromRectype.isEmpty) { + NoAvailableCopyStat.counter(rawCandidate.commonRecType.name).incr() + throw new IllegalStateException(s"No Copy defined for CRT: " + rawCandidate.commonRecType) + } + pushCopiesFromRectype + .map(pushCopySet => copyFilters.execute(rawCandidate, pushCopySet.toSeq)) + .getOrElse(Future.value(Seq.empty)) + } + + /** + * + * This method essentially forms the base for cross-step for the MagicRecs Copy Framework. Given + * a recommendation type this returns a set of tuples wherein each tuple is a pair of push and + * ntab copy eligible for the said recommendation type + * + * @param rawCandidate - [[RawCandidate]] object representing a recommendation candidate + * @return - Set of eligible [[MRPushCopy]], Option[[MRNtabCopy]] for a given recommendation type + */ + protected[cross] final def getEligiblePushAndNtabCopiesFromCandidate( + rawCandidate: RawCandidate + ): Future[Seq[(MRPushCopy, Option[MRNtabCopy])]] = { + + val eligiblePushCopies = getEligiblePushCopiesFromCandidate(rawCandidate) + + eligiblePushCopies.map { pushCopies => + val setBuilder = Set.newBuilder[(MRPushCopy, Option[MRNtabCopy])] + pushCopies.foreach { pushCopy => + val ntabCopies = CandidateToCopy.getNtabcopiesFromPushcopy(pushCopy) + val pushNtabCopyPairs = ntabCopies match { + case Some(ntabCopySet) => + if (ntabCopySet.isEmpty) { + NoAvailableNtabCopyStat.counter(s"copy_id: ${pushCopy.copyId}").incr() + Set(pushCopy -> None) + } // push copy only + else ntabCopySet.map(pushCopy -> Some(_)) + + case None => + Set.empty[(MRPushCopy, Option[MRNtabCopy])] // no push or ntab copy + } + setBuilder ++= pushNtabCopyPairs + } + setBuilder.result().toSeq + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyExpansion.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyExpansion.scala new file mode 100644 index 000000000..9748c90ff --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyExpansion.scala @@ -0,0 +1,56 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.util.MRNtabCopy +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.util.Future + +/** + * @param statsReceiver - stats receiver object + */ +class CandidateCopyExpansion(statsReceiver: StatsReceiver) + extends BaseCopyFramework(statsReceiver) { + + /** + * + * Given a [[CandidateDetails]] object representing a push recommendation candidate this method + * expands it to multiple candidates, each tagged with a push copy id and ntab copy id to + * represent the eligible copies for the given recommendation candidate + * + * @param candidateDetails - [[CandidateDetails]] objects containing a recommendation candidate + * + * @return - list of tuples of [[PushTypes.RawCandidate]] and [[CopyIds]] + */ + private final def crossCandidateDetailsWithCopyId( + candidateDetails: CandidateDetails[RawCandidate] + ): Future[Seq[(CandidateDetails[RawCandidate], CopyIds)]] = { + val eligibleCopyPairs = getEligiblePushAndNtabCopiesFromCandidate(candidateDetails.candidate) + val copyPairs = eligibleCopyPairs.map(_.map { + case (pushCopy: MRPushCopy, ntabCopy: Option[MRNtabCopy]) => + CopyIds( + pushCopyId = Some(pushCopy.copyId), + ntabCopyId = ntabCopy.map(_.copyId) + ) + }) + + copyPairs.map(_.map((candidateDetails, _))) + } + + /** + * + * This method takes as input a list of [[CandidateDetails]] objects which contain the push + * recommendation candidates for a given target user. It expands each input candidate into + * multiple candidates, each tagged with a push copy id and ntab copy id to represent the eligible + * copies for the given recommendation candidate + * + * @param candidateDetailsSeq - list of fetched candidates for push recommendation + * @return - list of tuples of [[RawCandidate]] and [[CopyIds]] + */ + final def expandCandidatesWithCopyId( + candidateDetailsSeq: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[(CandidateDetails[RawCandidate], CopyIds)]] = + Future.collect(candidateDetailsSeq.map(crossCandidateDetailsWithCopyId)).map(_.flatten) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyPair.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyPair.scala new file mode 100644 index 000000000..4eca41730 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateCopyPair.scala @@ -0,0 +1,11 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate + +/** + * + * @param candidate: [[RawCandidate]] is a recommendation candidate + * @param pushCopy: [[MRPushCopy]] eligible for candidate + */ +case class CandidateCopyPair(candidate: RawCandidate, pushCopy: MRPushCopy) diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateToCopy.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateToCopy.scala new file mode 100644 index 000000000..e7fbefe16 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CandidateToCopy.scala @@ -0,0 +1,263 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.common.util.MrPushCopyObjects +import com.twitter.frigate.common.util._ +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType._ + +object CandidateToCopy { + + // Static map from a CommonRecommendationType to set of eligible push notification copies + private[cross] val rectypeToPushCopy: Map[CommonRecommendationType, Set[ + MRPushCopy + ]] = + Map[CommonRecommendationType, Set[MRPushCopy]]( + F1FirstdegreeTweet -> Set( + MrPushCopyObjects.FirstDegreeJustTweetedBoldTitle + ), + F1FirstdegreePhoto -> Set( + MrPushCopyObjects.FirstDegreePhotoJustTweetedBoldTitle + ), + F1FirstdegreeVideo -> Set( + MrPushCopyObjects.FirstDegreeVideoJustTweetedBoldTitle + ), + TweetRetweet -> Set( + MrPushCopyObjects.TweetRetweetWithOneDisplaySocialContextsWithText, + MrPushCopyObjects.TweetRetweetWithTwoDisplaySocialContextsWithText, + MrPushCopyObjects.TweetRetweetWithOneDisplayAndKOtherSocialContextsWithText + ), + TweetRetweetPhoto -> Set( + MrPushCopyObjects.TweetRetweetPhotoWithOneDisplaySocialContextWithText, + MrPushCopyObjects.TweetRetweetPhotoWithTwoDisplaySocialContextsWithText, + MrPushCopyObjects.TweetRetweetPhotoWithOneDisplayAndKOtherSocialContextsWithText + ), + TweetRetweetVideo -> Set( + MrPushCopyObjects.TweetRetweetVideoWithOneDisplaySocialContextWithText, + MrPushCopyObjects.TweetRetweetVideoWithTwoDisplaySocialContextsWithText, + MrPushCopyObjects.TweetRetweetVideoWithOneDisplayAndKOtherSocialContextsWithText + ), + TweetFavorite -> Set( + MrPushCopyObjects.TweetLikeOneSocialContextWithText, + MrPushCopyObjects.TweetLikeTwoSocialContextWithText, + MrPushCopyObjects.TweetLikeMultipleSocialContextWithText + ), + TweetFavoritePhoto -> Set( + MrPushCopyObjects.TweetLikePhotoOneSocialContextWithText, + MrPushCopyObjects.TweetLikePhotoTwoSocialContextWithText, + MrPushCopyObjects.TweetLikePhotoMultipleSocialContextWithText + ), + TweetFavoriteVideo -> Set( + MrPushCopyObjects.TweetLikeVideoOneSocialContextWithText, + MrPushCopyObjects.TweetLikeVideoTwoSocialContextWithText, + MrPushCopyObjects.TweetLikeVideoMultipleSocialContextWithText + ), + UnreadBadgeCount -> Set(MrPushCopyObjects.UnreadBadgeCount), + InterestBasedTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + InterestBasedPhoto -> Set(MrPushCopyObjects.RecommendedForYouPhoto), + InterestBasedVideo -> Set(MrPushCopyObjects.RecommendedForYouVideo), + UserFollow -> Set( + MrPushCopyObjects.UserFollowWithOneSocialContext, + MrPushCopyObjects.UserFollowWithTwoSocialContext, + MrPushCopyObjects.UserFollowOneDisplayAndKOtherSocialContext + ), + HermitUser -> Set( + MrPushCopyObjects.HermitUserWithOneSocialContext, + MrPushCopyObjects.HermitUserWithTwoSocialContext, + MrPushCopyObjects.HermitUserWithOneDisplayAndKOtherSocialContexts + ), + TriangularLoopUser -> Set( + MrPushCopyObjects.TriangularLoopUserWithOneSocialContext, + MrPushCopyObjects.TriangularLoopUserWithTwoSocialContexts, + MrPushCopyObjects.TriangularLoopUserOneDisplayAndKotherSocialContext + ), + ForwardAddressbookUserFollow -> Set(MrPushCopyObjects.ForwardAddressBookUserFollow), + NewsArticleNewsLanding -> Set(MrPushCopyObjects.NewsArticleNewsLandingCopy), + TopicProofTweet -> Set(MrPushCopyObjects.TopicProofTweet), + UserInterestinTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + UserInterestinPhoto -> Set(MrPushCopyObjects.RecommendedForYouPhoto), + UserInterestinVideo -> Set(MrPushCopyObjects.RecommendedForYouVideo), + TwistlyTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + TwistlyPhoto -> Set(MrPushCopyObjects.RecommendedForYouPhoto), + TwistlyVideo -> Set(MrPushCopyObjects.RecommendedForYouVideo), + ElasticTimelineTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + ElasticTimelinePhoto -> Set(MrPushCopyObjects.RecommendedForYouPhoto), + ElasticTimelineVideo -> Set(MrPushCopyObjects.RecommendedForYouVideo), + ExploreVideoTweet -> Set(MrPushCopyObjects.ExploreVideoTweet), + List -> Set(MrPushCopyObjects.ListRecommendation), + InterestBasedUserFollow -> Set(MrPushCopyObjects.UserFollowInterestBasedCopy), + PastEmailEngagementTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + PastEmailEngagementPhoto -> Set(MrPushCopyObjects.RecommendedForYouPhoto), + PastEmailEngagementVideo -> Set(MrPushCopyObjects.RecommendedForYouVideo), + ExplorePush -> Set(MrPushCopyObjects.ExplorePush), + ConnectTabPush -> Set(MrPushCopyObjects.ConnectTabPush), + ConnectTabWithUserPush -> Set(MrPushCopyObjects.ConnectTabWithUserPush), + AddressBookUploadPush -> Set(MrPushCopyObjects.AddressBookPush), + InterestPickerPush -> Set(MrPushCopyObjects.InterestPickerPush), + CompleteOnboardingPush -> Set(MrPushCopyObjects.CompleteOnboardingPush), + GeoPopTweet -> Set(MrPushCopyObjects.GeoPopPushCopy), + TagSpaceTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + FrsTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + TwhinTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + MrModelingBasedTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + DetopicTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + TweetImpressions -> Set(MrPushCopyObjects.TopTweetImpressions), + TrendTweet -> Set(MrPushCopyObjects.TrendTweet), + ReverseAddressbookTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + ForwardAddressbookTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + SpaceInNetwork -> Set(MrPushCopyObjects.SpaceHost), + SpaceOutOfNetwork -> Set(MrPushCopyObjects.SpaceHost), + SubscribedSearch -> Set(MrPushCopyObjects.SubscribedSearchTweet), + TripGeoTweet -> Set(MrPushCopyObjects.TripGeoTweetPushCopy), + CrowdSearchTweet -> Set(MrPushCopyObjects.RecommendedForYouTweet), + Digest -> Set(MrPushCopyObjects.Digest), + TripHqTweet -> Set(MrPushCopyObjects.TripHqTweetPushCopy) + ) + + // Static map from a push copy to set of eligible ntab copies + private[cross] val pushcopyToNtabcopy: Map[MRPushCopy, Set[MRNtabCopy]] = + Map[MRPushCopy, Set[MRNtabCopy]]( + MrPushCopyObjects.FirstDegreeJustTweetedBoldTitle -> Set( + MrNtabCopyObjects.FirstDegreeTweetRecent), + MrPushCopyObjects.FirstDegreePhotoJustTweetedBoldTitle -> Set( + MrNtabCopyObjects.FirstDegreeTweetRecent + ), + MrPushCopyObjects.FirstDegreeVideoJustTweetedBoldTitle -> Set( + MrNtabCopyObjects.FirstDegreeTweetRecent + ), + MrPushCopyObjects.TweetRetweetWithOneDisplaySocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetRetweetWithTwoDisplaySocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetRetweetWithOneDisplayAndKOtherSocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TweetRetweetPhotoWithOneDisplaySocialContextWithText -> Set( + MrNtabCopyObjects.TweetRetweetPhotoWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetRetweetPhotoWithTwoDisplaySocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetPhotoWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetRetweetPhotoWithOneDisplayAndKOtherSocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetPhotoWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TweetRetweetVideoWithOneDisplaySocialContextWithText -> Set( + MrNtabCopyObjects.TweetRetweetVideoWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetRetweetVideoWithTwoDisplaySocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetVideoWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetRetweetVideoWithOneDisplayAndKOtherSocialContextsWithText -> Set( + MrNtabCopyObjects.TweetRetweetVideoWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TweetLikeOneSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetLikeTwoSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetLikeMultipleSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TweetLikePhotoOneSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikePhotoWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetLikePhotoTwoSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikePhotoWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetLikePhotoMultipleSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikePhotoWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TweetLikeVideoOneSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeVideoWithOneDisplaySocialContext + ), + MrPushCopyObjects.TweetLikeVideoTwoSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeVideoWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.TweetLikeVideoMultipleSocialContextWithText -> Set( + MrNtabCopyObjects.TweetLikeVideoWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.UnreadBadgeCount -> Set.empty[MRNtabCopy], + MrPushCopyObjects.RecommendedForYouTweet -> Set(MrNtabCopyObjects.RecommendedForYouCopy), + MrPushCopyObjects.RecommendedForYouPhoto -> Set(MrNtabCopyObjects.RecommendedForYouCopy), + MrPushCopyObjects.RecommendedForYouVideo -> Set(MrNtabCopyObjects.RecommendedForYouCopy), + MrPushCopyObjects.GeoPopPushCopy -> Set(MrNtabCopyObjects.RecommendedForYouCopy), + MrPushCopyObjects.UserFollowWithOneSocialContext -> Set( + MrNtabCopyObjects.UserFollowWithOneDisplaySocialContext + ), + MrPushCopyObjects.UserFollowWithTwoSocialContext -> Set( + MrNtabCopyObjects.UserFollowWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.UserFollowOneDisplayAndKOtherSocialContext -> Set( + MrNtabCopyObjects.UserFollowWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.HermitUserWithOneSocialContext -> Set( + MrNtabCopyObjects.UserFollowWithOneDisplaySocialContext + ), + MrPushCopyObjects.HermitUserWithTwoSocialContext -> Set( + MrNtabCopyObjects.UserFollowWithTwoDisplaySocialContexts + ), + MrPushCopyObjects.HermitUserWithOneDisplayAndKOtherSocialContexts -> Set( + MrNtabCopyObjects.UserFollowWithOneDisplayAndKOtherSocialContexts + ), + MrPushCopyObjects.TriangularLoopUserWithOneSocialContext -> Set( + MrNtabCopyObjects.TriangularLoopUserWithOneSocialContext + ), + MrPushCopyObjects.TriangularLoopUserWithTwoSocialContexts -> Set( + MrNtabCopyObjects.TriangularLoopUserWithTwoSocialContexts + ), + MrPushCopyObjects.TriangularLoopUserOneDisplayAndKotherSocialContext -> Set( + MrNtabCopyObjects.TriangularLoopUserOneDisplayAndKOtherSocialContext + ), + MrPushCopyObjects.NewsArticleNewsLandingCopy -> Set( + MrNtabCopyObjects.NewsArticleNewsLandingCopy + ), + MrPushCopyObjects.UserFollowInterestBasedCopy -> Set( + MrNtabCopyObjects.UserFollowInterestBasedCopy + ), + MrPushCopyObjects.ForwardAddressBookUserFollow -> Set( + MrNtabCopyObjects.ForwardAddressBookUserFollow), + MrPushCopyObjects.ConnectTabPush -> Set( + MrNtabCopyObjects.ConnectTabPush + ), + MrPushCopyObjects.ExplorePush -> Set.empty[MRNtabCopy], + MrPushCopyObjects.ConnectTabWithUserPush -> Set( + MrNtabCopyObjects.UserFollowInterestBasedCopy), + MrPushCopyObjects.AddressBookPush -> Set(MrNtabCopyObjects.AddressBook), + MrPushCopyObjects.InterestPickerPush -> Set(MrNtabCopyObjects.InterestPicker), + MrPushCopyObjects.CompleteOnboardingPush -> Set(MrNtabCopyObjects.CompleteOnboarding), + MrPushCopyObjects.TopicProofTweet -> Set(MrNtabCopyObjects.TopicProofTweet), + MrPushCopyObjects.TopTweetImpressions -> Set(MrNtabCopyObjects.TopTweetImpressions), + MrPushCopyObjects.TrendTweet -> Set(MrNtabCopyObjects.TrendTweet), + MrPushCopyObjects.SpaceHost -> Set(MrNtabCopyObjects.SpaceHost), + MrPushCopyObjects.SubscribedSearchTweet -> Set(MrNtabCopyObjects.SubscribedSearchTweet), + MrPushCopyObjects.TripGeoTweetPushCopy -> Set(MrNtabCopyObjects.RecommendedForYouCopy), + MrPushCopyObjects.Digest -> Set(MrNtabCopyObjects.Digest), + MrPushCopyObjects.TripHqTweetPushCopy -> Set(MrNtabCopyObjects.HighQualityTweet), + MrPushCopyObjects.ExploreVideoTweet -> Set(MrNtabCopyObjects.ExploreVideoTweet), + MrPushCopyObjects.ListRecommendation -> Set(MrNtabCopyObjects.ListRecommendation), + MrPushCopyObjects.MagicFanoutCreatorSubscription -> Set( + MrNtabCopyObjects.MagicFanoutCreatorSubscription), + MrPushCopyObjects.MagicFanoutNewCreator -> Set(MrNtabCopyObjects.MagicFanoutNewCreator) + ) + + /** + * + * @param crt - [[CommonRecommendationType]] used for a frigate push notification + * + * @return - Set of [[MRPushCopy]] objects representing push copies eligibile for a + * [[CommonRecommendationType]] + */ + def getPushCopiesFromRectype(crt: CommonRecommendationType): Option[Set[MRPushCopy]] = + rectypeToPushCopy.get(crt) + + /** + * + * @param pushcopy - [[MRPushCopy]] object representing a push notification copy + * @return - Set of [[MRNtabCopy]] objects that can be paired with a given [[MRPushCopy]] + */ + def getNtabcopiesFromPushcopy(pushcopy: MRPushCopy): Option[Set[MRNtabCopy]] = + pushcopyToNtabcopy.get(pushcopy) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyFilters.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyFilters.scala new file mode 100644 index 000000000..0fe5f5cdd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyFilters.scala @@ -0,0 +1,41 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.util.MRPushCopy +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +private[cross] class CopyFilters(statsReceiver: StatsReceiver) { + + private val copyPredicates = new CopyPredicates(statsReceiver.scope("copy_predicate")) + + def execute(rawCandidate: RawCandidate, pushCopies: Seq[MRPushCopy]): Future[Seq[MRPushCopy]] = { + val candidateCopyPairs: Seq[CandidateCopyPair] = + pushCopies.map(CandidateCopyPair(rawCandidate, _)) + + val compositePredicate: Predicate[CandidateCopyPair] = rawCandidate match { + case _: F1FirstDegree | _: OutOfNetworkTweetCandidate | _: EventCandidate | + _: TopicProofTweetCandidate | _: ListPushCandidate | _: HermitInterestBasedUserFollow | + _: UserFollowWithoutSocialContextCandidate | _: DiscoverTwitterCandidate | + _: TopTweetImpressionsCandidate | _: TrendTweetCandidate | + _: SubscribedSearchTweetCandidate | _: DigestCandidate => + copyPredicates.alwaysTruePredicate + + case _: SocialContextActions => copyPredicates.displaySocialContextPredicate + + case _ => copyPredicates.unrecognizedCandidatePredicate // block unrecognised candidates + } + + // apply predicate to all [[MRPushCopy]] objects + val filterResults: Future[Seq[Boolean]] = compositePredicate(candidateCopyPairs) + filterResults.map { results: Seq[Boolean] => + val seqBuilder = Seq.newBuilder[MRPushCopy] + results.zip(pushCopies).foreach { + case (result, pushCopy) => if (result) seqBuilder += pushCopy + } + seqBuilder.result() + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyPredicates.scala new file mode 100644 index 000000000..980af1554 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/refresh_handler/cross/CopyPredicates.scala @@ -0,0 +1,36 @@ +package com.twitter.frigate.pushservice.refresh_handler.cross + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.hermit.predicate.Predicate + +class CopyPredicates(statsReceiver: StatsReceiver) { + val alwaysTruePredicate = Predicate + .from { _: CandidateCopyPair => + true + }.withStats(statsReceiver.scope("always_true_copy_predicate")) + + val unrecognizedCandidatePredicate = alwaysTruePredicate.flip + .withStats(statsReceiver.scope("unrecognized_candidate")) + + val displaySocialContextPredicate = Predicate + .from { candidateCopyPair: CandidateCopyPair => + candidateCopyPair.candidate match { + case candidateWithScActions: RawCandidate with SocialContextActions => + val socialContextUserIds = candidateWithScActions.socialContextActions.map(_.userId) + val countSocialContext = socialContextUserIds.size + val pushCopy = candidateCopyPair.pushCopy + + countSocialContext match { + case 1 => pushCopy.hasOneDisplaySocialContext && !pushCopy.hasOtherSocialContext + case 2 => pushCopy.hasTwoDisplayContext && !pushCopy.hasOtherSocialContext + case c if c > 2 => + pushCopy.hasOneDisplaySocialContext && pushCopy.hasOtherSocialContext + case _ => false + } + + case _ => false + } + }.withStats(statsReceiver.scope("display_social_context_predicate")) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/scriber/MrRequestScribeHandler.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/scriber/MrRequestScribeHandler.scala new file mode 100644 index 000000000..90095056a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/scriber/MrRequestScribeHandler.scala @@ -0,0 +1,388 @@ +package com.twitter.frigate.pushservice.scriber + +import com.twitter.bijection.Base64String +import com.twitter.bijection.Injection +import com.twitter.bijection.scrooge.BinaryScalaCodec +import com.twitter.core_workflows.user_model.thriftscala.{UserState => ThriftUserState} +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.finagle.tracing.Trace +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.data_pipeline.features_common.PushQualityModelFeatureContext +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.scribe.thriftscala.CandidateFilteredOutStep +import com.twitter.frigate.scribe.thriftscala.CandidateRequestInfo +import com.twitter.frigate.scribe.thriftscala.MrRequestScribe +import com.twitter.frigate.scribe.thriftscala.TargetUserInfo +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.TweetNotification +import com.twitter.frigate.thriftscala.{SocialContextAction => TSocialContextAction} +import com.twitter.logging.Logger +import com.twitter.ml.api.DataRecord +import com.twitter.ml.api.Feature +import com.twitter.ml.api.FeatureType +import com.twitter.ml.api.util.SRichDataRecord +import com.twitter.ml.api.util.ScalaToJavaDataRecordConversions +import com.twitter.nrel.heavyranker.PushPredictionHelper +import com.twitter.util.Future +import com.twitter.util.Time +import java.util.UUID +import scala.collection.mutable + +class MrRequestScribeHandler(mrRequestScriberNode: String, stats: StatsReceiver) { + + private val mrRequestScribeLogger = Logger(mrRequestScriberNode) + + private val mrRequestScribeTargetFilteringStats = + stats.counter("MrRequestScribeHandler_target_filtering") + private val mrRequestScribeCandidateFilteringStats = + stats.counter("MrRequestScribeHandler_candidate_filtering") + private val mrRequestScribeInvalidStats = + stats.counter("MrRequestScribeHandler_invalid_filtering") + private val mrRequestScribeUnsupportedFeatureTypeStats = + stats.counter("MrRequestScribeHandler_unsupported_feature_type") + private val mrRequestScribeNotIncludedFeatureStats = + stats.counter("MrRequestScribeHandler_not_included_features") + + private final val MrRequestScribeInjection: Injection[MrRequestScribe, String] = BinaryScalaCodec( + MrRequestScribe + ) andThen Injection.connect[Array[Byte], Base64String, String] + + /** + * + * @param target : Target user id + * @param result : Result for target filtering + * + * @return + */ + def scribeForTargetFiltering(target: Target, result: Result): Future[Option[MrRequestScribe]] = { + if (target.isLoggedOutUser || !enableTargetFilteringScribing(target)) { + Future.None + } else { + val predicate = result match { + case Invalid(reason) => reason + case _ => + mrRequestScribeInvalidStats.incr() + throw new IllegalStateException("Invalid reason for Target Filtering " + result) + } + buildScribeThrift(target, predicate, None).map { targetFilteredScribe => + writeAtTargetFilteringStep(target, targetFilteredScribe) + Some(targetFilteredScribe) + } + } + } + + /** + * + * @param target : Target user id + * @param hydratedCandidates : Candidates hydrated with details: impressionId, frigateNotification and source + * @param preRankingFilteredCandidates : Candidates result filtered out at preRanking filtering step + * @param rankedCandidates : Sorted candidates details ranked by ranking step + * @param rerankedCandidates : Sorted candidates details ranked by reranking step + * @param restrictFilteredCandidates : Candidates details filtered out at restrict step + * @param allTakeCandidateResults : Candidates results at take step, include the candidates we take and the candidates filtered out at take step [with different result] + * + * @return + */ + def scribeForCandidateFiltering( + target: Target, + hydratedCandidates: Seq[CandidateDetails[PushCandidate]], + preRankingFilteredCandidates: Seq[CandidateResult[PushCandidate, Result]], + rankedCandidates: Seq[CandidateDetails[PushCandidate]], + rerankedCandidates: Seq[CandidateDetails[PushCandidate]], + restrictFilteredCandidates: Seq[CandidateDetails[PushCandidate]], + allTakeCandidateResults: Seq[CandidateResult[PushCandidate, Result]] + ): Future[Seq[MrRequestScribe]] = { + if (target.isLoggedOutUser || target.isEmailUser) { + Future.Nil + } else if (enableCandidateFilteringScribing(target)) { + val hydrateFeature = + target.params(PushFeatureSwitchParams.EnableMrRequestScribingWithFeatureHydrating) || + target.scribeFeatureForRequestScribe + + val candidateRequestInfoSeq = generateCandidatesScribeInfo( + hydratedCandidates, + preRankingFilteredCandidates, + rankedCandidates, + rerankedCandidates, + restrictFilteredCandidates, + allTakeCandidateResults, + isFeatureHydratingEnabled = hydrateFeature + ) + val flattenStructure = + target.params(PushFeatureSwitchParams.EnableFlattenMrRequestScribing) || hydrateFeature + candidateRequestInfoSeq.flatMap { candidateRequestInfos => + if (flattenStructure) { + Future.collect { + candidateRequestInfos.map { candidateRequestInfo => + buildScribeThrift(target, None, Some(Seq(candidateRequestInfo))) + .map { mrRequestScribe => + writeAtCandidateFilteringStep(target, mrRequestScribe) + mrRequestScribe + } + } + } + } else { + buildScribeThrift(target, None, Some(candidateRequestInfos)) + .map { mrRequestScribe => + writeAtCandidateFilteringStep(target, mrRequestScribe) + Seq(mrRequestScribe) + } + } + } + } else Future.Nil + + } + + private def buildScribeThrift( + target: Target, + targetFilteredOutPredicate: Option[String], + candidatesRequestInfo: Option[Seq[CandidateRequestInfo]] + ): Future[MrRequestScribe] = { + Future + .join( + target.targetUserState, + generateTargetFeatureScribeInfo(target), + target.targetUser).map { + case (userStateOption, targetFeatureOption, gizmoduckUserOpt) => + val userState = userStateOption.map(userState => ThriftUserState(userState.id)) + val targetFeatures = + targetFeatureOption.map(ScalaToJavaDataRecordConversions.javaDataRecord2ScalaDataRecord) + val traceId = Trace.id.traceId.toLong + + MrRequestScribe( + requestId = UUID.randomUUID.toString.replaceAll("-", ""), + scribedTimeMs = Time.now.inMilliseconds, + targetUserId = target.targetId, + targetUserInfo = Some( + TargetUserInfo( + userState, + features = targetFeatures, + userType = gizmoduckUserOpt.map(_.userType)) + ), + targetFilteredOutPredicate = targetFilteredOutPredicate, + candidates = candidatesRequestInfo, + traceId = Some(traceId) + ) + } + } + + private def generateTargetFeatureScribeInfo( + target: Target + ): Future[Option[DataRecord]] = { + val featureList = + target.params(PushFeatureSwitchParams.TargetLevelFeatureListForMrRequestScribing) + if (featureList.nonEmpty) { + PushPredictionHelper + .getDataRecordFromTargetFeatureMap( + target.targetId, + target.featureMap, + stats + ).map { dataRecord => + val richRecord = + new SRichDataRecord(dataRecord, PushQualityModelFeatureContext.featureContext) + + val selectedRecord = + SRichDataRecord(new DataRecord(), PushQualityModelFeatureContext.featureContext) + featureList.map { featureName => + val feature: Feature[_] = { + try { + PushQualityModelFeatureContext.featureContext.getFeature(featureName) + } catch { + case _: Exception => + mrRequestScribeNotIncludedFeatureStats.incr() + throw new IllegalStateException( + "Scribing features not included in FeatureContext: " + featureName) + } + } + + richRecord.getFeatureValueOpt(feature).foreach { featureVal => + feature.getFeatureType() match { + case FeatureType.BINARY => + selectedRecord.setFeatureValue( + feature.asInstanceOf[Feature[Boolean]], + featureVal.asInstanceOf[Boolean]) + case FeatureType.CONTINUOUS => + selectedRecord.setFeatureValue( + feature.asInstanceOf[Feature[Double]], + featureVal.asInstanceOf[Double]) + case FeatureType.STRING => + selectedRecord.setFeatureValue( + feature.asInstanceOf[Feature[String]], + featureVal.asInstanceOf[String]) + case FeatureType.DISCRETE => + selectedRecord.setFeatureValue( + feature.asInstanceOf[Feature[Long]], + featureVal.asInstanceOf[Long]) + case _ => + mrRequestScribeUnsupportedFeatureTypeStats.incr() + } + } + } + Some(selectedRecord.getRecord) + } + } else Future.None + } + + private def generateCandidatesScribeInfo( + hydratedCandidates: Seq[CandidateDetails[PushCandidate]], + preRankingFilteredCandidates: Seq[CandidateResult[PushCandidate, Result]], + rankedCandidates: Seq[CandidateDetails[PushCandidate]], + rerankedCandidates: Seq[CandidateDetails[PushCandidate]], + restrictFilteredCandidates: Seq[CandidateDetails[PushCandidate]], + allTakeCandidateResults: Seq[CandidateResult[PushCandidate, Result]], + isFeatureHydratingEnabled: Boolean + ): Future[Seq[CandidateRequestInfo]] = { + val candidatesMap = new mutable.HashMap[String, CandidateRequestInfo] + + hydratedCandidates.foreach { hydratedCandidate => + val frgNotif = hydratedCandidate.candidate.frigateNotification + val simplifiedTweetNotificationOpt = frgNotif.tweetNotification.map { tweetNotification => + TweetNotification( + tweetNotification.tweetId, + Seq.empty[TSocialContextAction], + tweetNotification.tweetAuthorId) + } + val simplifiedFrigateNotification = FrigateNotification( + frgNotif.commonRecommendationType, + frgNotif.notificationDisplayLocation, + tweetNotification = simplifiedTweetNotificationOpt + ) + candidatesMap(hydratedCandidate.candidate.impressionId) = CandidateRequestInfo( + candidateId = "", + candidateSource = hydratedCandidate.source.substring( + 0, + Math.min(6, hydratedCandidate.source.length) + ), + frigateNotification = Some(simplifiedFrigateNotification), + modelScore = None, + rankPosition = None, + rerankPosition = None, + features = None, + isSent = Some(false) + ) + } + + preRankingFilteredCandidates.foreach { preRankingFilteredCandidateResult => + candidatesMap(preRankingFilteredCandidateResult.candidate.impressionId) = + candidatesMap(preRankingFilteredCandidateResult.candidate.impressionId) + .copy( + candidateFilteredOutPredicate = preRankingFilteredCandidateResult.result match { + case Invalid(reason) => reason + case _ => { + mrRequestScribeInvalidStats.incr() + throw new IllegalStateException( + "Invalid reason for Candidate Filtering " + preRankingFilteredCandidateResult.result) + } + }, + candidateFilteredOutStep = Some(CandidateFilteredOutStep.PreRankFiltering) + ) + } + + for { + _ <- Future.collectToTry { + rankedCandidates.zipWithIndex.map { + case (rankedCandidateDetail, index) => + val modelScoresFut = { + val crt = rankedCandidateDetail.candidate.commonRecType + if (RecTypes.notEligibleForModelScoreTracking.contains(crt)) Future.None + else rankedCandidateDetail.candidate.modelScores.map(Some(_)) + } + + modelScoresFut.map { modelScores => + candidatesMap(rankedCandidateDetail.candidate.impressionId) = + candidatesMap(rankedCandidateDetail.candidate.impressionId).copy( + rankPosition = Some(index), + modelScore = modelScores + ) + } + } + } + + _ = rerankedCandidates.zipWithIndex.foreach { + case (rerankedCandidateDetail, index) => { + candidatesMap(rerankedCandidateDetail.candidate.impressionId) = + candidatesMap(rerankedCandidateDetail.candidate.impressionId).copy( + rerankPosition = Some(index) + ) + } + } + + _ <- Future.collectToTry { + rerankedCandidates.map { rerankedCandidateDetail => + if (isFeatureHydratingEnabled) { + PushPredictionHelper + .getDataRecord( + rerankedCandidateDetail.candidate.target.targetHydrationContext, + rerankedCandidateDetail.candidate.target.featureMap, + rerankedCandidateDetail.candidate.candidateHydrationContext, + rerankedCandidateDetail.candidate.candidateFeatureMap(), + stats + ).map { features => + candidatesMap(rerankedCandidateDetail.candidate.impressionId) = + candidatesMap(rerankedCandidateDetail.candidate.impressionId).copy( + features = Some( + ScalaToJavaDataRecordConversions.javaDataRecord2ScalaDataRecord(features)) + ) + } + } else Future.Unit + } + } + + _ = restrictFilteredCandidates.foreach { restrictFilteredCandidateDetatil => + candidatesMap(restrictFilteredCandidateDetatil.candidate.impressionId) = + candidatesMap(restrictFilteredCandidateDetatil.candidate.impressionId) + .copy(candidateFilteredOutStep = Some(CandidateFilteredOutStep.Restrict)) + } + + _ = allTakeCandidateResults.foreach { allTakeCandidateResult => + allTakeCandidateResult.result match { + case OK => + candidatesMap(allTakeCandidateResult.candidate.impressionId) = + candidatesMap(allTakeCandidateResult.candidate.impressionId).copy(isSent = Some(true)) + case Invalid(reason) => + candidatesMap(allTakeCandidateResult.candidate.impressionId) = + candidatesMap(allTakeCandidateResult.candidate.impressionId).copy( + candidateFilteredOutPredicate = reason, + candidateFilteredOutStep = Some(CandidateFilteredOutStep.PostRankFiltering)) + case _ => + mrRequestScribeInvalidStats.incr() + throw new IllegalStateException( + "Invalid reason for Candidate Filtering " + allTakeCandidateResult.result) + } + } + } yield candidatesMap.values.toSeq + } + + private def enableTargetFilteringScribing(target: Target): Boolean = { + target.params(PushParams.EnableMrRequestScribing) && target.params( + PushFeatureSwitchParams.EnableMrRequestScribingForTargetFiltering) + } + + private def enableCandidateFilteringScribing(target: Target): Boolean = { + target.params(PushParams.EnableMrRequestScribing) && target.params( + PushFeatureSwitchParams.EnableMrRequestScribingForCandidateFiltering) + } + + private def writeAtTargetFilteringStep(target: Target, mrRequestScribe: MrRequestScribe) = { + logToScribe(mrRequestScribe) + mrRequestScribeTargetFilteringStats.incr() + } + + private def writeAtCandidateFilteringStep(target: Target, mrRequestScribe: MrRequestScribe) = { + logToScribe(mrRequestScribe) + mrRequestScribeCandidateFilteringStats.incr() + } + + private def logToScribe(mrRequestScribe: MrRequestScribe): Unit = { + val logEntry: String = MrRequestScribeInjection(mrRequestScribe) + mrRequestScribeLogger.info(logEntry) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandler.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandler.scala new file mode 100644 index 000000000..e235f76cb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandler.scala @@ -0,0 +1,250 @@ +package com.twitter.frigate.pushservice.send_handler + +import com.twitter.finagle.stats.BroadcastStatsReceiver +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.CandidateFilteringOnlyFlow +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Response +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.config.CommonConstants +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.util.InvalidRequestException +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.ml.HydrationContextBuilder +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams.EnableMagicFanoutNewsForYouNtabCopy +import com.twitter.frigate.pushservice.scriber.MrRequestScribeHandler +import com.twitter.frigate.pushservice.send_handler.generator.PushRequestToCandidate +import com.twitter.frigate.pushservice.take.SendHandlerNotifier +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPostCandidateValidator +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPreCandidateValidator +import com.twitter.frigate.pushservice.target.PushTargetUserBuilder +import com.twitter.frigate.pushservice.util.ResponseStatsTrackUtils.trackStatsForResponseToRequest +import com.twitter.frigate.pushservice.util.SendHandlerPredicateUtil +import com.twitter.frigate.pushservice.thriftscala.PushRequest +import com.twitter.frigate.pushservice.thriftscala.PushRequestScribe +import com.twitter.frigate.pushservice.thriftscala.PushResponse +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.nrel.heavyranker.FeatureHydrator +import com.twitter.util._ + +/** + * A handler for sending PushRequests + */ +class SendHandler( + pushTargetUserBuilder: PushTargetUserBuilder, + preCandidateValidator: SendHandlerPreCandidateValidator, + postCandidateValidator: SendHandlerPostCandidateValidator, + sendHandlerNotifier: SendHandlerNotifier, + candidateHydrator: SendHandlerPushCandidateHydrator, + featureHydrator: FeatureHydrator, + sendHandlerPredicateUtil: SendHandlerPredicateUtil, + mrRequestScriberNode: String +)( + implicit val statsReceiver: StatsReceiver, + implicit val config: Config) + extends CandidateFilteringOnlyFlow[Target, RawCandidate, PushCandidate] { + + implicit private val timer: Timer = new JavaTimer(true) + val stats = statsReceiver.scope("SendHandler") + val log = MRLogger("SendHandler") + + private val buildTargetStats = stats.scope("build_target") + + private val candidateHydrationLatency: Stat = + stats.stat("candidateHydrationLatency") + + private val candidatePreValidatorLatency: Stat = + stats.stat("candidatePreValidatorLatency") + + private val candidatePostValidatorLatency: Stat = + stats.stat("candidatePostValidatorLatency") + + private val featureHydrationLatency: StatsReceiver = + stats.scope("featureHydrationLatency") + + private val mrRequestScribeHandler = + new MrRequestScribeHandler(mrRequestScriberNode, stats.scope("mr_request_scribe")) + + def apply(request: PushRequest): Future[PushResponse] = { + val receivers = Seq( + stats, + stats.scope(request.notification.commonRecommendationType.toString) + ) + val bStats = BroadcastStatsReceiver(receivers) + bStats.counter("requests").incr() + Stat + .timeFuture(bStats.stat("latency"))( + process(request).raiseWithin(CommonConstants.maxPushRequestDuration)) + .onSuccess { + case (pushResp, rawCandidate) => + trackStatsForResponseToRequest( + rawCandidate.commonRecType, + rawCandidate.target, + pushResp, + receivers)(statsReceiver) + if (!request.context.exists(_.darkWrite.contains(true))) { + config.requestScribe(PushRequestScribe(request, pushResp)) + } + } + .onFailure { ex => + bStats.counter("failures").incr() + bStats.scope("failures").counter(ex.getClass.getCanonicalName).incr() + } + .map { + case (pushResp, _) => pushResp + } + } + + private def process(request: PushRequest): Future[(PushResponse, RawCandidate)] = { + val recType = request.notification.commonRecommendationType + + track(buildTargetStats)( + pushTargetUserBuilder + .buildTarget( + request.userId, + request.context + ) + ).flatMap { targetUser => + val responseWithScribedInfo = request.context.exists { context => + context.responseWithScribedInfo.contains(true) + } + val newRequest = + if (request.notification.commonRecommendationType == CommonRecommendationType.MagicFanoutNewsEvent && + targetUser.params(EnableMagicFanoutNewsForYouNtabCopy)) { + val newNotification = request.notification.copy(ntabCopyId = + Some(MrNtabCopyObjects.MagicFanoutNewsForYouCopy.copyId)) + request.copy(notification = newNotification) + } else request + + if (RecTypes.isSendHandlerType(recType) || newRequest.context.exists( + _.allowCRT.contains(true))) { + + val rawCandidateFut = PushRequestToCandidate.generatePushCandidate( + newRequest.notification, + targetUser + ) + + rawCandidateFut.flatMap { rawCandidate => + val pushResponse = process(targetUser, Seq(rawCandidate)).flatMap { + sendHandlerNotifier.checkResponseAndNotify(_, responseWithScribedInfo) + } + + pushResponse.map { pushResponse => + (pushResponse, rawCandidate) + } + } + } else { + Future.exception(InvalidRequestException(s"${recType.name} not supported in SendHandler")) + } + } + } + + private def hydrateFeatures( + candidateDetails: Seq[CandidateDetails[PushCandidate]], + target: Target, + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + candidateDetails.headOption match { + case Some(candidateDetail) + if RecTypes.notEligibleForModelScoreTracking(candidateDetail.candidate.commonRecType) => + Future.value(candidateDetails) + + case Some(candidateDetail) => + val hydrationContextFut = HydrationContextBuilder.build(candidateDetail.candidate) + hydrationContextFut.flatMap { hc => + featureHydrator + .hydrateCandidate(Seq(hc), target.mrRequestContextForFeatureStore) + .map { hydrationResult => + val features = hydrationResult.getOrElse(hc, FeatureMap()) + candidateDetail.candidate.mergeFeatures(features) + candidateDetails + } + } + case _ => Future.Nil + } + } + + override def process( + target: Target, + externalCandidates: Seq[RawCandidate] + ): Future[Response[PushCandidate, Result]] = { + val candidate = externalCandidates.map(CandidateDetails(_, "realtime")) + + for { + hydratedCandidatesWithCopy <- hydrateCandidates(candidate) + + (candidates, preHydrationFilteredCandidates) <- track(filterStats)( + filter(target, hydratedCandidatesWithCopy) + ) + + featureHydratedCandidates <- + track(featureHydrationLatency)(hydrateFeatures(candidates, target)) + + allTakeCandidateResults <- track(takeStats)( + take(target, featureHydratedCandidates, desiredCandidateCount(target)) + ) + + _ <- mrRequestScribeHandler.scribeForCandidateFiltering( + target = target, + hydratedCandidates = hydratedCandidatesWithCopy, + preRankingFilteredCandidates = preHydrationFilteredCandidates, + rankedCandidates = featureHydratedCandidates, + rerankedCandidates = Seq.empty, + restrictFilteredCandidates = Seq.empty, // no restrict step + allTakeCandidateResults = allTakeCandidateResults + ) + } yield { + + /** + * We combine the results for all filtering steps and pass on in sequence to next step + * + * This is done to ensure the filtering reason for the candidate from multiple levels of + * filtering is carried all the way until [[PushResponse]] is built and returned from + * frigate-pushservice-send + */ + Response(OK, allTakeCandidateResults ++ preHydrationFilteredCandidates) + } + } + + override def hydrateCandidates( + candidates: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + Stat.timeFuture(candidateHydrationLatency)(candidateHydrator(candidates)) + } + + // Filter Step - pre-predicates and app specific predicates + override def filter( + target: Target, + hydratedCandidatesDetails: Seq[CandidateDetails[PushCandidate]] + ): Future[ + (Seq[CandidateDetails[PushCandidate]], Seq[CandidateResult[PushCandidate, Result]]) + ] = { + Stat.timeFuture(candidatePreValidatorLatency)( + sendHandlerPredicateUtil.preValidationForCandidate( + hydratedCandidatesDetails, + preCandidateValidator + )) + } + + // Post Validation - Take step + override def validCandidates( + target: Target, + candidates: Seq[PushCandidate] + ): Future[Seq[Result]] = { + Stat.timeFuture(candidatePostValidatorLatency)(Future.collect(candidates.map { candidate => + sendHandlerPredicateUtil + .postValidationForCandidate(candidate, postCandidateValidator) + .map(res => res.result) + })) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandlerPushCandidateHydrator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandlerPushCandidateHydrator.scala new file mode 100644 index 000000000..f8f102790 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/SendHandlerPushCandidateHydrator.scala @@ -0,0 +1,184 @@ +package com.twitter.frigate.pushservice.send_handler + +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.common.util.MrNtabCopyObjects +import com.twitter.frigate.common.util.MrPushCopyObjects +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.store.EventRequest +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.frigate.pushservice.util.CandidateHydrationUtil._ +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.livevideo.timeline.domain.v2.{Event => LiveEvent} +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.UserId +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.util.Future + +case class SendHandlerPushCandidateHydrator( + lexServiceStore: ReadableStore[EventRequest, LiveEvent], + fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent], + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata], + safeUserStore: ReadableStore[Long, User], + simClusterToEntityStore: ReadableStore[Int, SimClustersInferredEntities], + audioSpaceStore: ReadableStore[String, AudioSpace], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore, + superFollowCreatorTweetCountStore: ReadableStore[UserId, Int] +)( + implicit statsReceiver: StatsReceiver, + implicit val weightedOpenOrNtabClickModelScorer: PushMLModelScorer) { + + lazy val candidateWithCopyNumStat = statsReceiver.stat("candidate_with_copy_num") + lazy val hydratedCandidateStat = statsReceiver.scope("hydrated_candidates") + + def updateCandidates( + candidateDetails: Seq[CandidateDetails[RawCandidate]], + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + + Future.collect { + candidateDetails.map { candidateDetail => + val pushCandidate = candidateDetail.candidate + + val copyIds = getCopyIdsByCRT(pushCandidate.commonRecType) + + val hydratedCandidateFut = pushCandidate match { + case magicFanoutNewsEventCandidate: MagicFanoutNewsEventCandidate => + getHydratedCandidateForMagicFanoutNewsEvent( + magicFanoutNewsEventCandidate, + copyIds, + lexServiceStore, + fanoutMetadataStore, + semanticCoreMegadataStore, + simClusterToEntityStore, + interestsLookupStore, + uttEntityHydrationStore + ) + + case scheduledSpaceSubscriberCandidate: ScheduledSpaceSubscriberCandidate => + getHydratedCandidateForScheduledSpaceSubscriber( + scheduledSpaceSubscriberCandidate, + safeUserStore, + copyIds, + audioSpaceStore + ) + case scheduledSpaceSpeakerCandidate: ScheduledSpaceSpeakerCandidate => + getHydratedCandidateForScheduledSpaceSpeaker( + scheduledSpaceSpeakerCandidate, + safeUserStore, + copyIds, + audioSpaceStore + ) + case magicFanoutSportsEventCandidate: MagicFanoutSportsEventCandidate with MagicFanoutSportsScoreInformation => + getHydratedCandidateForMagicFanoutSportsEvent( + magicFanoutSportsEventCandidate, + copyIds, + lexServiceStore, + fanoutMetadataStore, + semanticCoreMegadataStore, + interestsLookupStore, + uttEntityHydrationStore + ) + case magicFanoutProductLaunchCandidate: MagicFanoutProductLaunchCandidate => + getHydratedCandidateForMagicFanoutProductLaunch( + magicFanoutProductLaunchCandidate, + copyIds) + case creatorEventCandidate: MagicFanoutCreatorEventCandidate => + getHydratedCandidateForMagicFanoutCreatorEvent( + creatorEventCandidate, + safeUserStore, + copyIds, + superFollowCreatorTweetCountStore) + case _ => + throw new IllegalArgumentException("Incorrect candidate type when update candidates") + } + + hydratedCandidateFut.map { hydratedCandidate => + hydratedCandidateStat.counter(hydratedCandidate.commonRecType.name).incr() + CandidateDetails( + hydratedCandidate, + source = candidateDetail.source + ) + } + } + } + } + + private def getCopyIdsByCRT(crt: CommonRecommendationType): CopyIds = { + crt match { + case CommonRecommendationType.MagicFanoutNewsEvent => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.MagicFanoutNewsPushCopy.copyId), + ntabCopyId = Some(MrNtabCopyObjects.MagicFanoutNewsForYouCopy.copyId), + aggregationId = None + ) + + case CommonRecommendationType.ScheduledSpaceSubscriber => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.ScheduledSpaceSubscriber.copyId), + ntabCopyId = Some(MrNtabCopyObjects.ScheduledSpaceSubscriber.copyId), + aggregationId = None + ) + case CommonRecommendationType.ScheduledSpaceSpeaker => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.ScheduledSpaceSpeaker.copyId), + ntabCopyId = Some(MrNtabCopyObjects.ScheduledSpaceSpeakerNow.copyId), + aggregationId = None + ) + case CommonRecommendationType.SpaceSpeaker => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.SpaceSpeaker.copyId), + ntabCopyId = Some(MrNtabCopyObjects.SpaceSpeaker.copyId), + aggregationId = None + ) + case CommonRecommendationType.SpaceHost => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.SpaceHost.copyId), + ntabCopyId = Some(MrNtabCopyObjects.SpaceHost.copyId), + aggregationId = None + ) + case CommonRecommendationType.MagicFanoutSportsEvent => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.MagicFanoutSportsPushCopy.copyId), + ntabCopyId = Some(MrNtabCopyObjects.MagicFanoutSportsCopy.copyId), + aggregationId = None + ) + case CommonRecommendationType.MagicFanoutProductLaunch => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.MagicFanoutProductLaunch.copyId), + ntabCopyId = Some(MrNtabCopyObjects.ProductLaunch.copyId), + aggregationId = None + ) + case CommonRecommendationType.CreatorSubscriber => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.MagicFanoutCreatorSubscription.copyId), + ntabCopyId = Some(MrNtabCopyObjects.MagicFanoutCreatorSubscription.copyId), + aggregationId = None + ) + case CommonRecommendationType.NewCreator => + CopyIds( + pushCopyId = Some(MrPushCopyObjects.MagicFanoutNewCreator.copyId), + ntabCopyId = Some(MrNtabCopyObjects.MagicFanoutNewCreator.copyId), + aggregationId = None + ) + case _ => + throw new IllegalArgumentException("Incorrect candidate type when fetch copy ids") + } + } + + def apply( + candidateDetails: Seq[CandidateDetails[RawCandidate]] + ): Future[Seq[CandidateDetails[PushCandidate]]] = { + updateCandidates(candidateDetails) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/CandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/CandidateGenerator.scala new file mode 100644 index 000000000..45907fa8e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/CandidateGenerator.scala @@ -0,0 +1,17 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Future + +trait CandidateGenerator { + + /** + * Build RawCandidate from FrigateNotification + * @param target + * @param frigateNotification + * @return RawCandidate + */ + def getCandidate(target: Target, frigateNotification: FrigateNotification): Future[RawCandidate] +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutCreatorEventCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutCreatorEventCandidateGenerator.scala new file mode 100644 index 000000000..10a7acb89 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutCreatorEventCandidateGenerator.scala @@ -0,0 +1,70 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.common.base.MagicFanoutCreatorEventCandidate +import com.twitter.frigate.magic_events.thriftscala.CreatorFanoutType +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Future + +object MagicFanoutCreatorEventCandidateGenerator extends CandidateGenerator { + override def getCandidate( + targetUser: PushTypes.Target, + notification: FrigateNotification + ): Future[PushTypes.RawCandidate] = { + + require( + notification.commonRecommendationType == CommonRecommendationType.CreatorSubscriber || notification.commonRecommendationType == CommonRecommendationType.NewCreator, + "MagicFanoutCreatorEvent: unexpected CRT " + notification.commonRecommendationType + ) + require( + notification.creatorSubscriptionNotification.isDefined, + "MagicFanoutCreatorEvent: creatorSubscriptionNotification is not defined") + require( + notification.creatorSubscriptionNotification.exists(_.magicFanoutPushId.isDefined), + "MagicFanoutCreatorEvent: magicFanoutPushId is not defined") + require( + notification.creatorSubscriptionNotification.exists(_.fanoutReasons.isDefined), + "MagicFanoutCreatorEvent: fanoutReasons is not defined") + require( + notification.creatorSubscriptionNotification.exists(_.creatorId.isDefined), + "MagicFanoutCreatorEvent: creatorId is not defined") + if (notification.commonRecommendationType == CommonRecommendationType.CreatorSubscriber) { + require( + notification.creatorSubscriptionNotification + .exists(_.subscriberId.isDefined), + "MagicFanoutCreatorEvent: subscriber id is not defined" + ) + } + + val creatorSubscriptionNotification = notification.creatorSubscriptionNotification.get + + val candidate = new RawCandidate with MagicFanoutCreatorEventCandidate { + + override val target: Target = targetUser + + override val pushId: Long = + creatorSubscriptionNotification.magicFanoutPushId.get + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + creatorSubscriptionNotification.fanoutReasons.get + + override val creatorFanoutType: CreatorFanoutType = + creatorSubscriptionNotification.creatorFanoutType + + override val commonRecType: CommonRecommendationType = + notification.commonRecommendationType + + override val frigateNotification: FrigateNotification = notification + + override val subscriberId: Option[Long] = creatorSubscriptionNotification.subscriberId + + override val creatorId: Long = creatorSubscriptionNotification.creatorId.get + } + + Future.value(candidate) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutNewsEventCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutNewsEventCandidateGenerator.scala new file mode 100644 index 000000000..7b351c91a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutNewsEventCandidateGenerator.scala @@ -0,0 +1,57 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.common.base.MagicFanoutNewsEventCandidate +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.MagicFanoutEventNotificationDetails +import com.twitter.util.Future + +object MagicFanoutNewsEventCandidateGenerator extends CandidateGenerator { + + override def getCandidate( + targetUser: Target, + notification: FrigateNotification + ): Future[RawCandidate] = { + + /** + * frigateNotification recommendation type should be [[CommonRecommendationType.MagicFanoutNewsEvent]] + * AND pushId field should be set + **/ + require( + notification.commonRecommendationType == CommonRecommendationType.MagicFanoutNewsEvent, + "MagicFanoutNewsEvent: unexpected CRT " + notification.commonRecommendationType + ) + + require( + notification.magicFanoutEventNotification.exists(_.pushId.isDefined), + "MagicFanoutNewsEvent: pushId is not defined") + + val magicFanoutEventNotification = notification.magicFanoutEventNotification.get + + val candidate = new RawCandidate with MagicFanoutNewsEventCandidate { + + override val target: Target = targetUser + + override val eventId: Long = magicFanoutEventNotification.eventId + + override val pushId: Long = magicFanoutEventNotification.pushId.get + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + magicFanoutEventNotification.eventReasons.getOrElse(Seq.empty) + + override val momentId: Option[Long] = magicFanoutEventNotification.momentId + + override val eventLanguage: Option[String] = magicFanoutEventNotification.eventLanguage + + override val details: Option[MagicFanoutEventNotificationDetails] = + magicFanoutEventNotification.details + + override val frigateNotification: FrigateNotification = notification + } + + Future.value(candidate) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutProductLaunchCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutProductLaunchCandidateGenerator.scala new file mode 100644 index 000000000..6844b1b06 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutProductLaunchCandidateGenerator.scala @@ -0,0 +1,54 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.common.base.MagicFanoutProductLaunchCandidate +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.magic_events.thriftscala.ProductType +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Future + +object MagicFanoutProductLaunchCandidateGenerator extends CandidateGenerator { + + override def getCandidate( + targetUser: PushTypes.Target, + notification: FrigateNotification + ): Future[PushTypes.RawCandidate] = { + + require( + notification.commonRecommendationType == CommonRecommendationType.MagicFanoutProductLaunch, + "MagicFanoutProductLaunch: unexpected CRT " + notification.commonRecommendationType + ) + require( + notification.magicFanoutProductLaunchNotification.isDefined, + "MagicFanoutProductLaunch: magicFanoutProductLaunchNotification is not defined") + require( + notification.magicFanoutProductLaunchNotification.exists(_.magicFanoutPushId.isDefined), + "MagicFanoutProductLaunch: magicFanoutPushId is not defined") + require( + notification.magicFanoutProductLaunchNotification.exists(_.fanoutReasons.isDefined), + "MagicFanoutProductLaunch: fanoutReasons is not defined") + + val magicFanoutProductLaunchNotification = notification.magicFanoutProductLaunchNotification.get + + val candidate = new RawCandidate with MagicFanoutProductLaunchCandidate { + + override val target: Target = targetUser + + override val pushId: Long = + magicFanoutProductLaunchNotification.magicFanoutPushId.get + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + magicFanoutProductLaunchNotification.fanoutReasons.get + + override val productLaunchType: ProductType = + magicFanoutProductLaunchNotification.productLaunchType + + override val frigateNotification: FrigateNotification = notification + } + + Future.value(candidate) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutSportsEventCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutSportsEventCandidateGenerator.scala new file mode 100644 index 000000000..cdd37833e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/MagicFanoutSportsEventCandidateGenerator.scala @@ -0,0 +1,153 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.datatools.entityservice.entities.sports.thriftscala.BaseballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.BasketballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.CricketMatchLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.NflFootballGameLiveUpdate +import com.twitter.datatools.entityservice.entities.sports.thriftscala.SoccerMatchLiveUpdate +import com.twitter.escherbird.common.thriftscala.Domains +import com.twitter.escherbird.common.thriftscala.QualifiedId +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.frigate.common.base.BaseGameScore +import com.twitter.frigate.common.base.MagicFanoutSportsEventCandidate +import com.twitter.frigate.common.base.MagicFanoutSportsScoreInformation +import com.twitter.frigate.common.base.TeamInfo +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.pushservice.exception.InvalidSportDomainException +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.predicate.magic_fanout.MagicFanoutSportsUtil +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.MagicFanoutEventNotificationDetails +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object MagicFanoutSportsEventCandidateGenerator { + + final def getCandidate( + targetUser: Target, + notification: FrigateNotification, + basketballGameScoreStore: ReadableStore[QualifiedId, BasketballGameLiveUpdate], + baseballGameScoreStore: ReadableStore[QualifiedId, BaseballGameLiveUpdate], + cricketMatchScoreStore: ReadableStore[QualifiedId, CricketMatchLiveUpdate], + soccerMatchScoreStore: ReadableStore[QualifiedId, SoccerMatchLiveUpdate], + nflGameScoreStore: ReadableStore[QualifiedId, NflFootballGameLiveUpdate], + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata], + ): Future[RawCandidate] = { + + /** + * frigateNotification recommendation type should be [[CommonRecommendationType.MagicFanoutSportsEvent]] + * AND pushId field should be set + * + * */ + require( + notification.commonRecommendationType == CommonRecommendationType.MagicFanoutSportsEvent, + "MagicFanoutSports: unexpected CRT " + notification.commonRecommendationType + ) + + require( + notification.magicFanoutEventNotification.exists(_.pushId.isDefined), + "MagicFanoutSportsEvent: pushId is not defined") + + val magicFanoutEventNotification = notification.magicFanoutEventNotification.get + val eventId = magicFanoutEventNotification.eventId + val _isScoreUpdate = magicFanoutEventNotification.isScoreUpdate.getOrElse(false) + + val gameScoresFut: Future[Option[BaseGameScore]] = { + if (_isScoreUpdate) { + semanticCoreMegadataStore + .get(SemanticEntityForQuery(PushConstants.SportsEventDomainId, eventId)) + .flatMap { + case Some(megadata) => + if (megadata.domains.contains(Domains.BasketballGame)) { + basketballGameScoreStore + .get(QualifiedId(Domains.BasketballGame.value, eventId)).map { + case Some(game) if game.status.isDefined => + val status = game.status.get + MagicFanoutSportsUtil.transformToGameScore(game.score, status) + case _ => None + } + } else if (megadata.domains.contains(Domains.BaseballGame)) { + baseballGameScoreStore + .get(QualifiedId(Domains.BaseballGame.value, eventId)).map { + case Some(game) if game.status.isDefined => + val status = game.status.get + MagicFanoutSportsUtil.transformToGameScore(game.runs, status) + case _ => None + } + } else if (megadata.domains.contains(Domains.NflFootballGame)) { + nflGameScoreStore + .get(QualifiedId(Domains.NflFootballGame.value, eventId)).map { + case Some(game) if game.status.isDefined => + val nflScore = MagicFanoutSportsUtil.transformNFLGameScore(game) + nflScore + case _ => None + } + } else if (megadata.domains.contains(Domains.SoccerMatch)) { + soccerMatchScoreStore + .get(QualifiedId(Domains.SoccerMatch.value, eventId)).map { + case Some(game) if game.status.isDefined => + val soccerScore = MagicFanoutSportsUtil.transformSoccerGameScore(game) + soccerScore + case _ => None + } + } else { + // The domains are not in our list of supported sports + throw new InvalidSportDomainException( + s"Domain for entity ${eventId} is not supported") + } + case _ => Future.None + } + } else Future.None + } + + val homeTeamInfoFut: Future[Option[TeamInfo]] = gameScoresFut.flatMap { + case Some(gameScore) => + MagicFanoutSportsUtil.getTeamInfo(gameScore.home, semanticCoreMegadataStore) + case _ => Future.None + } + + val awayTeamInfoFut: Future[Option[TeamInfo]] = gameScoresFut.flatMap { + case Some(gameScore) => + MagicFanoutSportsUtil.getTeamInfo(gameScore.away, semanticCoreMegadataStore) + case _ => Future.None + } + + val candidate = new RawCandidate + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation { + + override val target: Target = targetUser + + override val eventId: Long = magicFanoutEventNotification.eventId + + override val pushId: Long = magicFanoutEventNotification.pushId.get + + override val candidateMagicEventsReasons: Seq[MagicEventsReason] = + magicFanoutEventNotification.eventReasons.getOrElse(Seq.empty) + + override val momentId: Option[Long] = magicFanoutEventNotification.momentId + + override val eventLanguage: Option[String] = magicFanoutEventNotification.eventLanguage + + override val details: Option[MagicFanoutEventNotificationDetails] = + magicFanoutEventNotification.details + + override val frigateNotification: FrigateNotification = notification + + override val homeTeamInfo: Future[Option[TeamInfo]] = homeTeamInfoFut + + override val awayTeamInfo: Future[Option[TeamInfo]] = awayTeamInfoFut + + override val gameScores: Future[Option[BaseGameScore]] = gameScoresFut + + override val isScoreUpdate: Boolean = _isScoreUpdate + } + + Future.value(candidate) + + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/PushRequestToCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/PushRequestToCandidate.scala new file mode 100644 index 000000000..8d7e81d3f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/PushRequestToCandidate.scala @@ -0,0 +1,49 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.exception.UnsupportedCrtException +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.util.Future + +object PushRequestToCandidate { + final def generatePushCandidate( + frigateNotification: FrigateNotification, + target: Target + )( + implicit config: Config + ): Future[RawCandidate] = { + + val candidateGenerator: (Target, FrigateNotification) => Future[RawCandidate] = { + frigateNotification.commonRecommendationType match { + case CRT.MagicFanoutNewsEvent => MagicFanoutNewsEventCandidateGenerator.getCandidate + case CRT.ScheduledSpaceSubscriber => ScheduledSpaceSubscriberCandidateGenerator.getCandidate + case CRT.ScheduledSpaceSpeaker => ScheduledSpaceSpeakerCandidateGenerator.getCandidate + case CRT.MagicFanoutSportsEvent => + MagicFanoutSportsEventCandidateGenerator.getCandidate( + _, + _, + config.basketballGameScoreStore, + config.baseballGameScoreStore, + config.cricketMatchScoreStore, + config.soccerMatchScoreStore, + config.nflGameScoreStore, + config.semanticCoreMegadataStore + ) + case CRT.MagicFanoutProductLaunch => + MagicFanoutProductLaunchCandidateGenerator.getCandidate + case CRT.NewCreator => + MagicFanoutCreatorEventCandidateGenerator.getCandidate + case CRT.CreatorSubscriber => + MagicFanoutCreatorEventCandidateGenerator.getCandidate + case _ => + throw new UnsupportedCrtException( + "UnsupportedCrtException for SendHandler: " + frigateNotification.commonRecommendationType) + } + } + + candidateGenerator(target, frigateNotification) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSpeakerCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSpeakerCandidateGenerator.scala new file mode 100644 index 000000000..e7821db76 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSpeakerCandidateGenerator.scala @@ -0,0 +1,55 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.common.base.ScheduledSpaceSpeakerCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Future + +object ScheduledSpaceSpeakerCandidateGenerator extends CandidateGenerator { + + override def getCandidate( + targetUser: Target, + notification: FrigateNotification + ): Future[RawCandidate] = { + + /** + * frigateNotification recommendation type should be [[CommonRecommendationType.ScheduledSpaceSpeaker]] + * + **/ + require( + notification.commonRecommendationType == CommonRecommendationType.ScheduledSpaceSpeaker, + "ScheduledSpaceSpeaker: unexpected CRT " + notification.commonRecommendationType + ) + + val spaceNotification = notification.spaceNotification.getOrElse( + throw new IllegalStateException("ScheduledSpaceSpeaker notification object not defined")) + + require( + spaceNotification.hostUserId.isDefined, + "ScheduledSpaceSpeaker notification - hostUserId not defined" + ) + + val spaceHostId = spaceNotification.hostUserId + + require( + spaceNotification.scheduledStartTime.isDefined, + "ScheduledSpaceSpeaker notification - scheduledStartTime not defined" + ) + + val scheduledStartTime = spaceNotification.scheduledStartTime.get + + val candidate = new RawCandidate with ScheduledSpaceSpeakerCandidate { + override val target: Target = targetUser + override val frigateNotification: FrigateNotification = notification + override val spaceId: String = spaceNotification.broadcastId + override val hostId: Option[Long] = spaceHostId + override val startTime: Long = scheduledStartTime + override val speakerIds: Option[Seq[Long]] = spaceNotification.speakers + override val listenerIds: Option[Seq[Long]] = spaceNotification.listeners + } + + Future.value(candidate) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSubscriberCandidateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSubscriberCandidateGenerator.scala new file mode 100644 index 000000000..484f17b2a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/send_handler/generator/ScheduledSpaceSubscriberCandidateGenerator.scala @@ -0,0 +1,55 @@ +package com.twitter.frigate.pushservice.send_handler.generator + +import com.twitter.frigate.common.base.ScheduledSpaceSubscriberCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Future + +object ScheduledSpaceSubscriberCandidateGenerator extends CandidateGenerator { + + override def getCandidate( + targetUser: Target, + notification: FrigateNotification + ): Future[RawCandidate] = { + + /** + * frigateNotification recommendation type should be [[CommonRecommendationType.ScheduledSpaceSubscriber]] + * + **/ + require( + notification.commonRecommendationType == CommonRecommendationType.ScheduledSpaceSubscriber, + "ScheduledSpaceSubscriber: unexpected CRT " + notification.commonRecommendationType + ) + + val spaceNotification = notification.spaceNotification.getOrElse( + throw new IllegalStateException("ScheduledSpaceSubscriber notification object not defined")) + + require( + spaceNotification.hostUserId.isDefined, + "ScheduledSpaceSubscriber notification - hostUserId not defined" + ) + + val spaceHostId = spaceNotification.hostUserId + + require( + spaceNotification.scheduledStartTime.isDefined, + "ScheduledSpaceSubscriber notification - scheduledStartTime not defined" + ) + + val scheduledStartTime = spaceNotification.scheduledStartTime.get + + val candidate = new RawCandidate with ScheduledSpaceSubscriberCandidate { + override val target: Target = targetUser + override val frigateNotification: FrigateNotification = notification + override val spaceId: String = spaceNotification.broadcastId + override val hostId: Option[Long] = spaceHostId + override val startTime: Long = scheduledStartTime + override val speakerIds: Option[Seq[Long]] = spaceNotification.speakers + override val listenerIds: Option[Seq[Long]] = spaceNotification.listeners + } + + Future.value(candidate) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ContentMixerStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ContentMixerStore.scala new file mode 100644 index 000000000..1f4171030 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ContentMixerStore.scala @@ -0,0 +1,17 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.content_mixer.thriftscala.ContentMixer +import com.twitter.content_mixer.thriftscala.ContentMixerRequest +import com.twitter.content_mixer.thriftscala.ContentMixerResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class ContentMixerStore(contentMixer: ContentMixer.MethodPerEndpoint) + extends ReadableStore[ContentMixerRequest, ContentMixerResponse] { + + override def get(request: ContentMixerRequest): Future[Option[ContentMixerResponse]] = { + contentMixer.getCandidates(request).map { response => + Some(response) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CopySelectionServiceStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CopySelectionServiceStore.scala new file mode 100644 index 000000000..b793ade7e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CopySelectionServiceStore.scala @@ -0,0 +1,15 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.copyselectionservice.thriftscala._ +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class CopySelectionServiceStore(copySelectionServiceClient: CopySelectionService.FinagledClient) + extends ReadableStore[CopySelectionRequestV1, Copy] { + override def get(k: CopySelectionRequestV1): Future[Option[Copy]] = + copySelectionServiceClient.getSelectedCopy(CopySelectionRequest.V1(k)).map { + case CopySelectionResponse.V1(response) => + Some(response.selectedCopy) + case _ => throw CopyServiceException(CopyServiceErrorCode.VersionNotFound) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CrMixerTweetStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CrMixerTweetStore.scala new file mode 100644 index 000000000..dba016e6c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/CrMixerTweetStore.scala @@ -0,0 +1,58 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.cr_mixer.thriftscala.CrMixer +import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest +import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse +import com.twitter.cr_mixer.thriftscala.FrsTweetRequest +import com.twitter.cr_mixer.thriftscala.FrsTweetResponse +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.util.Future + +/** + * Store to get content recs from content recommender. + */ +case class CrMixerTweetStore( + crMixer: CrMixer.MethodPerEndpoint +)( + implicit statsReceiver: StatsReceiver = NullStatsReceiver) { + + private val requestsCounter = statsReceiver.counter("requests") + private val successCounter = statsReceiver.counter("success") + private val failuresCounter = statsReceiver.counter("failures") + private val nonEmptyCounter = statsReceiver.counter("non_empty") + private val emptyCounter = statsReceiver.counter("empty") + private val failuresScope = statsReceiver.scope("failures") + private val latencyStat = statsReceiver.stat("latency") + + private def updateStats[T](f: => Future[Option[T]]): Future[Option[T]] = { + requestsCounter.incr() + Stat + .timeFuture(latencyStat)(f) + .onSuccess { r => + if (r.isDefined) nonEmptyCounter.incr() else emptyCounter.incr() + successCounter.incr() + } + .onFailure { e => + { + failuresCounter.incr() + failuresScope.counter(e.getClass.getName).incr() + } + } + } + + def getTweetRecommendations( + request: CrMixerTweetRequest + ): Future[Option[CrMixerTweetResponse]] = { + updateStats(crMixer.getTweetRecommendations(request).map { response => + Some(response) + }) + } + + def getFRSTweetCandidates(request: FrsTweetRequest): Future[Option[FrsTweetResponse]] = { + updateStats(crMixer.getFrsBasedTweetRecommendations(request).map { response => + Some(response) + }) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ExploreRankerStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ExploreRankerStore.scala new file mode 100644 index 000000000..eeeb62d27 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/ExploreRankerStore.scala @@ -0,0 +1,28 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.explore_ranker.thriftscala.ExploreRanker +import com.twitter.explore_ranker.thriftscala.ExploreRankerResponse +import com.twitter.explore_ranker.thriftscala.ExploreRankerRequest +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +/** A Store for Video Tweet Recommendations from Explore + * + * @param exploreRankerService + */ +case class ExploreRankerStore(exploreRankerService: ExploreRanker.MethodPerEndpoint) + extends ReadableStore[ExploreRankerRequest, ExploreRankerResponse] { + + /** Method to get video recommendations + * + * @param request explore ranker request object + * @return + */ + override def get( + request: ExploreRankerRequest + ): Future[Option[ExploreRankerResponse]] = { + exploreRankerService.getRankedResults(request).map { response => + Some(response) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/FollowRecommendationsStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/FollowRecommendationsStore.scala new file mode 100644 index 000000000..0ab722cd4 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/FollowRecommendationsStore.scala @@ -0,0 +1,46 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService +import com.twitter.follow_recommendations.thriftscala.Recommendation +import com.twitter.follow_recommendations.thriftscala.RecommendationRequest +import com.twitter.follow_recommendations.thriftscala.RecommendationResponse +import com.twitter.follow_recommendations.thriftscala.UserRecommendation +import com.twitter.inject.Logging +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class FollowRecommendationsStore( + frsClient: FollowRecommendationsThriftService.MethodPerEndpoint, + statsReceiver: StatsReceiver) + extends ReadableStore[RecommendationRequest, RecommendationResponse] + with Logging { + + private val scopedStats = statsReceiver.scope(getClass.getSimpleName) + private val requests = scopedStats.counter("requests") + private val valid = scopedStats.counter("valid") + private val invalid = scopedStats.counter("invalid") + private val numTotalResults = scopedStats.stat("total_results") + private val numValidResults = scopedStats.stat("valid_results") + + override def get(request: RecommendationRequest): Future[Option[RecommendationResponse]] = { + requests.incr() + frsClient.getRecommendations(request).map { response => + numTotalResults.add(response.recommendations.size) + val validRecs = response.recommendations.filter { + case Recommendation.User(_: UserRecommendation) => + valid.incr() + true + case _ => + invalid.incr() + false + } + + numValidResults.add(validRecs.size) + Some( + RecommendationResponse( + recommendations = validRecs + )) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/IbisStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/IbisStore.scala new file mode 100644 index 000000000..6c355c505 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/IbisStore.scala @@ -0,0 +1,190 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.finagle.stats.BroadcastStatsReceiver +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.store +import com.twitter.frigate.common.store.Fail +import com.twitter.frigate.common.store.IbisRequestInfo +import com.twitter.frigate.common.store.IbisResponse +import com.twitter.frigate.common.store.Sent +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.ibis2.service.thriftscala.Flags +import com.twitter.ibis2.service.thriftscala.FlowControl +import com.twitter.ibis2.service.thriftscala.Ibis2Request +import com.twitter.ibis2.service.thriftscala.Ibis2Response +import com.twitter.ibis2.service.thriftscala.Ibis2ResponseStatus +import com.twitter.ibis2.service.thriftscala.Ibis2Service +import com.twitter.ibis2.service.thriftscala.NotificationNotSentCode +import com.twitter.ibis2.service.thriftscala.TargetFanoutResult.NotSentReason +import com.twitter.util.Future + +trait Ibis2Store extends store.Ibis2Store { + def send(ibis2Request: Ibis2Request, candidate: PushCandidate): Future[IbisResponse] +} + +case class PushIbis2Store( + ibisClient: Ibis2Service.MethodPerEndpoint +)( + implicit val statsReceiver: StatsReceiver = NullStatsReceiver) + extends Ibis2Store { + private val log = MRLogger(this.getClass.getSimpleName) + private val stats = statsReceiver.scope("ibis_v2_store") + private val statsByCrt = stats.scope("byCrt") + private val requestsByCrt = statsByCrt.scope("requests") + private val failuresByCrt = statsByCrt.scope("failures") + private val successByCrt = statsByCrt.scope("success") + + private val statsByIbisModel = stats.scope("byIbisModel") + private val requestsByIbisModel = statsByIbisModel.scope("requests") + private val failuresByIbisModel = statsByIbisModel.scope("failures") + private val successByIbisModel = statsByIbisModel.scope("success") + + private[this] def ibisSend( + ibis2Request: Ibis2Request, + commonRecommendationType: CommonRecommendationType + ): Future[IbisResponse] = { + val ibisModel = ibis2Request.modelName + + val bStats = if (ibis2Request.flags.getOrElse(Flags()).darkWrite.contains(true)) { + BroadcastStatsReceiver( + Seq( + stats, + stats.scope("dark_write") + ) + ) + } else BroadcastStatsReceiver(Seq(stats)) + + bStats.counter("requests").incr() + requestsByCrt.counter(commonRecommendationType.name).incr() + requestsByIbisModel.counter(ibisModel).incr() + + retry(ibisClient, ibis2Request, 3, bStats) + .map { response => + bStats.counter(response.status.status.name).incr() + successByCrt.counter(response.status.status.name, commonRecommendationType.name).incr() + successByIbisModel.counter(response.status.status.name, ibisModel).incr() + response.status.status match { + case Ibis2ResponseStatus.SuccessWithDeliveries | + Ibis2ResponseStatus.SuccessNoDeliveries => + IbisResponse(Sent, Some(response)) + case _ => + IbisResponse(Fail, Some(response)) + } + } + .onFailure { ex => + bStats.counter("failures").incr() + val exceptionName = ex.getClass.getCanonicalName + bStats.scope("failures").counter(exceptionName).incr() + failuresByCrt.counter(exceptionName, commonRecommendationType.name).incr() + failuresByIbisModel.counter(exceptionName, ibisModel).incr() + } + } + + private def getNotifNotSentReason( + ibis2Response: Ibis2Response + ): Option[NotificationNotSentCode] = { + ibis2Response.status.fanoutResults match { + case Some(fanoutResult) => + fanoutResult.pushResult.flatMap { pushResult => + pushResult.results.headOption match { + case Some(NotSentReason(notSentInfo)) => Some(notSentInfo.notSentCode) + case _ => None + } + } + case _ => None + } + } + + def send(ibis2Request: Ibis2Request, candidate: PushCandidate): Future[IbisResponse] = { + val requestWithIID = if (ibis2Request.flowControl.exists(_.externalIid.isDefined)) { + ibis2Request + } else { + ibis2Request.copy( + flowControl = Some( + ibis2Request.flowControl + .getOrElse(FlowControl()) + .copy(externalIid = Some(candidate.impressionId)) + ) + ) + } + + val commonRecommendationType = candidate.frigateNotification.commonRecommendationType + + ibisSend(requestWithIID, commonRecommendationType) + .onSuccess { response => + response.ibis2Response.foreach { ibis2Response => + getNotifNotSentReason(ibis2Response).foreach { notifNotSentCode => + stats.scope(ibis2Response.status.status.name).counter(s"$notifNotSentCode").incr() + } + if (ibis2Response.status.status != Ibis2ResponseStatus.SuccessWithDeliveries) { + log.warning( + s"Request dropped on ibis for ${ibis2Request.recipientSelector.recipientId}: $ibis2Response") + } + } + } + .onFailure { ex => + log.warning( + s"Ibis Request failure: ${ex.getClass.getCanonicalName} \n For IbisRequest: $ibis2Request") + log.error(ex, ex.getMessage) + } + } + + // retry request when Ibis2ResponseStatus is PreFanoutError + def retry( + ibisClient: Ibis2Service.MethodPerEndpoint, + request: Ibis2Request, + retryCount: Int, + bStats: StatsReceiver + ): Future[Ibis2Response] = { + ibisClient.sendNotification(request).flatMap { response => + response.status.status match { + case Ibis2ResponseStatus.PreFanoutError if retryCount > 0 => + bStats.scope("requests").counter("retry").incr() + bStats.counter(response.status.status.name).incr() + retry(ibisClient, request, retryCount - 1, bStats) + case _ => + Future.value(response) + } + } + } + + override def send( + ibis2Request: Ibis2Request, + requestInfo: IbisRequestInfo + ): Future[IbisResponse] = { + ibisSend(ibis2Request, requestInfo.commonRecommendationType) + } +} + +case class StagingIbis2Store(remoteIbis2Store: PushIbis2Store) extends Ibis2Store { + + final def addDarkWriteFlagIbis2Request( + isTeamMember: Boolean, + ibis2Request: Ibis2Request + ): Ibis2Request = { + val flags = + ibis2Request.flags.getOrElse(Flags()) + val darkWrite: Boolean = !isTeamMember || flags.darkWrite.getOrElse(false) + ibis2Request.copy(flags = Some(flags.copy(darkWrite = Some(darkWrite)))) + } + + override def send(ibis2Request: Ibis2Request, candidate: PushCandidate): Future[IbisResponse] = { + candidate.target.isTeamMember.flatMap { isTeamMember => + val ibis2Req = addDarkWriteFlagIbis2Request(isTeamMember, ibis2Request) + remoteIbis2Store.send(ibis2Req, candidate) + } + } + + override def send( + ibis2Request: Ibis2Request, + requestInfo: IbisRequestInfo + ): Future[IbisResponse] = { + requestInfo.isTeamMember.flatMap { isTeamMember => + val ibis2Req = addDarkWriteFlagIbis2Request(isTeamMember, ibis2Request) + remoteIbis2Store.send(ibis2Req, requestInfo) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/InterestDiscoveryStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/InterestDiscoveryStore.scala new file mode 100644 index 000000000..80fc0ea7e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/InterestDiscoveryStore.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.interests_discovery.thriftscala.InterestsDiscoveryService +import com.twitter.interests_discovery.thriftscala.RecommendedListsRequest +import com.twitter.interests_discovery.thriftscala.RecommendedListsResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class InterestDiscoveryStore( + client: InterestsDiscoveryService.MethodPerEndpoint) + extends ReadableStore[RecommendedListsRequest, RecommendedListsResponse] { + + override def get(request: RecommendedListsRequest): Future[Option[RecommendedListsResponse]] = { + client.getListRecos(request).map(Some(_)) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LabeledPushRecsDecideredStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LabeledPushRecsDecideredStore.scala new file mode 100644 index 000000000..73fc28837 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LabeledPushRecsDecideredStore.scala @@ -0,0 +1,156 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.candidate.TargetDecider +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.history.HistoryStoreKeyContext +import com.twitter.frigate.common.history.PushServiceHistoryStore +import com.twitter.frigate.data_pipeline.thriftscala._ +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.hermit.store.labeled_push_recs.LabeledPushRecsJoinedWithNotificationHistoryStore +import com.twitter.logging.Logger +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future +import com.twitter.util.Time + +case class LabeledPushRecsVerifyingStoreKey( + historyStoreKey: HistoryStoreKeyContext, + useHydratedDataset: Boolean, + verifyHydratedDatasetResults: Boolean) { + def userId: Long = historyStoreKey.targetUserId +} + +case class LabeledPushRecsVerifyingStoreResponse( + userHistory: UserHistoryValue, + unequalNotificationsUnhydratedToHydrated: Option[ + Map[(Time, FrigateNotification), FrigateNotification] + ], + missingFromHydrated: Option[Map[Time, FrigateNotification]]) + +case class LabeledPushRecsVerifyingStore( + labeledPushRecsStore: ReadableStore[UserHistoryKey, UserHistoryValue], + historyStore: PushServiceHistoryStore +)( + implicit stats: StatsReceiver) + extends ReadableStore[LabeledPushRecsVerifyingStoreKey, LabeledPushRecsVerifyingStoreResponse] { + + private def getByJoiningWithRealHistory( + key: HistoryStoreKeyContext + ): Future[Option[UserHistoryValue]] = { + val historyFut = historyStore.get(key, Some(365.days)) + val toJoinWithRealHistoryFut = labeledPushRecsStore.get(UserHistoryKey.UserId(key.targetUserId)) + Future.join(historyFut, toJoinWithRealHistoryFut).map { + case (_, None) => None + case (History(realtimeHistoryMap), Some(uhValue)) => + Some( + LabeledPushRecsJoinedWithNotificationHistoryStore + .joinLabeledPushRecsSentWithNotificationHistory(uhValue, realtimeHistoryMap, stats) + ) + } + } + + private def processUserHistoryValue(uhValue: UserHistoryValue): Map[Time, FrigateNotification] = { + uhValue.events + .getOrElse(Nil) + .collect { + case Event( + EventType.LabeledPushRecSend, + Some(tsMillis), + Some(EventUnion.LabeledPushRecSendEvent(lprs: LabeledPushRecSendEvent)) + ) if lprs.pushRecSendEvent.frigateNotification.isDefined => + Time.fromMilliseconds(tsMillis) -> lprs.pushRecSendEvent.frigateNotification.get + } + .toMap + } + + override def get( + key: LabeledPushRecsVerifyingStoreKey + ): Future[Option[LabeledPushRecsVerifyingStoreResponse]] = { + val uhKey = UserHistoryKey.UserId(key.userId) + if (!key.useHydratedDataset) { + getByJoiningWithRealHistory(key.historyStoreKey).map { uhValueOpt => + uhValueOpt.map { uhValue => LabeledPushRecsVerifyingStoreResponse(uhValue, None, None) } + } + } else { + labeledPushRecsStore.get(uhKey).flatMap { hydratedValueOpt: Option[UserHistoryValue] => + if (!key.verifyHydratedDatasetResults) { + Future.value(hydratedValueOpt.map { uhValue => + LabeledPushRecsVerifyingStoreResponse(uhValue, None, None) + }) + } else { + getByJoiningWithRealHistory(key.historyStoreKey).map { + joinedWithRealHistoryOpt: Option[UserHistoryValue] => + val joinedWithRealHistoryMap = + joinedWithRealHistoryOpt.map(processUserHistoryValue).getOrElse(Map.empty) + val hydratedMap = hydratedValueOpt.map(processUserHistoryValue).getOrElse(Map.empty) + val unequal = joinedWithRealHistoryMap.flatMap { + case (time, frigateNotif) => + hydratedMap.get(time).collect { + case n if n != frigateNotif => ((time, frigateNotif), n) + } + } + val missing = joinedWithRealHistoryMap.filter { + case (time, frigateNotif) => !hydratedMap.contains(time) + } + hydratedValueOpt.map { hydratedValue => + LabeledPushRecsVerifyingStoreResponse(hydratedValue, Some(unequal), Some(missing)) + } + } + } + } + } + } +} + +case class LabeledPushRecsStoreKey(target: TargetDecider, historyStoreKey: HistoryStoreKeyContext) { + def userId: Long = historyStoreKey.targetUserId +} + +case class LabeledPushRecsDecideredStore( + verifyingStore: ReadableStore[ + LabeledPushRecsVerifyingStoreKey, + LabeledPushRecsVerifyingStoreResponse + ], + useHydratedLabeledSendsDatasetDeciderKey: String, + verifyHydratedLabeledSendsForHistoryDeciderKey: String +)( + implicit globalStats: StatsReceiver) + extends ReadableStore[LabeledPushRecsStoreKey, UserHistoryValue] { + private val log = Logger() + private val stats = globalStats.scope("LabeledPushRecsDecideredStore") + private val numComparisons = stats.counter("num_comparisons") + private val numMissingStat = stats.stat("num_missing") + private val numUnequalStat = stats.stat("num_unequal") + + override def get(key: LabeledPushRecsStoreKey): Future[Option[UserHistoryValue]] = { + val useHydrated = key.target.isDeciderEnabled( + useHydratedLabeledSendsDatasetDeciderKey, + stats, + useRandomRecipient = true + ) + + val verifyHydrated = if (useHydrated) { + key.target.isDeciderEnabled( + verifyHydratedLabeledSendsForHistoryDeciderKey, + stats, + useRandomRecipient = true + ) + } else false + + val newKey = LabeledPushRecsVerifyingStoreKey(key.historyStoreKey, useHydrated, verifyHydrated) + verifyingStore.get(newKey).map { + case None => None + case Some(LabeledPushRecsVerifyingStoreResponse(uhValue, unequalOpt, missingOpt)) => + (unequalOpt, missingOpt) match { + case (Some(unequal), Some(missing)) => + numComparisons.incr() + numMissingStat.add(missing.size) + numUnequalStat.add(unequal.size) + case _ => //no-op + } + Some(uhValue) + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LexServiceStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LexServiceStore.scala new file mode 100644 index 000000000..b11cdc0dd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/LexServiceStore.scala @@ -0,0 +1,26 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.livevideo.common.ids.EventId +import com.twitter.livevideo.timeline.client.v2.LiveVideoTimelineClient +import com.twitter.livevideo.timeline.domain.v2.Event +import com.twitter.livevideo.timeline.domain.v2.LookupContext +import com.twitter.stitch.storehaus.ReadableStoreOfStitch +import com.twitter.stitch.NotFound +import com.twitter.stitch.Stitch +import com.twitter.storehaus.ReadableStore + +case class EventRequest(eventId: Long, lookupContext: LookupContext = LookupContext.default) + +object LexServiceStore { + def apply( + liveVideoTimelineClient: LiveVideoTimelineClient + ): ReadableStore[EventRequest, Event] = { + ReadableStoreOfStitch { eventRequest => + liveVideoTimelineClient.getEvent( + EventId(eventRequest.eventId), + eventRequest.lookupContext) rescue { + case NotFound => Stitch.NotFound + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/NTabHistoryStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/NTabHistoryStore.scala new file mode 100644 index 000000000..9e9bc37b7 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/NTabHistoryStore.scala @@ -0,0 +1,45 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.hermit.store.common.ReadableWritableStore +import com.twitter.notificationservice.thriftscala.GenericNotificationOverrideKey +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections.BinaryCompactScalaInjection +import com.twitter.storage.client.manhattan.bijections.Bijections.LongInjection +import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpoint +import com.twitter.storage.client.manhattan.kv.impl.Component +import com.twitter.storage.client.manhattan.kv.impl.DescriptorP1L1 +import com.twitter.storage.client.manhattan.kv.impl.KeyDescriptor +import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor +import com.twitter.util.Future + +case class NTabHistoryStore(mhEndpoint: ManhattanKVEndpoint, dataset: String) + extends ReadableWritableStore[(Long, String), GenericNotificationOverrideKey] { + + private val keyDesc: DescriptorP1L1.EmptyKey[Long, String] = + KeyDescriptor(Component(LongInjection), Component(StringInjection)) + + private val genericNotifKeyValDesc: ValueDescriptor.EmptyValue[GenericNotificationOverrideKey] = + ValueDescriptor[GenericNotificationOverrideKey]( + BinaryCompactScalaInjection(GenericNotificationOverrideKey) + ) + + override def get(key: (Long, String)): Future[Option[GenericNotificationOverrideKey]] = { + val (userId, impressionId) = key + val mhKey = keyDesc.withDataset(dataset).withPkey(userId).withLkey(impressionId) + + Stitch + .run(mhEndpoint.get(mhKey, genericNotifKeyValDesc)) + .map { optionMhValue => + optionMhValue.map(_.contents) + } + } + + override def put(keyValue: ((Long, String), GenericNotificationOverrideKey)): Future[Unit] = { + val ((userId, impressionId), genericNotifOverrideKey) = keyValue + val mhKey = keyDesc.withDataset(dataset).withPkey(userId).withLkey(impressionId) + val mhVal = genericNotifKeyValDesc.withValue(genericNotifOverrideKey) + Stitch.run(mhEndpoint.insert(mhKey, mhVal)) + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OCFPromptHistoryStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OCFPromptHistoryStore.scala new file mode 100644 index 000000000..33b119c79 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OCFPromptHistoryStore.scala @@ -0,0 +1,73 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.onboarding.task.service.thriftscala.FatigueFlowEnrollment +import com.twitter.stitch.Stitch +import com.twitter.storage.client.manhattan.bijections.Bijections.BinaryScalaInjection +import com.twitter.storage.client.manhattan.bijections.Bijections.LongInjection +import com.twitter.storage.client.manhattan.bijections.Bijections.StringInjection +import com.twitter.storage.client.manhattan.kv.impl.Component +import com.twitter.storage.client.manhattan.kv.impl.KeyDescriptor +import com.twitter.storage.client.manhattan.kv.impl.ValueDescriptor +import com.twitter.storage.client.manhattan.kv.ManhattanKVClient +import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams +import com.twitter.storage.client.manhattan.kv.ManhattanKVEndpointBuilder +import com.twitter.storage.client.manhattan.kv.NoMtlsParams +import com.twitter.storehaus.ReadableStore +import com.twitter.storehaus_internal.manhattan.Omega +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Time + +case class OCFHistoryStoreKey(userId: Long, fatigueDuration: Duration, fatigueGroup: String) + +class OCFPromptHistoryStore( + manhattanAppId: String, + dataset: String, + mtlsParams: ManhattanKVClientMtlsParams = NoMtlsParams +)( + implicit stats: StatsReceiver) + extends ReadableStore[OCFHistoryStoreKey, FatigueFlowEnrollment] { + + import ManhattanInjections._ + + private val client = ManhattanKVClient( + appId = manhattanAppId, + dest = Omega.wilyName, + mtlsParams = mtlsParams, + label = "ocf_history_store" + ) + private val endpoint = ManhattanKVEndpointBuilder(client, defaultMaxTimeout = 5.seconds) + .statsReceiver(stats.scope("ocf_history_store")) + .build() + + private val limitResultsTo = 1 + + private val datasetKey = keyDesc.withDataset(dataset) + + override def get(storeKey: OCFHistoryStoreKey): Future[Option[FatigueFlowEnrollment]] = { + val userId = storeKey.userId + val fatigueGroup = storeKey.fatigueGroup + val fatigueLength = storeKey.fatigueDuration.inMilliseconds + val currentTime = Time.now.inMilliseconds + val fullKey = datasetKey + .withPkey(userId) + .from(fatigueGroup) + .to(fatigueGroup, fatigueLength - currentTime) + + Stitch + .run(endpoint.slice(fullKey, valDesc, limit = Some(limitResultsTo))) + .map { results => + if (results.nonEmpty) { + val (_, mhValue) = results.head + Some(mhValue.contents) + } else None + } + } +} + +object ManhattanInjections { + val keyDesc = KeyDescriptor(Component(LongInjection), Component(StringInjection, LongInjection)) + val valDesc = ValueDescriptor(BinaryScalaInjection(FatigueFlowEnrollment)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OnlineUserHistoryStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OnlineUserHistoryStore.scala new file mode 100644 index 000000000..d7ecfa7e4 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OnlineUserHistoryStore.scala @@ -0,0 +1,81 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.conversions.DurationOps._ +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.store.RealTimeClientEventStore +import com.twitter.frigate.data_pipeline.common.HistoryJoin +import com.twitter.frigate.data_pipeline.thriftscala.Event +import com.twitter.frigate.data_pipeline.thriftscala.EventUnion +import com.twitter.frigate.data_pipeline.thriftscala.PushRecSendEvent +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Time + +case class OnlineUserHistoryKey( + userId: Long, + offlineUserHistory: Option[UserHistoryValue], + history: Option[History]) + +case class OnlineUserHistoryStore( + realTimeClientEventStore: RealTimeClientEventStore, + duration: Duration = 3.days) + extends ReadableStore[OnlineUserHistoryKey, UserHistoryValue] { + + override def get(key: OnlineUserHistoryKey): Future[Option[UserHistoryValue]] = { + val now = Time.now + + val pushRecSends = key.history + .getOrElse(History(Nil.toMap)) + .sortedPushDmHistory + .filter(_._1 > now - (duration + 1.day)) + .map { + case (time, frigateNotification) => + val pushRecSendEvent = PushRecSendEvent( + frigateNotification = Some(frigateNotification), + impressionId = frigateNotification.impressionId + ) + pushRecSendEvent -> time + } + + realTimeClientEventStore + .get(key.userId, now - duration, now) + .map { attributedEventHistory => + val attributedClientEvents = attributedEventHistory.sortedHistory.flatMap { + case (time, event) => + event.eventUnion match { + case Some(eventUnion: EventUnion.AttributedPushRecClientEvent) => + Some((eventUnion.attributedPushRecClientEvent, event.eventType, time)) + case _ => None + } + } + + val realtimeLabeledSends: Seq[Event] = HistoryJoin.getLabeledPushRecSends( + pushRecSends, + attributedClientEvents, + Seq(), + Seq(), + Seq(), + now + ) + + key.offlineUserHistory.map { offlineUserHistory => + val combinedEvents = offlineUserHistory.events.map { offlineEvents => + (offlineEvents ++ realtimeLabeledSends) + .map { event => + event.timestampMillis -> event + } + .toMap + .values + .toSeq + .sortBy { event => + -1 * event.timestampMillis.getOrElse(0L) + } + } + + offlineUserHistory.copy(events = combinedEvents) + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OpenAppUserStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OpenAppUserStore.scala new file mode 100644 index 000000000..85d3f5afa --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/OpenAppUserStore.scala @@ -0,0 +1,13 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.Client +import com.twitter.strato.generated.client.rux.open_app.UsersInOpenAppDdgOnUserClientColumn + +object OpenAppUserStore { + def apply(stratoClient: Client): ReadableStore[Long, Boolean] = { + val fetcher = new UsersInOpenAppDdgOnUserClientColumn(stratoClient).fetcher + StratoFetchableStore.withUnitView(fetcher).mapValues(_ => true) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SocialGraphServiceProcessStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SocialGraphServiceProcessStore.scala new file mode 100644 index 000000000..4af473656 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SocialGraphServiceProcessStore.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.frigate.pushservice.params.PushQPSLimitConstants.SocialGraphServiceBatchSize +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +case class SocialGraphServiceProcessStore(edgeStore: ReadableStore[RelationEdge, Boolean]) + extends ReadableStore[RelationEdge, Boolean] { + override def multiGet[T <: RelationEdge]( + relationEdges: Set[T] + ): Map[T, Future[Option[Boolean]]] = { + val splitSet = relationEdges.grouped(SocialGraphServiceBatchSize).toSet + splitSet + .map { relationship => + edgeStore.multiGet(relationship) + }.foldLeft(Map.empty[T, Future[Option[Boolean]]]) { (map1, map2) => + map1 ++ map2 + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SoftUserFollowingStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SoftUserFollowingStore.scala new file mode 100644 index 000000000..b2de4cf26 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/SoftUserFollowingStore.scala @@ -0,0 +1,61 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.gizmoduck.thriftscala.UserType +import com.twitter.stitch.Stitch +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.Client +import com.twitter.strato.client.UserId +import com.twitter.strato.config.FlockCursors.BySource.Begin +import com.twitter.strato.config.FlockCursors.Continue +import com.twitter.strato.config.FlockCursors.End +import com.twitter.strato.config.FlockPage +import com.twitter.strato.generated.client.socialgraph.service.soft_users.softUserFollows.EdgeBySourceClientColumn +import com.twitter.util.Future + +object SoftUserFollowingStore { + type ViewerFollowingCursor = EdgeBySourceClientColumn.Cursor + val MaxPagesToFetch = 2 + val PageLimit = 50 +} + +class SoftUserFollowingStore(stratoClient: Client) extends ReadableStore[User, Seq[Long]] { + import SoftUserFollowingStore._ + private val softUserFollowingEdgesPaginator = new EdgeBySourceClientColumn(stratoClient).paginator + + private def accumulateIds(cursor: ViewerFollowingCursor, pagesToFetch: Int): Stitch[Seq[Long]] = + softUserFollowingEdgesPaginator.paginate(cursor).flatMap { + case FlockPage(data, next, _) => + next match { + case cont: Continue if pagesToFetch > 1 => + Stitch + .join( + Stitch.value(data.map(_.to).map(_.value)), + accumulateIds(cont, pagesToFetch - 1)) + .map { + case (a, b) => a ++ b + } + + case _: End | _: Continue => + // end pagination if last page has been fetched or [[MaxPagesToFetch]] have been fetched + Stitch.value(data.map(_.to).map(_.value)) + } + } + + private def softFollowingFromStrato( + sourceId: Long, + pageLimit: Int, + pagesToFetch: Int + ): Stitch[Seq[Long]] = { + val begin = Begin[UserId, UserId](UserId(sourceId), pageLimit) + accumulateIds(begin, pagesToFetch) + } + + override def get(user: User): Future[Option[Seq[Long]]] = { + user.userType match { + case UserType.Soft => + Stitch.run(softFollowingFromStrato(user.id, PageLimit, MaxPagesToFetch)).map(Option(_)) + case _ => Future.None + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetImpressionsStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetImpressionsStore.scala new file mode 100644 index 000000000..6acf1d136 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetImpressionsStore.scala @@ -0,0 +1,19 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.frigate.common.store.strato.StratoFetchableStore +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.{Client => StratoClient} +import com.twitter.util.Future + +/** + * Store to get inbound Tweet impressions count for a specific Tweet id. + */ +class TweetImpressionsStore(stratoClient: StratoClient) extends ReadableStore[Long, String] { + + private val column = "rux/impression.Tweet" + private val store = StratoFetchableStore.withUnitView[Long, String](stratoClient, column) + + def getCounts(tweetId: Long): Future[Option[Long]] = { + store.get(tweetId).map(_.map(_.toLong)) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetTranslationStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetTranslationStore.scala new file mode 100644 index 000000000..618d8da32 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/TweetTranslationStore.scala @@ -0,0 +1,211 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.context.TwitterContext +import com.twitter.context.thriftscala.Viewer +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.TwitterContextPermit +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.kujaku.domain.thriftscala.CacheUsageType +import com.twitter.kujaku.domain.thriftscala.MachineTranslation +import com.twitter.kujaku.domain.thriftscala.MachineTranslationResponse +import com.twitter.kujaku.domain.thriftscala.TranslationSource +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.generated.client.translation.service.IsTweetTranslatableClientColumn +import com.twitter.strato.generated.client.translation.service.platform.MachineTranslateTweetClientColumn +import com.twitter.tweetypie.thriftscala.Tweet +import com.twitter.util.Future +import com.twitter.util.logging.Logging + +object TweetTranslationStore { + case class Key( + target: Target, + tweetId: Long, + tweet: Option[Tweet], + crt: CommonRecommendationType) + + case class Value( + translatedTweetText: String, + localizedSourceLanguage: String) + + val allowedCRTs = Set[CommonRecommendationType]( + CommonRecommendationType.TwistlyTweet + ) +} + +case class TweetTranslationStore( + translateTweetStore: ReadableStore[ + MachineTranslateTweetClientColumn.Key, + MachineTranslationResponse + ], + isTweetTranslatableStore: ReadableStore[IsTweetTranslatableClientColumn.Key, Boolean], + statsReceiver: StatsReceiver) + extends ReadableStore[TweetTranslationStore.Key, TweetTranslationStore.Value] + with Logging { + + private val stats = statsReceiver.scope("tweetTranslationStore") + private val isTranslatableCounter = stats.counter("tweetIsTranslatable") + private val notTranslatableCounter = stats.counter("tweetIsNotTranslatable") + private val protectedUserCounter = stats.counter("protectedUser") + private val notProtectedUserCounter = stats.counter("notProtectedUser") + private val validLanguageCounter = stats.counter("validTweetLanguage") + private val invalidLanguageCounter = stats.counter("invalidTweetLanguage") + private val validCrtCounter = stats.counter("validCrt") + private val invalidCrtCounter = stats.counter("invalidCrt") + private val paramEnabledCounter = stats.counter("paramEnabled") + private val paramDisabledCounter = stats.counter("paramDisabled") + + private val twitterContext = TwitterContext(TwitterContextPermit) + + override def get(k: TweetTranslationStore.Key): Future[Option[TweetTranslationStore.Value]] = { + k.target.inferredUserDeviceLanguage.flatMap { + case Some(deviceLanguage) => + setTwitterContext(k.target, deviceLanguage) { + translateTweet( + target = k.target, + tweetId = k.tweetId, + tweet = k.tweet, + crt = k.crt, + deviceLanguage = deviceLanguage).map { responseOpt => + responseOpt.flatMap { response => + response.translatorLocalizedSourceLanguage + .map { localizedSourceLanguage => + TweetTranslationStore.Value( + translatedTweetText = response.translation, + localizedSourceLanguage = localizedSourceLanguage + ) + }.filter { _ => + response.translationSource == TranslationSource.Google + } + } + } + } + case None => Future.None + } + + } + + // Don't sent protected tweets to external API for translation + private def checkProtectedUser(target: Target): Future[Boolean] = { + target.targetUser.map(_.flatMap(_.safety).forall(_.isProtected)).onSuccess { + case true => protectedUserCounter.incr() + case false => notProtectedUserCounter.incr() + } + } + + private def isTweetTranslatable( + target: Target, + tweetId: Long, + tweet: Option[Tweet], + crt: CommonRecommendationType, + deviceLanguage: String + ): Future[Boolean] = { + val tweetLangOpt = tweet.flatMap(_.language) + val isValidLanguage = tweetLangOpt.exists { tweetLang => + tweetLang.confidence > 0.5 && + tweetLang.language != deviceLanguage + } + + if (isValidLanguage) { + validLanguageCounter.incr() + } else { + invalidLanguageCounter.incr() + } + + val isValidCrt = TweetTranslationStore.allowedCRTs.contains(crt) + if (isValidCrt) { + validCrtCounter.incr() + } else { + invalidCrtCounter.incr() + } + + if (isValidCrt && isValidLanguage && target.params(PushParams.EnableIsTweetTranslatableCheck)) { + checkProtectedUser(target).flatMap { + case false => + val isTweetTranslatableKey = IsTweetTranslatableClientColumn.Key( + tweetId = tweetId, + destinationLanguage = Some(deviceLanguage), + translationSource = Some(TranslationSource.Google.name), + excludePreferredLanguages = Some(true) + ) + isTweetTranslatableStore + .get(isTweetTranslatableKey).map { resultOpt => + resultOpt.getOrElse(false) + }.onSuccess { + case true => isTranslatableCounter.incr() + case false => notTranslatableCounter.incr() + } + case true => + Future.False + } + } else { + Future.False + } + } + + private def translateTweet( + tweetId: Long, + deviceLanguage: String + ): Future[Option[MachineTranslation]] = { + val translateKey = MachineTranslateTweetClientColumn.Key( + tweetId = tweetId, + destinationLanguage = deviceLanguage, + translationSource = TranslationSource.Google, + translatableEntityTypes = Seq(), + onlyCached = false, + cacheUsageType = CacheUsageType.Default + ) + translateTweetStore.get(translateKey).map { + _.collect { + case MachineTranslationResponse.Result(result) => result + } + } + } + + private def translateTweet( + target: Target, + tweetId: Long, + tweet: Option[Tweet], + crt: CommonRecommendationType, + deviceLanguage: String + ): Future[Option[MachineTranslation]] = { + isTweetTranslatable(target, tweetId, tweet, crt, deviceLanguage).flatMap { + case true => + val isEnabledByParam = target.params(PushFeatureSwitchParams.EnableTweetTranslation) + if (isEnabledByParam) { + paramEnabledCounter.incr() + translateTweet(tweetId, deviceLanguage) + } else { + paramDisabledCounter.incr() + Future.None + } + case false => + Future.None + } + } + + private def setTwitterContext[Rep]( + target: Target, + deviceLanguage: String + )( + f: => Future[Rep] + ): Future[Rep] = { + twitterContext() match { + case Some(viewer) if viewer.userId.nonEmpty && viewer.authenticatedUserId.nonEmpty => + // If the context is already setup with a user ID just use it + f + case _ => + // If not, create a new context containing the viewer user id + twitterContext.let( + Viewer( + userId = Some(target.targetId), + requestLanguageCode = Some(deviceLanguage), + authenticatedUserId = Some(target.targetId) + )) { + f + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/UttEntityHydrationStore.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/UttEntityHydrationStore.scala new file mode 100644 index 000000000..bd96bf690 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/store/UttEntityHydrationStore.scala @@ -0,0 +1,79 @@ +package com.twitter.frigate.pushservice.store + +import com.twitter.escherbird.util.uttclient.CachedUttClientV2 +import com.twitter.escherbird.util.uttclient.InvalidUttEntityException +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.logging.Logger +import com.twitter.stitch.Stitch +import com.twitter.topiclisting.TopicListingViewerContext +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.topiclisting.utt.LocalizedEntityFactory +import com.twitter.util.Future + +/** + * + * @param viewerContext: [[TopicListingViewerContext]] for filtering topic + * @param semanticCoreEntityIds: list of semantic core entities to hydrate + */ +case class UttEntityHydrationQuery( + viewerContext: TopicListingViewerContext, + semanticCoreEntityIds: Seq[Long]) + +/** + * + * @param cachedUttClientV2 + * @param statsReceiver + */ +class UttEntityHydrationStore( + cachedUttClientV2: CachedUttClientV2, + statsReceiver: StatsReceiver, + log: Logger) { + + private val stats = statsReceiver.scope(this.getClass.getSimpleName) + private val uttEntityNotFound = stats.counter("invalid_utt_entity") + private val deviceLanguageMismatch = stats.counter("language_mismatch") + + /** + * SemanticCore recommends setting language and country code to None to fetch all localized topic + * names and apply filtering for locales on our end + * + * We use [[LocalizedEntityFactory]] from [[Topiclisting]] library to filter out topic name based + * on user locale + * + * Some(LocalizedEntity) - LocalizedUttEntity found + * None - LocalizedUttEntity not found + */ + def getLocalizedTopicEntities( + query: UttEntityHydrationQuery + ): Future[Seq[Option[LocalizedEntity]]] = Stitch.run { + Stitch.collect { + query.semanticCoreEntityIds.map { semanticCoreEntityId => + val uttEntity = cachedUttClientV2.cachedGetUttEntity( + language = None, + country = None, + version = None, + entityId = semanticCoreEntityId) + + uttEntity + .map { uttEntityMetadata => + val localizedEntity = LocalizedEntityFactory.getLocalizedEntity( + uttEntityMetadata, + query.viewerContext, + enableInternationalTopics = true, + enableTopicDescription = true) + // update counter + localizedEntity.foreach { entity => + if (!entity.nameMatchesDeviceLanguage) deviceLanguageMismatch.incr() + } + + localizedEntity + }.handle { + case e: InvalidUttEntityException => + log.error(e.getMessage) + uttEntityNotFound.incr() + None + } + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/CandidateNotifier.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/CandidateNotifier.scala new file mode 100644 index 000000000..1eb8cbc04 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/CandidateNotifier.scala @@ -0,0 +1,160 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.store.Fail +import com.twitter.frigate.common.store.IbisResponse +import com.twitter.frigate.common.store.InvalidConfiguration +import com.twitter.frigate.common.store.NoRequest +import com.twitter.frigate.common.store.Sent +import com.twitter.frigate.common.util.CasLock +import com.twitter.frigate.common.util.PushServiceUtil.InvalidConfigResponse +import com.twitter.frigate.common.util.PushServiceUtil.NtabWriteOnlyResponse +import com.twitter.frigate.common.util.PushServiceUtil.SendFailedResponse +import com.twitter.frigate.common.util.PushServiceUtil.SentResponse +import com.twitter.frigate.pushservice.predicate.CasLockPredicate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.history._ +import com.twitter.frigate.pushservice.util.CopyUtil +import com.twitter.frigate.pushservice.thriftscala.PushResponse +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.frigate.pushservice.util.OverrideNotificationUtil +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.util.Future + +class CandidateNotifier( + notificationSender: NotificationSender, + casLock: CasLock, + historyWriter: HistoryWriter, + eventBusWriter: EventBusWriter, + ntabOnlyChannelSelector: NtabOnlyChannelSelector +)( + implicit statsReceiver: StatsReceiver) { + + private lazy val casLockPredicate = + CasLockPredicate(casLock, expiryDuration = 10.minutes)(statsReceiver) + private val candidateNotifierStats = statsReceiver.scope(this.getClass.getSimpleName) + private val historyWriteCounter = + candidateNotifierStats.counter("simply_notifier_history_write_num") + private val loggedOutHistoryWriteCounter = + candidateNotifierStats.counter("logged_out_simply_notifier_history_write_num") + private val notificationSenderLatency = + candidateNotifierStats.scope("notification_sender_send") + private val log = MRLogger("CandidateNotifier") + + private def mapIbisResponse(ibisResponse: IbisResponse): PushResponse = { + ibisResponse match { + case IbisResponse(Sent, _) => SentResponse + case IbisResponse(Fail, _) => SendFailedResponse + case IbisResponse(InvalidConfiguration, _) => InvalidConfigResponse + case IbisResponse(NoRequest, _) => NtabWriteOnlyResponse + } + } + + /** + * - write to history store + * - send the notification + * - scribe the notification + * + * final modifier is to signal that this function cannot be overriden. There's some critical logic + * in this function, and it's helpful to know that no sub-class overrides it. + */ + final def notify( + candidate: PushCandidate, + ): Future[PushResponse] = { + if (candidate.target.isDarkWrite) { + notificationSender.sendIbisDarkWrite(candidate).map(mapIbisResponse) + } else { + casLockPredicate(Seq(candidate)).flatMap { casLockResults => + if (casLockResults.head || candidate.target.pushContext + .exists(_.skipFilters.contains(true))) { + Future + .join( + candidate.target.isSilentPush, + OverrideNotificationUtil + .getOverrideInfo(candidate, candidateNotifierStats), + CopyUtil.getCopyFeatures(candidate, candidateNotifierStats) + ).flatMap { + case (isSilentPush, overrideInfoOpt, copyFeaturesMap) => + val channels = ntabOnlyChannelSelector.selectChannel(candidate) + channels.flatMap { channels => + candidate + .frigateNotificationForPersistence( + channels, + isSilentPush, + overrideInfoOpt, + copyFeaturesMap.keySet).flatMap { frigateNotificationForPersistence => + val result = if (candidate.target.isDarkWrite) { + candidateNotifierStats.counter("dark_write").incr() + Future.Unit + } else { + historyWriteCounter.incr() + historyWriter + .writeSendToHistory(candidate, frigateNotificationForPersistence) + } + result.flatMap { _ => + track(notificationSenderLatency)( + notificationSender + .notify(channels, candidate) + .map { ibisResponse => + eventBusWriter + .writeToEventBus(candidate, frigateNotificationForPersistence) + mapIbisResponse(ibisResponse) + }) + } + } + } + } + } else { + candidateNotifierStats.counter("filtered_by_cas_lock").incr() + Future.value(PushResponse(PushStatus.Filtered, Some(casLockPredicate.name))) + } + } + } + } + + final def loggedOutNotify( + candidate: PushCandidate, + ): Future[PushResponse] = { + if (candidate.target.isDarkWrite) { + notificationSender.sendIbisDarkWrite(candidate).map(mapIbisResponse) + } else { + casLockPredicate(Seq(candidate)).flatMap { casLockResults => + if (casLockResults.head || candidate.target.pushContext + .exists(_.skipFilters.contains(true))) { + val response = candidate.target.isSilentPush.flatMap { isSilentPush => + candidate + .frigateNotificationForPersistence( + Seq(ChannelName.PushNtab), + isSilentPush, + None, + Set.empty).flatMap { frigateNotificationForPersistence => + val result = if (candidate.target.isDarkWrite) { + candidateNotifierStats.counter("logged_out_dark_write").incr() + Future.Unit + } else { + loggedOutHistoryWriteCounter.incr() + historyWriter.writeSendToHistory(candidate, frigateNotificationForPersistence) + } + + result.flatMap { _ => + track(notificationSenderLatency)( + notificationSender + .loggedOutNotify(candidate) + .map { ibisResponse => + mapIbisResponse(ibisResponse) + }) + } + } + } + response + } else { + candidateNotifierStats.counter("filtered_by_cas_lock").incr() + Future.value(PushResponse(PushStatus.Filtered, Some(casLockPredicate.name))) + } + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/LoggedOutRefreshForPushNotifier.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/LoggedOutRefreshForPushNotifier.scala new file mode 100644 index 000000000..07574f46f --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/LoggedOutRefreshForPushNotifier.scala @@ -0,0 +1,118 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.finagle.stats.BroadcastStatsReceiver +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Response +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.config.CommonConstants +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.util.PushServiceUtil.FilteredLoggedOutResponseFut +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.refresh_handler.RFPHStatsRecorder +import com.twitter.frigate.pushservice.thriftscala.LoggedOutResponse +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.util.Future +import com.twitter.util.JavaTimer +import com.twitter.util.Timer + +class LoggedOutRefreshForPushNotifier( + rfphStatsRecorder: RFPHStatsRecorder, + loCandidateNotifier: CandidateNotifier +)( + globalStats: StatsReceiver) { + private implicit val statsReceiver: StatsReceiver = + globalStats.scope("LoggedOutRefreshForPushHandler") + private val loPushStats: StatsReceiver = statsReceiver.scope("logged_out_push") + private val loSendLatency: StatsReceiver = statsReceiver.scope("logged_out_send") + private val processedCandidatesCounter: Counter = + statsReceiver.counter("processed_candidates_count") + private val validCandidatesCounter: Counter = statsReceiver.counter("valid_candidates_count") + private val okayCandidateCounter: Counter = statsReceiver.counter("ok_candidate_count") + private val nonOkayCandidateCounter: Counter = statsReceiver.counter("non_ok_candidate_count") + private val successNotifyCounter: Counter = statsReceiver.counter("success_notify_count") + private val notifyCandidate: Counter = statsReceiver.counter("notify_candidate") + private val noneCandidateResultCounter: Counter = statsReceiver.counter("none_candidate_count") + private val nonOkayPredsResult: Counter = statsReceiver.counter("non_okay_preds_result") + private val invalidResultCounter: Counter = statsReceiver.counter("invalid_result_count") + private val filteredLoggedOutResponse: Counter = statsReceiver.counter("filtered_response_count") + + implicit private val timer: Timer = new JavaTimer(true) + val log = MRLogger("LoggedOutRefreshForNotifier") + + private def notify( + candidatesResult: CandidateResult[PushCandidate, Result] + ): Future[LoggedOutResponse] = { + val candidate = candidatesResult.candidate + if (candidate != null) + notifyCandidate.incr() + val predsResult = candidatesResult.result + if (predsResult != OK) { + nonOkayPredsResult.incr() + val invalidResult = predsResult + invalidResult match { + case Invalid(Some(reason)) => + invalidResultCounter.incr() + Future.value(LoggedOutResponse(PushStatus.Filtered, Some(reason))) + case _ => + filteredLoggedOutResponse.incr() + Future.value(LoggedOutResponse(PushStatus.Filtered, None)) + } + } else { + track(loSendLatency)(loCandidateNotifier.loggedOutNotify(candidate).map { res => + LoggedOutResponse(res.status) + }) + } + } + + def checkResponseAndNotify( + response: Response[PushCandidate, Result] + ): Future[LoggedOutResponse] = { + val receivers = Seq(statsReceiver) + val loggedOutResponse = response match { + case Response(OK, processedCandidates) => + processedCandidatesCounter.incr(processedCandidates.size) + val validCandidates = processedCandidates.filter(_.result == OK) + validCandidatesCounter.incr(validCandidates.size) + + validCandidates.headOption match { + case Some(candidatesResult) => + candidatesResult.result match { + case OK => + okayCandidateCounter.incr() + notify(candidatesResult) + .onSuccess { nr => + successNotifyCounter.incr() + loPushStats.scope("lo_result").counter(nr.status.name).incr() + } + case _ => + nonOkayCandidateCounter.incr() + FilteredLoggedOutResponseFut + } + case _ => + noneCandidateResultCounter.incr() + FilteredLoggedOutResponseFut + } + + case Response(Invalid(reason), _) => + FilteredLoggedOutResponseFut.map(_.copy(filteredBy = reason)) + + case _ => + FilteredLoggedOutResponseFut + } + val bstats = BroadcastStatsReceiver(receivers) + Stat + .timeFuture(bstats.stat("logged_out_latency"))( + loggedOutResponse.raiseWithin(CommonConstants.maxPushRequestDuration) + ) + .onFailure { exception => + rfphStatsRecorder.loggedOutRequestExceptionStats(exception, bstats) + } + loggedOutResponse + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationSender.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationSender.scala new file mode 100644 index 000000000..70a695fb3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationSender.scala @@ -0,0 +1,95 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Stats.track +import com.twitter.frigate.common.store.IbisResponse +import com.twitter.frigate.common.store.Sent +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.take.sender.Ibis2Sender +import com.twitter.frigate.pushservice.take.sender.NtabSender +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.util.Future +import com.twitter.frigate.thriftscala.ChannelName + +/** + * NotificationSender wraps up all the notification infra send logic, and serves as an abstract layer + * between CandidateNotifier and the respective senders including ntab, ibis, which is being + * gated with both a decider/feature switch + */ +class NotificationSender( + ibis2Sender: Ibis2Sender, + ntabSender: NtabSender, + statsReceiver: StatsReceiver, + notificationScribe: NotificationScribe => Unit) { + + private val notificationNotifierStats = statsReceiver.scope(this.getClass.getSimpleName) + private val ibis2SendLatency = notificationNotifierStats.scope("ibis2_send") + private val loggedOutIbis2SendLatency = notificationNotifierStats.scope("logged_out_ibis2_send") + private val ntabSendLatency = notificationNotifierStats.scope("ntab_send") + + private val ntabWriteThenSkipPushCounter = + notificationNotifierStats.counter("ntab_write_then_skip_push") + private val ntabWriteThenIbisSendCounter = + notificationNotifierStats.counter("ntab_write_then_ibis_send") + notificationNotifierStats.counter("ins_dark_traffic_send") + + private val ntabOnlyChannelSenderV3Counter = + notificationNotifierStats.counter("ntab_only_channel_send_v3") + + def sendIbisDarkWrite(candidate: PushCandidate): Future[IbisResponse] = { + ibis2Sender.sendAsDarkWrite(candidate) + } + + private def isNtabOnlySend( + channels: Seq[ChannelName] + ): Future[Boolean] = { + val isNtabOnlyChannel = channels.contains(ChannelName.NtabOnly) + if (isNtabOnlyChannel) ntabOnlyChannelSenderV3Counter.incr() + + Future.value(isNtabOnlyChannel) + } + + private def isPushOnly(channels: Seq[ChannelName], candidate: PushCandidate): Future[Boolean] = { + Future.value(channels.contains(ChannelName.PushOnly)) + } + + def notify( + channels: Seq[ChannelName], + candidate: PushCandidate + ): Future[IbisResponse] = { + Future + .join(isPushOnly(channels, candidate), isNtabOnlySend(channels)).map { + case (isPushOnly, isNtabOnly) => + if (isPushOnly) { + track(ibis2SendLatency)(ibis2Sender.send(channels, candidate, notificationScribe, None)) + } else { + track(ntabSendLatency)( + ntabSender + .send(candidate, isNtabOnly)) + .flatMap { ntabResponse => + if (isNtabOnly) { + ntabWriteThenSkipPushCounter.incr() + candidate + .scribeData(channels = channels).foreach(notificationScribe).map(_ => + IbisResponse(Sent)) + } else { + ntabWriteThenIbisSendCounter.incr() + track(ibis2SendLatency)( + ibis2Sender.send(channels, candidate, notificationScribe, ntabResponse)) + } + } + + } + }.flatten + } + + def loggedOutNotify( + candidate: PushCandidate + ): Future[IbisResponse] = { + val ibisResponse = { + track(loggedOutIbis2SendLatency)( + ibis2Sender.send(Seq(ChannelName.PushNtab), candidate, notificationScribe, None)) + } + ibisResponse + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationServiceSender.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationServiceSender.scala new file mode 100644 index 000000000..c2f729115 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/NotificationServiceSender.scala @@ -0,0 +1,273 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.ntab.InvalidNTABWriteRequestException +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.notificationservice.thriftscala._ +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.Param +import com.twitter.util.Future +import scala.util.control.NoStackTrace + +class NtabCopyIdNotFoundException(private val message: String) + extends Exception(message) + with NoStackTrace + +class InvalidNtabCopyIdException(private val message: String) + extends Exception(message) + with NoStackTrace + +object NotificationServiceSender { + + def generateSocialContextTextEntities( + ntabDisplayNamesAndIdsFut: Future[Seq[(String, Long)]], + otherCountFut: Future[Int] + ): Future[Seq[DisplayTextEntity]] = { + Future.join(ntabDisplayNamesAndIdsFut, otherCountFut).map { + case (namesWithIdInOrder, otherCount) => + val displays = namesWithIdInOrder.zipWithIndex.map { + case ((name, id), index) => + DisplayTextEntity( + name = "user" + s"${index + 1}", + value = TextValue.Text(name), + emphasis = true, + userId = Some(id) + ) + } ++ Seq( + DisplayTextEntity(name = "nameCount", value = TextValue.Number(namesWithIdInOrder.size)) + ) + + val otherDisplay = if (otherCount > 0) { + Some( + DisplayTextEntity( + name = "otherCount", + value = TextValue.Number(otherCount) + ) + ) + } else None + displays ++ otherDisplay + } + } + + def getDisplayTextEntityFromUser( + userOpt: Option[User], + fieldName: String, + isBold: Boolean + ): Option[DisplayTextEntity] = { + for { + user <- userOpt + profile <- user.profile + } yield { + DisplayTextEntity( + name = fieldName, + value = TextValue.Text(profile.name), + emphasis = isBold, + userId = Some(user.id) + ) + } + } + + def getDisplayTextEntityFromUser( + user: Future[Option[User]], + fieldName: String, + isBold: Boolean + ): Future[Option[DisplayTextEntity]] = { + user.map { getDisplayTextEntityFromUser(_, fieldName, isBold) } + } +} + +case class NotificationServiceRequest( + candidate: PushCandidate, + impressionId: String, + isBadgeUpdate: Boolean, + overrideId: Option[String] = None) + +class NotificationServiceSender( + send: (Target, CreateGenericNotificationRequest) => Future[CreateGenericNotificationResponse], + enableWritesParam: Param[Boolean], + enableForEmployeesParam: Param[Boolean], + enableForEveryoneParam: Param[Boolean] +)( + implicit globalStats: StatsReceiver) + extends ReadableStore[NotificationServiceRequest, CreateGenericNotificationResponse] { + + val log = MRLogger(this.getClass.getName) + + val stats = globalStats.scope("NotificationServiceSender") + val requestEmpty = stats.scope("request_empty") + val requestNonEmpty = stats.counter("request_non_empty") + + val requestBadgeCount = stats.counter("request_badge_count") + + val successfulWrite = stats.counter("successful_write") + val successfulWriteScope = stats.scope("successful_write") + val failedWriteScope = stats.scope("failed_write") + val gotNonSuccessResponse = stats.counter("got_non_success_response") + val gotEmptyResponse = stats.counter("got_empty_response") + val deciderTurnedOffResponse = stats.scope("decider_turned_off_response") + + val disabledByDeciderForCandidate = stats.scope("model/candidate").counter("disabled_by_decider") + val sentToAlphaUserForCandidate = + stats.scope("model/candidate").counter("send_to_employee_or_team") + val sentToNonBucketedUserForCandidate = + stats.scope("model/candidate").counter("send_to_non_bucketed_decidered_user") + val noSendForCandidate = stats.scope("model/candidate").counter("no_send") + + val ineligibleUsersForCandidate = stats.scope("model/candidate").counter("ineligible_users") + + val darkWriteRequestsForCandidate = stats.scope("model/candidate").counter("dark_write_traffic") + + val heavyUserForCandidateCounter = stats.scope("model/candidate").counter("target_heavy") + val nonHeavyUserForCandidateCounter = stats.scope("model/candidate").counter("target_non_heavy") + + val skipWritingToNTAB = stats.counter("skip_writing_to_ntab") + + val ntabWriteDisabledForCandidate = stats.scope("model/candidate").counter("ntab_write_disabled") + + val ntabOverrideEnabledForCandidate = stats.scope("model/candidate").counter("override_enabled") + val ntabTTLForCandidate = stats.scope("model/candidate").counter("ttl_enabled") + + override def get( + notifRequest: NotificationServiceRequest + ): Future[Option[CreateGenericNotificationResponse]] = { + notifRequest.candidate.target.deviceInfo.flatMap { deviceInfoOpt => + val disableWritingToNtab = + notifRequest.candidate.target.params(PushParams.DisableWritingToNTAB) + + if (disableWritingToNtab) { + skipWritingToNTAB.incr() + Future.None + } else { + if (notifRequest.overrideId.nonEmpty) { ntabOverrideEnabledForCandidate.incr() } + Future + .join( + notifRequest.candidate.ntabRequest, + ntabWritesEnabledForCandidate(notifRequest.candidate)).flatMap { + case (Some(ntabRequest), ntabWritesEnabled) if ntabWritesEnabled => + if (ntabRequest.expiryTimeMillis.nonEmpty) { ntabTTLForCandidate.incr() } + sendNTabRequest( + ntabRequest, + notifRequest.candidate.target, + notifRequest.isBadgeUpdate, + notifRequest.candidate.commonRecType, + isFromCandidate = true, + overrideId = notifRequest.overrideId + ) + case (Some(_), ntabWritesEnabled) if !ntabWritesEnabled => + ntabWriteDisabledForCandidate.incr() + Future.None + case (None, ntabWritesEnabled) => + if (!ntabWritesEnabled) ntabWriteDisabledForCandidate.incr() + requestEmpty.counter(s"candidate_${notifRequest.candidate.commonRecType}").incr() + Future.None + } + } + } + } + + private def sendNTabRequest( + genericNotificationRequest: CreateGenericNotificationRequest, + target: Target, + isBadgeUpdate: Boolean, + crt: CommonRecommendationType, + isFromCandidate: Boolean, + overrideId: Option[String] + ): Future[Option[CreateGenericNotificationResponse]] = { + requestNonEmpty.incr() + val notifSvcReq = + genericNotificationRequest.copy( + sendBadgeCountUpdate = isBadgeUpdate, + overrideId = overrideId + ) + requestBadgeCount.incr() + send(target, notifSvcReq) + .map { response => + if (response.responseType.equals(CreateGenericNotificationResponseType.DecideredOff)) { + deciderTurnedOffResponse.counter(s"$crt").incr() + deciderTurnedOffResponse.counter(s"${genericNotificationRequest.genericType}").incr() + throw InvalidNTABWriteRequestException("Decider is turned off") + } else { + Some(response) + } + } + .onFailure { ex => + stats.counter(s"error_${ex.getClass.getCanonicalName}").incr() + failedWriteScope.counter(s"${crt}").incr() + log + .error( + ex, + s"NTAB failure $notifSvcReq" + ) + } + .onSuccess { + case Some(response) => + successfulWrite.incr() + val successfulWriteScopeString = if (isFromCandidate) "model/candidate" else "envelope" + successfulWriteScope.scope(successfulWriteScopeString).counter(s"$crt").incr() + if (response.responseType != CreateGenericNotificationResponseType.Success) { + gotNonSuccessResponse.incr() + log.warning(s"NTAB dropped $notifSvcReq with response $response") + } + + case _ => + gotEmptyResponse.incr() + } + } + + private def ntabWritesEnabledForCandidate(cand: PushCandidate): Future[Boolean] = { + if (!cand.target.params(enableWritesParam)) { + disabledByDeciderForCandidate.incr() + Future.False + } else { + Future + .join( + cand.target.isAnEmployee, + cand.target.isInNotificationsServiceWhitelist, + cand.target.isTeamMember + ) + .flatMap { + case (isEmployee, isInNotificationsServiceWhitelist, isTeamMember) => + cand.target.deviceInfo.flatMap { deviceInfoOpt => + deviceInfoOpt + .map { deviceInfo => + cand.target.isHeavyUserState.map { isHeavyUser => + val isAlphaTester = (isEmployee && cand.target + .params(enableForEmployeesParam)) || isInNotificationsServiceWhitelist || isTeamMember + if (cand.target.isDarkWrite) { + stats + .scope("model/candidate").counter( + s"dark_write_${cand.commonRecType}").incr() + darkWriteRequestsForCandidate.incr() + false + } else if (isAlphaTester || deviceInfo.isMRinNTabEligible + || cand.target.insertMagicrecsIntoNTabForNonPushableUsers) { + if (isHeavyUser) heavyUserForCandidateCounter.incr() + else nonHeavyUserForCandidateCounter.incr() + + val enabledForDesiredUsers = cand.target.params(enableForEveryoneParam) + if (isAlphaTester) { + sentToAlphaUserForCandidate.incr() + true + } else if (enabledForDesiredUsers) { + sentToNonBucketedUserForCandidate.incr() + true + } else { + noSendForCandidate.incr() + false + } + } else { + ineligibleUsersForCandidate.incr() + false + } + } + }.getOrElse(Future.False) + } + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/SendHandlerNotifier.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/SendHandlerNotifier.scala new file mode 100644 index 000000000..feb65dffe --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/SendHandlerNotifier.scala @@ -0,0 +1,86 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Response +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.util.NotificationScribeUtil +import com.twitter.frigate.common.util.PushServiceUtil +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.thriftscala.PushResponse +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.util.Future + +class SendHandlerNotifier( + candidateNotifier: CandidateNotifier, + private val statsReceiver: StatsReceiver) { + + val missingResponseCounter = statsReceiver.counter("missing_response") + val filteredResponseCounter = statsReceiver.counter("filtered") + + /** + * + * @param isScribeInfoRequired: [[Boolean]] to indicate if scribe info is required + * @param candidate: [[PushCandidate]] to build the scribe data from + * @return: scribe response string + */ + private def scribeInfoForResponse( + isScribeInfoRequired: Boolean, + candidate: PushCandidate + ): Future[Option[String]] = { + if (isScribeInfoRequired) { + candidate.scribeData().map { scribedInfo => + Some(NotificationScribeUtil.convertToJsonString(scribedInfo)) + } + } else Future.None + } + + /** + * + * @param response: Candidate validation response + * @param responseWithScribedInfo: boolean indicating if scribe data is expected in push response + * @return: [[PushResponse]] containing final result of send request for [[com.twitter.frigate.pushservice.thriftscala.PushRequest]] + */ + final def checkResponseAndNotify( + response: Response[PushCandidate, Result], + responseWithScribedInfo: Boolean + ): Future[PushResponse] = { + + response match { + case Response(OK, processedCandidates) => + val (validCandidates, invalidCandidates) = processedCandidates.partition(_.result == OK) + validCandidates.headOption match { + case Some(candidateResult) => + val scribeInfo = + scribeInfoForResponse(responseWithScribedInfo, candidateResult.candidate) + scribeInfo.flatMap { scribedData => + val response: Future[PushResponse] = + candidateNotifier.notify(candidateResult.candidate) + response.map(_.copy(notifScribe = scribedData)) + } + + case None => + invalidCandidates.headOption match { + case Some(candidateResult) => + filteredResponseCounter.incr() + val response = candidateResult.result match { + case Invalid(reason) => PushResponse(PushStatus.Filtered, filteredBy = reason) + case _ => PushResponse(PushStatus.Filtered, filteredBy = Some("unknown")) + } + + val scribeInfo = + scribeInfoForResponse(responseWithScribedInfo, candidateResult.candidate) + scribeInfo.map(scribeData => response.copy(notifScribe = scribeData)) + + case None => + missingResponseCounter.incr() + PushServiceUtil.FilteredPushResponseFut + } + } + + case Response(Invalid(reason), _) => + throw new IllegalStateException(s"Unexpected target filtering in SendHandler: $reason") + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/CandidateValidator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/CandidateValidator.scala new file mode 100644 index 000000000..ee85ba590 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/CandidateValidator.scala @@ -0,0 +1,83 @@ +package com.twitter.frigate.pushservice.take.candidate_validator + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.take.predicates.TakeCommonPredicates +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.hermit.predicate.ConcurrentPredicate +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.SequentialPredicate +import com.twitter.util.Future + +trait CandidateValidator extends TakeCommonPredicates { + + override implicit val statsReceiver: StatsReceiver = config.statsReceiver + + protected val log = MRLogger("CandidateValidator") + + private lazy val skipFiltersCounter = statsReceiver.counter("enable_skip_filters") + private lazy val emailUserSkipFiltersCounter = + statsReceiver.counter("email_user_enable_skip_filters") + private lazy val enablePredicatesCounter = statsReceiver.counter("enable_predicates") + + protected def enabledPredicates[C <: PushCandidate]( + candidate: C, + predicates: List[NamedPredicate[C]] + ): List[NamedPredicate[C]] = { + val target = candidate.target + val skipFilters: Boolean = + target.pushContext.flatMap(_.skipFilters).getOrElse(false) || target.params( + PushFeatureSwitchParams.SkipPostRankingFilters) + + if (skipFilters) { + skipFiltersCounter.incr() + if (target.isEmailUser) emailUserSkipFiltersCounter.incr() + + val predicatesToEnable = target.pushContext.flatMap(_.predicatesToEnable).getOrElse(Nil) + if (predicatesToEnable.nonEmpty) enablePredicatesCounter.incr() + + // if we skip predicates on pushContext, only enable the explicitly specified predicates + predicates.filter(predicatesToEnable.contains) + } else predicates + } + + protected def executeSequentialPredicates[C <: PushCandidate]( + candidate: C, + predicates: List[NamedPredicate[C]] + ): Future[Option[Predicate[C]]] = { + val predicatesEnabled = enabledPredicates(candidate, predicates) + val sequentialPredicate = new SequentialPredicate(predicatesEnabled) + + sequentialPredicate.track(Seq(candidate)).map(_.head) + } + + protected def executeConcurrentPredicates[C <: PushCandidate]( + candidate: C, + predicates: List[NamedPredicate[C]] + ): Future[List[Predicate[C]]] = { + val predicatesEnabled = enabledPredicates(candidate, predicates) + val concurrentPredicate: ConcurrentPredicate[C] = new ConcurrentPredicate[C](predicatesEnabled) + concurrentPredicate.track(Seq(candidate)).map(_.head) + } + + protected val candidatePredicatesMap: Map[CommonRecommendationType, List[ + NamedPredicate[_ <: PushCandidate] + ]] + + protected def getCRTPredicates[C <: PushCandidate]( + CRT: CommonRecommendationType + ): List[NamedPredicate[C]] = { + candidatePredicatesMap.get(CRT) match { + case Some(predicates) => + predicates.asInstanceOf[List[NamedPredicate[C]]] + case _ => + throw new IllegalStateException( + s"Unknown CommonRecommendationType for Predicates: ${CRT.name}") + } + } + + def validateCandidate[C <: PushCandidate](candidate: C): Future[Option[Predicate[C]]] +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/RFPHCandidateValidator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/RFPHCandidateValidator.scala new file mode 100644 index 000000000..ecc99cc9e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/RFPHCandidateValidator.scala @@ -0,0 +1,27 @@ +package com.twitter.frigate.pushservice.take.candidate_validator + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.take.predicates.candidate_map.CandidatePredicatesMap +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +class RFPHCandidateValidator(override val config: Config) extends CandidateValidator { + private val rFPHCandidateValidatorStats = statsReceiver.scope(this.getClass.getSimpleName) + private val concurrentPredicateCount = rFPHCandidateValidatorStats.counter("concurrent") + private val sequentialPredicateCount = rFPHCandidateValidatorStats.counter("sequential") + + override protected val candidatePredicatesMap = CandidatePredicatesMap(config) + + override def validateCandidate[C <: PushCandidate](candidate: C): Future[Option[Predicate[C]]] = { + val candidatePredicates = getCRTPredicates(candidate.commonRecType) + val predicates = rfphPrePredicates ++ candidatePredicates ++ postPredicates + if (candidate.target.isEmailUser) { + concurrentPredicateCount.incr() + executeConcurrentPredicates(candidate, predicates).map(_.headOption) + } else { + sequentialPredicateCount.incr() + executeSequentialPredicates(candidate, predicates) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPostCandidateValidator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPostCandidateValidator.scala new file mode 100644 index 000000000..096e9f102 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPostCandidateValidator.scala @@ -0,0 +1,26 @@ +package com.twitter.frigate.pushservice.take.candidate_validator + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.take.predicates.candidate_map.SendHandlerCandidatePredicatesMap +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +class SendHandlerPostCandidateValidator(override val config: Config) extends CandidateValidator { + + override protected val candidatePredicatesMap = + SendHandlerCandidatePredicatesMap.postCandidatePredicates(config) + + private val sendHandlerPostCandidateValidatorStats = + statsReceiver.counter("sendHandlerPostCandidateValidator_stats") + + override def validateCandidate[C <: PushCandidate](candidate: C): Future[Option[Predicate[C]]] = { + val candidatePredicates = getCRTPredicates(candidate.commonRecType) + val predicates = candidatePredicates ++ postPredicates + + sendHandlerPostCandidateValidatorStats.incr() + + executeConcurrentPredicates(candidate, predicates) + .map(_.headOption) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPreCandidateValidator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPreCandidateValidator.scala new file mode 100644 index 000000000..eb0293017 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/candidate_validator/SendHandlerPreCandidateValidator.scala @@ -0,0 +1,24 @@ +package com.twitter.frigate.pushservice.take.candidate_validator + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.take.predicates.candidate_map.SendHandlerCandidatePredicatesMap +import com.twitter.hermit.predicate.Predicate +import com.twitter.util.Future + +class SendHandlerPreCandidateValidator(override val config: Config) extends CandidateValidator { + + override protected val candidatePredicatesMap = + SendHandlerCandidatePredicatesMap.preCandidatePredicates(config) + + private val sendHandlerPreCandidateValidatorStats = + statsReceiver.counter("sendHandlerPreCandidateValidator_stats") + + override def validateCandidate[C <: PushCandidate](candidate: C): Future[Option[Predicate[C]]] = { + val candidatePredicates = getCRTPredicates(candidate.commonRecType) + val predicates = sendHandlerPrePredicates ++ candidatePredicates + + sendHandlerPreCandidateValidatorStats.incr() + executeSequentialPredicates(candidate, predicates) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelCandidate.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelCandidate.scala new file mode 100644 index 000000000..a278ef237 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelCandidate.scala @@ -0,0 +1,24 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.util.Future +import java.util.concurrent.ConcurrentHashMap +import scala.collection.concurrent +import scala.collection.convert.decorateAsScala._ + +/** + * A class to save all the channel related information + */ +trait ChannelForCandidate { + self: PushCandidate => + + // Cache of channel selection result + private[this] val selectedChannels: concurrent.Map[String, Future[Seq[ChannelName]]] = + new ConcurrentHashMap[String, Future[Seq[ChannelName]]]().asScala + + // Returns the channel information from all ChannelSelectors. + def getChannels(): Future[Seq[ChannelName]] = { + Future.collect(selectedChannels.values.toSeq).map { c => c.flatten } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelSelector.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelSelector.scala new file mode 100644 index 000000000..62378a4bc --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/ChannelSelector.scala @@ -0,0 +1,15 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.util.Future + +abstract class ChannelSelector { + + // Returns a map of channel name, and the candidates that can be sent on that channel. + def selectChannel( + candidate: PushCandidate + ): Future[Seq[ChannelName]] + + def getSelectorName(): String +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/NtabOnlyChannelSelector.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/NtabOnlyChannelSelector.scala new file mode 100644 index 000000000..e999da9be --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/channel_selection/NtabOnlyChannelSelector.scala @@ -0,0 +1,21 @@ +package com.twitter.frigate.pushservice.take + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.util.Future + +class NtabOnlyChannelSelector extends ChannelSelector { + val SELECTOR_NAME = "NtabOnlyChannelSelector" + + def getSelectorName(): String = SELECTOR_NAME + + // Returns a map of channel name, and the candidates that can be sent on that channel + def selectChannel( + candidate: PushCandidate + ): Future[Seq[ChannelName]] = { + // Check candidate channel eligible (based on setting, push cap etc + // Decide which candidate can be sent on what channel + val channelName: Future[ChannelName] = Future.value(ChannelName.PushNtab) + channelName.map(channel => Seq(channel)) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/EventBusWriter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/EventBusWriter.scala new file mode 100644 index 000000000..2bdf412ac --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/EventBusWriter.scala @@ -0,0 +1,37 @@ +package com.twitter.frigate.pushservice.take.history + +import com.twitter.eventbus.client.EventBusPublisher +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.NotificationScribeUtil +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.frigate.thriftscala.FrigateNotification + +class EventBusWriter( + eventBusPublisher: EventBusPublisher[NotificationScribe], + stats: StatsReceiver) { + private def writeSendEventToEventBus( + target: PushTypes.Target, + notificationScribe: NotificationScribe + ): Unit = { + if (target.params(PushParams.EnablePushSendEventBus)) { + val result = eventBusPublisher.publish(notificationScribe) + result.onFailure { _ => stats.counter("push_send_eventbus_failure").incr() } + } + } + + def writeToEventBus( + candidate: PushCandidate, + frigateNotificationForPersistence: FrigateNotification + ): Unit = { + val notificationScribe = NotificationScribeUtil.getNotificationScribe( + targetId = candidate.target.targetId, + impressionId = candidate.impressionId, + frigateNotification = frigateNotificationForPersistence, + createdAt = candidate.createdAt + ) + writeSendEventToEventBus(candidate.target, notificationScribe) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/HistoryWriter.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/HistoryWriter.scala new file mode 100644 index 000000000..ca9fe31bc --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/history/HistoryWriter.scala @@ -0,0 +1,49 @@ +package com.twitter.frigate.pushservice.take.history + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.history.HistoryStoreKeyContext +import com.twitter.frigate.common.history.PushServiceHistoryStore +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.conversions.DurationOps._ + +class HistoryWriter(historyStore: PushServiceHistoryStore, stats: StatsReceiver) { + private lazy val historyWriterStats = stats.scope(this.getClass.getSimpleName) + private lazy val historyWriteCounter = historyWriterStats.counter("history_write_num") + private lazy val loggedOutHistoryWriteCounter = + historyWriterStats.counter("logged_out_history_write_num") + + private def writeTtlForHistory(candidate: PushCandidate): Duration = { + if (candidate.target.isLoggedOutUser) { + 60.days + } else if (RecTypes.isTweetType(candidate.commonRecType)) { + candidate.target.params(PushFeatureSwitchParams.FrigateHistoryTweetNotificationWriteTtl) + } else candidate.target.params(PushFeatureSwitchParams.FrigateHistoryOtherNotificationWriteTtl) + } + + def writeSendToHistory( + candidate: PushCandidate, + frigateNotificationForPersistence: FrigateNotification + ): Future[Unit] = { + val historyStoreKeyContext = HistoryStoreKeyContext( + candidate.target.targetId, + candidate.target.pushContext.flatMap(_.useMemcacheForHistory).getOrElse(false) + ) + if (candidate.target.isLoggedOutUser) { + loggedOutHistoryWriteCounter.incr() + } else { + historyWriteCounter.incr() + } + historyStore + .put( + historyStoreKeyContext, + candidate.createdAt, + frigateNotificationForPersistence, + Some(writeTtlForHistory(candidate)) + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicRFPHPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicRFPHPredicates.scala new file mode 100644 index 000000000..99719af99 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicRFPHPredicates.scala @@ -0,0 +1,7 @@ +package com.twitter.frigate.pushservice.take.predicates +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate + +trait BasicRFPHPredicates[C <: PushCandidate] { + val predicates: List[NamedPredicate[C]] +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicSendHandlerPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicSendHandlerPredicates.scala new file mode 100644 index 000000000..591a4df75 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicSendHandlerPredicates.scala @@ -0,0 +1,13 @@ +package com.twitter.frigate.pushservice.take.predicates + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate + +trait BasicSendHandlerPredicates[C <: PushCandidate] { + + // specific predicates per candidate type before basic SendHandler predicates + val preCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty + + // specific predicates per candidate type after basic SendHandler predicates, could be empty + val postCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicates.scala new file mode 100644 index 000000000..0750abd6e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicates.scala @@ -0,0 +1,104 @@ +package com.twitter.frigate.pushservice.take.predicates + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.predicate.BqmlHealthModelPredicates +import com.twitter.frigate.pushservice.predicate.BqmlQualityModelPredicates +import com.twitter.frigate.pushservice.predicate.HealthPredicates +import com.twitter.frigate.pushservice.predicate.OONSpreadControlPredicate +import com.twitter.frigate.pushservice.predicate.OONTweetNegativeFeedbackBasedPredicate +import com.twitter.frigate.pushservice.predicate.OutOfNetworkCandidatesQualityPredicates +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.PNegMultimodalPredicates +import com.twitter.frigate.pushservice.predicate.TargetEngagementPredicate +import com.twitter.frigate.pushservice.predicate.TweetEngagementRatioPredicate +import com.twitter.frigate.pushservice.predicate.TweetLanguagePredicate +import com.twitter.frigate.pushservice.predicate.TweetWithheldContentPredicate + +trait BasicTweetPredicates { + + def config: Config + + implicit def statsReceiver: StatsReceiver + + final lazy val basicTweetPredicates = + List( + HealthPredicates.sensitiveMediaCategoryPredicate(), + HealthPredicates.profanityPredicate(), + PredicatesForCandidate.disableOutNetworkTweetPredicate(config.edgeStore), + TweetEngagementRatioPredicate.QTtoNtabClickBasedPredicate(), + TweetLanguagePredicate.oonTweeetLanguageMatch(), + HealthPredicates.userHealthSignalsPredicate(config.userHealthSignalStore), + HealthPredicates.authorSensitiveMediaPredicate(config.producerMediaRepresentationStore), + HealthPredicates.authorProfileBasedPredicate(), + PNegMultimodalPredicates.healthSignalScorePNegMultimodalPredicate( + config.tweetHealthScoreStore), + BqmlHealthModelPredicates.healthModelOonPredicate( + config.filteringModelScorer, + config.producerMediaRepresentationStore, + config.userHealthSignalStore, + config.tweetHealthScoreStore), + BqmlQualityModelPredicates.BqmlQualityModelOonPredicate(config.filteringModelScorer), + HealthPredicates.tweetHealthSignalScorePredicate(config.tweetHealthScoreStore), + HealthPredicates + .tweetHealthSignalScorePredicate(config.tweetHealthScoreStore, applyToQuoteTweet = true), + PredicatesForCandidate.nullCastF1ProtectedExperientPredicate( + config.cachedTweetyPieStoreV2 + ), + OONTweetNegativeFeedbackBasedPredicate.ntabDislikeBasedPredicate(), + OONSpreadControlPredicate.oonTweetSpreadControlPredicate(), + OONSpreadControlPredicate.oonAuthorSpreadControlPredicate(), + HealthPredicates.healthSignalScoreMultilingualPnsfwTweetTextPredicate( + config.tweetHealthScoreStore), + PredicatesForCandidate + .recommendedTweetAuthorAcceptableToTargetUser(config.edgeStore), + HealthPredicates.healthSignalScorePnsfwTweetTextPredicate(config.tweetHealthScoreStore), + HealthPredicates.healthSignalScoreSpammyTweetPredicate(config.tweetHealthScoreStore), + OutOfNetworkCandidatesQualityPredicates.NegativeKeywordsPredicate( + config.postRankingFeatureStoreClient), + PredicatesForCandidate.authorNotBeingDeviceFollowed(config.edgeStore), + TweetWithheldContentPredicate(), + PredicatesForCandidate.noOptoutFreeFormInterestPredicate, + PredicatesForCandidate.disableInNetworkTweetPredicate(config.edgeStore), + TweetEngagementRatioPredicate.TweetReplyLikeRatioPredicate(), + TargetEngagementPredicate( + config.userTweetPerspectiveStore, + defaultForMissing = true + ), + ) +} + +/** + * This trait is a new version of BasicTweetPredicates + * Difference from old version is that basicTweetPredicates are different + * basicTweetPredicates here don't include Social Graph Service related predicates + */ +trait BasicTweetPredicatesWithoutSGSPredicates { + + def config: Config + + implicit def statsReceiver: StatsReceiver + + final lazy val basicTweetPredicates = { + List( + HealthPredicates.healthSignalScoreSpammyTweetPredicate(config.tweetHealthScoreStore), + PredicatesForCandidate.nullCastF1ProtectedExperientPredicate( + config.cachedTweetyPieStoreV2 + ), + TweetWithheldContentPredicate(), + TargetEngagementPredicate( + config.userTweetPerspectiveStore, + defaultForMissing = true + ), + PredicatesForCandidate.noOptoutFreeFormInterestPredicate, + HealthPredicates.userHealthSignalsPredicate(config.userHealthSignalStore), + HealthPredicates.tweetHealthSignalScorePredicate(config.tweetHealthScoreStore), + BqmlQualityModelPredicates.BqmlQualityModelOonPredicate(config.filteringModelScorer), + BqmlHealthModelPredicates.healthModelOonPredicate( + config.filteringModelScorer, + config.producerMediaRepresentationStore, + config.userHealthSignalStore, + config.tweetHealthScoreStore), + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicatesForRFPH.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicatesForRFPH.scala new file mode 100644 index 000000000..7d660632a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/BasicTweetPredicatesForRFPH.scala @@ -0,0 +1,41 @@ +package com.twitter.frigate.pushservice.take.predicates + +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.hermit.predicate.NamedPredicate + +trait BasicTweetPredicatesForRFPH[C <: PushCandidate with TweetCandidate with TweetDetails] + extends BasicTweetPredicates + with BasicRFPHPredicates[C] { + + // specific predicates per candidate type before basic tweet predicates + def preCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty + + // specific predicates per candidate type after basic tweet predicates + def postCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty + + override lazy val predicates: List[NamedPredicate[C]] = + preCandidateSpecificPredicates ++ basicTweetPredicates ++ postCandidateSpecificPredicates +} + +/** + * This trait is a new version of BasicTweetPredicatesForRFPH + * Difference from old version is that basicTweetPredicates are different + * basicTweetPredicates here don't include Social Graph Service related predicates + */ +trait BasicTweetPredicatesForRFPHWithoutSGSPredicates[ + C <: PushCandidate with TweetCandidate with TweetDetails] + extends BasicTweetPredicatesWithoutSGSPredicates + with BasicRFPHPredicates[C] { + + // specific predicates per candidate type before basic tweet predicates + def preCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty + + // specific predicates per candidate type after basic tweet predicates + def postCandidateSpecificPredicates: List[NamedPredicate[C]] = List.empty + + override lazy val predicates: List[NamedPredicate[C]] = + preCandidateSpecificPredicates ++ basicTweetPredicates ++ postCandidateSpecificPredicates + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/OutOfNetworkTweetPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/OutOfNetworkTweetPredicates.scala new file mode 100644 index 000000000..e85dc95f0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/OutOfNetworkTweetPredicates.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.take.predicates + +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.hermit.predicate.NamedPredicate + +trait OutOfNetworkTweetPredicates[C <: PushCandidate with TweetCandidate with TweetDetails] + extends BasicTweetPredicatesForRFPH[C] { + + override lazy val preCandidateSpecificPredicates: List[NamedPredicate[C]] = + List( + PredicatesForCandidate.authorNotBeingFollowed(config.edgeStore) + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/TakeCommonPredicates.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/TakeCommonPredicates.scala new file mode 100644 index 000000000..e921f3fcf --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/TakeCommonPredicates.scala @@ -0,0 +1,36 @@ +package com.twitter.frigate.pushservice.take.predicates + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.pushservice.predicate.CrtDeciderPredicate +import com.twitter.frigate.pushservice.predicate.PredicatesForCandidate +import com.twitter.frigate.pushservice.predicate.ScarecrowPredicate +import com.twitter.frigate.pushservice.predicate.ntab_caret_fatigue.NtabCaretClickFatiguePredicate +import com.twitter.hermit.predicate.NamedPredicate + +trait TakeCommonPredicates { + def config: Config + + implicit def statsReceiver: StatsReceiver + + lazy val rfphPrePredicates: List[NamedPredicate[PushCandidate]] = List( + CrtDeciderPredicate(config.decider), + PredicatesForCandidate.isChannelValidPredicate, + ) + + lazy val sendHandlerPrePredicates: List[NamedPredicate[PushCandidate]] = List( + CrtDeciderPredicate(config.decider), + PredicatesForCandidate.enableSendHandlerCandidates, + PredicatesForCandidate.mrWebHoldbackPredicate, + PredicatesForCandidate.targetUserExists, + PredicatesForCandidate.authorInSocialContext, + PredicatesForCandidate.recommendedTweetIsAuthoredBySelf, + PredicatesForCandidate.selfInSocialContext, + NtabCaretClickFatiguePredicate() + ) + + lazy val postPredicates: List[NamedPredicate[PushCandidate]] = List( + ScarecrowPredicate(config.scarecrowCheckEventStore) + ) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/CandidatePredicatesMap.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/CandidatePredicatesMap.scala new file mode 100644 index 000000000..aa4642cea --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/CandidatePredicatesMap.scala @@ -0,0 +1,75 @@ +package com.twitter.frigate.pushservice.take.predicates.candidate_map + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model._ +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType._ +import com.twitter.hermit.predicate.NamedPredicate + +object CandidatePredicatesMap { + + def apply( + implicit config: Config + ): Map[CommonRecommendationType, List[NamedPredicate[_ <: PushCandidate]]] = { + + val trendTweetCandidatePredicates = TrendTweetPredicates(config).predicates + val tripTweetCandidatePredicates = TripTweetCandidatePredicates(config).predicates + val f1TweetCandidatePredicates = F1TweetCandidatePredicates(config).predicates + val oonTweetCandidatePredicates = OutOfNetworkTweetCandidatePredicates(config).predicates + val tweetActionCandidatePredicates = TweetActionCandidatePredicates(config).predicates + val topicProofTweetCandidatePredicates = TopicProofTweetCandidatePredicates(config).predicates + val addressBookPushPredicates = AddressBookPushCandidatePredicates(config).predicates + val completeOnboardingPushPredicates = CompleteOnboardingPushCandidatePredicates( + config).predicates + val popGeoTweetCandidatePredicate = PopGeoTweetCandidatePredicates(config).predicates + val topTweetImpressionsCandidatePredicates = TopTweetImpressionsPushCandidatePredicates( + config).predicates + val listCandidatePredicates = ListRecommendationPredicates(config).predicates + val subscribedSearchTweetCandidatePredicates = SubscribedSearchTweetCandidatePredicates( + config).predicates + + Map( + F1FirstdegreeTweet -> f1TweetCandidatePredicates, + F1FirstdegreePhoto -> f1TweetCandidatePredicates, + F1FirstdegreeVideo -> f1TweetCandidatePredicates, + ElasticTimelineTweet -> oonTweetCandidatePredicates, + ElasticTimelinePhoto -> oonTweetCandidatePredicates, + ElasticTimelineVideo -> oonTweetCandidatePredicates, + TwistlyTweet -> oonTweetCandidatePredicates, + TwistlyPhoto -> oonTweetCandidatePredicates, + TwistlyVideo -> oonTweetCandidatePredicates, + ExploreVideoTweet -> oonTweetCandidatePredicates, + UserInterestinTweet -> oonTweetCandidatePredicates, + UserInterestinPhoto -> oonTweetCandidatePredicates, + UserInterestinVideo -> oonTweetCandidatePredicates, + PastEmailEngagementTweet -> oonTweetCandidatePredicates, + PastEmailEngagementPhoto -> oonTweetCandidatePredicates, + PastEmailEngagementVideo -> oonTweetCandidatePredicates, + TagSpaceTweet -> oonTweetCandidatePredicates, + TwhinTweet -> oonTweetCandidatePredicates, + FrsTweet -> oonTweetCandidatePredicates, + MrModelingBasedTweet -> oonTweetCandidatePredicates, + TrendTweet -> trendTweetCandidatePredicates, + ReverseAddressbookTweet -> oonTweetCandidatePredicates, + ForwardAddressbookTweet -> oonTweetCandidatePredicates, + TripGeoTweet -> oonTweetCandidatePredicates, + TripHqTweet -> tripTweetCandidatePredicates, + DetopicTweet -> oonTweetCandidatePredicates, + CrowdSearchTweet -> oonTweetCandidatePredicates, + TweetFavorite -> tweetActionCandidatePredicates, + TweetFavoritePhoto -> tweetActionCandidatePredicates, + TweetFavoriteVideo -> tweetActionCandidatePredicates, + TweetRetweet -> tweetActionCandidatePredicates, + TweetRetweetPhoto -> tweetActionCandidatePredicates, + TweetRetweetVideo -> tweetActionCandidatePredicates, + TopicProofTweet -> topicProofTweetCandidatePredicates, + SubscribedSearch -> subscribedSearchTweetCandidatePredicates, + AddressBookUploadPush -> addressBookPushPredicates, + CompleteOnboardingPush -> completeOnboardingPushPredicates, + List -> listCandidatePredicates, + GeoPopTweet -> popGeoTweetCandidatePredicate, + TweetImpressions -> topTweetImpressionsCandidatePredicates + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/SendHandlerCandidatePredicatesMap.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/SendHandlerCandidatePredicatesMap.scala new file mode 100644 index 000000000..e37a91044 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/predicates/candidate_map/SendHandlerCandidatePredicatesMap.scala @@ -0,0 +1,78 @@ +package com.twitter.frigate.pushservice.take.predicates.candidate_map + +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model._ +import com.twitter.frigate.pushservice.config.Config +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType._ +import com.twitter.hermit.predicate.NamedPredicate + +object SendHandlerCandidatePredicatesMap { + + def preCandidatePredicates( + implicit config: Config + ): Map[CommonRecommendationType, List[NamedPredicate[_ <: PushCandidate]]] = { + val magicFanoutNewsEventCandidatePredicates = + MagicFanoutNewsEventCandidatePredicates(config).preCandidateSpecificPredicates + + val scheduledSpaceSubscriberPredicates = ScheduledSpaceSubscriberCandidatePredicates( + config).preCandidateSpecificPredicates + + val scheduledSpaceSpeakerPredicates = ScheduledSpaceSpeakerCandidatePredicates( + config).preCandidateSpecificPredicates + + val magicFanoutSportsEventCandidatePredicates = + MagicFanoutSportsEventCandidatePredicates(config).preCandidateSpecificPredicates + + val magicFanoutProductLaunchPredicates = MagicFanoutProductLaunchPushCandidatePredicates( + config).preCandidateSpecificPredicates + + val creatorSubscriptionFanoutPredicates = MagicFanouCreatorSubscriptionEventPushPredicates( + config).preCandidateSpecificPredicates + + val newCreatorFanoutPredicates = MagicFanoutNewCreatorEventPushPredicates( + config).preCandidateSpecificPredicates + + Map( + MagicFanoutNewsEvent -> magicFanoutNewsEventCandidatePredicates, + ScheduledSpaceSubscriber -> scheduledSpaceSubscriberPredicates, + ScheduledSpaceSpeaker -> scheduledSpaceSpeakerPredicates, + MagicFanoutSportsEvent -> magicFanoutSportsEventCandidatePredicates, + MagicFanoutProductLaunch -> magicFanoutProductLaunchPredicates, + NewCreator -> newCreatorFanoutPredicates, + CreatorSubscriber -> creatorSubscriptionFanoutPredicates + ) + } + + def postCandidatePredicates( + implicit config: Config + ): Map[CommonRecommendationType, List[NamedPredicate[_ <: PushCandidate]]] = { + val magicFanoutNewsEventCandidatePredicates = + MagicFanoutNewsEventCandidatePredicates(config).postCandidateSpecificPredicates + + val scheduledSpaceSubscriberPredicates = ScheduledSpaceSubscriberCandidatePredicates( + config).postCandidateSpecificPredicates + + val scheduledSpaceSpeakerPredicates = ScheduledSpaceSpeakerCandidatePredicates( + config).postCandidateSpecificPredicates + + val magicFanoutSportsEventCandidatePredicates = + MagicFanoutSportsEventCandidatePredicates(config).postCandidateSpecificPredicates + val magicFanoutProductLaunchPredicates = MagicFanoutProductLaunchPushCandidatePredicates( + config).postCandidateSpecificPredicates + val creatorSubscriptionFanoutPredicates = MagicFanouCreatorSubscriptionEventPushPredicates( + config).postCandidateSpecificPredicates + val newCreatorFanoutPredicates = MagicFanoutNewCreatorEventPushPredicates( + config).postCandidateSpecificPredicates + + Map( + MagicFanoutNewsEvent -> magicFanoutNewsEventCandidatePredicates, + ScheduledSpaceSubscriber -> scheduledSpaceSubscriberPredicates, + ScheduledSpaceSpeaker -> scheduledSpaceSpeakerPredicates, + MagicFanoutSportsEvent -> magicFanoutSportsEventCandidatePredicates, + MagicFanoutProductLaunch -> magicFanoutProductLaunchPredicates, + NewCreator -> newCreatorFanoutPredicates, + CreatorSubscriber -> creatorSubscriptionFanoutPredicates + ) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/Ibis2Sender.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/Ibis2Sender.scala new file mode 100644 index 000000000..a17d7fe1e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/Ibis2Sender.scala @@ -0,0 +1,185 @@ +package com.twitter.frigate.pushservice.take.sender + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.common.base.TweetDetails +import com.twitter.frigate.common.store.IbisResponse +import com.twitter.frigate.common.store.InvalidConfiguration +import com.twitter.frigate.common.store.NoRequest +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.frigate.pushservice.store.Ibis2Store +import com.twitter.frigate.pushservice.store.TweetTranslationStore +import com.twitter.frigate.pushservice.util.CopyUtil +import com.twitter.frigate.pushservice.util.FunctionalUtil +import com.twitter.frigate.pushservice.util.InlineActionUtil +import com.twitter.frigate.pushservice.util.OverrideNotificationUtil +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.frigate.scribe.thriftscala.NotificationScribe +import com.twitter.frigate.thriftscala.ChannelName +import com.twitter.frigate.thriftscala.NotificationDisplayLocation +import com.twitter.ibis2.service.thriftscala.Ibis2Request +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +class Ibis2Sender( + pushIbisV2Store: Ibis2Store, + tweetTranslationStore: ReadableStore[TweetTranslationStore.Key, TweetTranslationStore.Value], + statsReceiver: StatsReceiver) { + + private val stats = statsReceiver.scope(getClass.getSimpleName) + private val silentPushCounter = stats.counter("silent_push") + private val ibisSendFailureCounter = stats.scope("ibis_send_failure").counter("failures") + private val buggyAndroidReleaseCounter = stats.counter("is_buggy_android_release") + private val androidPrimaryCounter = stats.counter("android_primary_device") + private val addTranslationModelValuesCounter = stats.counter("with_translation_model_values") + private val patchNtabResponseEnabled = stats.scope("with_ntab_response") + private val noIbisPushStats = stats.counter("no_ibis_push") + + private def ibisSend( + candidate: PushCandidate, + translationModelValues: Option[Map[String, String]] = None, + ntabResponse: Option[CreateGenericNotificationResponse] = None + ): Future[IbisResponse] = { + if (candidate.frigateNotification.notificationDisplayLocation != NotificationDisplayLocation.PushToMobileDevice) { + Future.value(IbisResponse(InvalidConfiguration)) + } else { + candidate.ibis2Request.flatMap { + case Some(request) => + val requestWithTranslationMV = + addTranslationModelValues(request, translationModelValues) + val patchedIbisRequest = { + if (candidate.target.isLoggedOutUser) { + requestWithTranslationMV + } else { + patchNtabResponseToIbisRequest(requestWithTranslationMV, candidate, ntabResponse) + } + } + pushIbisV2Store.send(patchedIbisRequest, candidate) + case _ => + noIbisPushStats.incr() + Future.value(IbisResponse(sendStatus = NoRequest, ibis2Response = None)) + } + } + } + + def sendAsDarkWrite( + candidate: PushCandidate + ): Future[IbisResponse] = { + ibisSend(candidate) + } + + def send( + channels: Seq[ChannelName], + pushCandidate: PushCandidate, + notificationScribe: NotificationScribe => Unit, + ntabResponse: Option[CreateGenericNotificationResponse], + ): Future[IbisResponse] = pushCandidate.target.isSilentPush.flatMap { isSilentPush: Boolean => + if (isSilentPush) silentPushCounter.incr() + pushCandidate.target.deviceInfo.flatMap { deviceInfo => + if (deviceInfo.exists(_.isSim40AndroidVersion)) buggyAndroidReleaseCounter.incr() + if (PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfo)) androidPrimaryCounter.incr() + Future + .join( + OverrideNotificationUtil + .getOverrideInfo(pushCandidate, stats), + CopyUtil.getCopyFeatures(pushCandidate, stats), + getTranslationModelValues(pushCandidate) + ).flatMap { + case (overrideInfoOpt, copyFeaturesMap, translationModelValues) => + ibisSend(pushCandidate, translationModelValues, ntabResponse) + .onSuccess { ibisResponse => + pushCandidate + .scribeData( + ibis2Response = ibisResponse.ibis2Response, + isSilentPush = isSilentPush, + overrideInfoOpt = overrideInfoOpt, + copyFeaturesList = copyFeaturesMap.keySet, + channels = channels + ).foreach(notificationScribe) + }.onFailure { _ => + pushCandidate + .scribeData(channels = channels).foreach { data => + ibisSendFailureCounter.incr() + notificationScribe(data) + } + } + } + } + } + + private def getTranslationModelValues( + candidate: PushCandidate + ): Future[Option[Map[String, String]]] = { + candidate match { + case tweetCandidate: TweetCandidate with TweetDetails => + val key = TweetTranslationStore.Key( + target = candidate.target, + tweetId = tweetCandidate.tweetId, + tweet = tweetCandidate.tweet, + crt = candidate.commonRecType + ) + + tweetTranslationStore + .get(key) + .map { + case Some(value) => + Some( + Map( + "translated_tweet_text" -> value.translatedTweetText, + "localized_source_language" -> value.localizedSourceLanguage + )) + case None => None + } + case _ => Future.None + } + } + + private def addTranslationModelValues( + ibisRequest: Ibis2Request, + translationModelValues: Option[Map[String, String]] + ): Ibis2Request = { + (translationModelValues, ibisRequest.modelValues) match { + case (Some(translationModelVal), Some(existingModelValues)) => + addTranslationModelValuesCounter.incr() + ibisRequest.copy(modelValues = Some(translationModelVal ++ existingModelValues)) + case (Some(translationModelVal), None) => + addTranslationModelValuesCounter.incr() + ibisRequest.copy(modelValues = Some(translationModelVal)) + case (None, _) => ibisRequest + } + } + + private def patchNtabResponseToIbisRequest( + ibis2Req: Ibis2Request, + candidate: PushCandidate, + ntabResponse: Option[CreateGenericNotificationResponse] + ): Ibis2Request = { + if (candidate.target.params(FS.EnableInlineFeedbackOnPush)) { + patchNtabResponseEnabled.counter().incr() + val dislikePosition = candidate.target.params(FS.InlineFeedbackSubstitutePosition) + val dislikeActionOption = ntabResponse + .map(FunctionalUtil.incr(patchNtabResponseEnabled.counter("ntab_response_exist"))) + .flatMap(response => InlineActionUtil.getDislikeInlineAction(candidate, response)) + .map(FunctionalUtil.incr(patchNtabResponseEnabled.counter("dislike_action_generated"))) + + // Only generate patch serialized inline action when original request has existing serialized_inline_actions_v2 + val patchedSerializedActionOption = ibis2Req.modelValues + .flatMap(model => model.get("serialized_inline_actions_v2")) + .map(FunctionalUtil.incr(patchNtabResponseEnabled.counter("inline_action_v2_exists"))) + .map(serialized => + InlineActionUtil + .patchInlineActionAtPosition(serialized, dislikeActionOption, dislikePosition)) + .map(FunctionalUtil.incr(patchNtabResponseEnabled.counter("patch_inline_action_generated"))) + + (ibis2Req.modelValues, patchedSerializedActionOption) match { + case (Some(existingModelValue), Some(patchedActionV2)) => + patchNtabResponseEnabled.scope("patch_applied").counter().incr() + ibis2Req.copy(modelValues = + Some(existingModelValue ++ Map("serialized_inline_actions_v2" -> patchedActionV2))) + case _ => ibis2Req + } + } else ibis2Req + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/NtabSender.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/NtabSender.scala new file mode 100644 index 000000000..5019aa040 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/take/sender/NtabSender.scala @@ -0,0 +1,237 @@ +package com.twitter.frigate.pushservice.take.sender + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.ibis.PushOverrideInfo +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FSParams} +import com.twitter.frigate.pushservice.take.NotificationServiceRequest +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.hermit.store.common.ReadableWritableStore +import com.twitter.notificationservice.api.thriftscala.DeleteCurrentTimelineForUserRequest +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.notificationservice.thriftscala.DeleteGenericNotificationRequest +import com.twitter.notificationservice.thriftscala.GenericNotificationKey +import com.twitter.notificationservice.thriftscala.GenericNotificationOverrideKey +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object OverrideCandidate extends Enumeration { + val One: String = "overrideEntry1" +} + +class NtabSender( + notificationServiceSender: ReadableStore[ + NotificationServiceRequest, + CreateGenericNotificationResponse + ], + nTabHistoryStore: ReadableWritableStore[(Long, String), GenericNotificationOverrideKey], + nTabDelete: DeleteGenericNotificationRequest => Future[Unit], + nTabDeleteTimeline: DeleteCurrentTimelineForUserRequest => Future[Unit] +)( + implicit statsReceiver: StatsReceiver) { + + private[this] val nTabDeleteRequests = statsReceiver.counter("ntab_delete_request") + private[this] val nTabDeleteTimelineRequests = + statsReceiver.counter("ntab_delete_timeline_request") + private[this] val ntabOverrideImpressionNotFound = + statsReceiver.counter("ntab_impression_not_found") + private[this] val nTabOverrideOverriddenStat = + statsReceiver.counter("ntab_override_overridden") + private[this] val storeGenericNotifOverrideKey = + statsReceiver.counter("ntab_store_generic_notif_key") + private[this] val prevGenericNotifKeyNotFound = + statsReceiver.counter("ntab_prev_generic_notif_key_not_found") + + private[this] val ntabOverride = + statsReceiver.scope("ntab_override") + private[this] val ntabRequestWithOverrideId = + ntabOverride.counter("request") + private[this] val storeGenericNotifOverrideKeyWithOverrideId = + ntabOverride.counter("store_override_key") + + def send( + candidate: PushCandidate, + isNtabOnlyNotification: Boolean + ): Future[Option[CreateGenericNotificationResponse]] = { + if (candidate.target.params(FSParams.EnableOverrideIdNTabRequest)) { + ntabRequestWithOverrideId.incr() + overridePreviousEntry(candidate).flatMap { _ => + if (shouldDisableNtabOverride(candidate)) { + sendNewEntry(candidate, isNtabOnlyNotification, None) + } else { + sendNewEntry(candidate, isNtabOnlyNotification, Some(OverrideCandidate.One)) + } + } + } else { + for { + notificationOverwritten <- overrideNSlot(candidate) + _ <- deleteCachedApiTimeline(candidate, notificationOverwritten) + gnResponse <- sendNewEntry(candidate, isNtabOnlyNotification) + } yield gnResponse + } + } + + private def sendNewEntry( + candidate: PushCandidate, + isNtabOnlyNotif: Boolean, + overrideId: Option[String] = None + ): Future[Option[CreateGenericNotificationResponse]] = { + notificationServiceSender + .get( + NotificationServiceRequest( + candidate, + impressionId = candidate.impressionId, + isBadgeUpdate = isNtabOnlyNotif, + overrideId = overrideId + )).flatMap { + case Some(response) => + storeGenericNotifKey(candidate, response, overrideId).map { _ => Some(response) } + case _ => Future.None + } + } + + private def storeGenericNotifKey( + candidate: PushCandidate, + createGenericNotificationResponse: CreateGenericNotificationResponse, + overrideId: Option[String] + ): Future[Unit] = { + if (candidate.target.params(FSParams.EnableStoringNtabGenericNotifKey)) { + createGenericNotificationResponse.successKey match { + case Some(genericNotificationKey) => + val userId = genericNotificationKey.userId + if (overrideId.nonEmpty) { + storeGenericNotifOverrideKeyWithOverrideId.incr() + } + val gnOverrideKey = GenericNotificationOverrideKey( + userId = userId, + hashKey = genericNotificationKey.hashKey, + timestampMillis = genericNotificationKey.timestampMillis, + overrideId = overrideId + ) + val mhKeyVal = + ((userId, candidate.impressionId), gnOverrideKey) + storeGenericNotifOverrideKey.incr() + nTabHistoryStore.put(mhKeyVal) + case _ => Future.Unit + } + } else Future.Unit + } + + private def candidateEligibleForOverride( + targetHistory: History, + targetEntries: Seq[FrigateNotification], + ): FrigateNotification = { + val timestampToEntriesMap = + targetEntries.map { entry => + PushOverrideInfo + .getTimestampInMillisForFrigateNotification(entry, targetHistory, statsReceiver) + .getOrElse(PushConstants.DefaultLookBackForHistory.ago.inMilliseconds) -> entry + }.toMap + + PushOverrideInfo.getOldestFrigateNotification(timestampToEntriesMap) + } + + private def overrideNSlot(candidate: PushCandidate): Future[Boolean] = { + if (candidate.target.params(FSParams.EnableNslotsForOverrideOnNtab)) { + val targetHistoryFut = candidate.target.history + targetHistoryFut.flatMap { targetHistory => + val nonEligibleOverrideTypes = + Seq(RecTypes.RecommendedSpaceFanoutTypes ++ RecTypes.ScheduledSpaceReminderTypes) + + val overrideNotifs = PushOverrideInfo + .getOverrideEligiblePushNotifications( + targetHistory, + candidate.target.params(FSParams.OverrideNotificationsLookbackDurationForNTab), + statsReceiver + ).filterNot { + case notification => + nonEligibleOverrideTypes.contains(notification.commonRecommendationType) + } + + val maxNumUnreadEntries = + candidate.target.params(FSParams.OverrideNotificationsMaxCountForNTab) + if (overrideNotifs.nonEmpty && overrideNotifs.size >= maxNumUnreadEntries) { + val eligibleOverrideCandidateOpt = candidateEligibleForOverride( + targetHistory, + overrideNotifs + ) + eligibleOverrideCandidateOpt match { + case overrideCandidate if overrideCandidate.impressionId.nonEmpty => + deleteNTabEntryFromGenericNotificationStore( + candidate.target.targetId, + eligibleOverrideCandidateOpt.impressionId.head) + case _ => + ntabOverrideImpressionNotFound.incr() + Future.False + } + } else Future.False + } + } else { + Future.False + } + } + + private def shouldDisableNtabOverride(candidate: PushCandidate): Boolean = + RecTypes.isSendHandlerType(candidate.commonRecType) + + private def overridePreviousEntry(candidate: PushCandidate): Future[Boolean] = { + + if (shouldDisableNtabOverride(candidate)) { + nTabOverrideOverriddenStat.incr() + Future.False + } else { + val targetHistoryFut = candidate.target.history + targetHistoryFut.flatMap { targetHistory => + val impressionIds = PushOverrideInfo.getImpressionIdsOfPrevEligiblePushNotif( + targetHistory, + candidate.target.params(FSParams.OverrideNotificationsLookbackDurationForImpressionId), + statsReceiver) + + if (impressionIds.nonEmpty) { + deleteNTabEntryFromGenericNotificationStore(candidate.target.targetId, impressionIds.head) + } else { + ntabOverrideImpressionNotFound.incr() + Future.False // no deletes issued + } + } + } + } + + private def deleteCachedApiTimeline( + candidate: PushCandidate, + isNotificationOverridden: Boolean + ): Future[Unit] = { + if (isNotificationOverridden && candidate.target.params(FSParams.EnableDeletingNtabTimeline)) { + val deleteTimelineRequest = DeleteCurrentTimelineForUserRequest(candidate.target.targetId) + nTabDeleteTimelineRequests.incr() + nTabDeleteTimeline(deleteTimelineRequest) + } else { + Future.Unit + } + } + + private def deleteNTabEntryFromGenericNotificationStore( + targetUserId: Long, + targetImpressionId: String + ): Future[Boolean] = { + val mhKey = (targetUserId, targetImpressionId) + val genericNotificationKeyFut = nTabHistoryStore.get(mhKey) + genericNotificationKeyFut.flatMap { + case Some(genericNotifOverrideKey) => + val gnKey = GenericNotificationKey( + userId = genericNotifOverrideKey.userId, + hashKey = genericNotifOverrideKey.hashKey, + timestampMillis = genericNotifOverrideKey.timestampMillis + ) + val deleteEntryRequest = DeleteGenericNotificationRequest(gnKey) + nTabDeleteRequests.incr() + nTabDelete(deleteEntryRequest).map(_ => true) + case _ => + prevGenericNotifKeyNotFound.incr() + Future.False + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/CustomFSFields.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/CustomFSFields.scala new file mode 100644 index 000000000..9690e9bad --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/CustomFSFields.scala @@ -0,0 +1,98 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.featureswitches.FSCustomMapInput +import com.twitter.featureswitches.parsing.DynMap +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.pushservice.util.NsfwInfo +import com.twitter.gizmoduck.thriftscala.User + +object CustomFSFields { + private val IsReturningUser = "is_returning_user" + private val DaysSinceSignup = "days_since_signup" + private val DaysSinceLogin = "days_since_login" + private val DaysSinceReactivation = "days_since_reactivation" + private val ReactivationDate = "reactivation_date" + private val FollowGraphSize = "follow_graph_size" + private val GizmoduckUserType = "gizmoduck_user_type" + private val UserAge = "mr_user_age" + private val SensitiveOptIn = "sensitive_opt_in" + private val NsfwFollowRatio = "nsfw_follow_ratio" + private val TotalFollows = "follow_count" + private val NsfwRealGraphScore = "nsfw_real_graph_score" + private val NsfwProfileVisit = "nsfw_profile_visit" + private val TotalSearches = "total_searches" + private val NsfwSearchScore = "nsfw_search_score" + private val HasReportedNsfw = "nsfw_reported" + private val HasDislikedNsfw = "nsfw_disliked" + private val UserState = "user_state" + private val MrUserState = "mr_user_state" + private val NumDaysReceivedPushInLast30Days = + "num_days_received_push_in_last_30_days" + private val RecommendationsSetting = "recommendations_setting" + private val TopicsSetting = "topics_setting" + private val SpacesSetting = "spaces_setting" + private val NewsSetting = "news_setting" + private val LiveVideoSetting = "live_video_setting" + private val HasRecentPushableRebDevice = "has_recent_pushable_rweb_device" + private val RequestSource = "request_source" +} + +case class CustomFSFields( + isReactivatedUser: Boolean, + daysSinceSignup: Int, + numDaysReceivedPushInLast30Days: Int, + daysSinceLogin: Option[Int], + daysSinceReactivation: Option[Int], + user: Option[User], + userState: Option[String], + mrUserState: Option[String], + reactivationDate: Option[String], + requestSource: Option[String], + userAge: Option[Int], + nsfwInfo: Option[NsfwInfo], + deviceInfo: Option[DeviceInfo]) { + + import CustomFSFields._ + + private val keyValMap: Map[String, Any] = Map( + IsReturningUser -> isReactivatedUser, + DaysSinceSignup -> daysSinceSignup, + DaysSinceLogin -> daysSinceLogin, + NumDaysReceivedPushInLast30Days -> numDaysReceivedPushInLast30Days + ) ++ + daysSinceReactivation.map(DaysSinceReactivation -> _) ++ + reactivationDate.map(ReactivationDate -> _) ++ + user.flatMap(_.counts.map(counts => FollowGraphSize -> counts.following)) ++ + user.map(u => GizmoduckUserType -> u.userType.name) ++ + userState.map(UserState -> _) ++ + mrUserState.map(MrUserState -> _) ++ + requestSource.map(RequestSource -> _) ++ + userAge.map(UserAge -> _) ++ + nsfwInfo.flatMap(_.senstiveOptIn).map(SensitiveOptIn -> _) ++ + nsfwInfo + .map { nsInfo => + Map[String, Any]( + NsfwFollowRatio -> nsInfo.nsfwFollowRatio, + TotalFollows -> nsInfo.totalFollowCount, + NsfwRealGraphScore -> nsInfo.realGraphScore, + NsfwProfileVisit -> nsInfo.nsfwProfileVisits, + TotalSearches -> nsInfo.totalSearches, + NsfwSearchScore -> nsInfo.searchNsfwScore, + HasReportedNsfw -> nsInfo.hasReported, + HasDislikedNsfw -> nsInfo.hasDisliked + ) + }.getOrElse(Map.empty[String, Any]) ++ + deviceInfo + .map { deviceInfo => + Map[String, Boolean]( + RecommendationsSetting -> deviceInfo.isRecommendationsEligible, + TopicsSetting -> deviceInfo.isTopicsEligible, + SpacesSetting -> deviceInfo.isSpacesEligible, + LiveVideoSetting -> deviceInfo.isBroadcastsEligible, + NewsSetting -> deviceInfo.isNewsEligible, + HasRecentPushableRebDevice -> deviceInfo.hasRecentPushableRWebDevice + ) + }.getOrElse(Map.empty[String, Boolean]) + + val fsMap = FSCustomMapInput(DynMap(keyValMap)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/LoggedOutPushTargetUserBuilder.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/LoggedOutPushTargetUserBuilder.scala new file mode 100644 index 000000000..facbaede0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/LoggedOutPushTargetUserBuilder.scala @@ -0,0 +1,182 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.abdecider.LoggingABDecider +import com.twitter.conversions.DurationOps._ +import com.twitter.decider.Decider +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.history.HistoryStoreKeyContext +import com.twitter.frigate.common.history.MagicFanoutReasonHistory +import com.twitter.frigate.common.history.PushServiceHistoryStore +import com.twitter.frigate.common.history.RecItems +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.common.util.ABDeciderWithOverride +import com.twitter.frigate.common.util.LanguageLocaleUtil +import com.twitter.frigate.data_pipeline.features_common.MrRequestContextForFeatureStore +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.frigate.dau_model.thriftscala.DauProbability +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.thriftscala.PushContext +import com.twitter.frigate.thriftscala.UserForPushTargeting +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.stp.thriftscala.STPResult +import com.twitter.interests.thriftscala.InterestId +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.nrel.hydration.push.HydrationContext +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.service.metastore.gen.thriftscala.UserLanguages +import com.twitter.stitch.Stitch +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.columns.frigate.logged_out_web_notifications.thriftscala.LOWebNotificationMetadata +import com.twitter.timelines.configapi +import com.twitter.timelines.configapi.Params +import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphFeatures +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.wtf.scalding.common.thriftscala.UserFeatures + +case class LoggedOutPushTargetUserBuilder( + historyStore: PushServiceHistoryStore, + inputDecider: Decider, + inputAbDecider: LoggingABDecider, + loggedOutPushInfoStore: ReadableStore[Long, LOWebNotificationMetadata] +)( + globalStatsReceiver: StatsReceiver) { + private val stats = globalStatsReceiver.scope("LORefreshForPushHandler") + private val noHistoryCounter = stats.counter("no_logged_out_history") + private val historyFoundCounter = stats.counter("logged_out_history_counter") + private val noLoggedOutUserCounter = stats.counter("no_logged_out_user") + private val countryCodeCounter = stats.counter("country_counter") + private val noCountryCodeCounter = stats.counter("no_country_counter") + private val noLanguageCodeCounter = stats.counter("no_language_counter") + + def buildTarget( + guestId: Long, + inputPushContext: Option[PushContext] + ): Future[Target] = { + + val historyStoreKeyContext = HistoryStoreKeyContext( + guestId, + inputPushContext.flatMap(_.useMemcacheForHistory).getOrElse(false) + ) + if (historyStore.get(historyStoreKeyContext, Some(30.days)) == Future.None) { + noHistoryCounter.incr() + } else { + historyFoundCounter.incr() + + } + if (loggedOutPushInfoStore.get(guestId) == Future.None) { + noLoggedOutUserCounter.incr() + } + Future + .join( + historyStore.get(historyStoreKeyContext, Some(30.days)), + loggedOutPushInfoStore.get(guestId) + ).map { + case (loNotifHistory, loggedOutUserPushInfo) => + new Target { + override lazy val stats: StatsReceiver = globalStatsReceiver + override val targetId: Long = guestId + override val targetGuestId = Some(guestId) + override lazy val decider: Decider = inputDecider + override lazy val loggedOutMetadata = Future.value(loggedOutUserPushInfo) + val rawLanguageFut = loggedOutMetadata.map { metadata => metadata.map(_.language) } + override val targetLanguage: Future[Option[String]] = rawLanguageFut.map { rawLang => + if (rawLang.isDefined) { + val lang = LanguageLocaleUtil.getStandardLanguageCode(rawLang.get) + if (lang.isEmpty) { + noLanguageCodeCounter.incr() + None + } else { + Option(lang) + } + } else None + } + val country = loggedOutMetadata.map(_.map(_.countryCode)) + if (country.isDefined) { + countryCodeCounter.incr() + } else { + noCountryCodeCounter.incr() + } + if (loNotifHistory == null) { + noHistoryCounter.incr() + } else { + historyFoundCounter.incr() + } + override lazy val location: Future[Option[Location]] = country.map { + case Some(code) => + Some( + Location( + city = "", + region = "", + countryCode = code, + confidence = 0.0, + lat = None, + lon = None, + metro = None, + placeIds = None, + weightedLocations = None, + createdAtMsec = None, + ip = None, + isSignupIp = None, + placeMap = None + )) + case _ => None + } + + override lazy val pushContext: Option[PushContext] = inputPushContext + override lazy val history: Future[History] = Future.value(loNotifHistory) + override lazy val magicFanoutReasonHistory30Days: Future[MagicFanoutReasonHistory] = + Future.value(null) + override lazy val globalStats: StatsReceiver = globalStatsReceiver + override lazy val pushTargeting: Future[Option[UserForPushTargeting]] = Future.None + override lazy val appPermissions: Future[Option[AppPermission]] = Future.None + override lazy val lastHTLVisitTimestamp: Future[Option[Long]] = Future.None + override lazy val pushRecItems: Future[RecItems] = Future.value(null) + + override lazy val isNewSignup: Boolean = false + override lazy val metastoreLanguages: Future[Option[UserLanguages]] = Future.None + override lazy val optOutUserInterests: Future[Option[Seq[InterestId]]] = Future.None + override lazy val mrRequestContextForFeatureStore: MrRequestContextForFeatureStore = + null + override lazy val targetUser: Future[Option[User]] = Future.None + override lazy val notificationFeedbacks: Future[Option[Seq[FeedbackPromptValue]]] = + Future.None + override lazy val promptFeedbacks: Stitch[Seq[FeedbackPromptValue]] = null + override lazy val seedsWithWeight: Future[Option[Map[Long, Double]]] = Future.None + override lazy val tweetImpressionResults: Future[Seq[Long]] = Future.Nil + override lazy val params: configapi.Params = Params.Empty + override lazy val deviceInfo: Future[Option[DeviceInfo]] = Future.None + override lazy val userFeatures: Future[Option[UserFeatures]] = Future.None + override lazy val isOpenAppExperimentUser: Future[Boolean] = Future.False + override lazy val featureMap: Future[FeatureMap] = Future.value(null) + override lazy val dauProbability: Future[Option[DauProbability]] = Future.None + override lazy val labeledPushRecsHydrated: Future[Option[UserHistoryValue]] = + Future.None + override lazy val onlineLabeledPushRecs: Future[Option[UserHistoryValue]] = Future.None + override lazy val realGraphFeatures: Future[Option[RealGraphFeatures]] = Future.None + override lazy val stpResult: Future[Option[STPResult]] = Future.None + override lazy val globalOptoutProbabilities: Seq[Future[Option[Double]]] = Seq.empty + override lazy val bucketOptoutProbability: Future[Option[Double]] = Future.None + override lazy val utcOffset: Future[Option[Duration]] = Future.None + override lazy val abDecider: ABDeciderWithOverride = + ABDeciderWithOverride(inputAbDecider, ddgOverrideOption)(globalStatsReceiver) + override lazy val resurrectionDate: Future[Option[String]] = Future.None + override lazy val isResurrectedUser: Boolean = false + override lazy val timeSinceResurrection: Option[Duration] = None + override lazy val inlineActionHistory: Future[Seq[(Long, String)]] = Future.Nil + override lazy val caretFeedbacks: Future[Option[Seq[CaretFeedbackDetails]]] = + Future.None + + override def targetHydrationContext: Future[HydrationContext] = Future.value(null) + override def isBlueVerified: Future[Option[Boolean]] = Future.None + override def isVerified: Future[Option[Boolean]] = Future.None + override def isSuperFollowCreator: Future[Option[Boolean]] = Future.None + } + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/PushTargetUserBuilder.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/PushTargetUserBuilder.scala new file mode 100644 index 000000000..8378500af --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/PushTargetUserBuilder.scala @@ -0,0 +1,694 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.abdecider.LoggingABDecider +import com.twitter.conversions.DurationOps._ +import com.twitter.decider.Decider +import com.twitter.discovery.common.configapi.ConfigParamsBuilder +import com.twitter.discovery.common.configapi.ExperimentOverride +import com.twitter.featureswitches.Recipient +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.history._ +import com.twitter.frigate.common.logger.MRLogger +import com.twitter.frigate.common.store.FeedbackRequest +import com.twitter.frigate.common.store.PushRecItemsKey +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.common.store.interests.UserId +import com.twitter.frigate.common.util._ +import com.twitter.frigate.data_pipeline.features_common.MrRequestContextForFeatureStore +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.frigate.dau_model.thriftscala.DauProbability +import com.twitter.frigate.pushcap.thriftscala.PushcapInfo +import com.twitter.frigate.pushcap.thriftscala.PushcapUserHistory +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.ml.HydrationContextBuilder +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.store.LabeledPushRecsStoreKey +import com.twitter.frigate.pushservice.store.OnlineUserHistoryKey +import com.twitter.frigate.pushservice.util.NsfwInfo +import com.twitter.frigate.pushservice.util.NsfwPersonalizationUtil +import com.twitter.frigate.pushservice.util.PushAppPermissionUtil +import com.twitter.frigate.pushservice.util.PushCapUtil.getMinimumRestrictedPushcapInfo +import com.twitter.frigate.pushservice.thriftscala.PushContext +import com.twitter.frigate.pushservice.thriftscala.RequestSource +import com.twitter.frigate.thriftscala.SecondaryAccountsByUserState +import com.twitter.frigate.thriftscala.UserForPushTargeting +import com.twitter.frigate.user_states.thriftscala.MRUserHmmState +import com.twitter.frigate.user_states.thriftscala.{UserState => MrUserState} +import com.twitter.frontpage.stream.util.SnowflakeUtil +import com.twitter.geoduck.common.thriftscala.Place +import com.twitter.geoduck.service.thriftscala.LocationResponse +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.model.user_state.UserState +import com.twitter.hermit.model.user_state.UserState.UserState +import com.twitter.hermit.stp.thriftscala.STPResult +import com.twitter.ibis.thriftscala.ContentRecData +import com.twitter.interests.thriftscala.InterestId +import com.twitter.notificationservice.feedback.thriftscala.FeedbackInteraction +import com.twitter.notificationservice.genericfeedbackstore.FeedbackPromptValue +import com.twitter.notificationservice.genericfeedbackstore.GenericFeedbackStore +import com.twitter.notificationservice.genericfeedbackstore.GenericFeedbackStoreException +import com.twitter.notificationservice.model.service.DismissMenuFeedbackAction +import com.twitter.notificationservice.scribe.manhattan.GenericNotificationsFeedbackRequest +import com.twitter.notificationservice.thriftscala.CaretFeedbackDetails +import com.twitter.nrel.heavyranker.FeatureHydrator +import com.twitter.nrel.hydration.push.HydrationContext +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.rux.common.strato.thriftscala.UserTargetingProperty +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWUserSegmentation +import com.twitter.service.metastore.gen.thriftscala.Location +import com.twitter.service.metastore.gen.thriftscala.UserLanguages +import com.twitter.stitch.Stitch +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi +import com.twitter.timelines.real_graph.thriftscala.{RealGraphFeatures => RealGraphFeaturesUnion} +import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphFeatures +import com.twitter.ubs.thriftscala.SellerApplicationState +import com.twitter.ubs.thriftscala.SellerTrack +import com.twitter.user_session_store.thriftscala.UserSession +import com.twitter.util.Duration +import com.twitter.util.Future +import com.twitter.util.Time +import com.twitter.wtf.scalding.common.thriftscala.UserFeatures + +case class PushTargetUserBuilder( + historyStore: PushServiceHistoryStore, + emailHistoryStore: PushServiceHistoryStore, + labeledPushRecsStore: ReadableStore[LabeledPushRecsStoreKey, UserHistoryValue], + onlineUserHistoryStore: ReadableStore[OnlineUserHistoryKey, UserHistoryValue], + pushRecItemsStore: ReadableStore[PushRecItemsKey, RecItems], + userStore: ReadableStore[Long, User], + pushInfoStore: ReadableStore[Long, UserForPushTargeting], + userCountryStore: ReadableStore[Long, Location], + userUtcOffsetStore: ReadableStore[Long, Duration], + dauProbabilityStore: ReadableStore[Long, DauProbability], + nsfwConsumerStore: ReadableStore[Long, NSFWUserSegmentation], + userFeatureStore: ReadableStore[Long, UserFeatures], + userTargetingPropertyStore: ReadableStore[Long, UserTargetingProperty], + mrUserStateStore: ReadableStore[Long, MRUserHmmState], + tweetImpressionStore: ReadableStore[Long, Seq[Long]], + ntabCaretFeedbackStore: ReadableStore[GenericNotificationsFeedbackRequest, Seq[ + CaretFeedbackDetails + ]], + genericFeedbackStore: ReadableStore[FeedbackRequest, Seq[FeedbackPromptValue]], + genericNotificationFeedbackStore: GenericFeedbackStore, + timelinesUserSessionStore: ReadableStore[Long, UserSession], + cachedTweetyPieStore: ReadableStore[Long, TweetyPieResult], + strongTiesStore: ReadableStore[Long, STPResult], + userHTLLastVisitStore: ReadableStore[Long, Seq[Long]], + userLanguagesStore: ReadableStore[Long, UserLanguages], + inputDecider: Decider, + inputAbDecider: LoggingABDecider, + realGraphScoresTop500InStore: ReadableStore[Long, Map[Long, Double]], + recentFollowsStore: ReadableStore[Long, Seq[Long]], + resurrectedUserStore: ReadableStore[Long, String], + configParamsBuilder: ConfigParamsBuilder, + optOutUserInterestsStore: ReadableStore[UserId, Seq[InterestId]], + deviceInfoStore: ReadableStore[Long, DeviceInfo], + pushcapDynamicPredictionStore: ReadableStore[Long, PushcapUserHistory], + appPermissionStore: ReadableStore[(Long, (String, String)), AppPermission], + optoutModelScorer: PushMLModelScorer, + inlineActionHistoryStore: ReadableStore[Long, Seq[(Long, String)]], + featureHydrator: FeatureHydrator, + openAppUserStore: ReadableStore[Long, Boolean], + openedPushByHourAggregatedStore: ReadableStore[Long, Map[Int, Int]], + geoduckStoreV2: ReadableStore[Long, LocationResponse], + superFollowEligibilityUserStore: ReadableStore[Long, Boolean], + superFollowApplicationStatusStore: ReadableStore[(Long, SellerTrack), SellerApplicationState] +)( + globalStatsReceiver: StatsReceiver) { + + implicit val statsReceiver: StatsReceiver = globalStatsReceiver + + private val log = MRLogger("PushTargetUserBuilder") + private val recentFollowscounter = statsReceiver.counter("query_recent_follows") + private val isModelTrainingDataCounter = + statsReceiver.scope("TargetUserBuilder").counter("is_model_training") + private val feedbackStoreGenerationErr = statsReceiver.counter("feedback_store_generation_error") + private val newSignUpUserStats = statsReceiver.counter("new_signup_user") + private val pushcapSelectionStat = statsReceiver.scope("pushcap_modeling") + private val dormantUserCount = statsReceiver.counter("dormant_user_counter") + private val optoutModelStat = statsReceiver.scope("optout_modeling") + private val placeFoundStat = statsReceiver.scope("geoduck_v2").stat("places_found") + private val placesNotFound = statsReceiver.scope("geoduck_v2").counter("places_not_found") + // Email history store stats + private val emailHistoryStats = statsReceiver.scope("email_tweet_history") + private val emptyEmailHistoryCounter = emailHistoryStats.counter("empty") + private val nonEmptyEmailHistoryCounter = emailHistoryStats.counter("non_empty") + + private val MagicRecsCategory = "MagicRecs" + private val MomentsViaMagicRecsCategory = "MomentsViaMagicRecs" + private val MomentsCategory = "Moments" + + def buildTarget( + userId: Long, + inputPushContext: Option[PushContext], + forcedFeatureValues: Option[Map[String, configapi.FeatureValue]] = None + ): Future[Target] = { + val historyStoreKeyContext = HistoryStoreKeyContext( + userId, + inputPushContext.flatMap(_.useMemcacheForHistory).getOrElse(false) + ) + Future + .join( + userStore.get(userId), + deviceInfoStore.get(userId), + pushInfoStore.get(userId), + historyStore.get(historyStoreKeyContext, Some(30.days)), + emailHistoryStore.get( + HistoryStoreKeyContext(userId, useStoreB = false), + Some(7.days) // we only keep 7 days of email tweet history + ) + ).flatMap { + case (userOpt, deviceInfoOpt, userForPushTargetingInfoOpt, notifHistory, emailHistory) => + getCustomFSFields( + userId, + userOpt, + deviceInfoOpt, + userForPushTargetingInfoOpt, + notifHistory, + inputPushContext.flatMap(_.requestSource)).map { customFSField => + new Target { + + override lazy val stats: StatsReceiver = statsReceiver + + override val targetId: Long = userId + + override val targetUser: Future[Option[User]] = Future.value(userOpt) + + override val isEmailUser: Boolean = + inputPushContext.flatMap(_.requestSource) match { + case Some(source) if source == RequestSource.Email => true + case _ => false + } + + override val pushContext = inputPushContext + + override def globalStats: StatsReceiver = globalStatsReceiver + + override lazy val abDecider: ABDeciderWithOverride = + ABDeciderWithOverride(inputAbDecider, ddgOverrideOption) + + override lazy val pushRecItems: Future[RecItems] = + pushRecItemsStore + .get(PushRecItemsKey(historyStoreKeyContext, history)) + .map(_.getOrElse(RecItems.empty)) + + // List of past tweet candidates sent in the past through email with timestamp + override lazy val emailRecItems: Future[Seq[(Time, Long)]] = { + Future.value { + emailHistory.sortedEmailHistory.flatMap { + case (timeStamp, notification) => + notification.contentRecsNotification + .map { notification => + notification.recommendations.contentRecCollections.flatMap { + contentRecs => + contentRecs.contentRecModules.flatMap { contentRecModule => + contentRecModule.recData match { + case ContentRecData.TweetRec(tweetRec) => + nonEmptyEmailHistoryCounter.incr() + Seq(tweetRec.tweetId) + case _ => + emptyEmailHistoryCounter.incr() + Nil + } + } + } + }.getOrElse { + emptyEmailHistoryCounter.incr() + Nil + }.map(timeStamp -> _) + } + } + } + + override lazy val history: Future[History] = Future.value(notifHistory) + + override lazy val pushTargeting: Future[Option[UserForPushTargeting]] = + Future.value(userForPushTargetingInfoOpt) + + override lazy val decider: Decider = inputDecider + + override lazy val location: Future[Option[Location]] = + userCountryStore.get(userId) + + override lazy val deviceInfo: Future[Option[DeviceInfo]] = + Future.value(deviceInfoOpt) + + override lazy val targetLanguage: Future[Option[String]] = targetUser map { userOpt => + userOpt.flatMap(_.account.map(_.language)) + } + + override lazy val targetAgeInYears: Future[Option[Int]] = + Future.value(customFSField.userAge) + + override lazy val metastoreLanguages: Future[Option[UserLanguages]] = + userLanguagesStore.get(targetId) + + override lazy val utcOffset: Future[Option[Duration]] = + userUtcOffsetStore.get(targetId) + + override lazy val userFeatures: Future[Option[UserFeatures]] = + userFeatureStore.get(targetId) + + override lazy val targetUserState: Future[Option[UserState]] = + Future.value( + customFSField.userState + .flatMap(userState => UserState.valueOf(userState))) + + override lazy val targetMrUserState: Future[Option[MrUserState]] = + Future.value( + customFSField.mrUserState + .flatMap(mrUserState => MrUserState.valueOf(mrUserState))) + + override lazy val accountStateWithDeviceInfo: Future[ + Option[SecondaryAccountsByUserState] + ] = Future.None + + override lazy val dauProbability: Future[Option[DauProbability]] = { + dauProbabilityStore.get(targetId) + } + + override lazy val labeledPushRecsHydrated: Future[Option[UserHistoryValue]] = + labeledPushRecsStore.get(LabeledPushRecsStoreKey(this, historyStoreKeyContext)) + + override lazy val onlineLabeledPushRecs: Future[Option[UserHistoryValue]] = + labeledPushRecsHydrated.flatMap { labeledPushRecs => + history.flatMap { history => + onlineUserHistoryStore.get( + OnlineUserHistoryKey(targetId, labeledPushRecs, Some(history)) + ) + } + } + + override lazy val tweetImpressionResults: Future[Seq[Long]] = + tweetImpressionStore.get(targetId).map { + case Some(impressionList) => + impressionList + case _ => Nil + } + + override lazy val realGraphFeatures: Future[Option[RealGraphFeatures]] = + timelinesUserSessionStore.get(targetId).map { userSessionOpt => + userSessionOpt.flatMap { userSession => + userSession.realGraphFeatures.collect { + case RealGraphFeaturesUnion.V1(rGFeatures) => + rGFeatures + } + } + } + + override lazy val stpResult: Future[Option[STPResult]] = + strongTiesStore.get(targetId) + + override lazy val lastHTLVisitTimestamp: Future[Option[Long]] = + userHTLLastVisitStore.get(targetId).map { + case Some(lastVisitTimestamps) if lastVisitTimestamps.nonEmpty => + Some(lastVisitTimestamps.max) + case _ => None + } + + override lazy val caretFeedbacks: Future[Option[Seq[CaretFeedbackDetails]]] = { + val scribeHistoryLookbackPeriod = 365.days + val now = Time.now + val request = GenericNotificationsFeedbackRequest( + userId = targetId, + eventStartTimestamp = now - scribeHistoryLookbackPeriod, + eventEndTimestamp = now, + filterCategory = + Some(Set(MagicRecsCategory, MomentsViaMagicRecsCategory, MomentsCategory)), + filterFeedbackActionText = + Some(Set(DismissMenuFeedbackAction.FeedbackActionTextSeeLessOften)) + ) + ntabCaretFeedbackStore.get(request) + } + + override lazy val notificationFeedbacks: Future[ + Option[Seq[FeedbackPromptValue]] + ] = { + val scribeHistoryLookbackPeriod = 30.days + val now = Time.now + val request = FeedbackRequest( + userId = targetId, + oldestTimestamp = scribeHistoryLookbackPeriod.ago, + newestTimestamp = Time.now, + feedbackInteraction = FeedbackInteraction.Feedback + ) + genericFeedbackStore.get(request) + } + + // DEPRECATED: Use notificationFeedbacks instead. + // This method will increase latency dramatically. + override lazy val promptFeedbacks: Stitch[Seq[FeedbackPromptValue]] = { + val scribeHistoryLookbackPeriod = 7.days + + genericNotificationFeedbackStore + .getAll( + userId = targetId, + oldestTimestamp = scribeHistoryLookbackPeriod.ago, + newestTimestamp = Time.now, + feedbackInteraction = FeedbackInteraction.Feedback + ).handle { + case _: GenericFeedbackStoreException => { + feedbackStoreGenerationErr.incr() + Seq.empty[FeedbackPromptValue] + } + } + } + + override lazy val optOutUserInterests: Future[Option[Seq[InterestId]]] = { + optOutUserInterestsStore.get(targetId) + } + + private val experimentOverride = ddgOverrideOption.map { + case DDGOverride(Some(exp), Some(bucket)) => + Set(ExperimentOverride(exp, bucket)) + case _ => Set.empty[ExperimentOverride] + } + + override val signupCountryCode = + Future.value(userOpt.flatMap(_.safety.flatMap(_.signupCountryCode))) + + override lazy val params: configapi.Params = { + val fsRecipient = Recipient( + userId = Some(targetId), + userRoles = userOpt.flatMap(_.roles.map(_.roles.toSet)), + clientApplicationId = deviceInfoOpt.flatMap(_.guessedPrimaryClientAppId), + userAgent = deviceInfoOpt.flatMap(_.guessedPrimaryDeviceUserAgent), + countryCode = + userOpt.flatMap(_.account.flatMap(_.countryCode.map(_.toUpperCase))), + customFields = Some(customFSField.fsMap), + signupCountryCode = + userOpt.flatMap(_.safety.flatMap(_.signupCountryCode.map(_.toUpperCase))), + languageCode = deviceInfoOpt.flatMap { + _.deviceLanguages.flatMap(IbisAppPushDeviceSettingsUtil.inferredDeviceLanguage) + } + ) + + configParamsBuilder.build( + userId = Some(targetId), + experimentOverrides = experimentOverride, + featureRecipient = Some(fsRecipient), + forcedFeatureValues = forcedFeatureValues.getOrElse(Map.empty), + ) + } + + override lazy val mrRequestContextForFeatureStore = + MrRequestContextForFeatureStore(targetId, params, isModelTrainingData) + + override lazy val dynamicPushcap: Future[Option[PushcapInfo]] = { + // Get the pushcap from the pushcap model prediction store + if (params(PushParams.EnableModelBasedPushcapAssignments)) { + val originalPushcapInfoFut = + PushCapUtil.getPushcapFromUserHistory( + userId, + pushcapDynamicPredictionStore, + params(FeatureSwitchParams.PushcapModelType), + params(FeatureSwitchParams.PushcapModelPredictionVersion), + pushcapSelectionStat + ) + // Modify the push cap info if there is a restricted min value for predicted push caps. + val restrictedPushcap = params(PushFeatureSwitchParams.RestrictedMinModelPushcap) + originalPushcapInfoFut.map { + case Some(originalPushcapInfo) => + Some( + getMinimumRestrictedPushcapInfo( + restrictedPushcap, + originalPushcapInfo, + pushcapSelectionStat)) + case _ => None + } + } else Future.value(None) + } + + override lazy val targetHydrationContext: Future[HydrationContext] = + HydrationContextBuilder.build(this) + + override lazy val featureMap: Future[FeatureMap] = + targetHydrationContext.flatMap { hydrationContext => + featureHydrator.hydrateTarget( + hydrationContext, + this.params, + this.mrRequestContextForFeatureStore) + } + + override lazy val globalOptoutProbabilities: Seq[Future[Option[Double]]] = { + params(PushFeatureSwitchParams.GlobalOptoutModelParam).map { model_id => + optoutModelScorer + .singlePredictionForTargetLevel(model_id, targetId, featureMap) + } + } + + override lazy val bucketOptoutProbability: Future[Option[Double]] = { + Future + .collect(globalOptoutProbabilities).map { + _.zip(params(PushFeatureSwitchParams.GlobalOptoutThresholdParam)) + .exists { + case (Some(score), threshold) => score >= threshold + case _ => false + } + }.flatMap { + case true => + optoutModelScorer.singlePredictionForTargetLevel( + params(PushFeatureSwitchParams.BucketOptoutModelParam), + targetId, + featureMap) + case _ => Future.None + } + } + + override lazy val optoutAdjustedPushcap: Future[Option[Short]] = { + if (params(PushFeatureSwitchParams.EnableOptoutAdjustedPushcap)) { + bucketOptoutProbability.map { + case Some(score) => + val idx = params(PushFeatureSwitchParams.BucketOptoutSlotThresholdParam) + .indexWhere(score <= _) + if (idx >= 0) { + val pushcap = + params(PushFeatureSwitchParams.BucketOptoutSlotPushcapParam)(idx).toShort + optoutModelStat.scope("adjusted_pushcap").counter(f"$pushcap").incr() + if (pushcap >= 0) Some(pushcap) + else None + } else None + case _ => None + } + } else Future.None + } + + override lazy val seedsWithWeight: Future[Option[Map[Long, Double]]] = { + Future + .join( + realGraphScoresTop500InStore.get(userId), + targetUserState, + targetUser + ) + .flatMap { + case (seedSetOpt, userState, gizmoduckUser) => + val seedSet = seedSetOpt.getOrElse(Map.empty[Long, Double]) + + //If new sign_up or New user, combine recent_follows with real graph seedset + val isNewUserEnabled = { + val isNewerThan7days = customFSField.daysSinceSignup <= 7 + val isNewUserState = userState.contains(UserState.New) + isNewUserState || isNewSignup || isNewerThan7days + } + + val nonSeedSetFollowsFut = gizmoduckUser match { + case Some(user) if isNewUserEnabled => + recentFollowscounter.incr() + recentFollowsStore.get(user.id) + + case Some(user) if this.isModelTrainingData => + recentFollowscounter.incr() + isModelTrainingDataCounter.incr() + recentFollowsStore.get(user.id) + + case _ => Future.None + } + nonSeedSetFollowsFut.map { nonSeedSetFollows => + Some( + SeedsetUtil.combineRecentFollowsWithWeightedSeedset( + seedSet, + nonSeedSetFollows.getOrElse(Nil) + ) + ) + } + } + } + + override def magicFanoutReasonHistory30Days: Future[MagicFanoutReasonHistory] = + history.map(history => MagicFanoutReasonHistory(history)) + + override val isNewSignup: Boolean = + pushContext.flatMap(_.isFromNewUserLoopProcessor).getOrElse(false) + + override lazy val resurrectionDate: Future[Option[String]] = + Future.value(customFSField.reactivationDate) + + override lazy val isResurrectedUser: Boolean = + customFSField.daysSinceReactivation.isDefined + + override lazy val timeSinceResurrection: Option[Duration] = + customFSField.daysSinceReactivation.map(Duration.fromDays) + + override lazy val appPermissions: Future[Option[AppPermission]] = + PushAppPermissionUtil.getAppPermission( + userId, + PushAppPermissionUtil.AddressBookPermissionKey, + deviceInfo, + appPermissionStore) + + override lazy val inlineActionHistory: Future[Seq[(Long, String)]] = { + inlineActionHistoryStore + .get(userId).map { + case Some(sortedInlineActionHistory) => sortedInlineActionHistory + case _ => Seq.empty + } + } + + lazy val isOpenAppExperimentUser: Future[Boolean] = + openAppUserStore.get(userId).map(_.contains(true)) + + override lazy val openedPushByHourAggregated: Future[Option[Map[Int, Int]]] = + openedPushByHourAggregatedStore.get(userId) + + override lazy val places: Future[Seq[Place]] = { + geoduckStoreV2 + .get(targetId) + .map(_.flatMap(_.places)) + .map { + case Some(placeSeq) if placeSeq.nonEmpty => + placeFoundStat.add(placeSeq.size) + placeSeq + case _ => + placesNotFound.incr() + Seq.empty + } + } + + override val isBlueVerified: Future[Option[Boolean]] = + Future.value(userOpt.flatMap(_.safety.flatMap(_.isBlueVerified))) + + override val isVerified: Future[Option[Boolean]] = + Future.value(userOpt.flatMap(_.safety.map(_.verified))) + + override lazy val isSuperFollowCreator: Future[Option[Boolean]] = + superFollowEligibilityUserStore.get(targetId) + } + } + } + } + + /** + * Provide general way to add needed FS for target user, and package them in CustomFSFields. + * Custom Fields is a powerful feature that allows Feature Switch library users to define and + * match against any arbitrary fields. + **/ + private def getCustomFSFields( + userId: Long, + userOpt: Option[User], + deviceInfo: Option[DeviceInfo], + userForPushTargetingInfo: Option[UserForPushTargeting], + notifHistory: History, + requestSource: Option[RequestSource] + ): Future[CustomFSFields] = { + val reactivationDateFutOpt: Future[Option[String]] = resurrectedUserStore.get(userId) + val reactivationTimeFutOpt: Future[Option[Time]] = + reactivationDateFutOpt.map(_.map(dateStr => DateUtil.dateStrToTime(dateStr))) + + val isReactivatedUserFut: Future[Boolean] = reactivationTimeFutOpt.map { timeOpt => + timeOpt + .exists { time => Time.now - time < 30.days } + } + + val daysSinceReactivationFut: Future[Option[Int]] = + reactivationTimeFutOpt.map(_.map(time => Time.now.since(time).inDays)) + + val daysSinceSignup: Int = (Time.now - SnowflakeUtil.timeFromId(userId)).inDays + if (daysSinceSignup < 14) newSignUpUserStats.incr() + + val targetAgeInYears = userOpt.flatMap(_.extendedProfile.flatMap(_.ageInYears)) + + val lastLoginFut: Future[Option[Long]] = + userHTLLastVisitStore.get(userId).map { + case Some(lastHTLVisitTimes) => + val latestHTLVisitTime = lastHTLVisitTimes.max + userForPushTargetingInfo.flatMap( + _.lastActiveOnAppTimestamp + .map(_.max(latestHTLVisitTime)).orElse(Some(latestHTLVisitTime))) + case None => + userForPushTargetingInfo.flatMap(_.lastActiveOnAppTimestamp) + } + + val daysSinceLoginFut = lastLoginFut.map { + _.map { lastLoginTimestamp => + val timeSinceLogin = Time.now - Time.fromMilliseconds(lastLoginTimestamp) + if (timeSinceLogin.inDays > 21) { + dormantUserCount.incr() + } + timeSinceLogin.inDays + } + } + + /* Could add more custom FS here */ + val userNSFWInfoFut: Future[Option[NsfwInfo]] = + nsfwConsumerStore + .get(userId).map(_.map(nsfwUserSegmentation => NsfwInfo(nsfwUserSegmentation))) + + val userStateFut: Future[Option[String]] = userFeatureStore.get(userId).map { userFeaturesOpt => + userFeaturesOpt.flatMap { uFeats => + uFeats.userState.map(uState => uState.name) + } + } + + val mrUserStateFut: Future[Option[String]] = + mrUserStateStore.get(userId).map { mrUserStateOpt => + mrUserStateOpt.flatMap { mrUserState => + mrUserState.userState.map(_.name) + } + } + + Future + .join( + reactivationDateFutOpt, + isReactivatedUserFut, + userStateFut, + mrUserStateFut, + daysSinceLoginFut, + daysSinceReactivationFut, + userNSFWInfoFut + ).map { + case ( + reactivationDate, + isReactivatedUser, + userState, + mrUserState, + daysSinceLogin, + daysSinceReactivation, + userNSFWInfo) => + val numDaysReceivedPushInLast30Days: Int = + notifHistory.history.keys.map(_.inDays).toSet.size + + NsfwPersonalizationUtil.computeNsfwUserStats(userNSFWInfo) + + CustomFSFields( + isReactivatedUser, + daysSinceSignup, + numDaysReceivedPushInLast30Days, + daysSinceLogin, + daysSinceReactivation, + userOpt, + userState, + mrUserState, + reactivationDate, + requestSource.map(_.name), + targetAgeInYears, + userNSFWInfo, + deviceInfo + ) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/RFPHTargetPredicateGenerator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/RFPHTargetPredicateGenerator.scala new file mode 100644 index 000000000..745e061fb --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/RFPHTargetPredicateGenerator.scala @@ -0,0 +1,37 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.predicate.TargetPromptFeedbackFatiguePredicate +import com.twitter.frigate.common.predicate.TargetUserPredicates +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.predicate.TargetNtabCaretClickFatiguePredicate +import com.twitter.frigate.pushservice.predicate.TargetPredicates +import com.twitter.hermit.predicate.NamedPredicate + +class RFPHTargetPredicateGenerator(implicit statsReceiver: StatsReceiver) { + val predicates: List[NamedPredicate[Target]] = List( + TargetPredicates.magicRecsMinDurationSinceSent(), + TargetPredicates.targetHTLVisitPredicate(), + TargetPredicates.inlineActionFatiguePredicate(), + TargetPredicates.targetFatiguePredicate(), + TargetUserPredicates.secondaryDormantAccountPredicate(), + TargetPredicates.targetValidMobileSDKPredicate, + TargetPredicates.targetPushBitEnabledPredicate, + TargetUserPredicates.targetUserExists(), + TargetPredicates.paramPredicate(PushFeatureSwitchParams.EnablePushRecommendationsParam), + TargetPromptFeedbackFatiguePredicate.responseNoPredicate( + PushParams.EnablePromptFeedbackFatigueResponseNoPredicate, + PushConstants.AcceptableTimeSinceLastNegativeResponse), + TargetPredicates.teamExceptedPredicate(TargetNtabCaretClickFatiguePredicate.apply()), + TargetPredicates.optoutProbPredicate(), + TargetPredicates.webNotifsHoldback() + ) +} + +object RFPHTargetPredicates { + def apply(implicit statsReceiver: StatsReceiver): List[NamedPredicate[Target]] = + new RFPHTargetPredicateGenerator().predicates +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetAppPermissions.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetAppPermissions.scala new file mode 100644 index 000000000..c84af1286 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetAppPermissions.scala @@ -0,0 +1,10 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.util.Future + +trait TargetAppPermissions { + + def appPermissions: Future[Option[AppPermission]] + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetScoringDetails.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetScoringDetails.scala new file mode 100644 index 000000000..2a9c26e8b --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/target/TargetScoringDetails.scala @@ -0,0 +1,121 @@ +package com.twitter.frigate.pushservice.target + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.FeatureMap +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.candidate.TargetDecider +import com.twitter.frigate.common.candidate.UserDetails +import com.twitter.frigate.data_pipeline.thriftscala.UserHistoryValue +import com.twitter.frigate.dau_model.thriftscala.DauProbability +import com.twitter.frigate.scribe.thriftscala.SkipModelInfo +import com.twitter.hermit.stp.thriftscala.STPResult +import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphFeatures +import com.twitter.util.Future +import com.twitter.util.Time +import com.twitter.frigate.pushservice.params.DeciderKey +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.params.WeightedOpenOrNtabClickModel +import com.twitter.frigate.pushservice.util.PushDeviceUtil +import com.twitter.nrel.hydration.push.HydrationContext +import com.twitter.timelines.configapi.FSParam + +trait TargetScoringDetails { + tuc: TargetUser with TargetDecider with TargetABDecider with UserDetails => + + def stats: StatsReceiver + + /* + * We have 3 types of model training data: + * 1, skip ranker and model predicates + * controlled by decider frigate_notifier_quality_model_training_data + * the data distribution is same to the distribution in ranking + * 2, skip model predicates only + * controlled by decider skip_ml_model_predicate + * the data distribution is same to the distribution in filtering + * 3, no skip, only scribe features + * controlled by decider scribe_model_features + * the data distribution is same to production traffic + * The "miscellaneous" is used to store all misc information for selecting the data offline (e.g., ddg-bucket information) + * */ + lazy val skipModelInfo: Option[SkipModelInfo] = { + val trainingDataDeciderKey = DeciderKey.trainingDataDeciderKey.toString + val skipMlModelPredicateDeciderKey = DeciderKey.skipMlModelPredicateDeciderKey.toString + val scribeModelFeaturesDeciderKey = DeciderKey.scribeModelFeaturesDeciderKey.toString + val miscellaneous = None + + if (isDeciderEnabled(trainingDataDeciderKey, stats, useRandomRecipient = true)) { + Some( + SkipModelInfo( + skipPushOpenPredicate = Some(true), + skipPushRanker = Some(true), + miscellaneous = miscellaneous)) + } else if (isDeciderEnabled(skipMlModelPredicateDeciderKey, stats, useRandomRecipient = true)) { + Some( + SkipModelInfo( + skipPushOpenPredicate = Some(true), + skipPushRanker = Some(false), + miscellaneous = miscellaneous)) + } else if (isDeciderEnabled(scribeModelFeaturesDeciderKey, stats, useRandomRecipient = true)) { + Some(SkipModelInfo(noSkipButScribeFeatures = Some(true), miscellaneous = miscellaneous)) + } else { + Some(SkipModelInfo(miscellaneous = miscellaneous)) + } + } + + lazy val scribeFeatureForRequestScribe = + isDeciderEnabled( + DeciderKey.scribeModelFeaturesForRequestScribe.toString, + stats, + useRandomRecipient = true) + + lazy val rankingModelParam: Future[FSParam[WeightedOpenOrNtabClickModel.ModelNameType]] = + tuc.deviceInfo.map { deviceInfoOpt => + if (PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfoOpt) && + tuc.params(PushParams.AndroidOnlyRankingExperimentParam)) { + PushFeatureSwitchParams.WeightedOpenOrNtabClickRankingModelForAndroidParam + } else { + PushFeatureSwitchParams.WeightedOpenOrNtabClickRankingModelParam + } + } + + lazy val filteringModelParam: FSParam[WeightedOpenOrNtabClickModel.ModelNameType] = + PushFeatureSwitchParams.WeightedOpenOrNtabClickFilteringModelParam + + def skipMlRanker: Boolean = skipModelInfo.exists(_.skipPushRanker.contains(true)) + + def skipModelPredicate: Boolean = skipModelInfo.exists(_.skipPushOpenPredicate.contains(true)) + + def noSkipButScribeFeatures: Boolean = + skipModelInfo.exists(_.noSkipButScribeFeatures.contains(true)) + + def isModelTrainingData: Boolean = skipMlRanker || skipModelPredicate || noSkipButScribeFeatures + + def scribeFeatureWithoutHydratingNewFeatures: Boolean = + isDeciderEnabled( + DeciderKey.scribeModelFeaturesWithoutHydratingNewFeaturesDeciderKey.toString, + stats, + useRandomRecipient = true + ) + + def targetHydrationContext: Future[HydrationContext] + + def featureMap: Future[FeatureMap] + + def dauProbability: Future[Option[DauProbability]] + + def labeledPushRecsHydrated: Future[Option[UserHistoryValue]] + + def onlineLabeledPushRecs: Future[Option[UserHistoryValue]] + + def realGraphFeatures: Future[Option[RealGraphFeatures]] + + def stpResult: Future[Option[STPResult]] + + def globalOptoutProbabilities: Seq[Future[Option[Double]]] + + def bucketOptoutProbability: Future[Option[Double]] + + val sendTime: Long = Time.now.inMillis +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdaptorUtils.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdaptorUtils.scala new file mode 100644 index 000000000..dad918023 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdaptorUtils.scala @@ -0,0 +1,15 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object AdaptorUtils { + def getTweetyPieResults( + tweetIds: Set[Long], + tweetyPieStore: ReadableStore[Long, TweetyPieResult], + ): Future[Map[Long, Option[TweetyPieResult]]] = + FutureOps + .mapCollect(tweetyPieStore.multiGet(tweetIds)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdhocStatsUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdhocStatsUtil.scala new file mode 100644 index 000000000..1d3ff461e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/AdhocStatsUtil.scala @@ -0,0 +1,104 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.common.base.TweetCandidate +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams.ListOfAdhocIdsForStatsTracking + +class AdhocStatsUtil(stats: StatsReceiver) { + + private def getAdhocIds(candidate: PushCandidate): Set[Long] = + candidate.target.params(ListOfAdhocIdsForStatsTracking) + + private def isAdhocTweetCandidate(candidate: PushCandidate): Boolean = { + candidate match { + case tweetCandidate: RawCandidate with TweetCandidate with TweetAuthor => + tweetCandidate.authorId.exists(id => getAdhocIds(candidate).contains(id)) + case _ => false + } + } + + def getCandidateSourceStats(hydratedCandidates: Seq[CandidateDetails[PushCandidate]]): Unit = { + hydratedCandidates.foreach { hydratedCandidate => + if (isAdhocTweetCandidate(hydratedCandidate.candidate)) { + stats.scope("candidate_source").counter(hydratedCandidate.source).incr() + } + } + } + + def getPreRankingFilterStats( + preRankingFilteredCandidates: Seq[CandidateResult[PushCandidate, Result]] + ): Unit = { + preRankingFilteredCandidates.foreach { filteredCandidate => + if (isAdhocTweetCandidate(filteredCandidate.candidate)) { + filteredCandidate.result match { + case Invalid(reason) => + stats.scope("preranking_filter").counter(reason.getOrElse("unknown_reason")).incr() + case _ => + } + } + } + } + + def getLightRankingStats(lightRankedCandidates: Seq[CandidateDetails[PushCandidate]]): Unit = { + lightRankedCandidates.foreach { lightRankedCandidate => + if (isAdhocTweetCandidate(lightRankedCandidate.candidate)) { + stats.scope("light_ranker").counter("passed_light_ranking").incr() + } + } + } + + def getRankingStats(rankedCandidates: Seq[CandidateDetails[PushCandidate]]): Unit = { + rankedCandidates.zipWithIndex.foreach { + case (rankedCandidate, index) => + val rankerStats = stats.scope("heavy_ranker") + if (isAdhocTweetCandidate(rankedCandidate.candidate)) { + rankerStats.counter("ranked_candidates").incr() + rankerStats.stat("rank").add(index.toFloat) + rankedCandidate.candidate.modelScores.map { modelScores => + modelScores.foreach { + case (modelName, score) => + // mutiply score by 1000 to not lose precision while converting to Float + val precisionScore = (score * 100000).toFloat + rankerStats.stat(modelName).add(precisionScore) + } + } + } + } + } + def getReRankingStats(rankedCandidates: Seq[CandidateDetails[PushCandidate]]): Unit = { + rankedCandidates.zipWithIndex.foreach { + case (rankedCandidate, index) => + val rankerStats = stats.scope("re_ranking") + if (isAdhocTweetCandidate(rankedCandidate.candidate)) { + rankerStats.counter("re_ranked_candidates").incr() + rankerStats.stat("re_rank").add(index.toFloat) + } + } + } + + def getTakeCandidateResultStats( + allTakeCandidateResults: Seq[CandidateResult[PushCandidate, Result]] + ): Unit = { + val takeStats = stats.scope("take_step") + allTakeCandidateResults.foreach { candidateResult => + if (isAdhocTweetCandidate(candidateResult.candidate)) { + candidateResult.result match { + case OK => + takeStats.counter("sent").incr() + case Invalid(reason) => + takeStats.counter(reason.getOrElse("unknown_reason")).incr() + case _ => + takeStats.counter("unknown_filter").incr() + } + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/Candidate2FrigateNotification.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/Candidate2FrigateNotification.scala new file mode 100644 index 000000000..6fa0bc288 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/Candidate2FrigateNotification.scala @@ -0,0 +1,119 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.thriftscala.FrigateNotification +import com.twitter.frigate.thriftscala.NotificationDisplayLocation + +object Candidate2FrigateNotification { + + def getFrigateNotification( + candidate: PushCandidate + )( + implicit statsReceiver: StatsReceiver + ): FrigateNotification = { + candidate match { + + case topicTweetCandidate: PushCandidate with BaseTopicTweetCandidate => + PushAdaptorUtil.getFrigateNotificationForTweet( + crt = topicTweetCandidate.commonRecType, + tweetId = topicTweetCandidate.tweetId, + scActions = Nil, + authorIdOpt = topicTweetCandidate.authorId, + pushCopyId = topicTweetCandidate.pushCopyId, + ntabCopyId = topicTweetCandidate.ntabCopyId, + simclusterId = None, + semanticCoreEntityIds = topicTweetCandidate.semanticCoreEntityId.map(List(_)), + candidateContent = topicTweetCandidate.content, + trendId = None + ) + + case trendTweetCandidate: PushCandidate with TrendTweetCandidate => + PushAdaptorUtil.getFrigateNotificationForTweet( + trendTweetCandidate.commonRecType, + trendTweetCandidate.tweetId, + Nil, + trendTweetCandidate.authorId, + trendTweetCandidate.pushCopyId, + trendTweetCandidate.ntabCopyId, + None, + None, + trendTweetCandidate.content, + Some(trendTweetCandidate.trendId) + ) + + case tripTweetCandidate: PushCandidate with OutOfNetworkTweetCandidate with TripCandidate => + PushAdaptorUtil.getFrigateNotificationForTweet( + crt = tripTweetCandidate.commonRecType, + tweetId = tripTweetCandidate.tweetId, + scActions = Nil, + authorIdOpt = tripTweetCandidate.authorId, + pushCopyId = tripTweetCandidate.pushCopyId, + ntabCopyId = tripTweetCandidate.ntabCopyId, + simclusterId = None, + semanticCoreEntityIds = None, + candidateContent = tripTweetCandidate.content, + trendId = None, + tweetTripDomain = tripTweetCandidate.tripDomain + ) + + case outOfNetworkTweetCandidate: PushCandidate with OutOfNetworkTweetCandidate => + PushAdaptorUtil.getFrigateNotificationForTweet( + crt = outOfNetworkTweetCandidate.commonRecType, + tweetId = outOfNetworkTweetCandidate.tweetId, + scActions = Nil, + authorIdOpt = outOfNetworkTweetCandidate.authorId, + pushCopyId = outOfNetworkTweetCandidate.pushCopyId, + ntabCopyId = outOfNetworkTweetCandidate.ntabCopyId, + simclusterId = None, + semanticCoreEntityIds = None, + candidateContent = outOfNetworkTweetCandidate.content, + trendId = None + ) + + case userCandidate: PushCandidate with UserCandidate with SocialContextActions => + PushAdaptorUtil.getFrigateNotificationForUser( + userCandidate.commonRecType, + userCandidate.userId, + userCandidate.socialContextActions, + userCandidate.pushCopyId, + userCandidate.ntabCopyId + ) + + case userCandidate: PushCandidate with UserCandidate => + PushAdaptorUtil.getFrigateNotificationForUser( + userCandidate.commonRecType, + userCandidate.userId, + Nil, + userCandidate.pushCopyId, + userCandidate.ntabCopyId + ) + + case tweetCandidate: PushCandidate with TweetCandidate with TweetDetails with SocialContextActions => + PushAdaptorUtil.getFrigateNotificationForTweetWithSocialContextActions( + tweetCandidate.commonRecType, + tweetCandidate.tweetId, + tweetCandidate.socialContextActions, + tweetCandidate.authorId, + tweetCandidate.pushCopyId, + tweetCandidate.ntabCopyId, + candidateContent = tweetCandidate.content, + semanticCoreEntityIds = None, + trendId = None + ) + case pushCandidate: PushCandidate => + FrigateNotification( + commonRecommendationType = pushCandidate.commonRecType, + notificationDisplayLocation = NotificationDisplayLocation.PushToMobileDevice, + pushCopyId = pushCandidate.pushCopyId, + ntabCopyId = pushCandidate.ntabCopyId + ) + + case _ => + statsReceiver + .scope(s"${candidate.commonRecType}").counter("frigate_notification_error").incr() + throw new IllegalStateException("Incorrect candidate type when create FrigateNotification") + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateHydrationUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateHydrationUtil.scala new file mode 100644 index 000000000..8b737fe67 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateHydrationUtil.scala @@ -0,0 +1,439 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.channels.common.thriftscala.ApiList +import com.twitter.escherbird.common.thriftscala.Domains +import com.twitter.escherbird.metadata.thriftscala.EntityMegadata +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base._ +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.magic_events.thriftscala.FanoutEvent +import com.twitter.frigate.magic_events.thriftscala.MagicEventsReason +import com.twitter.frigate.magic_events.thriftscala.TargetID +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model._ +import com.twitter.frigate.pushservice.model.FanoutReasonEntities +import com.twitter.frigate.pushservice.ml.PushMLModelScorer +import com.twitter.frigate.pushservice.model.candidate.CopyIds +import com.twitter.frigate.pushservice.store.EventRequest +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.gizmoduck.thriftscala.User +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.hermit.store.semantic_core.SemanticEntityForQuery +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.livevideo.timeline.domain.v2.{Event => LiveEvent} +import com.twitter.simclusters_v2.thriftscala.SimClustersInferredEntities +import com.twitter.storehaus.FutureOps +import com.twitter.storehaus.ReadableStore +import com.twitter.strato.client.UserId +import com.twitter.ubs.thriftscala.AudioSpace +import com.twitter.util.Future + +object CandidateHydrationUtil { + + def getAuthorIdFromTweetCandidate(tweetCandidate: TweetCandidate): Option[Long] = { + tweetCandidate match { + case candidate: TweetCandidate with TweetAuthor => + candidate.authorId + case _ => None + } + } + + private def getCandidateAuthorFromUserMap( + tweetCandidate: TweetCandidate, + userMap: Map[Long, User] + ): Option[User] = { + getAuthorIdFromTweetCandidate(tweetCandidate) match { + case Some(id) => + userMap.get(id) + case _ => + None + } + } + + private def getRelationshipMapForInNetworkCandidate( + candidate: RawCandidate with TweetAuthor, + relationshipMap: Map[RelationEdge, Boolean] + ): Map[RelationEdge, Boolean] = { + val relationEdges = + RelationshipUtil.getPreCandidateRelationshipsForInNetworkTweets(candidate).toSet + relationEdges.map { relationEdge => + (relationEdge, relationshipMap(relationEdge)) + }.toMap + } + + private def getTweetCandidateSocialContextUsers( + candidate: RawCandidate with SocialContextActions, + userMap: Map[Long, User] + ): Map[Long, Option[User]] = { + candidate.socialContextUserIds.map { userId => userId -> userMap.get(userId) }.toMap + } + + type TweetWithSocialContextTraits = TweetCandidate with TweetDetails with SocialContextActions + + def getHydratedCandidateForTweetRetweet( + candidate: RawCandidate with TweetWithSocialContextTraits, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): TweetRetweetPushCandidate = { + new TweetRetweetPushCandidate( + candidate = candidate, + socialContextUserMap = Future.value(getTweetCandidateSocialContextUsers(candidate, userMap)), + author = Future.value(getCandidateAuthorFromUserMap(candidate, userMap)), + copyIds: CopyIds + ) + } + + def getHydratedCandidateForTweetFavorite( + candidate: RawCandidate with TweetWithSocialContextTraits, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): TweetFavoritePushCandidate = { + new TweetFavoritePushCandidate( + candidate = candidate, + socialContextUserMap = Future.value(getTweetCandidateSocialContextUsers(candidate, userMap)), + author = Future.value(getCandidateAuthorFromUserMap(candidate, userMap)), + copyIds = copyIds + ) + } + + def getHydratedCandidateForF1FirstDegreeTweet( + candidate: RawCandidate with F1FirstDegree, + userMap: Map[Long, User], + relationshipMap: Map[RelationEdge, Boolean], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): F1TweetPushCandidate = { + new F1TweetPushCandidate( + candidate = candidate, + author = Future.value(getCandidateAuthorFromUserMap(candidate, userMap)), + socialGraphServiceResultMap = + getRelationshipMapForInNetworkCandidate(candidate, relationshipMap), + copyIds = copyIds + ) + } + def getHydratedTopicProofTweetCandidate( + candidate: RawCandidate with TopicProofTweetCandidate, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushMLModelScorer: PushMLModelScorer + ): TopicProofTweetPushCandidate = + new TopicProofTweetPushCandidate( + candidate, + getCandidateAuthorFromUserMap(candidate, userMap), + copyIds + ) + + def getHydratedSubscribedSearchTweetCandidate( + candidate: RawCandidate with SubscribedSearchTweetCandidate, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushMLModelScorer: PushMLModelScorer + ): SubscribedSearchTweetPushCandidate = + new SubscribedSearchTweetPushCandidate( + candidate, + getCandidateAuthorFromUserMap(candidate, userMap), + copyIds) + + def getHydratedListCandidate( + apiListStore: ReadableStore[Long, ApiList], + candidate: RawCandidate with ListPushCandidate, + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushMLModelScorer: PushMLModelScorer + ): ListRecommendationPushCandidate = { + new ListRecommendationPushCandidate(apiListStore, candidate, copyIds) + } + + def getHydratedCandidateForOutOfNetworkTweetCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TopicCandidate, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): OutOfNetworkTweetPushCandidate = { + new OutOfNetworkTweetPushCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TopicCandidate, + author = Future.value(getCandidateAuthorFromUserMap(candidate, userMap)), + copyIds: CopyIds + ) + } + + def getHydratedCandidateForTripTweetCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TripCandidate, + userMap: Map[Long, User], + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): TripTweetPushCandidate = { + new TripTweetPushCandidate( + candidate: RawCandidate with OutOfNetworkTweetCandidate with TripCandidate, + author = Future.value(getCandidateAuthorFromUserMap(candidate, userMap)), + copyIds: CopyIds + ) + } + + def getHydratedCandidateForDiscoverTwitterCandidate( + candidate: RawCandidate with DiscoverTwitterCandidate, + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): DiscoverTwitterPushCandidate = { + new DiscoverTwitterPushCandidate( + candidate = candidate, + copyIds = copyIds + ) + } + + /** + * /* + * This method can be reusable for hydrating event candidates + **/ + * @param candidate + * @param fanoutMetadataStore + * @param semanticCoreMegadataStore + * @return (hydratedEvent, hydratedFanoutEvent, hydratedSemanticEntityResults, hydratedSemanticCoreMegadata) + */ + private def hydrateMagicFanoutEventCandidate( + candidate: RawCandidate with MagicFanoutEventCandidate, + fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent], + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata] + ): Future[MagicFanoutEventHydratedInfo] = { + + val fanoutEventFut = fanoutMetadataStore.get((candidate.eventId, candidate.pushId)) + + val semanticEntityForQueries: Seq[SemanticEntityForQuery] = { + val semanticCoreEntityIdQueries = candidate.candidateMagicEventsReasons match { + case magicEventsReasons: Seq[MagicEventsReason] => + magicEventsReasons.map(_.reason).collect { + case TargetID.SemanticCoreID(scInterest) => + SemanticEntityForQuery(domainId = scInterest.domainId, entityId = scInterest.entityId) + } + case _ => Seq.empty + } + val eventEntityQuery = SemanticEntityForQuery( + domainId = Domains.EventsEntityService.value, + entityId = candidate.eventId) + semanticCoreEntityIdQueries :+ eventEntityQuery + } + + val semanticEntityResultsFut = FutureOps.mapCollect( + semanticCoreMegadataStore.multiGet(semanticEntityForQueries.toSet) + ) + + Future + .join(fanoutEventFut, semanticEntityResultsFut).map { + case (fanoutEvent, semanticEntityResults) => + MagicFanoutEventHydratedInfo( + fanoutEvent, + semanticEntityResults + ) + case _ => + throw new IllegalArgumentException( + "event candidate hydration errors" + candidate.frigateNotification.toString) + } + } + + def getHydratedCandidateForMagicFanoutNewsEvent( + candidate: RawCandidate with MagicFanoutNewsEventCandidate, + copyIds: CopyIds, + lexServiceStore: ReadableStore[EventRequest, LiveEvent], + fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent], + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata], + simClusterToEntityStore: ReadableStore[Int, SimClustersInferredEntities], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[MagicFanoutNewsEventPushCandidate] = { + val magicFanoutEventHydratedInfoFut = hydrateMagicFanoutEventCandidate( + candidate, + fanoutMetadataStore, + semanticCoreMegadataStore + ) + + lazy val simClusterToEntityMappingFut: Future[Map[Int, Option[SimClustersInferredEntities]]] = + Future.collect { + simClusterToEntityStore.multiGet( + FanoutReasonEntities + .from(candidate.candidateMagicEventsReasons.map(_.reason)).simclusterIds.map( + _.clusterId) + ) + } + + Future + .join( + magicFanoutEventHydratedInfoFut, + simClusterToEntityMappingFut + ).map { + case (magicFanoutEventHydratedInfo, simClusterToEntityMapping) => + new MagicFanoutNewsEventPushCandidate( + candidate = candidate, + copyIds = copyIds, + fanoutEvent = magicFanoutEventHydratedInfo.fanoutEvent, + semanticEntityResults = magicFanoutEventHydratedInfo.semanticEntityResults, + simClusterToEntities = simClusterToEntityMapping, + lexServiceStore = lexServiceStore, + interestsLookupStore = interestsLookupStore, + uttEntityHydrationStore = uttEntityHydrationStore + ) + } + } + + def getHydratedCandidateForMagicFanoutSportsEvent( + candidate: RawCandidate + with MagicFanoutSportsEventCandidate + with MagicFanoutSportsScoreInformation, + copyIds: CopyIds, + lexServiceStore: ReadableStore[EventRequest, LiveEvent], + fanoutMetadataStore: ReadableStore[(Long, Long), FanoutEvent], + semanticCoreMegadataStore: ReadableStore[SemanticEntityForQuery, EntityMegadata], + interestsLookupStore: ReadableStore[InterestsLookupRequestWithContext, UserInterests], + uttEntityHydrationStore: UttEntityHydrationStore + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[MagicFanoutSportsPushCandidate] = { + val magicFanoutEventHydratedInfoFut = hydrateMagicFanoutEventCandidate( + candidate, + fanoutMetadataStore, + semanticCoreMegadataStore + ) + + magicFanoutEventHydratedInfoFut.map { magicFanoutEventHydratedInfo => + new MagicFanoutSportsPushCandidate( + candidate = candidate, + copyIds = copyIds, + fanoutEvent = magicFanoutEventHydratedInfo.fanoutEvent, + semanticEntityResults = magicFanoutEventHydratedInfo.semanticEntityResults, + simClusterToEntities = Map.empty, + lexServiceStore = lexServiceStore, + interestsLookupStore = interestsLookupStore, + uttEntityHydrationStore = uttEntityHydrationStore + ) + } + } + + def getHydratedCandidateForMagicFanoutProductLaunch( + candidate: RawCandidate with MagicFanoutProductLaunchCandidate, + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[MagicFanoutProductLaunchPushCandidate] = + Future.value(new MagicFanoutProductLaunchPushCandidate(candidate, copyIds)) + + def getHydratedCandidateForMagicFanoutCreatorEvent( + candidate: RawCandidate with MagicFanoutCreatorEventCandidate, + safeUserStore: ReadableStore[Long, User], + copyIds: CopyIds, + creatorTweetCountStore: ReadableStore[UserId, Int] + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[MagicFanoutCreatorEventPushCandidate] = { + safeUserStore.get(candidate.creatorId).map { hydratedCreatorUser => + new MagicFanoutCreatorEventPushCandidate( + candidate, + hydratedCreatorUser, + copyIds, + creatorTweetCountStore) + } + } + + def getHydratedCandidateForScheduledSpaceSubscriber( + candidate: RawCandidate with ScheduledSpaceSubscriberCandidate, + safeUserStore: ReadableStore[Long, User], + copyIds: CopyIds, + audioSpaceStore: ReadableStore[String, AudioSpace] + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[ScheduledSpaceSubscriberPushCandidate] = { + + candidate.hostId match { + case Some(spaceHostId) => + safeUserStore.get(spaceHostId).map { hydratedHost => + new ScheduledSpaceSubscriberPushCandidate( + candidate = candidate, + hostUser = hydratedHost, + copyIds = copyIds, + audioSpaceStore = audioSpaceStore + ) + } + case _ => + Future.exception( + new IllegalStateException( + "Missing Space Host Id for hydrating ScheduledSpaceSubscriberCandidate")) + } + } + + def getHydratedCandidateForScheduledSpaceSpeaker( + candidate: RawCandidate with ScheduledSpaceSpeakerCandidate, + safeUserStore: ReadableStore[Long, User], + copyIds: CopyIds, + audioSpaceStore: ReadableStore[String, AudioSpace] + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): Future[ScheduledSpaceSpeakerPushCandidate] = { + + candidate.hostId match { + case Some(spaceHostId) => + safeUserStore.get(spaceHostId).map { hydratedHost => + new ScheduledSpaceSpeakerPushCandidate( + candidate = candidate, + hostUser = hydratedHost, + copyIds = copyIds, + audioSpaceStore = audioSpaceStore + ) + } + case _ => + Future.exception( + new RuntimeException( + "Missing Space Host Id for hydrating ScheduledSpaceSpeakerCandidate")) + } + } + + def getHydratedCandidateForTopTweetImpressionsCandidate( + candidate: RawCandidate with TopTweetImpressionsCandidate, + copyIds: CopyIds + )( + implicit stats: StatsReceiver, + pushModelScorer: PushMLModelScorer + ): TopTweetImpressionsPushCandidate = { + new TopTweetImpressionsPushCandidate( + candidate = candidate, + copyIds = copyIds + ) + } + + def isNsfwAccount(user: User, nsfwTokens: Seq[String]): Boolean = { + def hasNsfwToken(str: String): Boolean = nsfwTokens.exists(str.toLowerCase().contains(_)) + + val name = user.profile.map(_.name).getOrElse("") + val screenName = user.profile.map(_.screenName).getOrElse("") + val location = user.profile.map(_.location).getOrElse("") + val description = user.profile.map(_.description).getOrElse("") + val hasNsfwFlag = + user.safety.map(safety => safety.nsfwUser || safety.nsfwAdmin).getOrElse(false) + hasNsfwToken(name) || hasNsfwToken(screenName) || hasNsfwToken(location) || hasNsfwToken( + description) || hasNsfwFlag + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateUtil.scala new file mode 100644 index 000000000..a4802da45 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CandidateUtil.scala @@ -0,0 +1,138 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.contentrecommender.thriftscala.MetricTag +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.SocialContextActions +import com.twitter.frigate.common.base.TargetInfo +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.base.TopicProofTweetCandidate +import com.twitter.frigate.common.base.TweetAuthorDetails +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.params.CrtGroupEnum +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType.TripGeoTweet +import com.twitter.frigate.thriftscala.CommonRecommendationType.TripHqTweet +import com.twitter.frigate.thriftscala.{SocialContextAction => TSocialContextAction} +import com.twitter.util.Future + +object CandidateUtil { + private val mrTwistlyMetricTags = + Seq(MetricTag.PushOpenOrNtabClick, MetricTag.RequestHealthFilterPushOpenBasedTweetEmbedding) + + def getSocialContextActionsFromCandidate(candidate: RawCandidate): Seq[TSocialContextAction] = { + candidate match { + case candidateWithSocialContex: RawCandidate with SocialContextActions => + candidateWithSocialContex.socialContextActions.map { scAction => + TSocialContextAction( + scAction.userId, + scAction.timestampInMillis, + scAction.tweetId + ) + } + case _ => Seq.empty + } + } + + /** + * Ranking Social Context based on the Real Graph weight + * @param socialContextActions Sequence of Social Context Actions + * @param seedsWithWeight Real Graph map consisting of User ID as key and RG weight as the value + * @param defaultToRecency Boolean to represent if we should use the timestamp of the SC to rank + * @return Returns the ranked sequence of SC Actions + */ + def getRankedSocialContext( + socialContextActions: Seq[SocialContextAction], + seedsWithWeight: Future[Option[Map[Long, Double]]], + defaultToRecency: Boolean + ): Future[Seq[SocialContextAction]] = { + seedsWithWeight.map { + case Some(followingsMap) => + socialContextActions.sortBy { action => -followingsMap.getOrElse(action.userId, 0.0) } + case _ => + if (defaultToRecency) socialContextActions.sortBy(-_.timestampInMillis) + else socialContextActions + } + } + + def shouldApplyHealthQualityFiltersForPrerankingPredicates( + candidate: TweetAuthorDetails with TargetInfo[TargetUser with TargetABDecider] + )( + implicit stats: StatsReceiver + ): Future[Boolean] = { + candidate.tweetAuthor.map { + case Some(user) => + val numFollowers: Double = user.counts.map(_.followers.toDouble).getOrElse(0.0) + numFollowers < candidate.target + .params(PushFeatureSwitchParams.NumFollowerThresholdForHealthAndQualityFiltersPreranking) + case _ => true + } + } + + def shouldApplyHealthQualityFilters( + candidate: PushCandidate + )( + implicit stats: StatsReceiver + ): Boolean = { + val numFollowers = + candidate.numericFeatures.getOrElse("RecTweetAuthor.User.ActiveFollowers", 0.0) + numFollowers < candidate.target + .params(PushFeatureSwitchParams.NumFollowerThresholdForHealthAndQualityFilters) + } + + def useAggressiveHealthThresholds(cand: PushCandidate): Boolean = + isMrTwistlyCandidate(cand) || + (cand.commonRecType == CommonRecommendationType.GeoPopTweet && cand.target.params( + PushFeatureSwitchParams.PopGeoTweetEnableAggressiveThresholds)) + + def isMrTwistlyCandidate(cand: PushCandidate): Boolean = + cand match { + case oonCandidate: PushCandidate with OutOfNetworkTweetCandidate => + oonCandidate.tagsCR + .getOrElse(Seq.empty).intersect(mrTwistlyMetricTags).nonEmpty && oonCandidate.tagsCR + .map(_.toSet.size).getOrElse(0) == 1 + case oonCandidate: PushCandidate with TopicProofTweetCandidate + if cand.target.params(PushFeatureSwitchParams.EnableHealthFiltersForTopicProofTweet) => + oonCandidate.tagsCR + .getOrElse(Seq.empty).intersect(mrTwistlyMetricTags).nonEmpty && oonCandidate.tagsCR + .map(_.toSet.size).getOrElse(0) == 1 + case _ => false + } + + def getTagsCRCount(cand: PushCandidate): Double = + cand match { + case oonCandidate: PushCandidate with OutOfNetworkTweetCandidate => + oonCandidate.tagsCR.map(_.toSet.size).getOrElse(0).toDouble + case oonCandidate: PushCandidate with TopicProofTweetCandidate + if cand.target.params(PushFeatureSwitchParams.EnableHealthFiltersForTopicProofTweet) => + oonCandidate.tagsCR.map(_.toSet.size).getOrElse(0).toDouble + case _ => 0.0 + } + + def isRelatedToMrTwistlyCandidate(cand: PushCandidate): Boolean = + cand match { + case oonCandidate: PushCandidate with OutOfNetworkTweetCandidate => + oonCandidate.tagsCR.getOrElse(Seq.empty).intersect(mrTwistlyMetricTags).nonEmpty + case oonCandidate: PushCandidate with TopicProofTweetCandidate + if cand.target.params(PushFeatureSwitchParams.EnableHealthFiltersForTopicProofTweet) => + oonCandidate.tagsCR.getOrElse(Seq.empty).intersect(mrTwistlyMetricTags).nonEmpty + case _ => false + } + + def getCrtGroup(commonRecType: CommonRecommendationType): CrtGroupEnum.Value = { + commonRecType match { + case crt if RecTypes.twistlyTweets(crt) => CrtGroupEnum.Twistly + case crt if RecTypes.frsTypes(crt) => CrtGroupEnum.Frs + case crt if RecTypes.f1RecTypes(crt) => CrtGroupEnum.F1 + case crt if crt == TripGeoTweet || crt == TripHqTweet => CrtGroupEnum.Trip + case crt if RecTypes.TopicTweetTypes(crt) => CrtGroupEnum.Topic + case crt if RecTypes.isGeoPopTweetType(crt) => CrtGroupEnum.GeoPop + case _ => CrtGroupEnum.Other + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CopyUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CopyUtil.scala new file mode 100644 index 000000000..95208c35e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/CopyUtil.scala @@ -0,0 +1,448 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FS} +import com.twitter.ibis2.lib.util.JsonMarshal +import com.twitter.util.Future +import com.twitter.util.Time + +object CopyUtil { + + /** + * Get a list of history feature copy alone with metadata in the look back period, the metadata + * can be used to calculate number of copy pushed after the current feature copy + * @param candidate the candidate to be pushed to the user + * @return Future[Seq((..,))], which is a seq of the history FEATURE copy along with + * metadata within the look back period. In the tuple, the 4 elements represents: + * 1. Timestamp of the past feature copy + * 2. Option[Seq()] of copy feature names of the past copy + * 3. Index of the particular feature copy in look back history if normal copy presents + */ + private def getPastCopyFeaturesList( + candidate: PushCandidate + ): Future[Seq[(Time, Option[Seq[String]], Int)]] = { + val target = candidate.target + + target.history.map { targetHistory => + val historyLookbackDuration = target.params(FS.CopyFeaturesHistoryLookbackDuration) + val notificationHistoryInLookbackDuration = targetHistory.sortedHistory + .takeWhile { + case (notifTimestamp, _) => historyLookbackDuration.ago < notifTimestamp + } + notificationHistoryInLookbackDuration.zipWithIndex + .filter { + case ((_, notification), _) => + notification.copyFeatures match { + case Some(copyFeatures) => copyFeatures.nonEmpty + case _ => false + } + } + .collect { + case ((timestamp, notification), notificationIndex) => + (timestamp, notification.copyFeatures, notificationIndex) + } + } + } + + private def getPastCopyFeaturesListForF1( + candidate: PushCandidate + ): Future[Seq[(Time, Option[Seq[String]], Int)]] = { + val target = candidate.target + target.history.map { targetHistory => + val historyLookbackDuration = target.params(FS.CopyFeaturesHistoryLookbackDuration) + val notificationHistoryInLookbackDuration = targetHistory.sortedHistory + .takeWhile { + case (notifTimestamp, _) => historyLookbackDuration.ago < notifTimestamp + } + notificationHistoryInLookbackDuration.zipWithIndex + .filter { + case ((_, notification), _) => + notification.copyFeatures match { + case Some(copyFeatures) => + RecTypes.isF1Type(notification.commonRecommendationType) && copyFeatures.nonEmpty + case _ => false + } + } + .collect { + case ((timestamp, notification), notificationIndex) => + (timestamp, notification.copyFeatures, notificationIndex) + } + } + } + + private def getPastCopyFeaturesListForOON( + candidate: PushCandidate + ): Future[Seq[(Time, Option[Seq[String]], Int)]] = { + val target = candidate.target + target.history.map { targetHistory => + val historyLookbackDuration = target.params(FS.CopyFeaturesHistoryLookbackDuration) + val notificationHistoryInLookbackDuration = targetHistory.sortedHistory + .takeWhile { + case (notifTimestamp, _) => historyLookbackDuration.ago < notifTimestamp + } + notificationHistoryInLookbackDuration.zipWithIndex + .filter { + case ((_, notification), _) => + notification.copyFeatures match { + case Some(copyFeatures) => + !RecTypes.isF1Type(notification.commonRecommendationType) && copyFeatures.nonEmpty + + case _ => false + } + } + .collect { + case ((timestamp, notification), notificationIndex) => + (timestamp, notification.copyFeatures, notificationIndex) + } + } + } + private def getEmojiFeaturesMap( + candidate: PushCandidate, + copyFeatureHistory: Seq[(Time, Option[Seq[String]], Int)], + lastHTLVisitTimestamp: Option[Long], + stats: StatsReceiver + ): Map[String, String] = { + val (emojiFatigueDuration, emojiFatigueNumOfPushes) = { + if (RecTypes.isF1Type(candidate.commonRecType)) { + ( + candidate.target.params(FS.F1EmojiCopyFatigueDuration), + candidate.target.params(FS.F1EmojiCopyNumOfPushesFatigue)) + } else { + ( + candidate.target.params(FS.OonEmojiCopyFatigueDuration), + candidate.target.params(FS.OonEmojiCopyNumOfPushesFatigue)) + } + } + + val scopedStats = stats + .scope("getEmojiFeaturesMap").scope(candidate.commonRecType.toString).scope( + emojiFatigueDuration.toString) + val addedEmojiCopyFeature = scopedStats.counter("added_emoji") + val fatiguedEmojiCopyFeature = scopedStats.counter("no_emoji") + + val copyFeatureType = PushConstants.EmojiFeatureNameForIbis2ModelValues + + val durationFatigueCarryFunc = () => + isUnderDurationFatigue(copyFeatureHistory, copyFeatureType, emojiFatigueDuration) + + val enableHTLBasedFatigueBasicRule = candidate.target.params(FS.EnableHTLBasedFatigueBasicRule) + val minDuration = candidate.target.params(FS.MinFatigueDurationSinceLastHTLVisit) + val lastHTLVisitBasedNonFatigueWindow = + candidate.target.params(FS.LastHTLVisitBasedNonFatigueWindow) + val htlBasedCopyFatigueCarryFunc = () => + isUnderHTLBasedFatigue(lastHTLVisitTimestamp, minDuration, lastHTLVisitBasedNonFatigueWindow) + + val isUnderFatigue = getIsUnderFatigue( + Seq( + (durationFatigueCarryFunc, true), + (htlBasedCopyFatigueCarryFunc, enableHTLBasedFatigueBasicRule), + ), + scopedStats + ) + + if (!isUnderFatigue) { + addedEmojiCopyFeature.incr() + Map(PushConstants.EmojiFeatureNameForIbis2ModelValues -> "true") + } else { + fatiguedEmojiCopyFeature.incr() + Map.empty[String, String] + } + } + + private def getTargetFeaturesMap( + candidate: PushCandidate, + copyFeatureHistory: Seq[(Time, Option[Seq[String]], Int)], + lastHTLVisitTimestamp: Option[Long], + stats: StatsReceiver + ): Map[String, String] = { + val targetFatigueDuration = { + if (RecTypes.isF1Type(candidate.commonRecType)) { + candidate.target.params(FS.F1TargetCopyFatigueDuration) + } else { + candidate.target.params(FS.OonTargetCopyFatigueDuration) + } + } + + val scopedStats = stats + .scope("getTargetFeaturesMap").scope(candidate.commonRecType.toString).scope( + targetFatigueDuration.toString) + val addedTargetCopyFeature = scopedStats.counter("added_target") + val fatiguedTargetCopyFeature = scopedStats.counter("no_target") + + val featureCopyType = PushConstants.TargetFeatureNameForIbis2ModelValues + val durationFatigueCarryFunc = () => + isUnderDurationFatigue(copyFeatureHistory, featureCopyType, targetFatigueDuration) + + val enableHTLBasedFatigueBasicRule = candidate.target.params(FS.EnableHTLBasedFatigueBasicRule) + val minDuration = candidate.target.params(FS.MinFatigueDurationSinceLastHTLVisit) + val lastHTLVisitBasedNonFatigueWindow = + candidate.target.params(FS.LastHTLVisitBasedNonFatigueWindow) + val htlBasedCopyFatigueCarryFunc = () => + isUnderHTLBasedFatigue(lastHTLVisitTimestamp, minDuration, lastHTLVisitBasedNonFatigueWindow) + + val isUnderFatigue = getIsUnderFatigue( + Seq( + (durationFatigueCarryFunc, true), + (htlBasedCopyFatigueCarryFunc, enableHTLBasedFatigueBasicRule), + ), + scopedStats + ) + + if (!isUnderFatigue) { + addedTargetCopyFeature.incr() + Map(PushConstants.TargetFeatureNameForIbis2ModelValues -> "true") + } else { + + fatiguedTargetCopyFeature.incr() + Map.empty[String, String] + } + } + + type FatigueRuleFlag = Boolean + type FatigueRuleFunc = () => Boolean + + def getIsUnderFatigue( + fatigueRulesWithFlags: Seq[(FatigueRuleFunc, FatigueRuleFlag)], + statsReceiver: StatsReceiver, + ): Boolean = { + val defaultFatigue = true + val finalFatigueRes = + fatigueRulesWithFlags.zipWithIndex.foldLeft(defaultFatigue)( + (fatigueSoFar, fatigueRuleFuncWithFlagAndIndex) => { + val ((fatigueRuleFunc, flag), index) = fatigueRuleFuncWithFlagAndIndex + val funcScopedStats = statsReceiver.scope(s"fatigueFunction${index}") + if (flag) { + val shouldFatigueForTheRule = fatigueRuleFunc() + funcScopedStats.scope(s"eval_${shouldFatigueForTheRule}").counter().incr() + val f = fatigueSoFar && shouldFatigueForTheRule + f + } else { + fatigueSoFar + } + }) + statsReceiver.scope(s"final_fatigue_${finalFatigueRes}").counter().incr() + finalFatigueRes + } + + private def isUnderDurationFatigue( + copyFeatureHistory: Seq[(Time, Option[Seq[String]], Int)], + copyFeatureType: String, + fatigueDuration: com.twitter.util.Duration, + ): Boolean = { + copyFeatureHistory.exists { + case (notifTimestamp, Some(copyFeatures), _) if copyFeatures.contains(copyFeatureType) => + notifTimestamp > fatigueDuration.ago + case _ => false + } + } + + private def isUnderHTLBasedFatigue( + lastHTLVisitTimestamp: Option[Long], + minDurationSinceLastHTLVisit: com.twitter.util.Duration, + lastHTLVisitBasedNonFatigueWindow: com.twitter.util.Duration, + ): Boolean = { + val lastHTLVisit = lastHTLVisitTimestamp.map(t => Time.fromMilliseconds(t)).getOrElse(Time.now) + val first = Time.now < (lastHTLVisit + minDurationSinceLastHTLVisit) + val second = + Time.now > (lastHTLVisit + minDurationSinceLastHTLVisit + lastHTLVisitBasedNonFatigueWindow) + first || second + } + + def getOONCBasedFeature( + candidate: PushCandidate, + stats: StatsReceiver + ): Future[Map[String, String]] = { + val target = candidate.target + val metric = stats.scope("getOONCBasedFeature") + if (target.params(FS.EnableOONCBasedCopy)) { + candidate.mrWeightedOpenOrNtabClickRankingProbability.map { + case Some(score) if score >= target.params(FS.HighOONCThresholdForCopy) => + metric.counter("high_OONC").incr() + metric.counter(FS.HighOONCTweetFormat.toString).incr() + Map( + "whole_template" -> JsonMarshal.toJson( + Map( + target.params(FS.HighOONCTweetFormat).toString -> true + ))) + case Some(score) if score <= target.params(FS.LowOONCThresholdForCopy) => + metric.counter("low_OONC").incr() + metric.counter(FS.LowOONCThresholdForCopy.toString).incr() + Map( + "whole_template" -> JsonMarshal.toJson( + Map( + target.params(FS.LowOONCTweetFormat).toString -> true + ))) + case _ => + metric.counter("not_in_OONC_range").incr() + Map.empty[String, String] + } + } else { + Future.value(Map.empty[String, String]) + } + } + + def getCopyFeatures( + candidate: PushCandidate, + stats: StatsReceiver, + ): Future[Map[String, String]] = { + if (candidate.target.isLoggedOutUser) { + Future.value(Map.empty[String, String]) + } else { + val featureMaps = getCopyBodyFeatures(candidate, stats) + for { + titleFeat <- getCopyTitleFeatures(candidate, stats) + nsfwFeat <- getNsfwCopyFeatures(candidate, stats) + ooncBasedFeature <- getOONCBasedFeature(candidate, stats) + } yield { + titleFeat ++ featureMaps ++ nsfwFeat ++ ooncBasedFeature + } + } + } + + private def getCopyTitleFeatures( + candidate: PushCandidate, + stats: StatsReceiver + ): Future[Map[String, String]] = { + val scopedStats = stats.scope("CopyUtil").scope("getCopyTitleFeatures") + + val target = candidate.target + + if ((RecTypes.isSimClusterBasedType(candidate.commonRecType) && target.params( + FS.EnableCopyFeaturesForOon)) || (RecTypes.isF1Type(candidate.commonRecType) && target + .params(FS.EnableCopyFeaturesForF1))) { + + val enableTargetAndEmojiSplitFatigue = target.params(FS.EnableTargetAndEmojiSplitFatigue) + val isTargetF1Type = RecTypes.isF1Type(candidate.commonRecType) + + val copyFeatureHistoryFuture = if (enableTargetAndEmojiSplitFatigue && isTargetF1Type) { + getPastCopyFeaturesListForF1(candidate) + } else if (enableTargetAndEmojiSplitFatigue && !isTargetF1Type) { + getPastCopyFeaturesListForOON(candidate) + } else { + getPastCopyFeaturesList(candidate) + } + + Future + .join( + copyFeatureHistoryFuture, + target.lastHTLVisitTimestamp, + ).map { + case (copyFeatureHistory, lastHTLVisitTimestamp) => + val emojiFeatures = { + if ((RecTypes.isF1Type(candidate.commonRecType) && target.params( + FS.EnableEmojiInF1Copy)) + || RecTypes.isSimClusterBasedType(candidate.commonRecType) && target.params( + FS.EnableEmojiInOonCopy)) { + getEmojiFeaturesMap( + candidate, + copyFeatureHistory, + lastHTLVisitTimestamp, + scopedStats) + } else Map.empty[String, String] + } + + val targetFeatures = { + if ((RecTypes.isF1Type(candidate.commonRecType) && target.params( + FS.EnableTargetInF1Copy)) || (RecTypes.isSimClusterBasedType( + candidate.commonRecType) && target.params(FS.EnableTargetInOonCopy))) { + getTargetFeaturesMap( + candidate, + copyFeatureHistory, + lastHTLVisitTimestamp, + scopedStats) + } else Map.empty[String, String] + } + + val baseCopyFeaturesMap = + if (emojiFeatures.nonEmpty || targetFeatures.nonEmpty) + Map(PushConstants.EnableCopyFeaturesForIbis2ModelValues -> "true") + else Map.empty[String, String] + baseCopyFeaturesMap ++ emojiFeatures ++ targetFeatures + case _ => + Map.empty[String, String] + } + } else Future.value(Map.empty[String, String]) + } + + private def getCopyBodyTruncateFeatures( + candidate: PushCandidate, + ): Map[String, String] = { + if (candidate.target.params(FS.EnableIosCopyBodyTruncate)) { + Map("enable_body_truncate_ios" -> "true") + } else { + Map.empty[String, String] + } + } + + private def getNsfwCopyFeatures( + candidate: PushCandidate, + stats: StatsReceiver + ): Future[Map[String, String]] = { + val scopedStats = stats.scope("CopyUtil").scope("getNsfwCopyBodyFeatures") + val hasNsfwScoreF1Counter = scopedStats.counter("f1_has_nsfw_score") + val hasNsfwScoreOonCounter = scopedStats.counter("oon_has_nsfw_score") + val noNsfwScoreCounter = scopedStats.counter("no_nsfw_score") + val nsfwScoreF1 = scopedStats.stat("f1_nsfw_score") + val nsfwScoreOon = scopedStats.stat("oon_nsfw_score") + val isNsfwF1Counter = scopedStats.counter("is_f1_nsfw") + val isNsfwOonCounter = scopedStats.counter("is_oon_nsfw") + + val target = candidate.target + val nsfwScoreFut = if (target.params(FS.EnableNsfwCopy)) { + candidate.mrNsfwScore + } else Future.None + + nsfwScoreFut.map { + case Some(nsfwScore) => + if (RecTypes.isF1Type(candidate.commonRecType)) { + hasNsfwScoreF1Counter.incr() + nsfwScoreF1.add(nsfwScore.toFloat * 10000) + if (nsfwScore > target.params(FS.NsfwScoreThresholdForF1Copy)) { + isNsfwF1Counter.incr() + Map("is_f1_nsfw" -> "true") + } else { + Map.empty[String, String] + } + } else if (RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType)) { + nsfwScoreOon.add(nsfwScore.toFloat * 10000) + hasNsfwScoreOonCounter.incr() + if (nsfwScore > target.params(FS.NsfwScoreThresholdForOONCopy)) { + isNsfwOonCounter.incr() + Map("is_oon_nsfw" -> "true") + } else { + Map.empty[String, String] + } + } else { + Map.empty[String, String] + } + case _ => + noNsfwScoreCounter.incr() + Map.empty[String, String] + } + } + + private def getCopyBodyFeatures( + candidate: PushCandidate, + stats: StatsReceiver + ): Map[String, String] = { + val target = candidate.target + val scopedStats = stats.scope("CopyUtil").scope("getCopyBodyFeatures") + + val copyBodyFeatures = { + if (RecTypes.isF1Type(candidate.commonRecType) && target.params(FS.EnableF1CopyBody)) { + scopedStats.counter("f1BodyExpEnabled").incr() + Map(PushConstants.CopyBodyExpIbisModelValues -> "true") + } else if (RecTypes.isOutOfNetworkTweetRecType(candidate.commonRecType) && target.params( + FS.EnableOONCopyBody)) { + scopedStats.counter("oonBodyExpEnabled").incr() + Map(PushConstants.CopyBodyExpIbisModelValues -> "true") + } else + Map.empty[String, String] + } + val copyBodyTruncateFeatures = getCopyBodyTruncateFeatures(candidate) + copyBodyFeatures ++ copyBodyTruncateFeatures + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/EmailLandingPageExperimentUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/EmailLandingPageExperimentUtil.scala new file mode 100644 index 000000000..34d5b9bae --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/EmailLandingPageExperimentUtil.scala @@ -0,0 +1,92 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams.EnableRuxLandingPage +import com.twitter.frigate.pushservice.params.PushParams.EnableRuxLandingPageAndroidParam +import com.twitter.frigate.pushservice.params.PushParams.EnableRuxLandingPageIOSParam +import com.twitter.frigate.pushservice.params.PushParams.RuxLandingPageExperimentKeyAndroidParam +import com.twitter.frigate.pushservice.params.PushParams.RuxLandingPageExperimentKeyIOSParam +import com.twitter.frigate.pushservice.params.PushParams.ShowRuxLandingPageAsModalOnIOS +import com.twitter.rux.common.context.thriftscala.MagicRecsNTabTweet +import com.twitter.rux.common.context.thriftscala.MagicRecsPushTweet +import com.twitter.rux.common.context.thriftscala.RuxContext +import com.twitter.rux.common.context.thriftscala.Source +import com.twitter.rux.common.encode.RuxContextEncoder + +/** + * This class provides utility functions for email landing page for push + */ +object EmailLandingPageExperimentUtil { + val ruxCxtEncoder = new RuxContextEncoder() + + def getIbis2ModelValue( + deviceInfoOpt: Option[DeviceInfo], + target: Target, + tweetId: Long + ): Map[String, String] = { + val enable = enablePushEmailLanding(deviceInfoOpt, target) + if (enable) { + val ruxCxt = if (deviceInfoOpt.exists(_.isRuxLandingPageEligible)) { + val encodedCxt = getRuxContext(tweetId, target, deviceInfoOpt) + Map("rux_cxt" -> encodedCxt) + } else Map.empty[String, String] + val enableModal = if (showModalForIOS(deviceInfoOpt, target)) { + Map("enable_modal" -> "true") + } else Map.empty[String, String] + + Map("land_on_email_landing_page" -> "true") ++ ruxCxt ++ enableModal + } else Map.empty[String, String] + } + + def createNTabRuxLandingURI(screenName: String, tweetId: Long): String = { + val encodedCxt = + ruxCxtEncoder.encode(RuxContext(Some(Source.MagicRecsNTabTweet(MagicRecsNTabTweet(tweetId))))) + s"$screenName/status/${tweetId.toString}?cxt=$encodedCxt" + } + + private def getRuxContext( + tweetId: Long, + target: Target, + deviceInfoOpt: Option[DeviceInfo] + ): String = { + val isDeviceIOS = PushDeviceUtil.isPrimaryDeviceIOS(deviceInfoOpt) + val isDeviceAndroid = PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfoOpt) + val keyOpt = if (isDeviceIOS) { + target.params(RuxLandingPageExperimentKeyIOSParam) + } else if (isDeviceAndroid) { + target.params(RuxLandingPageExperimentKeyAndroidParam) + } else None + val context = RuxContext(Some(Source.MagicRecsTweet(MagicRecsPushTweet(tweetId))), None, keyOpt) + ruxCxtEncoder.encode(context) + } + + private def enablePushEmailLanding( + deviceInfoOpt: Option[DeviceInfo], + target: Target + ): Boolean = + deviceInfoOpt.exists(deviceInfo => + if (deviceInfo.isEmailLandingPageEligible) { + val isRuxLandingPageEnabled = target.params(EnableRuxLandingPage) + isRuxLandingPageEnabled && isRuxLandingEnabledBasedOnDeviceInfo(deviceInfoOpt, target) + } else false) + + private def showModalForIOS(deviceInfoOpt: Option[DeviceInfo], target: Target): Boolean = { + deviceInfoOpt.exists { deviceInfo => + deviceInfo.isRuxLandingPageAsModalEligible && target.params(ShowRuxLandingPageAsModalOnIOS) + } + } + + private def isRuxLandingEnabledBasedOnDeviceInfo( + deviceInfoOpt: Option[DeviceInfo], + target: Target + ): Boolean = { + val isDeviceIOS = PushDeviceUtil.isPrimaryDeviceIOS(deviceInfoOpt) + val isDeviceAndroid = PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfoOpt) + if (isDeviceIOS) { + target.params(EnableRuxLandingPageIOSParam) + } else if (isDeviceAndroid) { + target.params(EnableRuxLandingPageAndroidParam) + } else true + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/FunctionalUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/FunctionalUtil.scala new file mode 100644 index 000000000..a2721873e --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/FunctionalUtil.scala @@ -0,0 +1,12 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.Counter + +object FunctionalUtil { + def incr[T](counter: Counter): T => T = { x => + { + counter.incr() + x + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/IbisScribeTargets.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/IbisScribeTargets.scala new file mode 100644 index 000000000..de1108f56 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/IbisScribeTargets.scala @@ -0,0 +1,55 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType._ + +object IbisScribeTargets { + val User2 = "magic_rec_user_2" + val User4 = "magic_rec_user_4" + val Tweet2 = "magic_rec_tweet_2" + val Tweet4 = "magic_rec_tweet_4" + val Tweet5 = "magic_rec_tweet_5" + val Tweet9 = "magic_rec_tweet_9" + val Tweet10 = "magic_rec_tweet_10" + val Tweet11 = "magic_rec_tweet_11" + val Tweet12 = "magic_rec_tweet_12" + val Tweet16 = "magic_rec_tweet_16" + val Hashtag = "magic_rec_hashtag" + val UnreadBadgeCount17 = "magic_rec_unread_badge_count_17" + val Highlights = "highlights" + val TweetAnalytics = "magic_rec_tweet_analytics" + val Untracked = "untracked" + + def crtToScribeTarget(crt: CommonRecommendationType): String = crt match { + case UserFollow => + User2 + case HermitUser => + User4 + case TweetRetweet | TweetFavorite => + Tweet2 + case TweetRetweetPhoto | TweetFavoritePhoto => + Tweet4 + case TweetRetweetVideo | TweetFavoriteVideo => + Tweet5 + case UrlTweetLanding => + Tweet9 + case F1FirstdegreeTweet | F1FirstdegreePhoto | F1FirstdegreeVideo => + Tweet10 + case AuthorTargetingTweet => + Tweet11 + case PeriscopeShare => + Tweet12 + case CommonRecommendationType.Highlights => + Highlights + case HashtagTweet | HashtagTweetRetweet => + Hashtag + case PinnedTweet => + Tweet16 + case UnreadBadgeCount => + UnreadBadgeCount17 + case TweetImpressions => + TweetAnalytics + case _ => + Untracked + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/InlineActionUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/InlineActionUtil.scala new file mode 100644 index 000000000..2900b7418 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/InlineActionUtil.scala @@ -0,0 +1,219 @@ +package com.twitter.frigate.pushservice.util + +import com.google.common.io.BaseEncoding +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.InlineActionsEnum +import com.twitter.frigate.pushservice.params.PushParams +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.ibis2.lib.util.JsonMarshal +import com.twitter.notifications.platform.thriftscala._ +import com.twitter.notificationservice.thriftscala.CreateGenericNotificationResponse +import com.twitter.scrooge.BinaryThriftStructSerializer +import com.twitter.util.Future + +/** + * This class provides utility functions for inline action for push + */ +object InlineActionUtil { + + def scopedStats(statsReceiver: StatsReceiver): StatsReceiver = + statsReceiver.scope(getClass.getSimpleName) + + /** + * Util function to build web inline actions for Ibis + * @param actions list of inline actions to be hydrated depending on the CRT + * @param enableForDesktopWeb if web inline actions should be shown on desktop RWeb, for experimentation purpose + * @param enableForMobileWeb if web inline actions should be shwon on mobile RWeb, for experimentation purpose + * @return Params for web inline actions to be consumed by `smart.inline.actions.web.mustache` in Ibis + */ + def getGeneratedTweetInlineActionsForWeb( + actions: Seq[InlineActionsEnum.Value], + enableForDesktopWeb: Boolean, + enableForMobileWeb: Boolean + ): Map[String, String] = { + if (!enableForDesktopWeb && !enableForMobileWeb) { + Map.empty + } else { + val inlineActions = buildEnrichedInlineActionsMap(actions) ++ Map( + "enable_for_desktop_web" -> enableForDesktopWeb.toString, + "enable_for_mobile_web" -> enableForMobileWeb.toString + ) + Map( + "inline_action_details_web" -> JsonMarshal.toJson(inlineActions), + ) + } + } + + def getGeneratedTweetInlineActionsV1( + actions: Seq[InlineActionsEnum.Value] + ): Map[String, String] = { + val inlineActions = buildEnrichedInlineActionsMap(actions) + Map( + "inline_action_details" -> JsonMarshal.toJson(inlineActions) + ) + } + + private def buildEnrichedInlineActionsMap( + actions: Seq[InlineActionsEnum.Value] + ): Map[String, Seq[Map[String, Any]]] = { + Map( + "actions" -> actions + .map(_.toString.toLowerCase) + .zipWithIndex + .map { + case (a: String, i: Int) => + Map("action" -> a) ++ Map( + s"use_${a}_stringcenter_key" -> true, + "last" -> (i == (actions.length - 1)) + ) + }.seq + ) + } + + def getGeneratedTweetInlineActionsV2( + actions: Seq[InlineActionsEnum.Value] + ): Map[String, String] = { + val v2CustomActions = actions + .map { + case InlineActionsEnum.Favorite => + NotificationCustomAction( + Some("mr_inline_favorite_title"), + CustomActionData.LegacyAction(LegacyAction(ActionIdentifier.Favorite)) + ) + case InlineActionsEnum.Follow => + NotificationCustomAction( + Some("mr_inline_follow_title"), + CustomActionData.LegacyAction(LegacyAction(ActionIdentifier.Follow))) + case InlineActionsEnum.Reply => + NotificationCustomAction( + Some("mr_inline_reply_title"), + CustomActionData.LegacyAction(LegacyAction(ActionIdentifier.Reply))) + case InlineActionsEnum.Retweet => + NotificationCustomAction( + Some("mr_inline_retweet_title"), + CustomActionData.LegacyAction(LegacyAction(ActionIdentifier.Retweet))) + case _ => + NotificationCustomAction( + Some("mr_inline_favorite_title"), + CustomActionData.LegacyAction(LegacyAction(ActionIdentifier.Favorite)) + ) + } + val notifications = NotificationCustomActions(v2CustomActions) + Map("serialized_inline_actions_v2" -> serializeActionsToBase64(notifications)) + } + + def getDislikeInlineAction( + candidate: PushCandidate, + ntabResponse: CreateGenericNotificationResponse + ): Option[NotificationCustomAction] = { + ntabResponse.successKey.map(successKey => { + val urlParams = Map[String, String]( + "answer" -> "dislike", + "notification_hash" -> successKey.hashKey.toString, + "upstream_uid" -> candidate.impressionId, + "notification_timestamp" -> successKey.timestampMillis.toString + ) + val urlParamsString = urlParams.map(kvp => f"${kvp._1}=${kvp._2}").mkString("&") + + val httpPostRequest = HttpRequest.PostRequest( + PostRequest(url = f"/2/notifications/feedback.json?$urlParamsString", bodyParams = None)) + val httpRequestAction = HttpRequestAction( + httpRequest = httpPostRequest, + scribeAction = Option("dislike_scribe_action"), + isAuthorizationRequired = Option(true), + isDestructive = Option(false), + undoable = None + ) + val dislikeAction = CustomActionData.HttpRequestAction(httpRequestAction) + NotificationCustomAction(title = Option("mr_inline_dislike_title"), action = dislikeAction) + }) + } + + /** + * Given a serialized inline action v2, update the action at index to the given new action. + * If given index is bigger than current action length, append the given inline action at the end. + * @param serialized_inline_actions_v2 the original action in serialized version + * @param actionOption an Option of the new action to replace the old one + * @param index the position where the old action will be replaced + * @return a new serialized inline action v2 + */ + def patchInlineActionAtPosition( + serialized_inline_actions_v2: String, + actionOption: Option[NotificationCustomAction], + index: Int + ): String = { + val originalActions: Seq[NotificationCustomAction] = deserializeActionsFromString( + serialized_inline_actions_v2).actions + val newActions = actionOption match { + case Some(action) if index >= originalActions.size => originalActions ++ Seq(action) + case Some(action) => originalActions.updated(index, action) + case _ => originalActions + } + serializeActionsToBase64(NotificationCustomActions(newActions)) + } + + /** + * Return list of available inline actions for ibis2 model + */ + def getGeneratedTweetInlineActions( + target: Target, + statsReceiver: StatsReceiver, + actions: Seq[InlineActionsEnum.Value], + ): Map[String, String] = { + val scopedStatsReceiver = scopedStats(statsReceiver) + val useV1 = target.params(PushFeatureSwitchParams.UseInlineActionsV1) + val useV2 = target.params(PushFeatureSwitchParams.UseInlineActionsV2) + if (useV1 && useV2) { + scopedStatsReceiver.counter("use_v1_and_use_v2").incr() + getGeneratedTweetInlineActionsV1(actions) ++ getGeneratedTweetInlineActionsV2(actions) + } else if (useV1 && !useV2) { + scopedStatsReceiver.counter("only_use_v1").incr() + getGeneratedTweetInlineActionsV1(actions) + } else if (!useV1 && useV2) { + scopedStatsReceiver.counter("only_use_v2").incr() + getGeneratedTweetInlineActionsV2(actions) + } else { + scopedStatsReceiver.counter("use_neither_v1_nor_v2").incr() + Map.empty[String, String] + } + } + + /** + * Return Tweet inline action ibis2 model values after applying experiment logic + */ + def getTweetInlineActionValue(target: Target): Future[Map[String, String]] = { + if (target.isLoggedOutUser) { + Future( + Map( + "show_inline_action" -> "false" + ) + ) + } else { + val showInlineAction: Boolean = target.params(PushParams.MRAndroidInlineActionOnPushCopyParam) + Future( + Map( + "show_inline_action" -> s"$showInlineAction" + ) + ) + } + } + + private val binaryThriftStructSerializer: BinaryThriftStructSerializer[ + NotificationCustomActions + ] = BinaryThriftStructSerializer.apply(NotificationCustomActions) + private val base64Encoding = BaseEncoding.base64() + + def serializeActionsToBase64(notificationCustomActions: NotificationCustomActions): String = { + val actionsAsByteArray: Array[Byte] = + binaryThriftStructSerializer.toBytes(notificationCustomActions) + base64Encoding.encode(actionsAsByteArray) + } + + def deserializeActionsFromString(serializedInlineActionV2: String): NotificationCustomActions = { + val actionAsByteArray = base64Encoding.decode(serializedInlineActionV2) + binaryThriftStructSerializer.fromBytes(actionAsByteArray) + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MediaAnnotationsUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MediaAnnotationsUtil.scala new file mode 100644 index 000000000..a3a3ecf50 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MediaAnnotationsUtil.scala @@ -0,0 +1,52 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate + +object MediaAnnotationsUtil { + + val mediaIdToCategoryMapping = Map("0" -> "0") + + val nudityCategoryId = "0" + val beautyCategoryId = "0" + val singlePersonCategoryId = "0" + val sensitiveMediaCategoryFeatureName = + "tweet.mediaunderstanding.tweet_annotations.sensitive_category_probabilities" + + def updateMediaCategoryStats( + candidates: Seq[CandidateDetails[PushCandidate]] + )( + implicit statsReceiver: StatsReceiver + ) = { + + val statScope = statsReceiver.scope("mediaStats") + val filteredCandidates = candidates.filter { candidate => + !candidate.candidate.sparseContinuousFeatures + .getOrElse(sensitiveMediaCategoryFeatureName, Map.empty[String, Double]).contains( + nudityCategoryId) + } + + if (filteredCandidates.isEmpty) + statScope.counter("emptyCandidateListAfterNudityFilter").incr() + else + statScope.counter("nonEmptyCandidateListAfterNudityFilter").incr() + candidates.foreach { candidate => + statScope.counter("totalCandidates").incr() + val mediaFeature = candidate.candidate.sparseContinuousFeatures + .getOrElse(sensitiveMediaCategoryFeatureName, Map.empty[String, Double]) + if (mediaFeature.nonEmpty) { + val mediaCategoryByMaxScore = mediaFeature.maxBy(_._2)._1 + statScope + .scope("mediaCategoryByMaxScore").counter(mediaIdToCategoryMapping + .getOrElse(mediaCategoryByMaxScore, "undefined")).incr() + + mediaFeature.keys.map { feature => + statScope + .scope("mediaCategory").counter(mediaIdToCategoryMapping + .getOrElse(feature, "undefined")).incr() + } + } + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MinDurationModifierCalculator.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MinDurationModifierCalculator.scala new file mode 100644 index 000000000..e96ecb817 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MinDurationModifierCalculator.scala @@ -0,0 +1,187 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.util.TimeUtil +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FSParams} +import com.twitter.util.Future +import com.twitter.util.Time +import java.util.Calendar +import java.util.TimeZone + +case class MinDurationModifierCalculator() { + + private def mapCountryCodeToTimeZone( + countryCode: String, + stats: StatsReceiver + ): Option[Calendar] = { + PushConstants.countryCodeToTimeZoneMap + .get(countryCode.toUpperCase).map(timezone => + Calendar.getInstance(TimeZone.getTimeZone(timezone))) + } + + private def transformToHour( + dayOfHour: Int + ): Int = { + if (dayOfHour < 0) dayOfHour + 24 + else dayOfHour + } + + private def getMinDurationByHourOfDay( + hourOfDay: Int, + startTimeList: Seq[Int], + endTimeList: Seq[Int], + minDurationTimeModifierConst: Seq[Int], + stats: StatsReceiver + ): Option[Int] = { + val scopedStats = stats.scope("getMinDurationByHourOfDay") + scopedStats.counter("request").incr() + val durationOpt = (startTimeList, endTimeList, minDurationTimeModifierConst).zipped.toList + .filter { + case (startTime, endTime, _) => + if (startTime <= endTime) hourOfDay >= startTime && hourOfDay < endTime + else (hourOfDay >= startTime) || hourOfDay < endTime + case _ => false + }.map { + case (_, _, modifier) => modifier + }.headOption + durationOpt match { + case Some(duration) => scopedStats.counter(s"$duration.minutes").incr() + case _ => scopedStats.counter("none").incr() + } + durationOpt + } + + def getMinDurationModifier( + target: Target, + calendar: Calendar, + stats: StatsReceiver + ): Option[Int] = { + val startTimeList = target.params(FSParams.MinDurationModifierStartHourList) + val endTimeList = target.params(FSParams.MinDurationModifierEndHourList) + val minDurationTimeModifierConst = target.params(FSParams.MinDurationTimeModifierConst) + if (startTimeList.length != endTimeList.length || minDurationTimeModifierConst.length != startTimeList.length) { + None + } else { + val hourOfDay = calendar.get(Calendar.HOUR_OF_DAY) + getMinDurationByHourOfDay( + hourOfDay, + startTimeList, + endTimeList, + minDurationTimeModifierConst, + stats) + } + } + + def getMinDurationModifier( + target: Target, + countryCodeOpt: Option[String], + stats: StatsReceiver + ): Option[Int] = { + val scopedStats = stats + .scope("getMinDurationModifier") + scopedStats.counter("total_requests").incr() + + countryCodeOpt match { + case Some(countryCode) => + scopedStats + .counter("country_code_exists").incr() + val calendarOpt = mapCountryCodeToTimeZone(countryCode, scopedStats) + calendarOpt.flatMap(calendar => getMinDurationModifier(target, calendar, scopedStats)) + case _ => None + } + } + + def getMinDurationModifier(target: Target, stats: StatsReceiver): Future[Option[Int]] = { + val scopedStats = stats + .scope("getMinDurationModifier") + scopedStats.counter("total_requests").incr() + + val startTimeList = target.params(FSParams.MinDurationModifierStartHourList) + val endTimeList = target.params(FSParams.MinDurationModifierEndHourList) + val minDurationTimeModifierConst = target.params(FSParams.MinDurationTimeModifierConst) + if (startTimeList.length != endTimeList.length || minDurationTimeModifierConst.length != startTimeList.length) { + Future.value(None) + } else { + target.localTimeInHHMM.map { + case (hourOfDay, _) => + getMinDurationByHourOfDay( + hourOfDay, + startTimeList, + endTimeList, + minDurationTimeModifierConst, + scopedStats) + case _ => None + } + } + } + + def getMinDurationModifierByUserOpenedHistory( + target: Target, + openedPushByHourAggregatedOpt: Option[Map[Int, Int]], + stats: StatsReceiver + ): Option[Int] = { + val scopedStats = stats + .scope("getMinDurationModifierByUserOpenedHistory") + scopedStats.counter("total_requests").incr() + openedPushByHourAggregatedOpt match { + case Some(openedPushByHourAggregated) => + if (openedPushByHourAggregated.isEmpty) { + scopedStats.counter("openedPushByHourAggregated_empty").incr() + None + } else { + val currentUTCHour = TimeUtil.hourOfDay(Time.now) + val utcHourWithMaxOpened = if (target.params(FSParams.EnableRandomHourForQuickSend)) { + (target.targetId % 24).toInt + } else { + openedPushByHourAggregated.maxBy(_._2)._1 + } + val numOfMaxOpened = openedPushByHourAggregated.maxBy(_._2)._2 + if (numOfMaxOpened >= target.params(FSParams.SendTimeByUserHistoryMaxOpenedThreshold)) { + scopedStats.counter("pass_experiment_bucket_threshold").incr() + if (numOfMaxOpened >= target + .params(FSParams.SendTimeByUserHistoryMaxOpenedThreshold)) { // only update if number of opened pushes meet threshold + scopedStats.counter("pass_max_threshold").incr() + val quickSendBeforeHours = + target.params(FSParams.SendTimeByUserHistoryQuickSendBeforeHours) + val quickSendAfterHours = + target.params(FSParams.SendTimeByUserHistoryQuickSendAfterHours) + + val hoursToLessSend = target.params(FSParams.SendTimeByUserHistoryNoSendsHours) + + val quickSendTimeMinDurationInMinute = + target.params(FSParams.SendTimeByUserHistoryQuickSendMinDurationInMinute) + val noSendTimeMinDuration = + target.params(FSParams.SendTimeByUserHistoryNoSendMinDuration) + + val startTimeForNoSend = transformToHour( + utcHourWithMaxOpened - quickSendBeforeHours - hoursToLessSend) + val startTimeForQuickSend = transformToHour( + utcHourWithMaxOpened - quickSendBeforeHours) + val endTimeForNoSend = + transformToHour(utcHourWithMaxOpened - quickSendBeforeHours) + val endTimeForQuickSend = + transformToHour(utcHourWithMaxOpened + quickSendAfterHours) + 1 + + val startTimeList = Seq(startTimeForNoSend, startTimeForQuickSend) + val endTimeList = Seq(endTimeForNoSend, endTimeForQuickSend) + val minDurationTimeModifierConst = + Seq(noSendTimeMinDuration, quickSendTimeMinDurationInMinute) + + getMinDurationByHourOfDay( + currentUTCHour, + startTimeList, + endTimeList, + minDurationTimeModifierConst, + scopedStats) + + } else None + } else None + } + case _ => + None + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MrUserStateUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MrUserStateUtil.scala new file mode 100644 index 000000000..33333c4c4 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/MrUserStateUtil.scala @@ -0,0 +1,16 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TargetUser + +object MrUserStateUtil { + def updateMrUserStateStats(target: TargetUser)(implicit statsReceiver: StatsReceiver) = { + statsReceiver.counter("AllUserStates").incr() + target.targetMrUserState.map { + case Some(state) => + statsReceiver.counter(state.name).incr() + case _ => + statsReceiver.counter("UnknownUserState").incr() + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/NsfwPersonalizationUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/NsfwPersonalizationUtil.scala new file mode 100644 index 000000000..01c8d0a72 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/NsfwPersonalizationUtil.scala @@ -0,0 +1,126 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.scio.nsfw_user_segmentation.thriftscala.NSFWUserSegmentation + +object NsfwPersonalizationUtil { + def computeNsfwUserStats( + targetNsfwInfo: Option[NsfwInfo] + )( + implicit statsReceiver: StatsReceiver + ): Unit = { + + def computeNsfwProfileVisitStats(sReceiver: StatsReceiver, nsfwProfileVisits: Long): Unit = { + if (nsfwProfileVisits >= 1) + sReceiver.counter("nsfwProfileVisits_gt_1").incr() + if (nsfwProfileVisits >= 2) + sReceiver.counter("nsfwProfileVisits_gt_2").incr() + if (nsfwProfileVisits >= 3) + sReceiver.counter("nsfwProfileVisits_gt_3").incr() + if (nsfwProfileVisits >= 5) + sReceiver.counter("nsfwProfileVisits_gt_5").incr() + if (nsfwProfileVisits >= 8) + sReceiver.counter("nsfwProfileVisits_gt_8").incr() + } + + def computeRatioStats( + sReceiver: StatsReceiver, + ratio: Int, + statName: String, + intervals: List[Double] = List(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) + ): Unit = { + intervals.foreach { i => + if (ratio > i * 10000) + sReceiver.counter(f"${statName}_greater_than_${i}").incr() + } + } + val sReceiver = statsReceiver.scope("nsfw_personalization") + sReceiver.counter("AllUsers").incr() + + (targetNsfwInfo) match { + case (Some(nsfwInfo)) => + val sensitive = nsfwInfo.senstiveOptIn.getOrElse(false) + val nsfwFollowRatio = + nsfwInfo.nsfwFollowRatio + val totalFollows = nsfwInfo.totalFollowCount + val numNsfwProfileVisits = nsfwInfo.nsfwProfileVisits + val nsfwRealGraphScore = nsfwInfo.realGraphScore + val nsfwSearchScore = nsfwInfo.searchNsfwScore + val totalSearches = nsfwInfo.totalSearches + val realGraphScore = nsfwInfo.realGraphScore + val searchScore = nsfwInfo.searchNsfwScore + + if (sensitive) + sReceiver.counter("sensitiveOptInEnabled").incr() + else + sReceiver.counter("sensitiveOptInDisabled").incr() + + computeRatioStats(sReceiver, nsfwFollowRatio, "nsfwRatio") + computeNsfwProfileVisitStats(sReceiver, numNsfwProfileVisits) + computeRatioStats(sReceiver, nsfwRealGraphScore.toInt, "nsfwRealGraphScore") + + if (totalSearches >= 10) + computeRatioStats(sReceiver, nsfwSearchScore.toInt, "nsfwSearchScore") + if (searchScore == 0) + sReceiver.counter("lowSearchScore").incr() + if (realGraphScore < 500) + sReceiver.counter("lowRealScore").incr() + if (numNsfwProfileVisits == 0) + sReceiver.counter("lowProfileVisit").incr() + if (nsfwFollowRatio == 0) + sReceiver.counter("lowFollowScore").incr() + + if (totalSearches > 10 && searchScore > 5000) + sReceiver.counter("highSearchScore").incr() + if (realGraphScore > 7000) + sReceiver.counter("highRealScore").incr() + if (numNsfwProfileVisits > 5) + sReceiver.counter("highProfileVisit").incr() + if (totalFollows > 10 && nsfwFollowRatio > 7000) + sReceiver.counter("highFollowScore").incr() + + if (searchScore == 0 && realGraphScore <= 500 && numNsfwProfileVisits == 0 && nsfwFollowRatio == 0) + sReceiver.counter("lowIntent").incr() + if ((totalSearches > 10 && searchScore > 5000) || realGraphScore > 7000 || numNsfwProfileVisits > 5 || (totalFollows > 10 && nsfwFollowRatio > 7000)) + sReceiver.counter("highIntent").incr() + case _ => + } + } +} + +case class NsfwInfo(nsfwUserSegmentation: NSFWUserSegmentation) { + + val scalingFactor = 10000 // to convert float to int as custom fields cannot be float + val senstiveOptIn: Option[Boolean] = nsfwUserSegmentation.nsfwView + val totalFollowCount: Long = nsfwUserSegmentation.totalFollowCnt.getOrElse(0L) + val nsfwFollowCnt: Long = + nsfwUserSegmentation.nsfwAdminOrHighprecOrAgathaGtP98FollowsCnt.getOrElse(0L) + val nsfwFollowRatio: Int = { + if (totalFollowCount != 0) { + (nsfwFollowCnt * scalingFactor / totalFollowCount).toInt + } else 0 + } + val nsfwProfileVisits: Long = + nsfwUserSegmentation.nsfwAdminOrHighPrecOrAgathaGtP98Visits + .map(_.numProfilesInLast14Days).getOrElse(0L) + val realGraphScore: Int = + nsfwUserSegmentation.realGraphMetrics + .map { rm => + if (rm.totalOutboundRGScore != 0) + rm.totalNsfwAdmHPAgthGtP98OutboundRGScore * scalingFactor / rm.totalOutboundRGScore + else 0d + }.getOrElse(0d).toInt + val totalSearches: Long = + nsfwUserSegmentation.searchMetrics.map(_.numNonTrndSrchInLast14Days).getOrElse(0L) + val searchNsfwScore: Int = nsfwUserSegmentation.searchMetrics + .map { sm => + if (sm.numNonTrndNonHshtgSrchInLast14Days != 0) + sm.numNonTrndNonHshtgGlobalNsfwSrchInLast14Days.toDouble * scalingFactor / sm.numNonTrndNonHshtgSrchInLast14Days + else 0 + }.getOrElse(0d).toInt + val hasReported: Boolean = + nsfwUserSegmentation.notifFeedbackMetrics.exists(_.notifReportMetrics.exists(_.countTotal != 0)) + val hasDisliked: Boolean = + nsfwUserSegmentation.notifFeedbackMetrics + .exists(_.notifDislikeMetrics.exists(_.countTotal != 0)) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/OverrideNotificationUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/OverrideNotificationUtil.scala new file mode 100644 index 000000000..ac4aba8a7 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/OverrideNotificationUtil.scala @@ -0,0 +1,230 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.MagicFanoutEventCandidate +import com.twitter.frigate.common.history.History +import com.twitter.frigate.common.rec_types.RecTypes +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.pushservice.model.ibis.PushOverrideInfo +import com.twitter.frigate.pushservice.params.PushConstants +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.pushservice.params.{PushFeatureSwitchParams => FSParams} +import com.twitter.frigate.thriftscala.CollapseInfo +import com.twitter.frigate.thriftscala.CommonRecommendationType +import com.twitter.frigate.thriftscala.CommonRecommendationType.MagicFanoutSportsEvent +import com.twitter.frigate.thriftscala.OverrideInfo +import com.twitter.util.Future +import java.util.UUID + +object OverrideNotificationUtil { + + /** + * Gets Override Info for the current notification. + * @param candidate [[PushCandidate]] object representing the recommendation candidate + * @param stats StatsReceiver to track stats for this function as well as the subsequent funcs. called + * @return Returns OverrideInfo if CollapseInfo exists, else None + */ + + def getOverrideInfo( + candidate: PushCandidate, + stats: StatsReceiver + ): Future[Option[OverrideInfo]] = { + if (candidate.target.isLoggedOutUser) { + Future.None + } else if (isOverrideEnabledForCandidate(candidate)) + getCollapseInfo(candidate, stats).map(_.map(OverrideInfo(_))) + else Future.None + } + + private def getCollapseInfo( + candidate: PushCandidate, + stats: StatsReceiver + ): Future[Option[CollapseInfo]] = { + val target = candidate.target + for { + targetHistory <- target.history + deviceInfo <- target.deviceInfo + } yield getCollapseInfo(target, targetHistory, deviceInfo, stats) + } + + /** + * Get Collapse Info for the current notification. + * @param target Push Target - recipient of the notification + * @param targetHistory Target's History + * @param deviceInfoOpt `Option` of the Target's Device Info + * @param stats StatsReceiver to track stats for this function as well as the subsequent funcs. called + * @return Returns CollapseInfo if the Target is eligible for Override Notifs, else None + */ + def getCollapseInfo( + target: PushTypes.Target, + targetHistory: History, + deviceInfoOpt: Option[DeviceInfo], + stats: StatsReceiver + ): Option[CollapseInfo] = { + val overrideInfoOfLastNotif = + PushOverrideInfo.getOverrideInfoOfLastEligiblePushNotif( + targetHistory, + target.params(FSParams.OverrideNotificationsLookbackDurationForOverrideInfo), + stats) + overrideInfoOfLastNotif match { + case Some(prevOverrideInfo) if isOverrideEnabled(target, deviceInfoOpt, stats) => + val notifsInLastOverrideChain = + PushOverrideInfo.getMrPushNotificationsInOverrideChain( + targetHistory, + prevOverrideInfo.collapseInfo.overrideChainId, + stats) + val numNotifsInLastOverrideChain = notifsInLastOverrideChain.size + val timestampOfFirstNotifInOverrideChain = + PushOverrideInfo + .getTimestampInMillisForFrigateNotification( + notifsInLastOverrideChain.last, + targetHistory, + stats).getOrElse(PushConstants.DefaultLookBackForHistory.ago.inMilliseconds) + if (numNotifsInLastOverrideChain < target.params(FSParams.MaxMrPushSends24HoursParam) && + timestampOfFirstNotifInOverrideChain > PushConstants.DefaultLookBackForHistory.ago.inMilliseconds) { + Some(prevOverrideInfo.collapseInfo) + } else { + val prevCollapseId = prevOverrideInfo.collapseInfo.collapseId + val newOverrideChainId = UUID.randomUUID.toString.replaceAll("-", "") + Some(CollapseInfo(prevCollapseId, newOverrideChainId)) + } + case None if isOverrideEnabled(target, deviceInfoOpt, stats) => + val newOverrideChainId = UUID.randomUUID.toString.replaceAll("-", "") + Some(CollapseInfo("", newOverrideChainId)) + case _ => None // Override is disabled for everything else + } + } + + /** + * Gets the collapse and impression identifier for the current override notification + * @param target Push Target - recipient of the notification + * @param stats StatsReceiver to track stats for this function as well as the subsequent funcs. called + * @return A Future of Collapse ID as well as the Impression ID. + */ + def getCollapseAndImpressionIdForOverride( + candidate: PushCandidate + ): Future[Option[(String, Seq[String])]] = { + if (isOverrideEnabledForCandidate(candidate)) { + val target = candidate.target + val stats = candidate.statsReceiver + Future.join(target.history, target.deviceInfo).map { + case (targetHistory, deviceInfoOpt) => + val collapseInfoOpt = getCollapseInfo(target, targetHistory, deviceInfoOpt, stats) + + val impressionIds = candidate.commonRecType match { + case MagicFanoutSportsEvent + if target.params(FSParams.EnableEventIdBasedOverrideForSportsCandidates) => + PushOverrideInfo.getImpressionIdsForPrevEligibleMagicFanoutEventCandidates( + targetHistory, + target.params(FSParams.OverrideNotificationsLookbackDurationForImpressionId), + stats, + MagicFanoutSportsEvent, + candidate + .asInstanceOf[RawCandidate with MagicFanoutEventCandidate].eventId + ) + case _ => + PushOverrideInfo.getImpressionIdsOfPrevEligiblePushNotif( + targetHistory, + target.params(FSParams.OverrideNotificationsLookbackDurationForImpressionId), + stats) + } + + collapseInfoOpt match { + case Some(collapseInfo) if impressionIds.nonEmpty => + val notifsInLastOverrideChain = + PushOverrideInfo.getMrPushNotificationsInOverrideChain( + targetHistory, + collapseInfo.overrideChainId, + stats) + stats + .scope("OverrideNotificationUtil").stat("number_of_notifications_sent").add( + notifsInLastOverrideChain.size + 1) + Some((collapseInfo.collapseId, impressionIds)) + case _ => None + } + case _ => None + } + } else Future.None + } + + /** + * Checks to see if override notifications are enabled based on the Target's Device Info and Params + * @param target Push Target - recipient of the notification + * @param deviceInfoOpt `Option` of the Target's Device Info + * @param stats StatsReceiver to track stats for this function + * @return Returns True if Override Notifications are enabled for the provided + * Target, else False. + */ + private def isOverrideEnabled( + target: PushTypes.Target, + deviceInfoOpt: Option[DeviceInfo], + stats: StatsReceiver + ): Boolean = { + val scopedStats = stats.scope("OverrideNotificationUtil").scope("isOverrideEnabled") + val enabledForAndroidCounter = scopedStats.counter("android_enabled") + val disabledForAndroidCounter = scopedStats.counter("android_disabled") + val enabledForIosCounter = scopedStats.counter("ios_enabled") + val disabledForIosCounter = scopedStats.counter("ios_disabled") + val disabledForOtherDevicesCounter = scopedStats.counter("other_disabled") + + val isPrimaryDeviceAndroid = PushDeviceUtil.isPrimaryDeviceAndroid(deviceInfoOpt) + val isPrimaryDeviceIos = PushDeviceUtil.isPrimaryDeviceIOS(deviceInfoOpt) + + lazy val validAndroidDevice = + isPrimaryDeviceAndroid && target.params(FSParams.EnableOverrideNotificationsForAndroid) + lazy val validIosDevice = + isPrimaryDeviceIos && target.params(FSParams.EnableOverrideNotificationsForIos) + + if (isPrimaryDeviceAndroid) { + if (validAndroidDevice) enabledForAndroidCounter.incr() else disabledForAndroidCounter.incr() + } else if (isPrimaryDeviceIos) { + if (validIosDevice) enabledForIosCounter.incr() else disabledForIosCounter.incr() + } else { + disabledForOtherDevicesCounter.incr() + } + + validAndroidDevice || validIosDevice + } + + /** + * Checks if override is enabled for the currently supported types for SendHandler or not. + * This method is package private for unit testing. + * @param candidate [[PushCandidate]] + * @param stats StatsReceiver to track statistics for this function + * @return Returns True if override notifications are enabled for the current type, otherwise False. + */ + private def isOverrideEnabledForSendHandlerCandidate( + candidate: PushCandidate + ): Boolean = { + val scopedStats = candidate.statsReceiver + .scope("OverrideNotificationUtil").scope("isOverrideEnabledForSendHandlerType") + + val overrideSupportedTypesForSpaces: Set[CommonRecommendationType] = Set( + CommonRecommendationType.SpaceSpeaker, + CommonRecommendationType.SpaceHost + ) + + val isOverrideSupportedForSpaces = { + overrideSupportedTypesForSpaces.contains(candidate.commonRecType) && + candidate.target.params(FSParams.EnableOverrideForSpaces) + } + + val isOverrideSupportedForSports = { + candidate.commonRecType == CommonRecommendationType.MagicFanoutSportsEvent && + candidate.target + .params(PushFeatureSwitchParams.EnableOverrideForSportsCandidates) + } + + val isOverrideSupported = isOverrideSupportedForSpaces || isOverrideSupportedForSports + + scopedStats.counter(s"$isOverrideSupported").incr() + isOverrideSupported + } + + private[util] def isOverrideEnabledForCandidate(candidate: PushCandidate) = + !RecTypes.isSendHandlerType( + candidate.commonRecType) || isOverrideEnabledForSendHandlerCandidate(candidate) +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAdaptorUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAdaptorUtil.scala new file mode 100644 index 000000000..7bc29cf01 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAdaptorUtil.scala @@ -0,0 +1,151 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.contentrecommender.thriftscala.MetricTag +import com.twitter.frigate.common.base.AlgorithmScore +import com.twitter.frigate.common.base.OutOfNetworkTweetCandidate +import com.twitter.frigate.common.base.SocialContextAction +import com.twitter.frigate.common.base.TopicCandidate +import com.twitter.frigate.common.base.TripCandidate +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.thriftscala.{SocialContextAction => TSocialContextAction} +import com.twitter.frigate.thriftscala.{CommonRecommendationType => CRT} +import com.twitter.frigate.thriftscala._ +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain +import scala.collection.Seq + +case class MediaCRT( + crt: CRT, + photoCRT: CRT, + videoCRT: CRT) + +object PushAdaptorUtil { + + def getFrigateNotificationForUser( + crt: CRT, + userId: Long, + scActions: Seq[SocialContextAction], + pushCopyId: Option[Int], + ntabCopyId: Option[Int] + ): FrigateNotification = { + + val thriftSCActions = scActions.map { scAction => + TSocialContextAction( + scAction.userId, + scAction.timestampInMillis, + scAction.tweetId + ) + } + FrigateNotification( + crt, + NotificationDisplayLocation.PushToMobileDevice, + userNotification = Some(UserNotification(userId, thriftSCActions)), + pushCopyId = pushCopyId, + ntabCopyId = ntabCopyId + ) + } + + def getFrigateNotificationForTweet( + crt: CRT, + tweetId: Long, + scActions: Seq[TSocialContextAction], + authorIdOpt: Option[Long], + pushCopyId: Option[Int], + ntabCopyId: Option[Int], + simclusterId: Option[Int], + semanticCoreEntityIds: Option[List[Long]], + candidateContent: Option[CandidateContent], + trendId: Option[String], + tweetTripDomain: Option[scala.collection.Set[TripDomain]] = None + ): FrigateNotification = { + FrigateNotification( + crt, + NotificationDisplayLocation.PushToMobileDevice, + tweetNotification = Some( + TweetNotification( + tweetId, + scActions, + authorIdOpt, + simclusterId, + semanticCoreEntityIds, + trendId, + tripDomain = tweetTripDomain) + ), + pushCopyId = pushCopyId, + ntabCopyId = ntabCopyId, + candidateContent = candidateContent + ) + } + + def getFrigateNotificationForTweetWithSocialContextActions( + crt: CRT, + tweetId: Long, + scActions: Seq[SocialContextAction], + authorIdOpt: Option[Long], + pushCopyId: Option[Int], + ntabCopyId: Option[Int], + candidateContent: Option[CandidateContent], + semanticCoreEntityIds: Option[List[Long]], + trendId: Option[String] + ): FrigateNotification = { + + val thriftSCActions = scActions.map { scAction => + TSocialContextAction( + scAction.userId, + scAction.timestampInMillis, + scAction.tweetId + ) + } + + getFrigateNotificationForTweet( + crt = crt, + tweetId = tweetId, + scActions = thriftSCActions, + authorIdOpt = authorIdOpt, + pushCopyId = pushCopyId, + ntabCopyId = ntabCopyId, + simclusterId = None, + candidateContent = candidateContent, + semanticCoreEntityIds = semanticCoreEntityIds, + trendId = trendId + ) + } + + def generateOutOfNetworkTweetCandidates( + inputTarget: Target, + id: Long, + mediaCRT: MediaCRT, + result: Option[TweetyPieResult], + localizedEntity: Option[LocalizedEntity] = None, + isMrBackfillFromCR: Option[Boolean] = None, + tagsFromCR: Option[Seq[MetricTag]] = None, + score: Option[Double] = None, + algorithmTypeCR: Option[String] = None, + tripTweetDomain: Option[scala.collection.Set[TripDomain]] = None + ): RawCandidate + with OutOfNetworkTweetCandidate + with TopicCandidate + with TripCandidate + with AlgorithmScore = { + new RawCandidate + with OutOfNetworkTweetCandidate + with TopicCandidate + with TripCandidate + with AlgorithmScore { + override val tweetId: Long = id + override val target: Target = inputTarget + override val tweetyPieResult: Option[TweetyPieResult] = result + override val localizedUttEntity: Option[LocalizedEntity] = localizedEntity + override val semanticCoreEntityId: Option[Long] = localizedEntity.map(_.entityId) + override def commonRecType: CRT = + getMediaBasedCRT(mediaCRT.crt, mediaCRT.photoCRT, mediaCRT.videoCRT) + override def isMrBackfillCR: Option[Boolean] = isMrBackfillFromCR + override def tagsCR: Option[Seq[MetricTag]] = tagsFromCR + override def algorithmScore: Option[Double] = score + override def algorithmCR: Option[String] = algorithmTypeCR + override def tripDomain: Option[collection.Set[TripDomain]] = tripTweetDomain + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAppPermissionUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAppPermissionUtil.scala new file mode 100644 index 000000000..0afa90fed --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushAppPermissionUtil.scala @@ -0,0 +1,49 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.onboarding.task.service.models.external.PermissionState +import com.twitter.permissions_storage.thriftscala.AppPermission +import com.twitter.storehaus.ReadableStore +import com.twitter.util.Future + +object PushAppPermissionUtil { + + final val AddressBookPermissionKey = "addressBook" + final val SyncStateKey = "syncState" + final val SyncStateOnValue = "on" + + /** + * Obtains the specified target's App Permissions, based on their primary device. + * @param targetId Target's Identifier + * @param permissionName The permission type we are querying for (address book, geolocation, etc.) + * @param deviceInfoFut Device info of the Target, presented as a Future + * @param appPermissionStore Readable Store which allows us to query the App Permission Strato Column + * @return Returns the AppPermission of the Target, presented as a Future + */ + def getAppPermission( + targetId: Long, + permissionName: String, + deviceInfoFut: Future[Option[DeviceInfo]], + appPermissionStore: ReadableStore[(Long, (String, String)), AppPermission] + ): Future[Option[AppPermission]] = { + deviceInfoFut.flatMap { deviceInfoOpt => + val primaryDeviceIdOpt = deviceInfoOpt.flatMap(_.primaryDeviceId) + primaryDeviceIdOpt match { + case Some(primaryDeviceId) => + val queryKey = (targetId, (primaryDeviceId, permissionName)) + appPermissionStore.get(queryKey) + case _ => Future.None + } + } + } + + def hasTargetUploadedAddressBook( + appPermissionOpt: Option[AppPermission] + ): Boolean = { + appPermissionOpt.exists { appPermission => + val syncState = appPermission.metadata.get(SyncStateKey) + appPermission.systemPermissionState == PermissionState.On && syncState + .exists(_.equalsIgnoreCase(SyncStateOnValue)) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushCapUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushCapUtil.scala new file mode 100644 index 000000000..d5d79c4fd --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushCapUtil.scala @@ -0,0 +1,184 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.conversions.DurationOps._ +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.candidate.FrigateHistory +import com.twitter.frigate.common.candidate.ResurrectedUserDetails +import com.twitter.frigate.common.candidate.TargetABDecider +import com.twitter.frigate.common.candidate.UserDetails +import com.twitter.frigate.pushcap.thriftscala.ModelType +import com.twitter.frigate.pushcap.thriftscala.PushcapInfo +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +import com.twitter.frigate.scribe.thriftscala.PushCapInfo +import com.twitter.util.Duration +import com.twitter.util.Future + +case class PushCapFatigueInfo( + pushcap: Int, + fatigueInterval: Duration) {} + +object PushCapUtil { + + def getDefaultPushCap(target: Target): Future[Int] = { + Future.value(target.params(PushFeatureSwitchParams.MaxMrPushSends24HoursParam)) + } + + def getMinimumRestrictedPushcapInfo( + restrictedPushcap: Int, + originalPushcapInfo: PushcapInfo, + statsReceiver: StatsReceiver + ): PushcapInfo = { + if (originalPushcapInfo.pushcap < restrictedPushcap) { + statsReceiver + .scope("minModelPushcapRestrictions").counter( + f"num_users_adjusted_from_${originalPushcapInfo.pushcap}_to_${restrictedPushcap}").incr() + PushcapInfo( + pushcap = restrictedPushcap.toShort, + modelType = ModelType.NoModel, + timestamp = 0L, + fatigueMinutes = Some((24L / restrictedPushcap) * 60L) + ) + } else originalPushcapInfo + } + + def getPushCapFatigue( + target: Target, + statsReceiver: StatsReceiver + ): Future[PushCapFatigueInfo] = { + val pushCapStats = statsReceiver.scope("pushcap_stats") + target.dynamicPushcap + .map { dynamicPushcapOpt => + val pushCap: Int = dynamicPushcapOpt match { + case Some(pushcapInfo) => pushcapInfo.pushcap + case _ => target.params(PushFeatureSwitchParams.MaxMrPushSends24HoursParam) + } + + pushCapStats.stat("pushCapValueStats").add(pushCap) + pushCapStats + .scope("pushCapValueCount").counter(f"num_users_with_pushcap_$pushCap").incr() + + target.finalPushcapAndFatigue += "pushPushCap" -> PushCapInfo("pushPushCap", pushCap.toByte) + + PushCapFatigueInfo(pushCap, 24.hours) + } + } + + def getMinDurationsSincePushWithoutUsingPushCap( + target: TargetUser + with TargetABDecider + with FrigateHistory + with UserDetails + with ResurrectedUserDetails + )( + implicit statsReceiver: StatsReceiver + ): Duration = { + val minDurationSincePush = + if (target.params(PushFeatureSwitchParams.EnableGraduallyRampUpNotification)) { + val daysInterval = + target.params(PushFeatureSwitchParams.GraduallyRampUpPhaseDurationDays).inDays.toDouble + val daysSinceActivation = + if (target.isResurrectedUser && target.timeSinceResurrection.isDefined) { + target.timeSinceResurrection.map(_.inDays.toDouble).get + } else { + target.timeElapsedAfterSignup.inDays.toDouble + } + val phaseInterval = + Math.max( + 1, + Math.ceil(daysSinceActivation / daysInterval).toInt + ) + val minDuration = 24 / phaseInterval + val finalMinDuration = + Math.max(4, minDuration).hours + statsReceiver + .scope("GraduallyRampUpFinalMinDuration").counter(s"$finalMinDuration.hours").incr() + finalMinDuration + } else { + target.params(PushFeatureSwitchParams.MinDurationSincePushParam) + } + statsReceiver + .scope("minDurationsSincePushWithoutUsingPushCap").counter( + s"$minDurationSincePush.hours").incr() + minDurationSincePush + } + + def getMinDurationSincePush( + target: Target, + statsReceiver: StatsReceiver + ): Future[Duration] = { + val minDurationStats: StatsReceiver = statsReceiver.scope("pushcapMinDuration_stats") + val minDurationModifierCalculator = + MinDurationModifierCalculator() + val openedPushByHourAggregatedFut = + if (target.params(PushFeatureSwitchParams.EnableQueryUserOpenedHistory)) + target.openedPushByHourAggregated + else Future.None + Future + .join( + target.dynamicPushcap, + target.accountCountryCode, + openedPushByHourAggregatedFut + ) + .map { + case (dynamicPushcapOpt, countryCodeOpt, openedPushByHourAggregated) => + val minDurationSincePush: Duration = { + val isGraduallyRampingUpResurrected = target.isResurrectedUser && target.params( + PushFeatureSwitchParams.EnableGraduallyRampUpNotification) + if (isGraduallyRampingUpResurrected || target.params( + PushFeatureSwitchParams.EnableExplicitPushCap)) { + getMinDurationsSincePushWithoutUsingPushCap(target)(minDurationStats) + } else { + dynamicPushcapOpt match { + case Some(pushcapInfo) => + pushcapInfo.fatigueMinutes match { + case Some(fatigueMinutes) => (fatigueMinutes / 60).hours + case _ if pushcapInfo.pushcap > 0 => (24 / pushcapInfo.pushcap).hours + case _ => getMinDurationsSincePushWithoutUsingPushCap(target)(minDurationStats) + } + case _ => + getMinDurationsSincePushWithoutUsingPushCap(target)(minDurationStats) + } + } + } + + val modifiedMinDurationSincePush = + if (target.params(PushFeatureSwitchParams.EnableMinDurationModifier)) { + val modifierHourOpt = + minDurationModifierCalculator.getMinDurationModifier( + target, + countryCodeOpt, + statsReceiver.scope("MinDuration")) + modifierHourOpt match { + case Some(modifierHour) => modifierHour.hours + case _ => minDurationSincePush + } + } else if (target.params( + PushFeatureSwitchParams.EnableMinDurationModifierByUserHistory)) { + val modifierMinuteOpt = + minDurationModifierCalculator.getMinDurationModifierByUserOpenedHistory( + target, + openedPushByHourAggregated, + statsReceiver.scope("MinDuration")) + + modifierMinuteOpt match { + case Some(modifierMinute) => modifierMinute.minutes + case _ => minDurationSincePush + } + } else minDurationSincePush + + target.finalPushcapAndFatigue += "pushFatigue" -> PushCapInfo( + "pushFatigue", + modifiedMinDurationSincePush.inHours.toByte) + + minDurationStats + .stat("minDurationSincePushValueStats").add(modifiedMinDurationSincePush.inHours) + minDurationStats + .scope("minDurationSincePushValueCount").counter( + s"$modifiedMinDurationSincePush").incr() + + modifiedMinDurationSincePush + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushDeviceUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushDeviceUtil.scala new file mode 100644 index 000000000..d191d742a --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushDeviceUtil.scala @@ -0,0 +1,57 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.common.store.deviceinfo.MobileClientType +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.util.Future +import com.twitter.finagle.stats.NullStatsReceiver +import com.twitter.finagle.stats.StatsReceiver + +object PushDeviceUtil { + + def isPrimaryDeviceAndroid(deviceInfoOpt: Option[DeviceInfo]): Boolean = { + deviceInfoOpt.exists { + _.guessedPrimaryClient.exists { clientType => + (clientType == MobileClientType.Android) || (clientType == MobileClientType.AndroidLite) + } + } + } + + def isPrimaryDeviceIOS(deviceInfoOpt: Option[DeviceInfo]): Boolean = { + deviceInfoOpt.exists { + _.guessedPrimaryClient.exists { clientType => + (clientType == MobileClientType.Iphone) || (clientType == MobileClientType.Ipad) + } + } + } + + def isPushRecommendationsEligible(target: Target): Future[Boolean] = + target.deviceInfo.map(_.exists(_.isRecommendationsEligible)) + + def isTopicsEligible( + target: Target, + statsReceiver: StatsReceiver = NullStatsReceiver + ): Future[Boolean] = { + val isTopicsSkipFatigue = Future.True + + Future.join(isTopicsSkipFatigue, target.deviceInfo.map(_.exists(_.isTopicsEligible))).map { + case (isTopicsNotFatigue, isTopicsEligibleSetting) => + isTopicsNotFatigue && isTopicsEligibleSetting + } + } + + def isSpacesEligible(target: Target): Future[Boolean] = + target.deviceInfo.map(_.exists(_.isSpacesEligible)) + + def isNtabOnlyEligible: Future[Boolean] = { + Future.False + } + + def isRecommendationsEligible(target: Target): Future[Boolean] = { + Future.join(isPushRecommendationsEligible(target), isNtabOnlyEligible).map { + case (isPushRecommendation, isNtabOnly) => isPushRecommendation || isNtabOnly + case _ => false + } + } + +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushIbisUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushIbisUtil.scala new file mode 100644 index 000000000..7567726bf --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushIbisUtil.scala @@ -0,0 +1,36 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.util.Future + +object PushIbisUtil { + + def getSocialContextModelValues(socialContextUserIds: Seq[Long]): Map[String, String] = { + + val socialContextSize = socialContextUserIds.size + + val (displaySocialContexts, otherCount) = { + if (socialContextSize < 3) (socialContextUserIds, 0) + else (socialContextUserIds.take(1), socialContextSize - 1) + } + + val usersValue = displaySocialContexts.map(_.toString).mkString(",") + + if (otherCount > 0) Map("social_users" -> s"$usersValue+$otherCount") + else Map("social_users" -> usersValue) + } + + def mergeFutModelValues( + mvFut1: Future[Map[String, String]], + mvFut2: Future[Map[String, String]] + ): Future[Map[String, String]] = { + Future.join(mvFut1, mvFut2).map { + case (mv1, mv2) => mv1 ++ mv2 + } + } + + def mergeModelValues( + mvFut1: Future[Map[String, String]], + mv2: Map[String, String] + ): Future[Map[String, String]] = + mvFut1.map { mv1 => mv1 ++ mv2 } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushToHomeUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushToHomeUtil.scala new file mode 100644 index 000000000..8f9fd63c3 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/PushToHomeUtil.scala @@ -0,0 +1,24 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.store.deviceinfo.DeviceInfo +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams +object PushToHomeUtil { + def getIbis2ModelValue( + deviceInfoOpt: Option[DeviceInfo], + target: Target, + stats: StatsReceiver + ): Option[Map[String, String]] = { + deviceInfoOpt.flatMap { deviceInfo => + val isAndroidEnabled = deviceInfo.isLandOnHomeAndroid && target.params( + PushFeatureSwitchParams.EnableTweetPushToHomeAndroid) + val isIOSEnabled = deviceInfo.isLandOnHomeiOS && target.params( + PushFeatureSwitchParams.EnableTweetPushToHomeiOS) + if (isAndroidEnabled || isIOSEnabled) { + stats.counter("enable_push_to_home").incr() + Some(Map("is_land_on_home" -> "true")) + } else None + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RFPHTakeStepUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RFPHTakeStepUtil.scala new file mode 100644 index 000000000..015e065ec --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RFPHTakeStepUtil.scala @@ -0,0 +1,114 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.Counter +import com.twitter.finagle.stats.Stat +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.refresh_handler.ResultWithDebugInfo +import com.twitter.frigate.pushservice.predicate.BigFilteringEpsilonGreedyExplorationPredicate +import com.twitter.frigate.pushservice.predicate.MlModelsHoldbackExperimentPredicate +import com.twitter.frigate.pushservice.take.candidate_validator.RFPHCandidateValidator +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.util.Future + +class RFPHTakeStepUtil()(globalStats: StatsReceiver) { + + implicit val statsReceiver: StatsReceiver = + globalStats.scope("RefreshForPushHandler") + private val takeStats: StatsReceiver = statsReceiver.scope("take") + private val notifierStats = takeStats.scope("notifier") + private val validatorStats = takeStats.scope("validator") + private val validatorLatency: Stat = validatorStats.stat("latency") + + private val executedPredicatesInTandem: Counter = + takeStats.counter("predicates_executed_in_tandem") + + private val bigFilteringEpsGreedyPredicate: NamedPredicate[PushCandidate] = + BigFilteringEpsilonGreedyExplorationPredicate()(takeStats) + private val bigFilteringEpsGreedyStats: StatsReceiver = + takeStats.scope("big_filtering_eps_greedy_predicate") + + private val modelPredicate: NamedPredicate[PushCandidate] = + MlModelsHoldbackExperimentPredicate()(takeStats) + private val mlPredicateStats: StatsReceiver = takeStats.scope("ml_predicate") + + private def updateFilteredStatusExptStats(candidate: PushCandidate, predName: String): Unit = { + + val recTypeStat = globalStats.scope( + candidate.commonRecType.toString + ) + + recTypeStat.counter(PushStatus.Filtered.toString).incr() + recTypeStat + .scope(PushStatus.Filtered.toString) + .counter(predName) + .incr() + } + + def isCandidateValid( + candidate: PushCandidate, + candidateValidator: RFPHCandidateValidator + ): Future[ResultWithDebugInfo] = { + val predResultFuture = Stat.timeFuture(validatorLatency) { + Future + .join( + bigFilteringEpsGreedyPredicate.apply(Seq(candidate)), + modelPredicate.apply(Seq(candidate)) + ).flatMap { + case (Seq(true), Seq(true)) => + executedPredicatesInTandem.incr() + + bigFilteringEpsGreedyStats + .scope(candidate.commonRecType.toString) + .counter("passed") + .incr() + + mlPredicateStats + .scope(candidate.commonRecType.toString) + .counter("passed") + .incr() + candidateValidator.validateCandidate(candidate).map((_, Nil)) + case (Seq(false), _) => + bigFilteringEpsGreedyStats + .scope(candidate.commonRecType.toString) + .counter("filtered") + .incr() + Future.value((Some(bigFilteringEpsGreedyPredicate), Nil)) + case (_, _) => + mlPredicateStats + .scope(candidate.commonRecType.toString) + .counter("filtered") + .incr() + Future.value((Some(modelPredicate), Nil)) + } + } + + predResultFuture.map { + case (Some(pred: NamedPredicate[_]), candPredicateResults) => + takeStats.counter("filtered_by_named_general_predicate").incr() + updateFilteredStatusExptStats(candidate, pred.name) + ResultWithDebugInfo( + Invalid(Some(pred.name)), + candPredicateResults + ) + + case (Some(_), candPredicateResults) => + takeStats.counter("filtered_by_unnamed_general_predicate").incr() + updateFilteredStatusExptStats(candidate, predName = "unk") + ResultWithDebugInfo( + Invalid(Some("unnamed_candidate_predicate")), + candPredicateResults + ) + + case (None, candPredicateResults) => + takeStats.counter("accepted_push_ok").incr() + ResultWithDebugInfo( + OK, + candPredicateResults + ) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RelationshipUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RelationshipUtil.scala new file mode 100644 index 000000000..8f24756ae --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/RelationshipUtil.scala @@ -0,0 +1,66 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.frigate.common.base.TweetAuthor +import com.twitter.frigate.pushservice.model.PushTypes.RawCandidate +import com.twitter.hermit.predicate.socialgraph.Edge +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.socialgraph.thriftscala.RelationshipType + +/** + * This class provides utility functions for relationshipEdge for each Candidate type. + */ +object RelationshipUtil { + + /** + * Form relationEdges + * @param candidate PushCandidate + * @param relationship relationshipTypes for different candidate types + * @return relationEdges for different candidate types + */ + private def formRelationEdgeWithTargetIdAndAuthorId( + candidate: RawCandidate, + relationship: List[RelationshipType with Product] + ): List[RelationEdge] = { + candidate match { + case candidate: RawCandidate with TweetAuthor => + candidate.authorId match { + case Some(authorId) => + val edge = Edge(candidate.target.targetId, authorId) + for { + r <- relationship + } yield RelationEdge(edge, r) + case _ => List.empty[RelationEdge] + } + case _ => List.empty[RelationEdge] + } + } + + /** + * Form all relationshipEdges for basicTweetRelationShips + * @param candidate PushCandidate + * @return List of relationEdges for basicTweetRelationShips + */ + def getBasicTweetRelationships(candidate: RawCandidate): List[RelationEdge] = { + val relationship = List( + RelationshipType.DeviceFollowing, + RelationshipType.Blocking, + RelationshipType.BlockedBy, + RelationshipType.HideRecommendations, + RelationshipType.Muting) + formRelationEdgeWithTargetIdAndAuthorId(candidate, relationship) + } + + /** + * Form all relationshipEdges for F1tweetsRelationships + * @param candidate PushCandidate + * @return List of relationEdges for F1tweetsRelationships + */ + def getPreCandidateRelationshipsForInNetworkTweets( + candidate: RawCandidate + ): List[RelationEdge] = { + val relationship = List(RelationshipType.Following) + getBasicTweetRelationships(candidate) ++ formRelationEdgeWithTargetIdAndAuthorId( + candidate, + relationship) + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/ResponseStatsTrackUtils.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/ResponseStatsTrackUtils.scala new file mode 100644 index 000000000..1b16ec8c0 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/ResponseStatsTrackUtils.scala @@ -0,0 +1,42 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.BroadcastStatsReceiver +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.thriftscala.PushResponse +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.frigate.thriftscala.CommonRecommendationType + +object ResponseStatsTrackUtils { + def trackStatsForResponseToRequest( + crt: CommonRecommendationType, + target: Target, + response: PushResponse, + receivers: Seq[StatsReceiver] + )( + originalStats: StatsReceiver + ): Unit = { + val newReceivers = Seq( + originalStats + .scope("is_model_training_data") + .scope(target.isModelTrainingData.toString), + originalStats.scope("scribe_target").scope(IbisScribeTargets.crtToScribeTarget(crt)) + ) + + val broadcastStats = BroadcastStatsReceiver(receivers) + val broadcastStatsWithExpts = BroadcastStatsReceiver(newReceivers ++ receivers) + + if (response.status == PushStatus.Sent) { + if (target.isModelTrainingData) { + broadcastStats.counter("num_training_data_recs_sent").incr() + } + } + broadcastStatsWithExpts.counter(response.status.toString).incr() + if (response.status == PushStatus.Filtered) { + broadcastStats + .scope(response.status.toString) + .counter(response.filteredBy.getOrElse("None")) + .incr() + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/SendHandlerPredicateUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/SendHandlerPredicateUtil.scala new file mode 100644 index 000000000..4174fa21c --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/SendHandlerPredicateUtil.scala @@ -0,0 +1,129 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.finagle.stats.StatsReceiver +import com.twitter.frigate.common.base.CandidateDetails +import com.twitter.frigate.common.base.CandidateResult +import com.twitter.frigate.common.base.Invalid +import com.twitter.frigate.common.base.OK +import com.twitter.frigate.common.base.Result +import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate +import com.twitter.frigate.pushservice.refresh_handler.ResultWithDebugInfo +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPostCandidateValidator +import com.twitter.frigate.pushservice.take.candidate_validator.SendHandlerPreCandidateValidator +import com.twitter.frigate.pushservice.thriftscala.PushStatus +import com.twitter.hermit.predicate.NamedPredicate +import com.twitter.util.Future + +class SendHandlerPredicateUtil()(globalStats: StatsReceiver) { + implicit val statsReceiver: StatsReceiver = + globalStats.scope("SendHandler") + private val validateStats: StatsReceiver = statsReceiver.scope("validate") + + private def updateFilteredStatusExptStats(candidate: PushCandidate, predName: String): Unit = { + + val recTypeStat = globalStats.scope( + candidate.commonRecType.toString + ) + + recTypeStat.counter(PushStatus.Filtered.toString).incr() + recTypeStat + .scope(PushStatus.Filtered.toString) + .counter(predName) + .incr() + } + + /** + * Parsing the candidateValidtor result into desired format for preValidation before ml filtering + * @param hydratedCandidates + * @param candidateValidator + * @return + */ + def preValidationForCandidate( + hydratedCandidates: Seq[CandidateDetails[PushCandidate]], + candidateValidator: SendHandlerPreCandidateValidator + ): Future[ + (Seq[CandidateDetails[PushCandidate]], Seq[CandidateResult[PushCandidate, Result]]) + ] = { + val predResultFuture = + Future.collect( + hydratedCandidates.map(hydratedCandidate => + candidateValidator.validateCandidate(hydratedCandidate.candidate)) + ) + + predResultFuture.map { results => + results + .zip(hydratedCandidates) + .foldLeft( + ( + Seq.empty[CandidateDetails[PushCandidate]], + Seq.empty[CandidateResult[PushCandidate, Result]] + ) + ) { + case ((goodCandidates, filteredCandidates), (result, candidateDetails)) => + result match { + case None => + (goodCandidates :+ candidateDetails, filteredCandidates) + case Some(pred: NamedPredicate[_]) => + val r = Invalid(Some(pred.name)) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + case Some(_) => + val r = Invalid(Some("Filtered by un-named predicate")) + ( + goodCandidates, + filteredCandidates :+ CandidateResult[PushCandidate, Result]( + candidateDetails.candidate, + candidateDetails.source, + r + ) + ) + } + } + } + } + + /** + * Parsing the candidateValidtor result into desired format for postValidation including and after ml filtering + * @param candidate + * @param candidateValidator + * @return + */ + def postValidationForCandidate( + candidate: PushCandidate, + candidateValidator: SendHandlerPostCandidateValidator + ): Future[ResultWithDebugInfo] = { + val predResultFuture = + candidateValidator.validateCandidate(candidate) + + predResultFuture.map { + case (Some(pred: NamedPredicate[_])) => + validateStats.counter("filtered_by_named_general_predicate").incr() + updateFilteredStatusExptStats(candidate, pred.name) + ResultWithDebugInfo( + Invalid(Some(pred.name)), + Nil + ) + + case Some(_) => + validateStats.counter("filtered_by_unnamed_general_predicate").incr() + updateFilteredStatusExptStats(candidate, predName = "unk") + ResultWithDebugInfo( + Invalid(Some("unnamed_candidate_predicate")), + Nil + ) + + case _ => + validateStats.counter("accepted_push_ok").incr() + ResultWithDebugInfo( + OK, + Nil + ) + } + } +} diff --git a/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/TopicsUtil.scala b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/TopicsUtil.scala new file mode 100644 index 000000000..a5b7cf2c5 --- /dev/null +++ b/pushservice/src/main/scala/com/twitter/frigate/pushservice/util/TopicsUtil.scala @@ -0,0 +1,340 @@ +package com.twitter.frigate.pushservice.util + +import com.twitter.contentrecommender.thriftscala.DisplayLocation +import com.twitter.finagle.stats.Stat +import com.twitter.frigate.common.base.TargetUser +import com.twitter.frigate.common.predicate.CommonOutNetworkTweetCandidatesSourcePredicates.authorNotBeingFollowedPredicate +import com.twitter.frigate.common.store.interests.InterestsLookupRequestWithContext +import com.twitter.frigate.pushservice.model.PushTypes.Target +import com.twitter.frigate.pushservice.model.PushTypes +import com.twitter.frigate.pushservice.store.UttEntityHydrationQuery +import com.twitter.frigate.pushservice.store.UttEntityHydrationStore +import com.twitter.hermit.predicate.Predicate +import com.twitter.hermit.predicate.socialgraph.RelationEdge +import com.twitter.interests.thriftscala.InterestRelationType +import com.twitter.interests.thriftscala.InterestRelationship +import com.twitter.interests.thriftscala.InterestedInInterestLookupContext +import com.twitter.interests.thriftscala.InterestedInInterestModel +import com.twitter.interests.thriftscala.ProductId +import com.twitter.interests.thriftscala.UserInterest +import com.twitter.interests.thriftscala.UserInterestData +import com.twitter.interests.thriftscala.UserInterests +import com.twitter.interests.thriftscala.{TopicListingViewerContext => TopicListingViewerContextCR} +import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult +import com.twitter.storehaus.ReadableStore +import com.twitter.timelines.configapi.Param +import com.twitter.topiclisting.TopicListingViewerContext +import com.twitter.topiclisting.utt.LocalizedEntity +import com.twitter.tsp.thriftscala.TopicListingSetting +import com.twitter.tsp.thriftscala.TopicSocialProofRequest +import com.twitter.tsp.thriftscala.TopicSocialProofResponse +import com.twitter.tsp.thriftscala.TopicWithScore +import com.twitter.util.Future +import scala.collection.Map + +case class TweetWithTopicProof( + tweetId: Long, + topicId: Long, + authorId: Option[Long], + score: Double, + tweetyPieResult: TweetyPieResult, + topicListingSetting: String, + algorithmCR: Option[String], + isOON: Boolean) + +object TopicsUtil { + + /** + * Obtains the Localized Entities for the provided SC Entity IDs + * @param target The target user for which we're obtaining candidates + * @param semanticCoreEntityIds The seq. of entity ids for which we would like to obtain the Localized Entities + * @param uttEntityHydrationStore Store to query the actual LocalizedEntities + * @return A Future Map consisting of the entity id as the key and LocalizedEntity as the value + */ + def getLocalizedEntityMap( + target: Target, + semanticCoreEntityIds: Set[Long], + uttEntityHydrationStore: UttEntityHydrationStore + ): Future[Map[Long, LocalizedEntity]] = { + buildTopicListingViewerContext(target) + .flatMap { topicListingViewerContext => + val query = UttEntityHydrationQuery(topicListingViewerContext, semanticCoreEntityIds.toSeq) + val localizedTopicEntitiesFut = + uttEntityHydrationStore.getLocalizedTopicEntities(query).map(_.flatten) + localizedTopicEntitiesFut.map { localizedTopicEntities => + localizedTopicEntities.map { localizedTopicEntity => + localizedTopicEntity.entityId -> localizedTopicEntity + }.toMap + } + } + } + + /** + * Fetch explict followed interests i.e Topics for targetUser + * + * @param targetUser: [[Target]] object representing a user eligible for MagicRecs notification + * @return: list of all Topics(Interests) Followed by targetUser + */ + def getTopicsFollowedByUser( + targetUser: Target, + interestsWithLookupContextStore: ReadableStore[ + InterestsLookupRequestWithContext, + UserInterests + ], + followedTopicsStats: Stat + ): Future[Option[Seq[UserInterest]]] = { + buildTopicListingViewerContext(targetUser).flatMap { topicListingViewerContext => + // explicit interests relation query + val explicitInterestsLookupRequest = InterestsLookupRequestWithContext( + targetUser.targetId, + Some( + InterestedInInterestLookupContext( + explicitContext = None, + inferredContext = None, + productId = Some(ProductId.Followable), + topicListingViewerContext = Some(topicListingViewerContext.toThrift), + disableExplicit = None, + disableImplicit = Some(true) + ) + ) + ) + + // filter explicit follow relationships from response + interestsWithLookupContextStore.get(explicitInterestsLookupRequest).map { + _.flatMap { userInterests => + val followedTopics = userInterests.interests.map { + _.filter { + case UserInterest(_, Some(interestData)) => + interestData match { + case UserInterestData.InterestedIn(interestedIn) => + interestedIn.exists { + case InterestedInInterestModel.ExplicitModel(explicitModel) => + explicitModel match { + case InterestRelationship.V1(v1) => + v1.relation == InterestRelationType.Followed + + case _ => false + } + + case _ => false + } + + case _ => false + } + + case _ => false // interestData unavailable + } + } + followedTopicsStats.add(followedTopics.getOrElse(Seq.empty[UserInterest]).size) + followedTopics + } + } + } + } + + /** + * + * @param target : [[Target]] object respresenting MagicRecs user + * + * @return: [[TopicListingViewerContext]] for querying topics + */ + def buildTopicListingViewerContext(target: Target): Future[TopicListingViewerContext] = { + Future.join(target.inferredUserDeviceLanguage, target.countryCode, target.targetUser).map { + case (inferredLanguage, countryCode, userInfo) => + TopicListingViewerContext( + userId = Some(target.targetId), + guestId = None, + deviceId = None, + clientApplicationId = None, + userAgent = None, + languageCode = inferredLanguage, + countryCode = countryCode, + userRoles = userInfo.flatMap(_.roles.map(_.roles.toSet)) + ) + } + } + + /** + * + * @param target : [[Target]] object respresenting MagicRecs user + * + * @return: [[TopicListingViewerContext]] for querying topics + */ + def buildTopicListingViewerContextForCR(target: Target): Future[TopicListingViewerContextCR] = { + TopicsUtil.buildTopicListingViewerContext(target).map(_.toThrift) + } + + /** + * + * @param target : [[Target]] object respresenting MagicRecs user + * @param tweets : [[Seq[TweetyPieResult]]] object representing Tweets to get TSP for + * @param topicSocialProofServiceStore: [[ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse]]] + * @param edgeStore: [[ReadableStore[RelationEdge, Boolean]]]] + * + * @return: [[Future[Seq[TweetWithTopicProof]]]] Tweets with topic proof + */ + def getTopicSocialProofs( + inputTarget: Target, + tweets: Seq[TweetyPieResult], + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + edgeStore: ReadableStore[RelationEdge, Boolean], + scoreThresholdParam: Param[Double] + ): Future[Seq[TweetWithTopicProof]] = { + buildTopicListingViewerContextForCR(inputTarget).flatMap { topicListingContext => + val tweetIds: Set[Long] = tweets.map(_.tweet.id).toSet + val tweetIdsToTweetyPie = tweets.map(tp => tp.tweet.id -> tp).toMap + val topicSocialProofRequest = + TopicSocialProofRequest( + inputTarget.targetId, + tweetIds, + DisplayLocation.MagicRecsRecommendTopicTweets, + TopicListingSetting.Followable, + topicListingContext) + + topicSocialProofServiceStore + .get(topicSocialProofRequest).flatMap { + case Some(topicSocialProofResponse) => + val topicProofCandidates = topicSocialProofResponse.socialProofs.collect { + case (tweetId, topicsWithScore) + if topicsWithScore.nonEmpty && topicsWithScore + .maxBy(_.score).score >= inputTarget + .params(scoreThresholdParam) => + // Get the topic with max score if there are any topics returned + val topicWithScore = topicsWithScore.maxBy(_.score) + TweetWithTopicProof( + tweetId, + topicWithScore.topicId, + tweetIdsToTweetyPie(tweetId).tweet.coreData.map(_.userId), + topicWithScore.score, + tweetIdsToTweetyPie(tweetId), + topicWithScore.topicFollowType.map(_.name).getOrElse(""), + topicWithScore.algorithmType.map(_.name), + isOON = true + ) + }.toSeq + + hydrateTopicProofCandidatesWithEdgeStore(inputTarget, topicProofCandidates, edgeStore) + case _ => Future.value(Seq.empty[TweetWithTopicProof]) + } + } + } + + /** + * Obtain TopicWithScores for provided tweet candidates and target + * @param target target user + * @param Tweets tweet candidates represented in a (tweetId, TweetyPieResult) map + * @param topicSocialProofServiceStore store to query topic social proof + * @param enableTopicAnnotation whether to enable topic annotation + * @param topicScoreThreshold threshold for topic score + * @return a (tweetId, TopicWithScore) map where the topic with highest topic score (if exists) is chosen + */ + def getTopicsWithScoreMap( + target: PushTypes.Target, + Tweets: Map[Long, Option[TweetyPieResult]], + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + enableTopicAnnotation: Boolean, + topicScoreThreshold: Double + ): Future[Option[Map[Long, TopicWithScore]]] = { + + if (enableTopicAnnotation) { + TopicsUtil + .buildTopicListingViewerContextForCR(target).flatMap { topicListingContext => + val tweetIds = Tweets.keySet + val topicSocialProofRequest = + TopicSocialProofRequest( + target.targetId, + tweetIds, + DisplayLocation.MagicRecsRecommendTopicTweets, + TopicListingSetting.Followable, + topicListingContext) + + topicSocialProofServiceStore + .get(topicSocialProofRequest).map { + _.map { topicSocialProofResponse => + topicSocialProofResponse.socialProofs + .collect { + case (tweetId, topicsWithScore) + if topicsWithScore.nonEmpty && Tweets(tweetId).nonEmpty + && topicsWithScore.maxBy(_.score).score >= topicScoreThreshold => + tweetId -> topicsWithScore.maxBy(_.score) + } + + } + } + } + } else { + Future.None + } + + } + + /** + * Obtain LocalizedEntities for provided tweet candidates and target + * @param target target user + * @param Tweets tweet candidates represented in a (tweetId, TweetyPieResult) map + * @param uttEntityHydrationStore store to query the actual LocalizedEntities + * @param topicSocialProofServiceStore store to query topic social proof + * @param enableTopicAnnotation whether to enable topic annotation + * @param topicScoreThreshold threshold for topic score + * @return a (tweetId, LocalizedEntity Option) Future map that stores Localized Entity (can be empty) for given tweetId + */ + def getTweetIdLocalizedEntityMap( + target: PushTypes.Target, + Tweets: Map[Long, Option[TweetyPieResult]], + uttEntityHydrationStore: UttEntityHydrationStore, + topicSocialProofServiceStore: ReadableStore[TopicSocialProofRequest, TopicSocialProofResponse], + enableTopicAnnotation: Boolean, + topicScoreThreshold: Double + ): Future[Map[Long, Option[LocalizedEntity]]] = { + + val topicWithScoreMap = getTopicsWithScoreMap( + target, + Tweets, + topicSocialProofServiceStore, + enableTopicAnnotation, + topicScoreThreshold) + + topicWithScoreMap.flatMap { topicWithScores => + topicWithScores match { + case Some(topics) => + val topicIds = topics.collect { case (_, topic) => topic.topicId }.toSet + val LocalizedEntityMapFut = + getLocalizedEntityMap(target, topicIds, uttEntityHydrationStore) + + LocalizedEntityMapFut.map { LocalizedEntityMap => + topics.map { + case (tweetId, topic) => + tweetId -> LocalizedEntityMap.get(topic.topicId) + } + } + case _ => Future.value(Map[Long, Option[LocalizedEntity]]()) + } + } + + } + + /** + * Hydrate TweetWithTopicProof candidates with isOON field info, + * based on the following relationship between target user and candidate author in edgeStore + * @return TweetWithTopicProof candidates with isOON field populated + */ + def hydrateTopicProofCandidatesWithEdgeStore( + inputTarget: TargetUser, + topicProofCandidates: Seq[TweetWithTopicProof], + edgeStore: ReadableStore[RelationEdge, Boolean], + ): Future[Seq[TweetWithTopicProof]] = { + // IDs of all authors of TopicProof candidates that are OON with respect to inputTarget + val validOONAuthorIdsFut = + Predicate.filter( + topicProofCandidates.flatMap(_.authorId).distinct, + authorNotBeingFollowedPredicate(inputTarget, edgeStore)) + + validOONAuthorIdsFut.map { validOONAuthorIds => + topicProofCandidates.map(candidate => { + candidate.copy(isOON = + candidate.authorId.isDefined && validOONAuthorIds.contains(candidate.authorId.get)) + }) + } + } + +} From fb54d8b54984f89f7dba90a18e7c3048421464c3 Mon Sep 17 00:00:00 2001 From: twitter-team <> Date: Fri, 19 May 2023 14:36:39 -0700 Subject: [PATCH 11/11] README updates - renames pushservice readme.md to README.md - Minor changes to main README.md --- README.md | 10 +++++----- pushservice/{readme.md => README.md} | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) rename pushservice/{readme.md => README.md} (97%) diff --git a/README.md b/README.md index 5bff49018..b872faef5 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Twitter's Recommendation Algorithm -Twitter's Recommendation Algorithm is a set of services and jobs that are responsible for serving feeds of Tweets and other content across all Twitter product surfaces (e.g. For You Timeline, Search, Explore). For an introduction to how the algorithm works, please refer to our [engineering blog](https://blog.twitter.com/engineering/en_us/topics/open-source/2023/twitter-recommendation-algorithm). +Twitter's Recommendation Algorithm is a set of services and jobs that are responsible for serving feeds of Tweets and other content across all Twitter product surfaces (e.g. For You Timeline, Search, Explore, Notifications). For an introduction to how the algorithm works, please refer to our [engineering blog](https://blog.twitter.com/engineering/en_us/topics/open-source/2023/twitter-recommendation-algorithm). ## Architecture @@ -8,7 +8,8 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | Type | Component | Description | |------------|------------|------------| -| Data | [unified-user-actions](unified_user_actions/README.md) | Real-time stream of user actions on Twitter. | +| Data | [tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. | +| | [unified-user-actions](unified_user_actions/README.md) | Real-time stream of user actions on Twitter. | | | [user-signal-service](user-signal-service/README.md) | Centralized platform to retrieve explicit (e.g. likes, replies) and implicit (e.g. profile visits, tweet clicks) user signals. | | Model | [SimClusters](src/scala/com/twitter/simclusters_v2/README.md) | Community detection and sparse embeddings into those communities. | | | [TwHIN](https://github.com/twitter/the-algorithm-ml/blob/main/projects/twhin/README.md) | Dense knowledge graph embeddings for Users and Tweets. | @@ -24,9 +25,8 @@ Product surfaces at Twitter are built on a shared set of data, models, and softw | | [timelines-aggregation-framework](timelines/data_processing/ml_util/aggregation_framework/README.md) | Framework for generating aggregate features in batch or real time. | | | [representation-manager](representation-manager/README.md) | Service to retrieve embeddings (i.e. SimClusers and TwHIN). | | | [twml](twml/README.md) | Legacy machine learning framework built on TensorFlow v1. | -| | [Tweetypie](tweetypie/server/README.md) | Core Tweet service that handles the reading and writing of Tweet data. | -The product surface currently included in this repository is the For You Timeline. +The product surfaces currently included in this repository are the For You Timeline and Recommended Notifications. ### For You Timeline @@ -50,7 +50,7 @@ The core components of the For You Timeline included in this repository are list ### Recommended Notifications -The core components that power Recommended Notifications included in this repository are listed below: +The core components of Recommended Notifications included in this repository are listed below: | Type | Component | Description | |------------|------------|------------| diff --git a/pushservice/readme.md b/pushservice/README.md similarity index 97% rename from pushservice/readme.md rename to pushservice/README.md index 99c20fcba..b1bad0a57 100644 --- a/pushservice/readme.md +++ b/pushservice/README.md @@ -17,9 +17,9 @@ RefreshForPushHandler follows these steps: - Fetch Candidates - Retrieves a list of potential candidates for the push by querying various candidate sources using the target - Candidate Hydration - - Hydrates the candidate details with batch calls to different downstream services. + - Hydrates the candidate details with batch calls to different downstream services - Pre-rank Filtering, also called Light Filtering - - Filters the hydrated candidates with lightweight RPC calls. + - Filters the hydrated candidates with lightweight RPC calls - Rank - Perform feature hydration for candidates and target user - Performs light ranking on candidates @@ -36,7 +36,7 @@ SendHandler follows these steps: - Building Target - Builds a target user object based on the given user ID - Candidate Hydration - - Hydrates the candidate details with batch calls to different downstream services. + - Hydrates the candidate details with batch calls to different downstream services - Feature Hydration - Perform feature hydration for candidates and target user - Take Step, also called Heavy Filtering