mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-11-14 07:35:10 +01:00
Delete cr-mixer directory
This commit is contained in:
parent
29a136d27d
commit
1a6792fd07
@ -1,24 +0,0 @@
|
||||
jvm_binary(
|
||||
name = "bin",
|
||||
basename = "cr-mixer",
|
||||
main = "com.twitter.cr_mixer.CrMixerServerMain",
|
||||
runtime_platform = "java11",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/ch/qos/logback:logback-classic",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer",
|
||||
"finagle/finagle-zipkin-scribe/src/main/scala",
|
||||
"finatra/inject/inject-logback/src/main/scala",
|
||||
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
|
||||
"twitter-server-internal/src/main/scala",
|
||||
"twitter-server/logback-classic/src/main/scala",
|
||||
],
|
||||
)
|
||||
|
||||
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
|
||||
jvm_app(
|
||||
name = "cr-mixer-app",
|
||||
archive = "zip",
|
||||
binary = ":bin",
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
@ -1,7 +0,0 @@
|
||||
# CR-Mixer
|
||||
|
||||
CR-Mixer is a candidate generation service proposed as part of the Personalization Strategy vision for Twitter. Its aim is to speed up the iteration and development of candidate generation and light ranking. The service acts as a lightweight coordinating layer that delegates candidate generation tasks to underlying compute services. It focuses on Twitter's candidate generation use cases and offers a centralized platform for fetching, mixing, and managing candidate sources and light rankers. The overarching goal is to increase the speed and ease of testing and developing candidate generation pipelines, ultimately delivering more value to Twitter users.
|
||||
|
||||
CR-Mixer acts as a configurator and delegator, providing abstractions for the challenging parts of candidate generation and handling performance issues. It will offer a 1-stop-shop for fetching and mixing candidate sources, a managed and shared performant platform, a light ranking layer, a common filtering layer, a version control system, a co-owned feature switch set, and peripheral tooling.
|
||||
|
||||
CR-Mixer's pipeline consists of 4 steps: source signal extraction, candidate generation, filtering, and ranking. It also provides peripheral tooling like scribing, debugging, and monitoring. The service fetches source signals externally from stores like UserProfileService and RealGraph, calls external candidate generation services, and caches results. Filters are applied for deduping and pre-ranking, and a light ranking step follows.
|
@ -1,8 +0,0 @@
|
||||
resources(
|
||||
sources = [
|
||||
"*.xml",
|
||||
"*.yml",
|
||||
"config/*.yml",
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
@ -1,146 +0,0 @@
|
||||
# The keys in this file correspond to the DeciderValues defined in
|
||||
# https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala
|
||||
|
||||
dark_traffic_filter:
|
||||
comment: Proportion of the requests that are forwarded as dark traffic to the proxy
|
||||
default_availability: 0
|
||||
|
||||
enable_tweet_recommendations_home_product:
|
||||
comment: Proportion of requests where we return an actual response for TweetRecommendations Home product
|
||||
default_availability: 10000
|
||||
|
||||
enable_tweet_health_score:
|
||||
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute TweetHealthModelScore"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_agatha_score:
|
||||
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute UserHealthModelScore"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_tweet_entity_graph_traffic:
|
||||
comment: "Enable the traffic to user entity tweet graph to fetch liked-by tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_tweet_graph_traffic:
|
||||
comment: "Enable the traffic to user tweet graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_video_graph_traffic:
|
||||
comment: "Enable the traffic to user video graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_user_ad_graph_traffic:
|
||||
comment: "Enable the traffic to user ad graph to fetch similar tweets candidates"
|
||||
default_availability: 10000
|
||||
|
||||
enable_qig_similar_tweets_traffic:
|
||||
comment: "Enable the traffic to QIG to fetch similar tweet candidates"
|
||||
default_availability: 0
|
||||
|
||||
enable_frs_traffic:
|
||||
comment: "Enable the traffic to FRS to fetch user follow recommendations"
|
||||
default_availability: 0
|
||||
|
||||
enable_hydra_dark_traffic:
|
||||
comment: "Enable dark traffic to hydra"
|
||||
default_availability: 0
|
||||
|
||||
enable_real_graph_mh_store:
|
||||
comment: "Enable traffic for the real graph manhattan based store"
|
||||
default_availability: 0
|
||||
|
||||
enable_simclusters_ann_experimental_dark_traffic:
|
||||
comment: "Enable dark traffic to simclusters-ann-experimental"
|
||||
default_availability: 0
|
||||
|
||||
enable_simclusters_ann_2_dark_traffic:
|
||||
comment: "Enable dark traffic to prod SimClustersANN2"
|
||||
default_availability: 0
|
||||
|
||||
enable_user_state_store:
|
||||
comment: "Enable traffic user state store to hydrate user state"
|
||||
default_availability: 0
|
||||
|
||||
upper_funnel_per_step_scribe_rate:
|
||||
comment: "Enable Upper Funnel Event Scribe Sampling (fetch, pre-rank, interleave etc.) for getTweetsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
kafka_message_scribe_sample_rate:
|
||||
comment: "Gates the production of forked scribe messages to kafka for the async feature hydrator"
|
||||
default_availability: 0
|
||||
|
||||
top_level_api_ddg_metrics_scribe_rate:
|
||||
comment: "Enable Top Level API DDG Metrics Scribe Sampling for getTweetsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
ads_recommendations_per_experiment_scribe_rate:
|
||||
comment: "Percentage of DDG traffic to Scribe for getAdsRecommendations() endpoint"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Notifications:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getTweetRecommendations_Email:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryTweet_MoreTweetsModule:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryAuthor:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getRelatedTweetsForQueryAuthor_MoreTweetsModule:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getFrsBasedTweetRecommendations_Home:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_loadshedding_getFrsBasedTweetRecommendations_Notifications:
|
||||
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
|
||||
default_availability: 0
|
||||
|
||||
enable_user_media_representation_store:
|
||||
comment: "Enable fetching user nudity rate signal from Media Understanding"
|
||||
default_availability: 0
|
||||
|
||||
enable_magic_recs_real_time_aggregates_store:
|
||||
comment: "Enable fetching real time aggregates features from Magic Recs memcache"
|
||||
default_availability: 0
|
||||
|
||||
enable_utg_realtime_tweet_engagement_score:
|
||||
comment: "Enable fetching real time tweet engagement score from utg-plus"
|
||||
default_availability: 0
|
||||
|
||||
get_tweet_recommendations_cache_rate:
|
||||
comment: "Proportion of users where getTweetRecommendations() request and responses will be cached"
|
||||
default_availability: 1000
|
||||
|
||||
enable_earlybird_traffic:
|
||||
comment: "Enable fetching tweet candidates from Earlybird"
|
||||
default_availability: 0
|
||||
|
||||
enable_scribe_for_blue_verified_tweet_candidates:
|
||||
comment: "Enable scribing for tweet candidates from Blue Verified users"
|
||||
default_availability: 0
|
@ -1,168 +0,0 @@
|
||||
<configuration>
|
||||
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Service Config -->
|
||||
<!-- ===================================================== -->
|
||||
<property name="DEFAULT_SERVICE_PATTERN"
|
||||
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
|
||||
|
||||
<property name="DEFAULT_ACCESS_PATTERN"
|
||||
value="%msg"/>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Common Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- JUL/JDK14 to Logback bridge -->
|
||||
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
|
||||
<resetJUL>true</resetJUL>
|
||||
</contextListener>
|
||||
|
||||
<!-- ====================================================================================== -->
|
||||
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
|
||||
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
|
||||
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
|
||||
<!-- ====================================================================================== -->
|
||||
|
||||
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.service.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
|
||||
<!-- keep 21 days' worth of history -->
|
||||
<maxHistory>21</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
|
||||
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>${log.access.output}</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
|
||||
<!-- keep 21 days' worth of history -->
|
||||
<maxHistory>21</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!--LogLens -->
|
||||
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/service</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- LogLens Access -->
|
||||
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
|
||||
<mdcAdditionalContext>true</mdcAdditionalContext>
|
||||
<category>${log.lens.category}</category>
|
||||
<index>${log.lens.index}</index>
|
||||
<tag>${log.lens.tag}/access</tag>
|
||||
<encoder>
|
||||
<pattern>%msg</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- Pipeline Execution Logs -->
|
||||
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
|
||||
<file>allow_listed_pipeline_executions.log</file>
|
||||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
|
||||
<!-- daily rollover -->
|
||||
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
|
||||
<!-- keep 7 days' worth of history -->
|
||||
<maxHistory>7</maxHistory>
|
||||
<cleanHistoryOnStart>true</cleanHistoryOnStart>
|
||||
</rollingPolicy>
|
||||
<encoder>
|
||||
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Primary Async Appenders -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<property name="async_queue_size" value="${queue.size:-50000}"/>
|
||||
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
|
||||
|
||||
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="SERVICE"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS"/>
|
||||
</appender>
|
||||
|
||||
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
|
||||
<queueSize>${async_queue_size}</queueSize>
|
||||
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
|
||||
<appender-ref ref="LOGLENS-ACCESS"/>
|
||||
</appender>
|
||||
|
||||
<!-- ===================================================== -->
|
||||
<!-- Package Config -->
|
||||
<!-- ===================================================== -->
|
||||
|
||||
<!-- Per-Package Config -->
|
||||
<logger name="com.twitter" level="info"/>
|
||||
<logger name="com.twitter.wilyns" level="warn"/>
|
||||
<logger name="com.twitter.configbus.client.file" level="off"/>
|
||||
<logger name="com.twitter.finagle.mux" level="warn"/>
|
||||
<logger name="com.twitter.finagle.serverset2" level="warn"/>
|
||||
<logger name="com.twitter.logging.ScribeHandler" level="off"/>
|
||||
<logger name="com.twitter.zookeeper.client.internal" level="warn"/>
|
||||
<logger name="io.netty.handler.ssl.SslHandler" level="OFF"/>
|
||||
|
||||
|
||||
<!-- Root Config -->
|
||||
<root level="${log_level:-INFO}">
|
||||
<appender-ref ref="ASYNC-SERVICE"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS"/>
|
||||
</root>
|
||||
|
||||
<!-- Access Logging -->
|
||||
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter"
|
||||
level="info"
|
||||
additivity="false">
|
||||
<appender-ref ref="ASYNC-ACCESS"/>
|
||||
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
|
||||
</logger>
|
||||
|
||||
<!-- Pipeline Executions Log -->
|
||||
<logger name="com.twitter.product_mixer.core.service.pipeline_execution_logger"
|
||||
level="info"
|
||||
additivity="false">
|
||||
<appender-ref ref="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" />
|
||||
</logger>
|
||||
|
||||
</configuration>
|
@ -1,48 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/google/inject:guice",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/jvm/net/codingwell:scala-guice",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-api",
|
||||
"cr-mixer/server/src/main/resources",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"finagle/finagle-core/src/main",
|
||||
"finagle/finagle-http/src/main/scala",
|
||||
"finagle/finagle-thriftmux/src/main/scala",
|
||||
"finatra-internal/mtls-http/src/main/scala",
|
||||
"finatra-internal/mtls-thriftmux/src/main/scala",
|
||||
"finatra/http-core/src/main/java/com/twitter/finatra/http",
|
||||
"finatra/inject/inject-app/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"finatra/inject/inject-server/src/main/scala",
|
||||
"finatra/inject/inject-utils/src/main/scala",
|
||||
"finatra/utils/src/main/java/com/twitter/finatra/annotations",
|
||||
"hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/controllers",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
|
||||
"src/thrift/com/twitter/timelines/render:thrift-scala",
|
||||
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms",
|
||||
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
|
||||
"timelines/src/main/scala/com/twitter/timelines/features/app",
|
||||
"twitter-server-internal",
|
||||
"twitter-server/server/src/main/scala",
|
||||
"util/util-app/src/main/scala",
|
||||
"util/util-core:scala",
|
||||
"util/util-slf4j-api/src/main/scala",
|
||||
],
|
||||
)
|
@ -1,18 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.twitter.finatra.http.routing.HttpWarmup
|
||||
import com.twitter.finatra.httpclient.RequestBuilder._
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.inject.utils.Handler
|
||||
import com.twitter.util.Try
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class CrMixerHttpServerWarmupHandler @Inject() (warmup: HttpWarmup) extends Handler with Logging {
|
||||
|
||||
override def handle(): Unit = {
|
||||
Try(warmup.send(get("/admin/cr-mixer/product-pipelines"), admin = true)())
|
||||
.onFailure(e => error(e.getMessage, e))
|
||||
}
|
||||
}
|
@ -1,229 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.twitter.cr_mixer.controller.CrMixerThriftController
|
||||
import com.twitter.cr_mixer.featureswitch.SetImpressedBucketsLocalContextFilter
|
||||
import com.twitter.cr_mixer.module.ActivePromotedTweetStoreModule
|
||||
import com.twitter.cr_mixer.module.CertoStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.CrMixerParamConfigModule
|
||||
import com.twitter.cr_mixer.module.EmbeddingStoreModule
|
||||
import com.twitter.cr_mixer.module.FrsStoreModule
|
||||
import com.twitter.cr_mixer.module.MHMtlsParamsModule
|
||||
import com.twitter.cr_mixer.module.OfflineCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.RealGraphStoreMhModule
|
||||
import com.twitter.cr_mixer.module.RealGraphOonStoreModule
|
||||
import com.twitter.cr_mixer.module.RepresentationManagerModule
|
||||
import com.twitter.cr_mixer.module.RepresentationScorerModule
|
||||
import com.twitter.cr_mixer.module.TweetInfoStoreModule
|
||||
import com.twitter.cr_mixer.module.TweetRecentEngagedUserStoreModule
|
||||
import com.twitter.cr_mixer.module.TweetRecommendationResultsStoreModule
|
||||
import com.twitter.cr_mixer.module.TripCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.TwhinCollabFilterStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.UserSignalServiceColumnModule
|
||||
import com.twitter.cr_mixer.module.UserSignalServiceStoreModule
|
||||
import com.twitter.cr_mixer.module.UserStateStoreModule
|
||||
import com.twitter.cr_mixer.module.core.ABDeciderModule
|
||||
import com.twitter.cr_mixer.module.core.CrMixerFlagModule
|
||||
import com.twitter.cr_mixer.module.core.CrMixerLoggingABDeciderModule
|
||||
import com.twitter.cr_mixer.module.core.FeatureContextBuilderModule
|
||||
import com.twitter.cr_mixer.module.core.FeatureSwitchesModule
|
||||
import com.twitter.cr_mixer.module.core.KafkaProducerModule
|
||||
import com.twitter.cr_mixer.module.core.LoggerFactoryModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUnifiedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.SimClustersANNSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUnifiedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedQigSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedTwHINSimlarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserAdGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserTweetGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserVideoGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.TwhinCollabFilterLookupSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserAdGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserTweetGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.ConsumersBasedUserVideoGraphStoreModule
|
||||
import com.twitter.cr_mixer.module.DiffusionStoreModule
|
||||
import com.twitter.cr_mixer.module.EarlybirdRecencyBasedCandidateStoreModule
|
||||
import com.twitter.cr_mixer.module.TwiceClustersMembersStoreModule
|
||||
import com.twitter.cr_mixer.module.StrongTiePredictionStoreModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.EarlybirdSearchClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.FrsClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.QigServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.SimClustersAnnServiceClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.TweetyPieClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphPlusClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserVideoGraphClientModule
|
||||
import com.twitter.cr_mixer.{thriftscala => st}
|
||||
import com.twitter.finagle.Filter
|
||||
import com.twitter.finatra.annotations.DarkTrafficFilterType
|
||||
import com.twitter.finatra.decider.modules.DeciderModule
|
||||
import com.twitter.finatra.http.HttpServer
|
||||
import com.twitter.finatra.http.routing.HttpRouter
|
||||
import com.twitter.finatra.jackson.modules.ScalaObjectMapperModule
|
||||
import com.twitter.finatra.mtls.http.{Mtls => HttpMtls}
|
||||
import com.twitter.finatra.mtls.thriftmux.Mtls
|
||||
import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
|
||||
import com.twitter.finatra.thrift.ThriftServer
|
||||
import com.twitter.finatra.thrift.filters._
|
||||
import com.twitter.finatra.thrift.routing.ThriftRouter
|
||||
import com.twitter.hydra.common.model_config.{ConfigModule => HydraConfigModule}
|
||||
import com.twitter.inject.thrift.modules.ThriftClientIdModule
|
||||
import com.twitter.product_mixer.core.module.LoggingThrowableExceptionMapper
|
||||
import com.twitter.product_mixer.core.module.StratoClientModule
|
||||
import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule
|
||||
import com.twitter.relevance_platform.common.filters.ClientStatsFilter
|
||||
import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
|
||||
import com.twitter.cr_mixer.module.SimClustersANNServiceNameToClientMapper
|
||||
import com.twitter.cr_mixer.module.SkitStratoStoreModule
|
||||
import com.twitter.cr_mixer.module.BlueVerifiedAnnotationStoreModule
|
||||
import com.twitter.cr_mixer.module.core.TimeoutConfigModule
|
||||
import com.twitter.cr_mixer.module.grpc_client.NaviGRPCClientModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.CertoTopicTweetSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.ConsumerBasedWalsSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.DiffusionBasedSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.EarlybirdSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.SkitTopicTweetSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.similarity_engine.UserTweetEntityGraphSimilarityEngineModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.HydraPartitionClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.HydraRootClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserAdGraphClientModule
|
||||
import com.twitter.cr_mixer.module.thrift_client.UserTweetEntityGraphClientModule
|
||||
import com.twitter.thriftwebforms.MethodOptions
|
||||
|
||||
object CrMixerServerMain extends CrMixerServer
|
||||
|
||||
class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls {
|
||||
override val name = "cr-mixer-server"
|
||||
|
||||
private val coreModules = Seq(
|
||||
ABDeciderModule,
|
||||
CrMixerFlagModule,
|
||||
CrMixerLoggingABDeciderModule,
|
||||
CrMixerParamConfigModule,
|
||||
new DarkTrafficFilterModule[st.CrMixer.ReqRepServicePerEndpoint](),
|
||||
DeciderModule,
|
||||
FeatureContextBuilderModule,
|
||||
FeatureSwitchesModule,
|
||||
KafkaProducerModule,
|
||||
LoggerFactoryModule,
|
||||
MHMtlsParamsModule,
|
||||
ProductMixerFlagModule,
|
||||
ScalaObjectMapperModule,
|
||||
ThriftClientIdModule
|
||||
)
|
||||
|
||||
private val thriftClientModules = Seq(
|
||||
AnnQueryServiceClientModule,
|
||||
EarlybirdSearchClientModule,
|
||||
FrsClientModule,
|
||||
HydraPartitionClientModule,
|
||||
HydraRootClientModule,
|
||||
QigServiceClientModule,
|
||||
SimClustersAnnServiceClientModule,
|
||||
TweetyPieClientModule,
|
||||
UserAdGraphClientModule,
|
||||
UserTweetEntityGraphClientModule,
|
||||
UserTweetGraphClientModule,
|
||||
UserTweetGraphPlusClientModule,
|
||||
UserVideoGraphClientModule,
|
||||
)
|
||||
|
||||
private val grpcClientModules = Seq(
|
||||
NaviGRPCClientModule
|
||||
)
|
||||
|
||||
// Modules sorted alphabetically, please keep the order when adding a new module
|
||||
override val modules: Seq[Module] =
|
||||
coreModules ++ thriftClientModules ++ grpcClientModules ++
|
||||
Seq(
|
||||
ActivePromotedTweetStoreModule,
|
||||
CertoStratoStoreModule,
|
||||
CertoTopicTweetSimilarityEngineModule,
|
||||
ConsumersBasedUserAdGraphSimilarityEngineModule,
|
||||
ConsumersBasedUserTweetGraphStoreModule,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngineModule,
|
||||
ConsumersBasedUserVideoGraphStoreModule,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngineModule,
|
||||
ConsumerEmbeddingBasedTwHINSimilarityEngineModule,
|
||||
ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule,
|
||||
ConsumersBasedUserAdGraphStoreModule,
|
||||
ConsumerBasedWalsSimilarityEngineModule,
|
||||
DiffusionStoreModule,
|
||||
EmbeddingStoreModule,
|
||||
EarlybirdSimilarityEngineModule,
|
||||
EarlybirdRecencyBasedCandidateStoreModule,
|
||||
FrsStoreModule,
|
||||
HydraConfigModule,
|
||||
OfflineCandidateStoreModule,
|
||||
ProducerBasedUnifiedSimilarityEngineModule,
|
||||
ProducerBasedUserAdGraphSimilarityEngineModule,
|
||||
ProducerBasedUserTweetGraphSimilarityEngineModule,
|
||||
RealGraphOonStoreModule,
|
||||
RealGraphStoreMhModule,
|
||||
RepresentationManagerModule,
|
||||
RepresentationScorerModule,
|
||||
SimClustersANNServiceNameToClientMapper,
|
||||
SimClustersANNSimilarityEngineModule,
|
||||
SkitStratoStoreModule,
|
||||
SkitTopicTweetSimilarityEngineModule,
|
||||
StratoClientModule,
|
||||
StrongTiePredictionStoreModule,
|
||||
TimeoutConfigModule,
|
||||
TripCandidateStoreModule,
|
||||
TwiceClustersMembersStoreModule,
|
||||
TweetBasedQigSimilarityEngineModule,
|
||||
TweetBasedTwHINSimlarityEngineModule,
|
||||
TweetBasedUnifiedSimilarityEngineModule,
|
||||
TweetBasedUserAdGraphSimilarityEngineModule,
|
||||
TweetBasedUserTweetGraphSimilarityEngineModule,
|
||||
TweetBasedUserVideoGraphSimilarityEngineModule,
|
||||
TweetInfoStoreModule,
|
||||
TweetRecentEngagedUserStoreModule,
|
||||
TweetRecommendationResultsStoreModule,
|
||||
TwhinCollabFilterStratoStoreModule,
|
||||
TwhinCollabFilterLookupSimilarityEngineModule,
|
||||
UserSignalServiceColumnModule,
|
||||
UserSignalServiceStoreModule,
|
||||
UserStateStoreModule,
|
||||
UserTweetEntityGraphSimilarityEngineModule,
|
||||
DiffusionBasedSimilarityEngineModule,
|
||||
BlueVerifiedAnnotationStoreModule,
|
||||
new MtlsThriftWebFormsModule[st.CrMixer.MethodPerEndpoint](this) {
|
||||
override protected def defaultMethodAccess: MethodOptions.Access = {
|
||||
MethodOptions.Access.ByLdapGroup(
|
||||
Seq(
|
||||
"cr-mixer-admins",
|
||||
"recosplat-sensitive-data-medium",
|
||||
"recos-platform-admins",
|
||||
))
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def configureThrift(router: ThriftRouter): Unit = {
|
||||
router
|
||||
.filter[LoggingMDCFilter]
|
||||
.filter[TraceIdMDCFilter]
|
||||
.filter[ThriftMDCFilter]
|
||||
.filter[ClientStatsFilter]
|
||||
.filter[AccessLoggingFilter]
|
||||
.filter[SetImpressedBucketsLocalContextFilter]
|
||||
.filter[ExceptionMappingFilter]
|
||||
.filter[Filter.TypeAgnostic, DarkTrafficFilterType]
|
||||
.exceptionMapper[LoggingThrowableExceptionMapper]
|
||||
.add[CrMixerThriftController]
|
||||
}
|
||||
|
||||
override protected def warmup(): Unit = {
|
||||
handle[CrMixerThriftServerWarmupHandler]()
|
||||
handle[CrMixerHttpServerWarmupHandler]()
|
||||
}
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.finatra.thrift.routing.ThriftWarmup
|
||||
import com.twitter.inject.Logging
|
||||
import com.twitter.inject.utils.Handler
|
||||
import com.twitter.product_mixer.core.{thriftscala => pt}
|
||||
import com.twitter.cr_mixer.{thriftscala => st}
|
||||
import com.twitter.scrooge.Request
|
||||
import com.twitter.scrooge.Response
|
||||
import com.twitter.util.Return
|
||||
import com.twitter.util.Throw
|
||||
import com.twitter.util.Try
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class CrMixerThriftServerWarmupHandler @Inject() (warmup: ThriftWarmup)
|
||||
extends Handler
|
||||
with Logging {
|
||||
|
||||
private val clientId = ClientId("thrift-warmup-client")
|
||||
|
||||
def handle(): Unit = {
|
||||
val testIds = Seq(1, 2, 3)
|
||||
try {
|
||||
clientId.asCurrent {
|
||||
testIds.foreach { id =>
|
||||
val warmupReq = warmupQuery(id)
|
||||
info(s"Sending warm-up request to service with query: $warmupReq")
|
||||
warmup.sendRequest(
|
||||
method = st.CrMixer.GetTweetRecommendations,
|
||||
req = Request(st.CrMixer.GetTweetRecommendations.Args(warmupReq)))(assertWarmupResponse)
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
case e: Throwable =>
|
||||
// we don't want a warmup failure to prevent start-up
|
||||
error(e.getMessage, e)
|
||||
}
|
||||
info("Warm-up done.")
|
||||
}
|
||||
|
||||
private def warmupQuery(userId: Long): st.CrMixerTweetRequest = {
|
||||
val clientContext = pt.ClientContext(
|
||||
userId = Some(userId),
|
||||
guestId = None,
|
||||
appId = Some(258901L),
|
||||
ipAddress = Some("0.0.0.0"),
|
||||
userAgent = Some("FAKE_USER_AGENT_FOR_WARMUPS"),
|
||||
countryCode = Some("US"),
|
||||
languageCode = Some("en"),
|
||||
isTwoffice = None,
|
||||
userRoles = None,
|
||||
deviceId = Some("FAKE_DEVICE_ID_FOR_WARMUPS")
|
||||
)
|
||||
st.CrMixerTweetRequest(
|
||||
clientContext = clientContext,
|
||||
product = st.Product.Home,
|
||||
productContext = Some(st.ProductContext.HomeContext(st.HomeContext())),
|
||||
)
|
||||
}
|
||||
|
||||
private def assertWarmupResponse(
|
||||
result: Try[Response[st.CrMixer.GetTweetRecommendations.SuccessType]]
|
||||
): Unit = {
|
||||
// we collect and log any exceptions from the result.
|
||||
result match {
|
||||
case Return(_) => // ok
|
||||
case Throw(exception) =>
|
||||
warn("Error performing warm-up request.")
|
||||
error(exception.getMessage, exception)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.mutable
|
||||
|
||||
@Singleton
|
||||
case class AdsBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Interleaves candidates by iteratively choosing InterestedIn candidates and TWISTLY candidates
|
||||
* in turn. InterestedIn candidates have no source signal, whereas TWISTLY candidates do. TWISTLY
|
||||
* candidates themselves are interleaved by source before equal blending with InterestedIn
|
||||
* candidates.
|
||||
*/
|
||||
def blend(
|
||||
inputCandidates: Seq[Seq[InitialAdsCandidate]],
|
||||
): Future[Seq[BlendedAdsCandidate]] = {
|
||||
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
val (interestedInCandidates, twistlyCandidates) =
|
||||
candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty)
|
||||
// First interleave twistly candidates
|
||||
val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates)
|
||||
|
||||
val twistlyAndInterestedInCandidates =
|
||||
Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates)
|
||||
|
||||
// then interleave twistly candidates with interested in to make them even
|
||||
val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
private def buildBlendedAdsCandidate(
|
||||
inputCandidates: Seq[Seq[InitialAdsCandidate]],
|
||||
interleavedCandidates: Seq[InitialAdsCandidate]
|
||||
): Seq[BlendedAdsCandidate] = {
|
||||
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
|
||||
interleavedCandidates.map { interleavedCandidate =>
|
||||
interleavedCandidate.toBlendedAdsCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
|
||||
}
|
||||
}
|
||||
|
||||
private def buildCandidateToCGInfosMap(
|
||||
candidateSeq: Seq[Seq[InitialAdsCandidate]],
|
||||
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
|
||||
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
|
||||
|
||||
candidateSeq.foreach { candidates =>
|
||||
candidates.foreach { candidate =>
|
||||
val candidateGenerationInfoSeq = {
|
||||
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
|
||||
}
|
||||
val candidateGenerationInfo = candidate.candidateGenerationInfo
|
||||
tweetIdMap.put(
|
||||
candidate.tweetId,
|
||||
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
|
||||
}
|
||||
}
|
||||
tweetIdMap.toMap
|
||||
}
|
||||
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
],
|
||||
)
|
@ -1,48 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import scala.collection.mutable
|
||||
|
||||
object BlendedCandidatesBuilder {
|
||||
|
||||
/**
|
||||
* @param inputCandidates input candidate prior to interleaving
|
||||
* @param interleavedCandidates after interleaving. These tweets are de-duplicated.
|
||||
*/
|
||||
def build(
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
interleavedCandidates: Seq[InitialCandidate]
|
||||
): Seq[BlendedCandidate] = {
|
||||
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
|
||||
interleavedCandidates.map { interleavedCandidate =>
|
||||
interleavedCandidate.toBlendedCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The same tweet can be generated by different sources.
|
||||
* This function tells you which CandidateGenerationInfo generated a given tweet
|
||||
*/
|
||||
private def buildCandidateToCGInfosMap(
|
||||
candidateSeq: Seq[Seq[InitialCandidate]],
|
||||
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
|
||||
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
|
||||
|
||||
candidateSeq.foreach { candidates =>
|
||||
candidates.foreach { candidate =>
|
||||
val candidateGenerationInfoSeq = {
|
||||
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
|
||||
}
|
||||
val candidateGenerationInfo = candidate.candidateGenerationInfo
|
||||
tweetIdMap.put(
|
||||
candidate.tweetId,
|
||||
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
|
||||
}
|
||||
}
|
||||
tweetIdMap.toMap
|
||||
}
|
||||
|
||||
}
|
@ -1,121 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
|
||||
case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Exposes multiple types of sorting relying only on Content Based signals
|
||||
* Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores
|
||||
* that come from the active SimilarityEngine and then sort on the standardized scores.
|
||||
*/
|
||||
def blend(
|
||||
params: Params,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match {
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency =>
|
||||
candidates.flatten.sortBy(c => getSnowflakeTimeStamp(c.tweetId)).reverse
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting =>
|
||||
candidates.flatten.sortBy(_ => scala.util.Random.nextDouble())
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.FavoriteCount =>
|
||||
candidates.flatten.sortBy(-_.tweetInfo.favCount)
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.SimilarityToSignalSorting =>
|
||||
standardizeAndSortByScore(flattenAndGroupByEngineTypeOrFirstContribEngine(candidates))
|
||||
case _ =>
|
||||
candidates.flatten.sortBy(-_.tweetInfo.favCount)
|
||||
}
|
||||
|
||||
stats.stat("candidates").add(sortedCandidates.size)
|
||||
|
||||
val blendedCandidates =
|
||||
BlendedCandidatesBuilder.build(inputCandidates, removeDuplicates(sortedCandidates))
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
private def removeDuplicates(candidates: Seq[InitialCandidate]): Seq[InitialCandidate] = {
|
||||
val seen = collection.mutable.Set.empty[Long]
|
||||
candidates.filter { c =>
|
||||
if (seen.contains(c.tweetId)) {
|
||||
false
|
||||
} else {
|
||||
seen += c.tweetId
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def groupByEngineTypeOrFirstContribEngine(
|
||||
candidates: Seq[InitialCandidate]
|
||||
): Map[SimilarityEngineType, Seq[InitialCandidate]] = {
|
||||
val grouped = candidates.groupBy { candidate =>
|
||||
val contrib = candidate.candidateGenerationInfo.contributingSimilarityEngines
|
||||
if (contrib.nonEmpty) {
|
||||
contrib.head.similarityEngineType
|
||||
} else {
|
||||
candidate.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
|
||||
}
|
||||
}
|
||||
grouped
|
||||
}
|
||||
|
||||
private def flattenAndGroupByEngineTypeOrFirstContribEngine(
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Seq[Seq[InitialCandidate]] = {
|
||||
val flat = candidates.flatten
|
||||
val grouped = groupByEngineTypeOrFirstContribEngine(flat)
|
||||
grouped.values.toSeq
|
||||
}
|
||||
|
||||
private def standardizeAndSortByScore(
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Seq[InitialCandidate] = {
|
||||
candidates
|
||||
.map { innerSeq =>
|
||||
val meanScore = innerSeq
|
||||
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
|
||||
.sum / innerSeq.length
|
||||
val stdDev = scala.math
|
||||
.sqrt(
|
||||
innerSeq
|
||||
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
|
||||
.map(a => a - meanScore)
|
||||
.map(a => a * a)
|
||||
.sum / innerSeq.length)
|
||||
innerSeq
|
||||
.map(c =>
|
||||
(
|
||||
c,
|
||||
c.candidateGenerationInfo.similarityEngineInfo.score
|
||||
.map { score =>
|
||||
if (stdDev != 0) (score - meanScore) / stdDev
|
||||
else 0.0
|
||||
}
|
||||
.getOrElse(0.0)))
|
||||
}.flatten.sortBy { case (_, standardizedScore) => -standardizedScore }
|
||||
.map { case (candidate, _) => candidate }
|
||||
}
|
||||
|
||||
private def getSnowflakeTimeStamp(tweetId: Long): Time = {
|
||||
val isSnowflake = SnowflakeId.isSnowflakeId(tweetId)
|
||||
if (isSnowflake) {
|
||||
SnowflakeId(tweetId).time
|
||||
} else {
|
||||
Time.fromMilliseconds(0L)
|
||||
}
|
||||
}
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.util.CountWeightedInterleaveUtil
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* A weighted round robin interleaving algorithm.
|
||||
* The weight of each blending group based on the count of candidates in each blending group.
|
||||
* The more candidates under a blending group, the more candidates are selected from it during round
|
||||
* robin, which in effect prioritizes this group.
|
||||
*
|
||||
* Weights sum up to 1. For example:
|
||||
* total candidates = 8
|
||||
* Group Weight
|
||||
* [A1, A2, A3, A4] 4/8 = 0.5 // select 50% of results from group A
|
||||
* [B1, B2] 2/8 = 0.25 // 25% from group B
|
||||
* [C1, C2] 2/8 = 0.25 // 25% from group C
|
||||
*
|
||||
* Blended results = [A1, A2, B1, C1, A3, A4, B2, C2]
|
||||
* See @linht's go/weighted-interleave
|
||||
*/
|
||||
@Singleton
|
||||
case class CountWeightedInterleaveBlender @Inject() (globalStats: StatsReceiver) {
|
||||
import CountWeightedInterleaveBlender._
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
def blend(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
val weightedBlenderQuery = CountWeightedInterleaveBlender.paramToQuery(query.params)
|
||||
countWeightedInterleave(weightedBlenderQuery, inputCandidates)
|
||||
}
|
||||
|
||||
private[blender] def countWeightedInterleave(
|
||||
query: WeightedBlenderQuery,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
val candidatesAndWeightKeyByIndexId: Seq[(Seq[InitialCandidate], Double)] = {
|
||||
CountWeightedInterleaveUtil.buildInitialCandidatesWithWeightKeyByFeature(
|
||||
inputCandidates,
|
||||
query.rankerWeightShrinkage)
|
||||
}
|
||||
|
||||
val interleavedCandidates =
|
||||
InterleaveUtil.weightedInterleave(candidatesAndWeightKeyByIndexId, query.maxWeightAdjustments)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
}
|
||||
|
||||
object CountWeightedInterleaveBlender {
|
||||
|
||||
/**
|
||||
* We pass two parameters to the weighted interleaver:
|
||||
* @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we
|
||||
* stay to uniform sampling. The bigger the shrinkage the
|
||||
* closer we are to uniform round robin
|
||||
* @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to
|
||||
* uniform. Set so that we avoid infinite loops (e.g. if weights are
|
||||
* 0)
|
||||
*/
|
||||
case class WeightedBlenderQuery(
|
||||
rankerWeightShrinkage: Double,
|
||||
maxWeightAdjustments: Int)
|
||||
|
||||
def paramToQuery(params: Params): WeightedBlenderQuery = {
|
||||
val rankerWeightShrinkage: Double =
|
||||
params(BlenderParams.RankingInterleaveWeightShrinkageParam)
|
||||
val maxWeightAdjustments: Int =
|
||||
params(BlenderParams.RankingInterleaveMaxWeightAdjustments)
|
||||
|
||||
WeightedBlenderQuery(rankerWeightShrinkage, maxWeightAdjustments)
|
||||
}
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class InterleaveBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Interleaves candidates, by taking 1 candidate from each Seq[Seq[InitialCandidate]] in sequence,
|
||||
* until we run out of candidates.
|
||||
*/
|
||||
def blend(
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
val interleavedCandidates = InterleaveUtil.interleave(inputCandidates)
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
}
|
@ -1,64 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypes
|
||||
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypesWithVideo
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
|
||||
case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
|
||||
|
||||
private val name: String = this.getClass.getCanonicalName
|
||||
private val stats: StatsReceiver = globalStats.scope(name)
|
||||
|
||||
/**
|
||||
* Partition the candidates based on source type
|
||||
* Interleave the two partitions of candidates separately
|
||||
* Then append the back fill candidates to the end
|
||||
*/
|
||||
def blend(
|
||||
params: Params,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
|
||||
// Filter out empty candidate sequence
|
||||
val candidates = inputCandidates.filter(_.nonEmpty)
|
||||
|
||||
val backFillSourceTypes =
|
||||
if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo
|
||||
else BackFillSourceTypes
|
||||
// partition candidates based on their source types
|
||||
val (backFillCandidates, regularCandidates) =
|
||||
candidates.partition(
|
||||
_.head.candidateGenerationInfo.sourceInfoOpt
|
||||
.exists(sourceInfo => backFillSourceTypes.contains(sourceInfo.sourceType)))
|
||||
|
||||
val interleavedRegularCandidates = InterleaveUtil.interleave(regularCandidates)
|
||||
val interleavedBackFillCandidates =
|
||||
InterleaveUtil.interleave(backFillCandidates)
|
||||
stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size)
|
||||
// Append interleaved backfill candidates to the end
|
||||
val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates
|
||||
|
||||
stats.stat("candidates").add(interleavedCandidates.size)
|
||||
|
||||
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
|
||||
Future.value(blendedCandidates)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object ImplicitSignalBackFillBlender {
|
||||
final val BackFillSourceTypesWithVideo: Set[SourceType] = Set(
|
||||
SourceType.UserRepeatedProfileVisit,
|
||||
SourceType.VideoTweetPlayback50,
|
||||
SourceType.VideoTweetQualityView)
|
||||
|
||||
final val BackFillSourceTypes: Set[SourceType] = Set(SourceType.UserRepeatedProfileVisit)
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
package com.twitter.cr_mixer.blender
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.BlenderParams
|
||||
import com.twitter.cr_mixer.param.BlenderParams.BlendingAlgorithmEnum
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class SwitchBlender @Inject() (
|
||||
defaultBlender: InterleaveBlender,
|
||||
sourceTypeBackFillBlender: SourceTypeBackFillBlender,
|
||||
adsBlender: AdsBlender,
|
||||
contentSignalBlender: ContentSignalBlender,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
def blend(
|
||||
params: Params,
|
||||
userState: UserState,
|
||||
inputCandidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
// Take out empty seq
|
||||
val nonEmptyCandidates = inputCandidates.collect {
|
||||
case candidates if candidates.nonEmpty =>
|
||||
candidates
|
||||
}
|
||||
stats.stat("num_of_sequences").add(inputCandidates.size)
|
||||
|
||||
// Sort the seqs in an order
|
||||
val innerSignalSorting = params(BlenderParams.SignalTypeSortingAlgorithmParam) match {
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.SourceSignalRecency =>
|
||||
SwitchBlender.TimestampOrder
|
||||
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => SwitchBlender.RandomOrder
|
||||
case _ => SwitchBlender.TimestampOrder
|
||||
}
|
||||
|
||||
val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting)
|
||||
// Blend based on specified blender rules
|
||||
params(BlenderParams.BlendingAlgorithmParam) match {
|
||||
case BlendingAlgorithmEnum.RoundRobin =>
|
||||
defaultBlender.blend(candidatesToBlend)
|
||||
case BlendingAlgorithmEnum.SourceTypeBackFill =>
|
||||
sourceTypeBackFillBlender.blend(params, candidatesToBlend)
|
||||
case BlendingAlgorithmEnum.SourceSignalSorting =>
|
||||
contentSignalBlender.blend(params, candidatesToBlend)
|
||||
case _ => defaultBlender.blend(candidatesToBlend)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object SwitchBlender {
|
||||
|
||||
/**
|
||||
* Prefers candidates generated from sources with the latest timestamps.
|
||||
* The newer the source signal, the higher a candidate ranks.
|
||||
* This ordering biases against consumer-based candidates because their timestamp defaults to 0
|
||||
*
|
||||
* Within a Seq[Seq[Candidate]], all candidates within a inner Seq
|
||||
* are guaranteed to have the same sourceInfo because they are grouped by (sourceInfo, SE model).
|
||||
* Hence, we can pick .headOption to represent the whole list when filtering by the internalId of the sourceInfoOpt.
|
||||
* But of course the similarityEngine score in a CGInfo could be different.
|
||||
*/
|
||||
val TimestampOrder: Ordering[InitialCandidate] =
|
||||
math.Ordering
|
||||
.by[InitialCandidate, Time](
|
||||
_.candidateGenerationInfo.sourceInfoOpt
|
||||
.flatMap(_.sourceEventTime)
|
||||
.getOrElse(Time.fromMilliseconds(0L)))
|
||||
.reverse
|
||||
|
||||
private val RandomOrder: Ordering[InitialCandidate] =
|
||||
Ordering.by[InitialCandidate, Double](_ => scala.util.Random.nextDouble())
|
||||
}
|
@ -1,140 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.blender.AdsBlender
|
||||
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.BlendedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.RankedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.param.AdsParams
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
|
||||
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
|
||||
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
|
||||
import com.twitter.cr_mixer.source_signal.UssSourceSignalFetcher
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class AdsCandidateGenerator @Inject() (
|
||||
ussSourceSignalFetcher: UssSourceSignalFetcher,
|
||||
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
|
||||
adsCandidateSourceRouter: AdsCandidateSourcesRouter,
|
||||
adsBlender: AdsBlender,
|
||||
scribeLogger: AdsRecommendationsScribeLogger,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSourcesStats = stats.scope("fetchSources")
|
||||
private val fetchRealGraphSeedsStats = stats.scope("fetchRealGraphSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val interleaveStats = stats.scope("interleave")
|
||||
private val rankStats = stats.scope("rank")
|
||||
|
||||
def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = {
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
// fetch source signals
|
||||
sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) {
|
||||
fetchSources(query)
|
||||
}
|
||||
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) {
|
||||
fetchSeeds(query)
|
||||
}
|
||||
// get initial candidates from similarity engines
|
||||
// hydrate lineItemInfo and filter out non active ads
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, sourceSignals, realGraphSeeds)
|
||||
}
|
||||
|
||||
// blend candidates
|
||||
blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
|
||||
interleave(initialCandidates)
|
||||
}
|
||||
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
rank(
|
||||
blendedCandidates,
|
||||
query.params(AdsParams.EnableScoreBoost),
|
||||
query.params(AdsParams.AdsCandidateGenerationScoreBoostFactor),
|
||||
rankStats)
|
||||
}
|
||||
} yield {
|
||||
rankedCandidates.take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
def fetchSources(
|
||||
query: AdsCandidateGeneratorQuery
|
||||
): Future[Set[SourceInfo]] = {
|
||||
val fetcherQuery =
|
||||
FetcherQuery(query.userId, query.product, query.userState, query.params)
|
||||
ussSourceSignalFetcher.get(fetcherQuery).map(_.getOrElse(Seq.empty).toSet)
|
||||
}
|
||||
|
||||
private def fetchCandidates(
|
||||
query: AdsCandidateGeneratorQuery,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
realGraphSeeds: Map[UserId, Double]
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
scribeLogger.scribeInitialAdsCandidates(
|
||||
query,
|
||||
adsCandidateSourceRouter
|
||||
.fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params),
|
||||
query.params(AdsParams.EnableScribe)
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
private def fetchSeeds(
|
||||
query: AdsCandidateGeneratorQuery
|
||||
): Future[Map[UserId, Double]] = {
|
||||
if (query.params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
|
||||
realGraphInSourceGraphFetcher
|
||||
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
|
||||
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
|
||||
} else Future.value(Map.empty[UserId, Double])
|
||||
}
|
||||
|
||||
private def interleave(
|
||||
candidates: Seq[Seq[InitialAdsCandidate]]
|
||||
): Future[Seq[BlendedAdsCandidate]] = {
|
||||
adsBlender
|
||||
.blend(candidates)
|
||||
}
|
||||
|
||||
private def rank(
|
||||
candidates: Seq[BlendedAdsCandidate],
|
||||
enableScoreBoost: Boolean,
|
||||
scoreBoostFactor: Double,
|
||||
statsReceiver: StatsReceiver,
|
||||
): Future[Seq[RankedAdsCandidate]] = {
|
||||
|
||||
val candidateSize = candidates.size
|
||||
val rankedCandidates = candidates.zipWithIndex.map {
|
||||
case (candidate, index) =>
|
||||
val score = 0.5 + 0.5 * ((candidateSize - index).toDouble / candidateSize)
|
||||
val boostedScore = if (enableScoreBoost) {
|
||||
statsReceiver.stat("boostedScore").add((100.0 * score * scoreBoostFactor).toFloat)
|
||||
score * scoreBoostFactor
|
||||
} else {
|
||||
statsReceiver.stat("score").add((100.0 * score).toFloat)
|
||||
score
|
||||
}
|
||||
candidate.toRankedAdsCandidate(boostedScore)
|
||||
}
|
||||
Future.value(rankedCandidates)
|
||||
}
|
||||
}
|
@ -1,516 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.ModelConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
|
||||
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
|
||||
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.InterestedInParams
|
||||
import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.SimClustersANNParams
|
||||
import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.FilterUtil
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class AdsCandidateSourcesRouter @Inject() (
|
||||
activePromotedTweetStore: ReadableStore[TweetId, Seq[LineItemInfo]],
|
||||
decider: CrMixerDecider,
|
||||
@Named(ModuleNames.SimClustersANNSimilarityEngine) simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine)
|
||||
tweetBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine)
|
||||
consumersBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumersBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine)
|
||||
producerBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUserAdGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine)
|
||||
tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) consumerTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
|
||||
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumerBasedWalsSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
globalStats: StatsReceiver,
|
||||
) {
|
||||
|
||||
import AdsCandidateSourcesRouter._
|
||||
|
||||
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
def fetchCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
params: configapi.Params
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
|
||||
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
|
||||
|
||||
val tweetBasedSANNMinScore = params(
|
||||
TweetBasedCandidateGenerationParams.SimClustersMinScoreParam)
|
||||
val tweetBasedSANN1Candidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
tweetBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val tweetBasedSANN2Candidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
tweetBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val tweetBasedUagCandidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableUAGParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getTweetBasedUserAdGraphCandidates(Some(sourceInfo), params)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val realGraphInNetworkBasedUagCandidates =
|
||||
if (params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
|
||||
getRealGraphConsumersBasedUserAdGraphCandidates(realGraphSeeds, params).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val producerBasedUagCandidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableUAGParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getProducerBasedUserAdGraphCandidates(Some(sourceInfo), params)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val tweetBasedTwhinAdsCandidates =
|
||||
if (params(TweetBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getTwHINAdsCandidates(
|
||||
tweetBasedTwHINANNSimilarityEngine,
|
||||
SimilarityEngineType.TweetBasedTwHINANN,
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
ModelConfig.DebuggerDemo)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val producerBasedSANNMinScore = params(
|
||||
ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam)
|
||||
val producerBasedSANN1Candidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
producerBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
val producerBasedSANN2Candidates =
|
||||
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
|
||||
Future.collect(
|
||||
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
Some(sourceInfo),
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
producerBasedSANNMinScore)
|
||||
})
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val interestedInMinScore = params(InterestedInParams.MinScoreParam)
|
||||
val interestedInSANN1Candidates = if (params(InterestedInParams.EnableSimClustersANN1Param)) {
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
None,
|
||||
params,
|
||||
simClustersANN1ConfigId,
|
||||
interestedInMinScore).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val interestedInSANN2Candidates = if (params(InterestedInParams.EnableSimClustersANN2Param)) {
|
||||
getSimClustersANNCandidates(
|
||||
requestUserId,
|
||||
None,
|
||||
params,
|
||||
simClustersANN2ConfigId,
|
||||
interestedInMinScore).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val consumerTwHINAdsCandidates =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
getTwHINAdsCandidates(
|
||||
consumerTwHINANNSimilarityEngine,
|
||||
SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN,
|
||||
requestUserId,
|
||||
None,
|
||||
ModelConfig.DebuggerDemo).map(Seq(_))
|
||||
} else Future.value(Seq.empty)
|
||||
|
||||
val consumerBasedWalsCandidates =
|
||||
if (params(
|
||||
ConsumerBasedWalsParams.EnableSourceParam
|
||||
)) {
|
||||
getConsumerBasedWalsCandidates(sourceSignals, params)
|
||||
}.map {
|
||||
Seq(_)
|
||||
}
|
||||
else Future.value(Seq.empty)
|
||||
|
||||
Future
|
||||
.collect(Seq(
|
||||
tweetBasedSANN1Candidates,
|
||||
tweetBasedSANN2Candidates,
|
||||
tweetBasedUagCandidates,
|
||||
tweetBasedTwhinAdsCandidates,
|
||||
producerBasedUagCandidates,
|
||||
producerBasedSANN1Candidates,
|
||||
producerBasedSANN2Candidates,
|
||||
realGraphInNetworkBasedUagCandidates,
|
||||
interestedInSANN1Candidates,
|
||||
interestedInSANN2Candidates,
|
||||
consumerTwHINAdsCandidates,
|
||||
consumerBasedWalsCandidates,
|
||||
)).map(_.flatten).map { tweetsWithCGInfoSeq =>
|
||||
Future.collect(
|
||||
tweetsWithCGInfoSeq.map(candidates => convertToInitialCandidates(candidates, stats)))
|
||||
}.flatten.map { candidatesLists =>
|
||||
val result = candidatesLists.filter(_.nonEmpty)
|
||||
stats.stat("numOfSequences").add(result.size)
|
||||
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
stats: StatsReceiver
|
||||
): Future[Seq[InitialAdsCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
stats.stat("initialCandidateSizeBeforeLineItemFilter").add(tweetIds.size)
|
||||
Future.collect(activePromotedTweetStore.multiGet(tweetIds)).map { lineItemInfos =>
|
||||
/** *
|
||||
* If lineItemInfo does not exist, we will filter out the promoted tweet as it cannot be targeted and ranked in admixer
|
||||
*/
|
||||
val filteredCandidates = candidates.collect {
|
||||
case candidate if lineItemInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val lineItemInfo = lineItemInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialAdsCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
lineItemInfo = lineItemInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
stats.stat("initialCandidateSizeAfterLineItemFilter").add(filteredCandidates.size)
|
||||
filteredCandidates
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def getSimClustersANNCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params,
|
||||
configId: String,
|
||||
minScore: Double
|
||||
) = {
|
||||
|
||||
val simClustersModelVersion =
|
||||
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
|
||||
|
||||
val embeddingType =
|
||||
if (sourceInfo.isEmpty) {
|
||||
params(InterestedInParams.InterestedInEmbeddingIdParam).embeddingType
|
||||
} else getSimClustersANNEmbeddingType(sourceInfo.get)
|
||||
val query = SimClustersANNSimilarityEngine.fromParams(
|
||||
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
|
||||
embeddingType,
|
||||
simClustersModelVersion,
|
||||
configId,
|
||||
params
|
||||
)
|
||||
|
||||
// dark traffic to simclusters-ann-2
|
||||
if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) {
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val sann2Query = SimClustersANNSimilarityEngine.fromParams(
|
||||
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
|
||||
embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params
|
||||
)
|
||||
simClustersANNSimilarityEngine
|
||||
.getCandidates(sann2Query)
|
||||
}
|
||||
|
||||
simClustersANNSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.filter(_.score > minScore).map {
|
||||
tweetWithScore =>
|
||||
val similarityEngineInfo = SimClustersANNSimilarityEngine
|
||||
.toSimilarityEngineInfo(query, tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getProducerBasedUserAdGraphCandidates(
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams(
|
||||
sourceInfo.get.internalId,
|
||||
params
|
||||
)
|
||||
producerBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = ProducerBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getTweetBasedUserAdGraphCandidates(
|
||||
sourceInfo: Option[SourceInfo],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = TweetBasedUserAdGraphSimilarityEngine.fromParams(
|
||||
sourceInfo.get.internalId,
|
||||
params
|
||||
)
|
||||
tweetBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = TweetBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getRealGraphConsumersBasedUserAdGraphCandidates(
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
params: configapi.Params
|
||||
) = {
|
||||
|
||||
val query = ConsumersBasedUserAdGraphSimilarityEngine
|
||||
.fromParams(realGraphSeeds, params)
|
||||
|
||||
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
|
||||
val sourceInfo = SourceInfo(
|
||||
sourceType = SourceType.RealGraphIn,
|
||||
internalId = InternalId.UserId(0L),
|
||||
sourceEventTime = None
|
||||
)
|
||||
consumersBasedUserAdGraphSimilarityEngine
|
||||
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
|
||||
val similarityEngineInfo = ConsumersBasedUserAdGraphSimilarityEngine
|
||||
.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
Some(sourceInfo),
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getTwHINAdsCandidates(
|
||||
similarityEngine: HnswANNSimilarityEngine,
|
||||
similarityEngineType: SimilarityEngineType,
|
||||
requestUserId: UserId,
|
||||
sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine
|
||||
model: String
|
||||
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
|
||||
val internalId =
|
||||
if (sourceInfo.nonEmpty) sourceInfo.get.internalId else InternalId.UserId(requestUserId)
|
||||
similarityEngine
|
||||
.getCandidates(buildHnswANNQuery(internalId, model)).map(_.getOrElse(Seq.empty)).map(_.map {
|
||||
tweetWithScore =>
|
||||
val similarityEngineInfo = SimilarityEngineInfo(
|
||||
similarityEngineType = similarityEngineType,
|
||||
modelId = Some(model),
|
||||
score = Some(tweetWithScore.score))
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq(similarityEngineInfo)
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
private[candidate_generation] def getConsumerBasedWalsCandidates(
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params
|
||||
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
|
||||
// Fetch source signals and filter them based on age.
|
||||
val signals = FilterUtil.tweetSourceAgeFilter(
|
||||
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
|
||||
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
|
||||
|
||||
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
|
||||
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
|
||||
)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
consumerBasedWalsSimilarityEngine.getScopedStats
|
||||
.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
} yield tweetsWithCandidateGenerationInfoOpt.toSeq.flatten
|
||||
}
|
||||
}
|
||||
|
||||
object AdsCandidateSourcesRouter {
|
||||
def getSimClustersANNEmbeddingType(
|
||||
sourceInfo: SourceInfo
|
||||
): EmbeddingType = {
|
||||
sourceInfo.sourceType match {
|
||||
case SourceType.TweetFavorite | SourceType.Retweet | SourceType.OriginalTweet |
|
||||
SourceType.Reply | SourceType.TweetShare | SourceType.NotificationClick |
|
||||
SourceType.GoodTweetClick | SourceType.VideoTweetQualityView |
|
||||
SourceType.VideoTweetPlayback50 =>
|
||||
EmbeddingType.LogFavLongestL2EmbeddingTweet
|
||||
case SourceType.UserFollow | SourceType.UserRepeatedProfileVisit | SourceType.RealGraphOon |
|
||||
SourceType.FollowRecommendation | SourceType.UserTrafficAttributionProfileVisit |
|
||||
SourceType.GoodProfileClick | SourceType.TwiceUserId =>
|
||||
EmbeddingType.FavBasedProducer
|
||||
case _ => throw new IllegalArgumentException("sourceInfo.sourceType not supported")
|
||||
}
|
||||
}
|
||||
|
||||
def buildHnswANNQuery(internalId: InternalId, modelId: String): HnswANNEngineQuery = {
|
||||
HnswANNEngineQuery(
|
||||
sourceId = internalId,
|
||||
modelId = modelId,
|
||||
params = Params.Empty
|
||||
)
|
||||
}
|
||||
|
||||
def getConsumerBasedWalsSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.TweetDontLike.value, //currently no-op
|
||||
SourceType.TweetReport.value, //currently no-op
|
||||
SourceType.AccountMute.value, //currently no-op
|
||||
SourceType.AccountBlock.value //currently no-op
|
||||
)
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"ann/src/main/scala/com/twitter/ann/hnsw",
|
||||
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"cuad/projects/hashspace/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
|
||||
"frigate/frigate-common:base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/constants",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
|
||||
"src/scala/com/twitter/ml/featurestore/lib",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/ml/api:embedding-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
|
||||
"src/thrift/com/twitter/search:earlybird-scala",
|
||||
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"strato/config/columns/cuad/hashspace:hashspace-strato-client",
|
||||
],
|
||||
)
|
@ -1,536 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModelConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.model.TripTweetWithScore
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
|
||||
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
|
||||
import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.EngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.FilterUtil
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TripEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Route the SourceInfo to the associated Candidate Engines.
|
||||
*/
|
||||
@Singleton
|
||||
case class CandidateSourcesRouter @Inject() (
|
||||
customizedRetrievalCandidateGeneration: CustomizedRetrievalCandidateGeneration,
|
||||
simClustersInterestedInCandidateGeneration: SimClustersInterestedInCandidateGeneration,
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine)
|
||||
tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine)
|
||||
producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine)
|
||||
consumerEmbeddingBasedTripSimilarityEngine: StandardSimilarityEngine[
|
||||
TripEngineQuery,
|
||||
TripTweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine)
|
||||
consumerBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine)
|
||||
consumerBasedTwoTowerSimilarityEngine: HnswANNSimilarityEngine,
|
||||
@Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine)
|
||||
consumersBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
UserTweetEntityGraphSimilarityEngine.Query,
|
||||
TweetWithScoreAndSocialProof
|
||||
],
|
||||
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
|
||||
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
|
||||
ConsumerBasedWalsSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver,
|
||||
) {
|
||||
|
||||
import CandidateSourcesRouter._
|
||||
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
|
||||
|
||||
def fetchCandidates(
|
||||
requestUserId: UserId,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]],
|
||||
params: configapi.Params,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
val tweetBasedCandidatesFuture = getCandidates(
|
||||
getTweetBasedSourceInfo(sourceSignals),
|
||||
params,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParams,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
|
||||
val producerBasedCandidatesFuture =
|
||||
getCandidates(
|
||||
getProducerBasedSourceInfo(sourceSignals),
|
||||
params,
|
||||
ProducerBasedUnifiedSimilarityEngine.fromParams(_, _),
|
||||
producerBasedUnifiedSimilarityEngine.getCandidates
|
||||
)
|
||||
|
||||
val simClustersInterestedInBasedCandidatesFuture =
|
||||
getCandidatesPerSimilarityEngineModel(
|
||||
requestUserId,
|
||||
params,
|
||||
SimClustersInterestedInCandidateGeneration.fromParams,
|
||||
simClustersInterestedInCandidateGeneration.get)
|
||||
|
||||
val consumerEmbeddingBasedLogFavBasedTripCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerEmbeddingBasedCandidateGenerationParams.EnableLogFavBasedSimClustersTripParam)) {
|
||||
getSimClustersTripCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
|
||||
ModelConfig.ConsumerLogFavBasedInterestedInEmbedding,
|
||||
InternalId.UserId(requestUserId),
|
||||
params
|
||||
),
|
||||
consumerEmbeddingBasedTripSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else
|
||||
Future.Nil
|
||||
|
||||
val consumersBasedUvgRealGraphInCandidatesFuture =
|
||||
if (params(ConsumersBasedUserVideoGraphParams.EnableSourceParam)) {
|
||||
val realGraphInGraphSourceInfoOpt =
|
||||
getGraphSourceInfoBySourceType(SourceType.RealGraphIn.name, sourceGraphs)
|
||||
|
||||
getGraphBasedCandidates(
|
||||
params,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine
|
||||
.fromParamsForRealGraphIn(
|
||||
realGraphInGraphSourceInfoOpt
|
||||
.map { graphSourceInfo => graphSourceInfo.seedWithScores }.getOrElse(Map.empty),
|
||||
params),
|
||||
consumersBasedUserVideoGraphSimilarityEngine,
|
||||
ConsumersBasedUserVideoGraphSimilarityEngine.toSimilarityEngineInfo,
|
||||
realGraphInGraphSourceInfoOpt
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedFollowBasedTripCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerEmbeddingBasedCandidateGenerationParams.EnableFollowBasedSimClustersTripParam)) {
|
||||
getSimClustersTripCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
|
||||
ModelConfig.ConsumerFollowBasedInterestedInEmbedding,
|
||||
InternalId.UserId(requestUserId),
|
||||
params
|
||||
),
|
||||
consumerEmbeddingBasedTripSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else
|
||||
Future.Nil
|
||||
|
||||
val consumerBasedWalsCandidatesFuture =
|
||||
if (params(
|
||||
ConsumerBasedWalsParams.EnableSourceParam
|
||||
)) {
|
||||
getConsumerBasedWalsCandidates(sourceSignals, params)
|
||||
}.map { Seq(_) }
|
||||
else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedTwHINCandidatesFuture =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
|
||||
getHnswCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTwHINSimilarityEngine.fromParams(
|
||||
InternalId.UserId(requestUserId),
|
||||
params),
|
||||
consumerBasedTwHINANNSimilarityEngine
|
||||
).map { Seq(_) }
|
||||
} else Future.Nil
|
||||
|
||||
val consumerEmbeddingBasedTwoTowerCandidatesFuture =
|
||||
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwoTowerParam)) {
|
||||
getHnswCandidates(
|
||||
params,
|
||||
ConsumerEmbeddingBasedTwoTowerSimilarityEngine.fromParams(
|
||||
InternalId.UserId(requestUserId),
|
||||
params),
|
||||
consumerBasedTwoTowerSimilarityEngine
|
||||
).map {
|
||||
Seq(_)
|
||||
}
|
||||
} else Future.Nil
|
||||
|
||||
val customizedRetrievalBasedCandidatesFuture =
|
||||
getCandidatesPerSimilarityEngineModel(
|
||||
requestUserId,
|
||||
params,
|
||||
CustomizedRetrievalCandidateGeneration.fromParams,
|
||||
customizedRetrievalCandidateGeneration.get)
|
||||
|
||||
Future
|
||||
.collect(
|
||||
Seq(
|
||||
tweetBasedCandidatesFuture,
|
||||
producerBasedCandidatesFuture,
|
||||
simClustersInterestedInBasedCandidatesFuture,
|
||||
consumerBasedWalsCandidatesFuture,
|
||||
consumerEmbeddingBasedLogFavBasedTripCandidatesFuture,
|
||||
consumerEmbeddingBasedFollowBasedTripCandidatesFuture,
|
||||
consumerEmbeddingBasedTwHINCandidatesFuture,
|
||||
consumerEmbeddingBasedTwoTowerCandidatesFuture,
|
||||
consumersBasedUvgRealGraphInCandidatesFuture,
|
||||
customizedRetrievalBasedCandidatesFuture
|
||||
)).map { candidatesList =>
|
||||
// remove empty innerSeq
|
||||
val result = candidatesList.flatten.filter(_.nonEmpty)
|
||||
stats.stat("numOfSequences").add(result.size)
|
||||
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
|
||||
|
||||
result
|
||||
}
|
||||
}
|
||||
|
||||
private def getGraphBasedCandidates[QueryType](
|
||||
params: configapi.Params,
|
||||
query: EngineQuery[QueryType],
|
||||
engine: StandardSimilarityEngine[QueryType, TweetWithScore],
|
||||
toSimilarityEngineInfo: Double => SimilarityEngineInfo,
|
||||
graphSourceInfoOpt: Option[GraphSourceInfo] = None
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val candidatesOptFut = engine.getCandidates(query)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
engine.getScopedStats.stat("sortedCandidates_size").add(sortedCandidates.size)
|
||||
val tweetsWithCandidateGenerationInfo = sortedCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo = toSimilarityEngineInfo(tweetWithScore.score)
|
||||
val sourceInfo = graphSourceInfoOpt.map { graphSourceInfo =>
|
||||
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
|
||||
SourceInfo(
|
||||
sourceType = graphSourceInfo.sourceType,
|
||||
internalId = InternalId.UserId(0L),
|
||||
sourceEventTime = None
|
||||
)
|
||||
}
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
sourceInfo,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getCandidates[QueryType](
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params,
|
||||
fromParams: (SourceInfo, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val queries = sourceSignals.map { sourceInfo =>
|
||||
fromParams(sourceInfo, params)
|
||||
}.toSeq
|
||||
|
||||
Future
|
||||
.collect {
|
||||
queries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(candidates.toSeq.flatten)
|
||||
} yield {
|
||||
prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def getConsumerBasedWalsCandidates(
|
||||
sourceSignals: Set[SourceInfo],
|
||||
params: configapi.Params
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
// Fetch source signals and filter them based on age.
|
||||
val signals = FilterUtil.tweetSourceAgeFilter(
|
||||
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
|
||||
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
|
||||
|
||||
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
|
||||
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
|
||||
)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
consumerBasedWalsSimilarityEngine.getScopedStats
|
||||
.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getSimClustersTripCandidates(
|
||||
params: configapi.Params,
|
||||
query: TripEngineQuery,
|
||||
engine: StandardSimilarityEngine[
|
||||
TripEngineQuery,
|
||||
TripTweetWithScore
|
||||
],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetsWithCandidatesGenerationInfoOptFut =
|
||||
engine.getCandidates(EngineQuery(query, params)).map {
|
||||
_.map {
|
||||
_.map { tweetWithScore =>
|
||||
// define filters
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
SimilarityEngineType.ExploreTripOfflineSimClustersTweets,
|
||||
None,
|
||||
Some(tweetWithScore.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidatesGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
private def getHnswCandidates(
|
||||
params: configapi.Params,
|
||||
query: HnswANNEngineQuery,
|
||||
engine: HnswANNSimilarityEngine,
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val candidatesOptFut = engine.getCandidates(query)
|
||||
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
|
||||
_.map { tweetsWithScores =>
|
||||
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
|
||||
val filteredCandidates =
|
||||
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
|
||||
engine.getScopedStats.stat("filteredCandidates_size").add(filteredCandidates.size)
|
||||
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
|
||||
{
|
||||
val similarityEngineInfo =
|
||||
engine.toSimilarityEngineInfo(query, tweetWithScore.score)
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
similarityEngineInfo,
|
||||
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
|
||||
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
|
||||
}
|
||||
}
|
||||
for {
|
||||
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
|
||||
initialCandidates <- convertToInitialCandidates(
|
||||
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
|
||||
} yield initialCandidates
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns candidates from each similarity engine separately.
|
||||
* For 1 requestUserId, it will fetch results from each similarity engine e_i,
|
||||
* and returns Seq[Seq[TweetCandidate]].
|
||||
*/
|
||||
private def getCandidatesPerSimilarityEngineModel[QueryType](
|
||||
requestUserId: UserId,
|
||||
params: configapi.Params,
|
||||
fromParams: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[
|
||||
Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val query = fromParams(InternalId.UserId(requestUserId), params)
|
||||
getFunc(query).flatMap { candidatesPerSimilarityEngineModelOpt =>
|
||||
val candidatesPerSimilarityEngineModel = candidatesPerSimilarityEngineModelOpt.toSeq.flatten
|
||||
Future.collect {
|
||||
candidatesPerSimilarityEngineModel.map(convertToInitialCandidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object CandidateSourcesRouter {
|
||||
def getGraphSourceInfoBySourceType(
|
||||
sourceTypeStr: String,
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]]
|
||||
): Option[GraphSourceInfo] = {
|
||||
sourceGraphs.getOrElse(sourceTypeStr, None)
|
||||
}
|
||||
|
||||
def getTweetBasedSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForTweetBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
def getProducerBasedSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForProducerBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
def getConsumerBasedWalsSourceInfo(
|
||||
sourceSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
sourceSignals.collect {
|
||||
case sourceInfo
|
||||
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
|
||||
sourceInfo
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* Signal funneling should not exist in CG or even in any SimilarityEngine.
|
||||
* They will be in Router, or eventually, in CrCandidateGenerator.
|
||||
*/
|
||||
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.TweetDontLike.value, //currently no-op
|
||||
SourceType.TweetReport.value, //currently no-op
|
||||
SourceType.AccountMute.value, //currently no-op
|
||||
SourceType.AccountBlock.value //currently no-op
|
||||
)
|
||||
val AllowedSourceTypesForTweetBasedUnifiedSE = Set(
|
||||
SourceType.TweetFavorite.value,
|
||||
SourceType.Retweet.value,
|
||||
SourceType.OriginalTweet.value,
|
||||
SourceType.Reply.value,
|
||||
SourceType.TweetShare.value,
|
||||
SourceType.NotificationClick.value,
|
||||
SourceType.GoodTweetClick.value,
|
||||
SourceType.VideoTweetQualityView.value,
|
||||
SourceType.VideoTweetPlayback50.value,
|
||||
SourceType.TweetAggregation.value,
|
||||
)
|
||||
val AllowedSourceTypesForProducerBasedUnifiedSE = Set(
|
||||
SourceType.UserFollow.value,
|
||||
SourceType.UserRepeatedProfileVisit.value,
|
||||
SourceType.RealGraphOon.value,
|
||||
SourceType.FollowRecommendation.value,
|
||||
SourceType.UserTrafficAttributionProfileVisit.value,
|
||||
SourceType.GoodProfileClick.value,
|
||||
SourceType.ProducerAggregation.value,
|
||||
)
|
||||
}
|
@ -1,350 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.blender.SwitchBlender
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.filter.PostRankFilterRunner
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.param.RankerParams
|
||||
import com.twitter.cr_mixer.param.RecentNegativeSignalParams
|
||||
import com.twitter.cr_mixer.ranker.SwitchRanker
|
||||
import com.twitter.cr_mixer.source_signal.SourceInfoRouter
|
||||
import com.twitter.cr_mixer.source_signal.UssStore.EnabledNegativeSourceTypes
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.JavaTimer
|
||||
import com.twitter.util.Timer
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* For now it performs the main steps as follows:
|
||||
* 1. Source signal (via USS, FRS) fetch
|
||||
* 2. Candidate generation
|
||||
* 3. Filtering
|
||||
* 4. Interleave blender
|
||||
* 5. Ranker
|
||||
* 6. Post-ranker filter
|
||||
* 7. Truncation
|
||||
*/
|
||||
@Singleton
|
||||
class CrCandidateGenerator @Inject() (
|
||||
sourceInfoRouter: SourceInfoRouter,
|
||||
candidateSourceRouter: CandidateSourcesRouter,
|
||||
switchBlender: SwitchBlender,
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
postRankFilterRunner: PostRankFilterRunner,
|
||||
switchRanker: SwitchRanker,
|
||||
crMixerScribeLogger: CrMixerScribeLogger,
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
private val timer: Timer = new JavaTimer(true)
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
private val fetchSourcesStats = stats.scope("fetchSources")
|
||||
private val fetchPositiveSourcesStats = stats.scope("fetchPositiveSources")
|
||||
private val fetchNegativeSourcesStats = stats.scope("fetchNegativeSources")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val fetchCandidatesAfterFilterStats = stats.scope("fetchCandidatesAfterFilter")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
private val interleaveStats = stats.scope("interleave")
|
||||
private val rankStats = stats.scope("rank")
|
||||
private val postRankFilterStats = stats.scope("postRankFilter")
|
||||
private val blueVerifiedTweetStats = stats.scope("blueVerifiedTweetStats")
|
||||
private val blueVerifiedTweetStatsPerSimilarityEngine =
|
||||
stats.scope("blueVerifiedTweetStatsPerSimilarityEngine")
|
||||
|
||||
def get(query: CrCandidateGeneratorQuery): Future[Seq[RankedCandidate]] = {
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
val perProductBlueVerifiedStats =
|
||||
blueVerifiedTweetStats.scope("perProduct", query.product.toString)
|
||||
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
trackResultStats(perProductStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
val result = for {
|
||||
(sourceSignals, sourceGraphsMap) <- StatsUtil.trackBlockStats(fetchSourcesStats) {
|
||||
fetchSources(query)
|
||||
}
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) {
|
||||
// find the positive and negative signals
|
||||
val (positiveSignals, negativeSignals) = sourceSignals.partition { signal =>
|
||||
!EnabledNegativeSourceTypes.contains(signal.sourceType)
|
||||
}
|
||||
fetchPositiveSourcesStats.stat("size").add(positiveSignals.size)
|
||||
fetchNegativeSourcesStats.stat("size").add(negativeSignals.size)
|
||||
|
||||
// find the positive signals to keep, removing block and muted users
|
||||
val filteredSourceInfo =
|
||||
if (negativeSignals.nonEmpty && query.params(
|
||||
RecentNegativeSignalParams.EnableSourceParam)) {
|
||||
filterSourceInfo(positiveSignals, negativeSignals)
|
||||
} else {
|
||||
positiveSignals
|
||||
}
|
||||
|
||||
// fetch candidates from the positive signals
|
||||
StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, filteredSourceInfo, sourceGraphsMap)
|
||||
}
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
interleavedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
|
||||
interleave(query, filteredCandidates)
|
||||
}
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
val candidatesToRank =
|
||||
interleavedCandidates.take(query.params(RankerParams.MaxCandidatesToRank))
|
||||
rank(query, candidatesToRank)
|
||||
}
|
||||
postRankFilterCandidates <- StatsUtil.trackItemsStats(postRankFilterStats) {
|
||||
postRankFilter(query, rankedCandidates)
|
||||
}
|
||||
} yield {
|
||||
trackTopKStats(
|
||||
800,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = false,
|
||||
perProductBlueVerifiedStats)
|
||||
trackTopKStats(
|
||||
400,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = false,
|
||||
perProductBlueVerifiedStats)
|
||||
trackTopKStats(
|
||||
query.maxNumResults,
|
||||
postRankFilterCandidates,
|
||||
isQueryK = true,
|
||||
perProductBlueVerifiedStats)
|
||||
|
||||
val (blueVerifiedTweets, remainingTweets) =
|
||||
postRankFilterCandidates.partition(
|
||||
_.tweetInfo.hasBlueVerifiedAnnotation.contains(true))
|
||||
val topKBlueVerified = blueVerifiedTweets.take(query.maxNumResults)
|
||||
val topKRemaining = remainingTweets.take(query.maxNumResults - topKBlueVerified.size)
|
||||
|
||||
trackBlueVerifiedTweetStats(topKBlueVerified, perProductBlueVerifiedStats)
|
||||
|
||||
if (topKBlueVerified.nonEmpty && query.params(RankerParams.EnableBlueVerifiedTopK)) {
|
||||
topKBlueVerified ++ topKRemaining
|
||||
} else {
|
||||
postRankFilterCandidates
|
||||
}
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.serviceTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def fetchSources(
|
||||
query: CrCandidateGeneratorQuery
|
||||
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
|
||||
crMixerScribeLogger.scribeSignalSources(
|
||||
query,
|
||||
sourceInfoRouter
|
||||
.get(query.userId, query.product, query.userState, query.params))
|
||||
}
|
||||
|
||||
private def filterSourceInfo(
|
||||
positiveSignals: Set[SourceInfo],
|
||||
negativeSignals: Set[SourceInfo]
|
||||
): Set[SourceInfo] = {
|
||||
val filterUsers: Set[Long] = negativeSignals.flatMap {
|
||||
case SourceInfo(_, InternalId.UserId(userId), _) => Some(userId)
|
||||
case _ => None
|
||||
}
|
||||
|
||||
positiveSignals.filter {
|
||||
case SourceInfo(_, InternalId.UserId(userId), _) => !filterUsers.contains(userId)
|
||||
case _ => true
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
sourceSignals: Set[SourceInfo],
|
||||
sourceGraphs: Map[String, Option[GraphSourceInfo]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
val initialCandidates = candidateSourceRouter
|
||||
.fetchCandidates(
|
||||
query.userId,
|
||||
sourceSignals,
|
||||
sourceGraphs,
|
||||
query.params
|
||||
)
|
||||
|
||||
initialCandidates.map(_.flatten.map { candidate =>
|
||||
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
|
||||
blueVerifiedTweetStatsPerSimilarityEngine
|
||||
.scope(query.product.toString).scope(
|
||||
candidate.candidateGenerationInfo.contributingSimilarityEngines.head.similarityEngineType.toString).counter(
|
||||
candidate.tweetInfo.authorId.toString).incr()
|
||||
}
|
||||
})
|
||||
|
||||
crMixerScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
initialCandidates
|
||||
)
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
crMixerScribeLogger.scribePreRankFilterCandidates(
|
||||
query,
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates))
|
||||
}
|
||||
|
||||
private def postRankFilter(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
postRankFilterRunner.run(query, candidates)
|
||||
}
|
||||
|
||||
private def interleave(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
crMixerScribeLogger.scribeInterleaveCandidates(
|
||||
query,
|
||||
switchBlender
|
||||
.blend(query.params, query.userState, candidates))
|
||||
}
|
||||
|
||||
private def rank(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[BlendedCandidate],
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
crMixerScribeLogger.scribeRankedCandidates(
|
||||
query,
|
||||
switchRanker.rank(query, candidates)
|
||||
)
|
||||
}
|
||||
|
||||
private def trackResultStats(
|
||||
stats: StatsReceiver
|
||||
)(
|
||||
fn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
fn.onSuccess { candidates =>
|
||||
trackReasonChosenSourceTypeStats(candidates, stats)
|
||||
trackReasonChosenSimilarityEngineStats(candidates, stats)
|
||||
trackPotentialReasonsSourceTypeStats(candidates, stats)
|
||||
trackPotentialReasonsSimilarityEngineStats(candidates, stats)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackReasonChosenSourceTypeStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.groupBy(_.reasonChosen.sourceInfoOpt.map(_.sourceType))
|
||||
.foreach {
|
||||
case (sourceTypeOpt, rankedCands) =>
|
||||
val sourceType = sourceTypeOpt.map(_.toString).getOrElse("RequesterId") // default
|
||||
stats.stat("reasonChosen", "sourceType", sourceType, "size").add(rankedCands.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackReasonChosenSimilarityEngineStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.groupBy(_.reasonChosen.similarityEngineInfo.similarityEngineType)
|
||||
.foreach {
|
||||
case (seInfoType, rankedCands) =>
|
||||
stats
|
||||
.stat("reasonChosen", "similarityEngine", seInfoType.toString, "size").add(
|
||||
rankedCands.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackPotentialReasonsSourceTypeStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.flatMap(_.potentialReasons.map(_.sourceInfoOpt.map(_.sourceType)))
|
||||
.groupBy(source => source)
|
||||
.foreach {
|
||||
case (sourceInfoOpt, seq) =>
|
||||
val sourceType = sourceInfoOpt.map(_.toString).getOrElse("RequesterId") // default
|
||||
stats.stat("potentialReasons", "sourceType", sourceType, "size").add(seq.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackPotentialReasonsSimilarityEngineStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
candidates
|
||||
.flatMap(_.potentialReasons.map(_.similarityEngineInfo.similarityEngineType))
|
||||
.groupBy(se => se)
|
||||
.foreach {
|
||||
case (seType, seq) =>
|
||||
stats.stat("potentialReasons", "similarityEngine", seType.toString, "size").add(seq.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def trackBlueVerifiedTweetStats(
|
||||
candidates: Seq[RankedCandidate],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
candidates.foreach { candidate =>
|
||||
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
|
||||
statsReceiver.counter(candidate.tweetInfo.authorId.toString).incr()
|
||||
statsReceiver
|
||||
.scope(candidate.tweetInfo.authorId.toString).counter(candidate.tweetId.toString).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def trackTopKStats(
|
||||
k: Int,
|
||||
tweetCandidates: Seq[RankedCandidate],
|
||||
isQueryK: Boolean,
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
val (topK, beyondK) = tweetCandidates.splitAt(k)
|
||||
|
||||
val blueVerifiedIds = tweetCandidates.collect {
|
||||
case candidate if candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true) =>
|
||||
candidate.tweetInfo.authorId
|
||||
}.toSet
|
||||
|
||||
blueVerifiedIds.foreach { blueVerifiedId =>
|
||||
val numTweetsTopK = topK.count(_.tweetInfo.authorId == blueVerifiedId)
|
||||
val numTweetsBeyondK = beyondK.count(_.tweetInfo.authorId == blueVerifiedId)
|
||||
|
||||
if (isQueryK) {
|
||||
statsReceiver.scope(blueVerifiedId.toString).stat(s"topK").add(numTweetsTopK)
|
||||
statsReceiver
|
||||
.scope(blueVerifiedId.toString).stat(s"beyondK").add(numTweetsBeyondK)
|
||||
} else {
|
||||
statsReceiver.scope(blueVerifiedId.toString).stat(s"top$k").add(numTweetsTopK)
|
||||
statsReceiver
|
||||
.scope(blueVerifiedId.toString).stat(s"beyond$k").add(numTweetsBeyondK)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,345 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.candidate_generation.CustomizedRetrievalCandidateGeneration.Query
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedCandidateGenerationParams._
|
||||
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedTwhinParams._
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupEngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine
|
||||
import com.twitter.cr_mixer.util.InterleaveUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.base.CandidateSource
|
||||
import com.twitter.frigate.common.base.Stats
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.mutable.ArrayBuffer
|
||||
|
||||
/**
|
||||
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources
|
||||
*
|
||||
* Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different
|
||||
* similarity engines without blending. In other words, this class shall not be thought of as a
|
||||
* Unified Similarity Engine. It is a CG that calls multiple singular Similarity Engines.
|
||||
*/
|
||||
@Singleton
|
||||
case class CustomizedRetrievalCandidateGeneration @Inject() (
|
||||
@Named(ModuleNames.TwhinCollabFilterSimilarityEngine)
|
||||
twhinCollabFilterSimilarityEngine: LookupSimilarityEngine[
|
||||
TwhinCollabFilterSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
@Named(ModuleNames.DiffusionBasedSimilarityEngine)
|
||||
diffusionBasedSimilarityEngine: LookupSimilarityEngine[
|
||||
DiffusionBasedSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
Query,
|
||||
Seq[TweetWithCandidateGenerationInfo]
|
||||
] {
|
||||
|
||||
override def name: String = this.getClass.getSimpleName
|
||||
|
||||
private val stats = statsReceiver.scope(name)
|
||||
private val fetchCandidatesStat = stats.scope("fetchCandidates")
|
||||
|
||||
/**
|
||||
* For each Similarity Engine Model, return a list of tweet candidates
|
||||
*/
|
||||
override def get(
|
||||
query: Query
|
||||
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
|
||||
query.internalId match {
|
||||
case InternalId.UserId(_) =>
|
||||
Stats.trackOption(fetchCandidatesStat) {
|
||||
val twhinCollabFilterForFollowCandidatesFut = if (query.enableTwhinCollabFilter) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinCollabFilterFollowQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinCollabFilterForEngagementCandidatesFut =
|
||||
if (query.enableTwhinCollabFilter) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(
|
||||
query.twhinCollabFilterEngagementQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinMultiClusterForFollowCandidatesFut = if (query.enableTwhinMultiCluster) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinMultiClusterFollowQuery)
|
||||
} else Future.None
|
||||
|
||||
val twhinMultiClusterForEngagementCandidatesFut =
|
||||
if (query.enableTwhinMultiCluster) {
|
||||
twhinCollabFilterSimilarityEngine.getCandidates(
|
||||
query.twhinMultiClusterEngagementQuery)
|
||||
} else Future.None
|
||||
|
||||
val diffusionBasedSimilarityEngineCandidatesFut = if (query.enableRetweetBasedDiffusion) {
|
||||
diffusionBasedSimilarityEngine.getCandidates(query.diffusionBasedSimilarityEngineQuery)
|
||||
} else Future.None
|
||||
|
||||
Future
|
||||
.join(
|
||||
twhinCollabFilterForFollowCandidatesFut,
|
||||
twhinCollabFilterForEngagementCandidatesFut,
|
||||
twhinMultiClusterForFollowCandidatesFut,
|
||||
twhinMultiClusterForEngagementCandidatesFut,
|
||||
diffusionBasedSimilarityEngineCandidatesFut
|
||||
).map {
|
||||
case (
|
||||
twhinCollabFilterForFollowCandidates,
|
||||
twhinCollabFilterForEngagementCandidates,
|
||||
twhinMultiClusterForFollowCandidates,
|
||||
twhinMultiClusterForEngagementCandidates,
|
||||
diffusionBasedSimilarityEngineCandidates) =>
|
||||
val maxCandidateNumPerSourceKey = 200
|
||||
val twhinCollabFilterForFollowWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinCollabFilterForFollowCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinCollabFilterFollowQuery,
|
||||
)
|
||||
val twhinCollabFilterForEngagementWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinCollabFilterForEngagementCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinCollabFilterEngagementQuery,
|
||||
)
|
||||
val twhinMultiClusterForFollowWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinMultiClusterForFollowCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinMultiClusterFollowQuery,
|
||||
)
|
||||
val twhinMultiClusterForEngagementWithCGInfo =
|
||||
getTwhinCollabCandidatesWithCGInfo(
|
||||
twhinMultiClusterForEngagementCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.twhinMultiClusterEngagementQuery,
|
||||
)
|
||||
val retweetBasedDiffusionWithCGInfo =
|
||||
getDiffusionBasedCandidatesWithCGInfo(
|
||||
diffusionBasedSimilarityEngineCandidates,
|
||||
maxCandidateNumPerSourceKey,
|
||||
query.diffusionBasedSimilarityEngineQuery,
|
||||
)
|
||||
|
||||
val twhinCollabCandidateSourcesToBeInterleaved =
|
||||
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
|
||||
twhinCollabFilterForFollowWithCGInfo,
|
||||
twhinCollabFilterForEngagementWithCGInfo,
|
||||
)
|
||||
|
||||
val twhinMultiClusterCandidateSourcesToBeInterleaved =
|
||||
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
|
||||
twhinMultiClusterForFollowWithCGInfo,
|
||||
twhinMultiClusterForEngagementWithCGInfo,
|
||||
)
|
||||
|
||||
val interleavedTwhinCollabCandidates =
|
||||
InterleaveUtil.interleave(twhinCollabCandidateSourcesToBeInterleaved)
|
||||
|
||||
val interleavedTwhinMultiClusterCandidates =
|
||||
InterleaveUtil.interleave(twhinMultiClusterCandidateSourcesToBeInterleaved)
|
||||
|
||||
val twhinCollabFilterResults =
|
||||
if (interleavedTwhinCollabCandidates.nonEmpty) {
|
||||
Some(interleavedTwhinCollabCandidates.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
val twhinMultiClusterResults =
|
||||
if (interleavedTwhinMultiClusterCandidates.nonEmpty) {
|
||||
Some(interleavedTwhinMultiClusterCandidates.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
val diffusionResults =
|
||||
if (retweetBasedDiffusionWithCGInfo.nonEmpty) {
|
||||
Some(retweetBasedDiffusionWithCGInfo.take(maxCandidateNumPerSourceKey))
|
||||
} else None
|
||||
|
||||
Some(
|
||||
Seq(
|
||||
twhinCollabFilterResults,
|
||||
twhinMultiClusterResults,
|
||||
diffusionResults
|
||||
).flatten)
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
throw new IllegalArgumentException("sourceId_is_not_userId_cnt")
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */
|
||||
private def tweetAgeFilter(
|
||||
candidates: Seq[TweetWithScore],
|
||||
maxTweetAgeHours: Duration
|
||||
): Seq[TweetWithScore] = {
|
||||
// Tweet IDs are approximately chronological (see http://go/snowflake),
|
||||
// so we are building the earliest tweet id once
|
||||
// The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper.
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours)
|
||||
candidates.filter { candidate => candidate.tweetId >= earliestTweetId }
|
||||
}
|
||||
|
||||
/**
|
||||
* AgeFilters tweetCandidates with stats
|
||||
* Only age filter logic is effective here (through tweetAgeFilter). This function acts mostly for metric logging.
|
||||
*/
|
||||
private def ageFilterWithStats(
|
||||
offlineInterestedInCandidates: Seq[TweetWithScore],
|
||||
maxTweetAgeHours: Duration,
|
||||
scopedStatsReceiver: StatsReceiver
|
||||
): Seq[TweetWithScore] = {
|
||||
scopedStatsReceiver.stat("size").add(offlineInterestedInCandidates.size)
|
||||
val candidates = offlineInterestedInCandidates.map { candidate =>
|
||||
TweetWithScore(candidate.tweetId, candidate.score)
|
||||
}
|
||||
val filteredCandidates = tweetAgeFilter(candidates, maxTweetAgeHours)
|
||||
scopedStatsReceiver.stat(f"filtered_size").add(filteredCandidates.size)
|
||||
if (filteredCandidates.isEmpty) scopedStatsReceiver.counter(f"empty").incr()
|
||||
|
||||
filteredCandidates
|
||||
}
|
||||
|
||||
private def getTwhinCollabCandidatesWithCGInfo(
|
||||
tweetCandidates: Option[Seq[TweetWithScore]],
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
twhinCollabFilterQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
): Seq[TweetWithCandidateGenerationInfo] = {
|
||||
val twhinTweets = tweetCandidates match {
|
||||
case Some(tweetsWithScores) =>
|
||||
tweetsWithScores.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
TwhinCollabFilterSimilarityEngine
|
||||
.toSimilarityEngineInfo(twhinCollabFilterQuery, tweetWithScore.score),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
case _ => Seq.empty
|
||||
}
|
||||
twhinTweets.take(maxCandidateNumPerSourceKey)
|
||||
}
|
||||
|
||||
private def getDiffusionBasedCandidatesWithCGInfo(
|
||||
tweetCandidates: Option[Seq[TweetWithScore]],
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
|
||||
DiffusionBasedSimilarityEngine.Query
|
||||
],
|
||||
): Seq[TweetWithCandidateGenerationInfo] = {
|
||||
val diffusionTweets = tweetCandidates match {
|
||||
case Some(tweetsWithScores) =>
|
||||
tweetsWithScores.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
DiffusionBasedSimilarityEngine
|
||||
.toSimilarityEngineInfo(diffusionBasedSimilarityEngineQuery, tweetWithScore.score),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
case _ => Seq.empty
|
||||
}
|
||||
diffusionTweets.take(maxCandidateNumPerSourceKey)
|
||||
}
|
||||
}
|
||||
|
||||
object CustomizedRetrievalCandidateGeneration {
|
||||
|
||||
case class Query(
|
||||
internalId: InternalId,
|
||||
maxCandidateNumPerSourceKey: Int,
|
||||
maxTweetAgeHours: Duration,
|
||||
// twhinCollabFilter
|
||||
enableTwhinCollabFilter: Boolean,
|
||||
twhinCollabFilterFollowQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
twhinCollabFilterEngagementQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
// twhinMultiCluster
|
||||
enableTwhinMultiCluster: Boolean,
|
||||
twhinMultiClusterFollowQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
twhinMultiClusterEngagementQuery: LookupEngineQuery[
|
||||
TwhinCollabFilterSimilarityEngine.Query
|
||||
],
|
||||
enableRetweetBasedDiffusion: Boolean,
|
||||
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
|
||||
DiffusionBasedSimilarityEngine.Query
|
||||
],
|
||||
)
|
||||
|
||||
def fromParams(
|
||||
internalId: InternalId,
|
||||
params: configapi.Params
|
||||
): Query = {
|
||||
val twhinCollabFilterFollowQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinCollabFilterFollowSource),
|
||||
params)
|
||||
|
||||
val twhinCollabFilterEngagementQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinCollabFilterEngagementSource),
|
||||
params)
|
||||
|
||||
val twhinMultiClusterFollowQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinMultiClusterFollowSource),
|
||||
params)
|
||||
|
||||
val twhinMultiClusterEngagementQuery =
|
||||
TwhinCollabFilterSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedTwhinMultiClusterEngagementSource),
|
||||
params)
|
||||
|
||||
val diffusionBasedSimilarityEngineQuery =
|
||||
DiffusionBasedSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
params(CustomizedRetrievalBasedRetweetDiffusionSource),
|
||||
params)
|
||||
|
||||
Query(
|
||||
internalId = internalId,
|
||||
maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam),
|
||||
maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam),
|
||||
// twhinCollabFilter
|
||||
enableTwhinCollabFilter = params(EnableTwhinCollabFilterClusterParam),
|
||||
twhinCollabFilterFollowQuery = twhinCollabFilterFollowQuery,
|
||||
twhinCollabFilterEngagementQuery = twhinCollabFilterEngagementQuery,
|
||||
enableTwhinMultiCluster = params(EnableTwhinMultiClusterParam),
|
||||
twhinMultiClusterFollowQuery = twhinMultiClusterFollowQuery,
|
||||
twhinMultiClusterEngagementQuery = twhinMultiClusterEngagementQuery,
|
||||
enableRetweetBasedDiffusion = params(EnableRetweetBasedDiffusionParam),
|
||||
diffusionBasedSimilarityEngineQuery = diffusionBasedSimilarityEngineQuery
|
||||
)
|
||||
}
|
||||
}
|
@ -1,220 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithAuthor
|
||||
import com.twitter.cr_mixer.param.FrsParams
|
||||
import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineRouter
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweet
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.hermit.constants.AlgorithmFeedbackTokens
|
||||
import com.twitter.hermit.constants.AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap
|
||||
import com.twitter.hermit.model.Algorithm
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* TweetCandidateGenerator based on FRS seed users. For now this candidate generator fetches seed
|
||||
* users from FRS, and retrieves the seed users' past tweets from Earlybird with Earlybird light
|
||||
* ranking models.
|
||||
*/
|
||||
@Singleton
|
||||
class FrsTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]],
|
||||
frsBasedSimilarityEngine: EarlybirdSimilarityEngineRouter,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
import FrsTweetCandidateGenerator._
|
||||
|
||||
private val timer = DefaultTimer
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSeedsStats = stats.scope("fetchSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val filterCandidatesStats = stats.scope("filterCandidates")
|
||||
private val hydrateCandidatesStats = stats.scope("hydrateCandidates")
|
||||
private val getCandidatesStats = stats.scope("getCandidates")
|
||||
|
||||
/**
|
||||
* The function retrieves the candidate for the given user as follows:
|
||||
* 1. Seed user fetch from FRS.
|
||||
* 2. Candidate fetch from Earlybird.
|
||||
* 3. Filtering.
|
||||
* 4. Candidate hydration.
|
||||
* 5. Truncation.
|
||||
*/
|
||||
def get(
|
||||
frsTweetCandidateGeneratorQuery: FrsTweetCandidateGeneratorQuery
|
||||
): Future[Seq[FrsTweet]] = {
|
||||
val userId = frsTweetCandidateGeneratorQuery.userId
|
||||
val product = frsTweetCandidateGeneratorQuery.product
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", product.name)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
val result = for {
|
||||
seedAuthorWithScores <- StatsUtil.trackOptionItemMapStats(fetchSeedsStats) {
|
||||
fetchSeeds(
|
||||
userId,
|
||||
frsTweetCandidateGeneratorQuery.impressedUserList,
|
||||
frsTweetCandidateGeneratorQuery.languageCodeOpt,
|
||||
frsTweetCandidateGeneratorQuery.countryCodeOpt,
|
||||
frsTweetCandidateGeneratorQuery.params,
|
||||
)
|
||||
}
|
||||
tweetCandidates <- StatsUtil.trackOptionItemsStats(fetchCandidatesStats) {
|
||||
fetchCandidates(
|
||||
userId,
|
||||
seedAuthorWithScores.map(_.keys.toSeq).getOrElse(Seq.empty),
|
||||
frsTweetCandidateGeneratorQuery.impressedTweetList,
|
||||
seedAuthorWithScores.map(_.mapValues(_.score)).getOrElse(Map.empty),
|
||||
frsTweetCandidateGeneratorQuery.params
|
||||
)
|
||||
}
|
||||
filteredTweetCandidates <- StatsUtil.trackOptionItemsStats(filterCandidatesStats) {
|
||||
filterCandidates(
|
||||
tweetCandidates,
|
||||
frsTweetCandidateGeneratorQuery.params
|
||||
)
|
||||
}
|
||||
hydratedTweetCandidates <- StatsUtil.trackOptionItemsStats(hydrateCandidatesStats) {
|
||||
hydrateCandidates(
|
||||
seedAuthorWithScores,
|
||||
filteredTweetCandidates
|
||||
)
|
||||
}
|
||||
} yield {
|
||||
hydratedTweetCandidates
|
||||
.map(_.take(frsTweetCandidateGeneratorQuery.maxNumResults)).getOrElse(Seq.empty)
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.frsBasedTweetEndpointTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch recommended seed users from FRS
|
||||
*/
|
||||
private def fetchSeeds(
|
||||
userId: UserId,
|
||||
userDenyList: Set[UserId],
|
||||
languageCodeOpt: Option[String],
|
||||
countryCodeOpt: Option[String],
|
||||
params: Params
|
||||
): Future[Option[Map[UserId, FrsQueryResult]]] = {
|
||||
frsStore
|
||||
.get(
|
||||
FrsStore.Query(
|
||||
userId,
|
||||
params(FrsParams.FrsBasedCandidateGenerationMaxSeedsNumParam),
|
||||
params(FrsParams.FrsBasedCandidateGenerationDisplayLocationParam).displayLocation,
|
||||
userDenyList.toSeq,
|
||||
languageCodeOpt,
|
||||
countryCodeOpt
|
||||
)).map {
|
||||
_.map { seedAuthors =>
|
||||
seedAuthors.map(user => user.userId -> user).toMap
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch tweet candidates from Earlybird
|
||||
*/
|
||||
private def fetchCandidates(
|
||||
searcherUserId: UserId,
|
||||
seedAuthors: Seq[UserId],
|
||||
impressedTweetList: Set[TweetId],
|
||||
frsUserToScores: Map[UserId, Double],
|
||||
params: Params
|
||||
): Future[Option[Seq[TweetWithAuthor]]] = {
|
||||
if (seedAuthors.nonEmpty) {
|
||||
// call earlybird
|
||||
val query = EarlybirdSimilarityEngineRouter.queryFromParams(
|
||||
Some(searcherUserId),
|
||||
seedAuthors,
|
||||
impressedTweetList,
|
||||
frsUserToScoresForScoreAdjustment = Some(frsUserToScores),
|
||||
params
|
||||
)
|
||||
frsBasedSimilarityEngine.get(query)
|
||||
} else Future.None
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter candidates that do not pass visibility filter policy
|
||||
*/
|
||||
private def filterCandidates(
|
||||
candidates: Option[Seq[TweetWithAuthor]],
|
||||
params: Params
|
||||
): Future[Option[Seq[TweetWithAuthor]]] = {
|
||||
val tweetIds = candidates.map(_.map(_.tweetId).toSet).getOrElse(Set.empty)
|
||||
if (params(FrsParams.FrsBasedCandidateGenerationEnableVisibilityFilteringParam))
|
||||
Future
|
||||
.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
candidates.map {
|
||||
// If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
_.filter(candidate => tweetInfos.getOrElse(candidate.tweetId, None).isDefined)
|
||||
}
|
||||
}
|
||||
else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hydrate the candidates with the FRS candidate sources and scores
|
||||
*/
|
||||
private def hydrateCandidates(
|
||||
frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]],
|
||||
candidates: Option[Seq[TweetWithAuthor]]
|
||||
): Future[Option[Seq[FrsTweet]]] = {
|
||||
Future.value {
|
||||
candidates.map {
|
||||
_.map { tweetWithAuthor =>
|
||||
val frsQueryResult = frsAuthorWithScores.flatMap(_.get(tweetWithAuthor.authorId))
|
||||
FrsTweet(
|
||||
tweetId = tweetWithAuthor.tweetId,
|
||||
authorId = tweetWithAuthor.authorId,
|
||||
frsPrimarySource = frsQueryResult.flatMap(_.primarySource),
|
||||
frsAuthorScore = frsQueryResult.map(_.score),
|
||||
frsCandidateSourceScores = frsQueryResult.flatMap { result =>
|
||||
result.sourceWithScores.map {
|
||||
_.collect {
|
||||
// see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
|
||||
// see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
|
||||
case (candidateSourceAlgoStr, score)
|
||||
if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains(
|
||||
candidateSourceAlgoStr) =>
|
||||
AlgorithmToFeedbackTokenMap.getOrElse(
|
||||
AlgorithmFeedbackTokens.TokenStrToAlgorithmMap
|
||||
.getOrElse(candidateSourceAlgoStr, DefaultAlgo),
|
||||
DefaultAlgoToken) -> score
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object FrsTweetCandidateGenerator {
|
||||
val DefaultAlgo: Algorithm.Value = Algorithm.Other
|
||||
// 9999 is the token for Algorithm.Other
|
||||
val DefaultAlgoToken: Int = AlgorithmToFeedbackTokenMap.getOrElse(DefaultAlgo, 9999)
|
||||
}
|
@ -1,156 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RelatedTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
ProducerBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
relatedTweetScribeLogger: RelatedTweetScribeLogger,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
|
||||
def get(
|
||||
query: RelatedTweetCandidateGeneratorQuery
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query)
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
} yield {
|
||||
filteredCandidates.headOption
|
||||
.getOrElse(
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedTweetCandidateGenerator results invalid")
|
||||
).take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
relatedTweetScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
query.internalId match {
|
||||
case InternalId.TweetId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case InternalId.UserId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
ProducerBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
|
||||
producerBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedTweetCandidateGenerator gets invalid InternalId")
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/***
|
||||
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
|
||||
* and apply VF filter based on TweetInfoStore
|
||||
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
|
||||
* instead of a Seq[Candidate] even though we only have a Seq in it.
|
||||
*/
|
||||
private def getCandidatesFromSimilarityEngine[QueryType](
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
fromParamsForRelatedTweet: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
/***
|
||||
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
|
||||
* and as a result, it will return Seq[Seq[InitialCandidate]]
|
||||
*/
|
||||
val engineQueries =
|
||||
Seq(fromParamsForRelatedTweet(query.internalId, query.params))
|
||||
|
||||
Future
|
||||
.collect {
|
||||
engineQueries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(
|
||||
candidates.toSeq.flatten
|
||||
)
|
||||
} yield prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
relatedTweetScribeLogger.scribePreRankFilterCandidates(
|
||||
query,
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates))
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
* This tweetInfo filter also acts as the VF filter
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,139 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.filter.PreRankFilterRunner
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class RelatedVideoTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
|
||||
TweetBasedUnifiedSimilarityEngine.Query,
|
||||
TweetWithCandidateGenerationInfo
|
||||
],
|
||||
preRankFilterRunner: PreRankFilterRunner,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val preRankFilterStats = stats.scope("preRankFilter")
|
||||
|
||||
def get(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
for {
|
||||
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query)
|
||||
}
|
||||
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
|
||||
preRankFilter(query, initialCandidates)
|
||||
}
|
||||
} yield {
|
||||
filteredCandidates.headOption
|
||||
.getOrElse(
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedVideoTweetCandidateGenerator results invalid")
|
||||
).take(query.maxNumResults)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
query.internalId match {
|
||||
case InternalId.TweetId(_) =>
|
||||
getCandidatesFromSimilarityEngine(
|
||||
query,
|
||||
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedVideoTweet,
|
||||
tweetBasedUnifiedSimilarityEngine.getCandidates)
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException(
|
||||
"RelatedVideoTweetCandidateGenerator gets invalid InternalId")
|
||||
}
|
||||
}
|
||||
|
||||
/***
|
||||
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
|
||||
* and apply VF filter based on TweetInfoStore
|
||||
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
|
||||
* instead of a Seq[Candidate] even though we only have a Seq in it.
|
||||
*/
|
||||
private def getCandidatesFromSimilarityEngine[QueryType](
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery,
|
||||
fromParamsForRelatedVideoTweet: (InternalId, configapi.Params) => QueryType,
|
||||
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
/***
|
||||
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
|
||||
* and as a result, it will return Seq[Seq[InitialCandidate]]
|
||||
*/
|
||||
val engineQueries =
|
||||
Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params))
|
||||
|
||||
Future
|
||||
.collect {
|
||||
engineQueries.map { query =>
|
||||
for {
|
||||
candidates <- getFunc(query)
|
||||
prefilterCandidates <- convertToInitialCandidates(
|
||||
candidates.toSeq.flatten
|
||||
)
|
||||
} yield prefilterCandidates
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def preRankFilter(
|
||||
query: RelatedVideoTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[Seq[InitialCandidate]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
preRankFilterRunner
|
||||
.runSequentialFilters(query, candidates)
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithCandidateGenerationInfo],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/***
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
* This tweetInfo filter also acts as the VF filter
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
candidate.candidateGenerationInfo
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,640 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScore
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.InterestedInParams
|
||||
import com.twitter.cr_mixer.param.SimClustersANNParams
|
||||
import com.twitter.cr_mixer.similarity_engine.EngineQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.base.CandidateSource
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.ModelVersions
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.timelines.configapi
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
|
||||
/**
|
||||
* This store looks for similar tweets for a given UserId that generates UserInterestedIn
|
||||
* from SimClustersANN. It will be a standalone CandidateGeneration class moving forward.
|
||||
*
|
||||
* After the abstraction improvement (apply SimilarityEngine trait)
|
||||
* these CG will be subjected to change.
|
||||
*/
|
||||
@Singleton
|
||||
case class SimClustersInterestedInCandidateGeneration @Inject() (
|
||||
@Named(ModuleNames.SimClustersANNSimilarityEngine)
|
||||
simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
SimClustersANNSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends CandidateSource[
|
||||
SimClustersInterestedInCandidateGeneration.Query,
|
||||
Seq[TweetWithCandidateGenerationInfo]
|
||||
] {
|
||||
|
||||
override def name: String = this.getClass.getSimpleName
|
||||
private val stats = statsReceiver.scope(name)
|
||||
private val fetchCandidatesStat = stats.scope("fetchCandidates")
|
||||
|
||||
override def get(
|
||||
query: SimClustersInterestedInCandidateGeneration.Query
|
||||
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
|
||||
|
||||
query.internalId match {
|
||||
case _: InternalId.UserId =>
|
||||
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
|
||||
// UserInterestedIn Queries
|
||||
val userInterestedInCandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInExperimentalSANNCandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN1CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN2CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN3CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN5CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userInterestedInSANN4CandidateResultFut =
|
||||
if (query.enableUserInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.interestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
// UserNextInterestedIn Queries
|
||||
val userNextInterestedInCandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInExperimentalSANNCandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN1CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN2CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN3CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN5CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userNextInterestedInSANN4CandidateResultFut =
|
||||
if (query.enableUserNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.nextInterestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
// AddressBookInterestedIn Queries
|
||||
val userAddressBookInterestedInCandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookExperimentalSANNCandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInExperimentalSimClustersANNQuery,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN1CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN1Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN2CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN2Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN3CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN3Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN5CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN5Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
val userAddressBookSANN4CandidateResultFut =
|
||||
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
|
||||
getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine,
|
||||
query.addressbookInterestedInSimClustersANN4Query,
|
||||
query.simClustersInterestedInMinScore)
|
||||
else
|
||||
Future.None
|
||||
|
||||
Future
|
||||
.collect(
|
||||
Seq(
|
||||
userInterestedInCandidateResultFut,
|
||||
userNextInterestedInCandidateResultFut,
|
||||
userAddressBookInterestedInCandidateResultFut,
|
||||
userInterestedInExperimentalSANNCandidateResultFut,
|
||||
userNextInterestedInExperimentalSANNCandidateResultFut,
|
||||
userAddressBookExperimentalSANNCandidateResultFut,
|
||||
userInterestedInSANN1CandidateResultFut,
|
||||
userNextInterestedInSANN1CandidateResultFut,
|
||||
userAddressBookSANN1CandidateResultFut,
|
||||
userInterestedInSANN2CandidateResultFut,
|
||||
userNextInterestedInSANN2CandidateResultFut,
|
||||
userAddressBookSANN2CandidateResultFut,
|
||||
userInterestedInSANN3CandidateResultFut,
|
||||
userNextInterestedInSANN3CandidateResultFut,
|
||||
userAddressBookSANN3CandidateResultFut,
|
||||
userInterestedInSANN5CandidateResultFut,
|
||||
userNextInterestedInSANN5CandidateResultFut,
|
||||
userAddressBookSANN5CandidateResultFut,
|
||||
userInterestedInSANN4CandidateResultFut,
|
||||
userNextInterestedInSANN4CandidateResultFut,
|
||||
userAddressBookSANN4CandidateResultFut
|
||||
)
|
||||
).map { candidateResults =>
|
||||
Some(
|
||||
candidateResults.map(candidateResult => candidateResult.getOrElse(Seq.empty))
|
||||
)
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
stats.counter("sourceId_is_not_userId_cnt").incr()
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
|
||||
private def simClustersCandidateMinScoreFilter(
|
||||
simClustersAnnCandidates: Seq[TweetWithScore],
|
||||
simClustersInterestedInMinScore: Double,
|
||||
simClustersANNConfigId: String
|
||||
): Seq[TweetWithScore] = {
|
||||
val filteredCandidates = simClustersAnnCandidates
|
||||
.filter { candidate =>
|
||||
candidate.score > simClustersInterestedInMinScore
|
||||
}
|
||||
|
||||
stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size)
|
||||
stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr()
|
||||
if (filteredCandidates.isEmpty)
|
||||
stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr()
|
||||
|
||||
filteredCandidates.map { candidate =>
|
||||
TweetWithScore(candidate.tweetId, candidate.score)
|
||||
}
|
||||
}
|
||||
|
||||
private def getInterestedInCandidateResult(
|
||||
simClustersANNSimilarityEngine: StandardSimilarityEngine[
|
||||
SimClustersANNSimilarityEngine.Query,
|
||||
TweetWithScore
|
||||
],
|
||||
simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
simClustersInterestedInMinScore: Double,
|
||||
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
|
||||
val interestedInCandidatesFut =
|
||||
simClustersANNSimilarityEngine.getCandidates(simClustersANNQuery)
|
||||
|
||||
val interestedInCandidateResultFut = interestedInCandidatesFut.map { interestedInCandidates =>
|
||||
stats.stat("candidateSize").add(interestedInCandidates.size)
|
||||
|
||||
val embeddingCandidatesStat = stats.scope(
|
||||
simClustersANNQuery.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.name)
|
||||
|
||||
embeddingCandidatesStat.stat("candidateSize").add(interestedInCandidates.size)
|
||||
if (interestedInCandidates.isEmpty) {
|
||||
embeddingCandidatesStat.counter("empty_results").incr()
|
||||
}
|
||||
embeddingCandidatesStat.counter("requests").incr()
|
||||
|
||||
val filteredTweets = simClustersCandidateMinScoreFilter(
|
||||
interestedInCandidates.toSeq.flatten,
|
||||
simClustersInterestedInMinScore,
|
||||
simClustersANNQuery.storeQuery.simClustersANNConfigId)
|
||||
|
||||
val interestedInTweetsWithCGInfo = filteredTweets.map { tweetWithScore =>
|
||||
TweetWithCandidateGenerationInfo(
|
||||
tweetWithScore.tweetId,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimClustersANNSimilarityEngine
|
||||
.toSimilarityEngineInfo(simClustersANNQuery, tweetWithScore.score),
|
||||
Seq.empty // SANN is an atomic SE, and hence it has no contributing SEs
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
val interestedInResults = if (interestedInTweetsWithCGInfo.nonEmpty) {
|
||||
Some(interestedInTweetsWithCGInfo)
|
||||
} else None
|
||||
interestedInResults
|
||||
}
|
||||
interestedInCandidateResultFut
|
||||
}
|
||||
}
|
||||
|
||||
object SimClustersInterestedInCandidateGeneration {
|
||||
|
||||
case class Query(
|
||||
internalId: InternalId,
|
||||
enableUserInterestedIn: Boolean,
|
||||
enableUserNextInterestedIn: Boolean,
|
||||
enableAddressBookNextInterestedIn: Boolean,
|
||||
enableProdSimClustersANNSimilarityEngine: Boolean,
|
||||
enableExperimentalSimClustersANNSimilarityEngine: Boolean,
|
||||
enableSimClustersANN1SimilarityEngine: Boolean,
|
||||
enableSimClustersANN2SimilarityEngine: Boolean,
|
||||
enableSimClustersANN3SimilarityEngine: Boolean,
|
||||
enableSimClustersANN5SimilarityEngine: Boolean,
|
||||
enableSimClustersANN4SimilarityEngine: Boolean,
|
||||
simClustersInterestedInMinScore: Double,
|
||||
simClustersNextInterestedInMinScore: Double,
|
||||
simClustersAddressBookInterestedInMinScore: Double,
|
||||
interestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInExperimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInExperimentalSimClustersANNQuery: EngineQuery[
|
||||
SimClustersANNSimilarityEngine.Query
|
||||
],
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery: EngineQuery[
|
||||
SimClustersANNSimilarityEngine.Query
|
||||
],
|
||||
interestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
interestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
nextInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
addressbookInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
|
||||
)
|
||||
|
||||
def fromParams(
|
||||
internalId: InternalId,
|
||||
params: configapi.Params,
|
||||
): Query = {
|
||||
// SimClusters common configs
|
||||
val simClustersModelVersion =
|
||||
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
|
||||
val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId)
|
||||
val experimentalSimClustersANNConfigId = params(
|
||||
SimClustersANNParams.ExperimentalSimClustersANNConfigId)
|
||||
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
|
||||
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
|
||||
val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId)
|
||||
val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId)
|
||||
val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId)
|
||||
|
||||
val simClustersInterestedInMinScore = params(InterestedInParams.MinScoreParam)
|
||||
val simClustersNextInterestedInMinScore = params(
|
||||
InterestedInParams.MinScoreSequentialModelParam)
|
||||
val simClustersAddressBookInterestedInMinScore = params(
|
||||
InterestedInParams.MinScoreAddressBookParam)
|
||||
|
||||
// InterestedIn embeddings parameters
|
||||
val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam)
|
||||
val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam)
|
||||
val addressbookInterestedInEmbedding = params(
|
||||
InterestedInParams.AddressBookInterestedInEmbeddingIdParam)
|
||||
|
||||
// Prod SimClustersANN Query
|
||||
val interestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANNConfigId,
|
||||
params)
|
||||
|
||||
// Experimental SANN cluster Query
|
||||
val interestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInExperimentalSimClustersANNQuery =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
experimentalSimClustersANNConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 1 Query
|
||||
val interestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN1Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN1ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 2 Query
|
||||
val interestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN2Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN2ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 3 Query
|
||||
val interestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN3Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN3ConfigId,
|
||||
params)
|
||||
|
||||
// SimClusters ANN cluster 5 Query
|
||||
val interestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
// SimClusters ANN cluster 4 Query
|
||||
val interestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
interestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
|
||||
val nextInterestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
nextInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN5Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN5ConfigId,
|
||||
params)
|
||||
|
||||
val addressbookInterestedInSimClustersANN4Query =
|
||||
SimClustersANNSimilarityEngine.fromParams(
|
||||
internalId,
|
||||
addressbookInterestedInEmbedding.embeddingType,
|
||||
simClustersModelVersion,
|
||||
simClustersANN4ConfigId,
|
||||
params)
|
||||
|
||||
Query(
|
||||
internalId = internalId,
|
||||
enableUserInterestedIn = params(InterestedInParams.EnableSourceParam),
|
||||
enableUserNextInterestedIn = params(InterestedInParams.EnableSourceSequentialModelParam),
|
||||
enableAddressBookNextInterestedIn = params(InterestedInParams.EnableSourceAddressBookParam),
|
||||
enableProdSimClustersANNSimilarityEngine =
|
||||
params(InterestedInParams.EnableProdSimClustersANNParam),
|
||||
enableExperimentalSimClustersANNSimilarityEngine =
|
||||
params(InterestedInParams.EnableExperimentalSimClustersANNParam),
|
||||
enableSimClustersANN1SimilarityEngine = params(InterestedInParams.EnableSimClustersANN1Param),
|
||||
enableSimClustersANN2SimilarityEngine = params(InterestedInParams.EnableSimClustersANN2Param),
|
||||
enableSimClustersANN3SimilarityEngine = params(InterestedInParams.EnableSimClustersANN3Param),
|
||||
enableSimClustersANN5SimilarityEngine = params(InterestedInParams.EnableSimClustersANN5Param),
|
||||
enableSimClustersANN4SimilarityEngine = params(InterestedInParams.EnableSimClustersANN4Param),
|
||||
simClustersInterestedInMinScore = simClustersInterestedInMinScore,
|
||||
simClustersNextInterestedInMinScore = simClustersNextInterestedInMinScore,
|
||||
simClustersAddressBookInterestedInMinScore = simClustersAddressBookInterestedInMinScore,
|
||||
interestedInSimClustersANNQuery = interestedInSimClustersANNQuery,
|
||||
nextInterestedInSimClustersANNQuery = nextInterestedInSimClustersANNQuery,
|
||||
addressbookInterestedInSimClustersANNQuery = addressbookInterestedInSimClustersANNQuery,
|
||||
interestedInExperimentalSimClustersANNQuery = interestedInExperimentalSimClustersANNQuery,
|
||||
nextInterestedInExperimentalSimClustersANNQuery =
|
||||
nextInterestedInExperimentalSimClustersANNQuery,
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery =
|
||||
addressbookInterestedInExperimentalSimClustersANNQuery,
|
||||
interestedInSimClustersANN1Query = interestedInSimClustersANN1Query,
|
||||
nextInterestedInSimClustersANN1Query = nextInterestedInSimClustersANN1Query,
|
||||
addressbookInterestedInSimClustersANN1Query = addressbookInterestedInSimClustersANN1Query,
|
||||
interestedInSimClustersANN2Query = interestedInSimClustersANN2Query,
|
||||
nextInterestedInSimClustersANN2Query = nextInterestedInSimClustersANN2Query,
|
||||
addressbookInterestedInSimClustersANN2Query = addressbookInterestedInSimClustersANN2Query,
|
||||
interestedInSimClustersANN3Query = interestedInSimClustersANN3Query,
|
||||
nextInterestedInSimClustersANN3Query = nextInterestedInSimClustersANN3Query,
|
||||
addressbookInterestedInSimClustersANN3Query = addressbookInterestedInSimClustersANN3Query,
|
||||
interestedInSimClustersANN5Query = interestedInSimClustersANN5Query,
|
||||
nextInterestedInSimClustersANN5Query = nextInterestedInSimClustersANN5Query,
|
||||
addressbookInterestedInSimClustersANN5Query = addressbookInterestedInSimClustersANN5Query,
|
||||
interestedInSimClustersANN4Query = interestedInSimClustersANN4Query,
|
||||
nextInterestedInSimClustersANN4Query = nextInterestedInSimClustersANN4Query,
|
||||
addressbookInterestedInSimClustersANN4Query = addressbookInterestedInSimClustersANN4Query,
|
||||
)
|
||||
}
|
||||
}
|
@ -1,232 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TopicTweetWithScore
|
||||
import com.twitter.cr_mixer.param.TopicTweetParams
|
||||
import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweet
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.servo.util.MemoizingStatsReceiver
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Formerly CrTopic in legacy Content Recommender. This generator finds top Tweets per Topic.
|
||||
*/
|
||||
@Singleton
|
||||
class TopicTweetCandidateGenerator @Inject() (
|
||||
certoTopicTweetSimilarityEngine: CertoTopicTweetSimilarityEngine,
|
||||
skitTopicTweetSimilarityEngine: SkitTopicTweetSimilarityEngine,
|
||||
skitHighPrecisionTopicTweetSimilarityEngine: SkitHighPrecisionTopicTweetSimilarityEngine,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver) {
|
||||
private val timer = DefaultTimer
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val filterCandidatesStats = stats.scope("filterCandidates")
|
||||
private val tweetyPieFilteredStats = filterCandidatesStats.stat("tweetypie_filtered")
|
||||
private val memoizedStatsReceiver = new MemoizingStatsReceiver(stats)
|
||||
|
||||
def get(
|
||||
query: TopicTweetCandidateGeneratorQuery
|
||||
): Future[Map[Long, Seq[TopicTweet]]] = {
|
||||
val maxTweetAge = query.params(TopicTweetParams.MaxTweetAge)
|
||||
val product = query.product
|
||||
val allStats = memoizedStatsReceiver.scope("all")
|
||||
val perProductStats = memoizedStatsReceiver.scope("perProduct", product.name)
|
||||
StatsUtil.trackMapValueStats(allStats) {
|
||||
StatsUtil.trackMapValueStats(perProductStats) {
|
||||
val result = for {
|
||||
retrievedTweets <- fetchCandidates(query)
|
||||
initialTweetCandidates <- convertToInitialCandidates(retrievedTweets)
|
||||
filteredTweetCandidates <- filterCandidates(
|
||||
initialTweetCandidates,
|
||||
maxTweetAge,
|
||||
query.isVideoOnly,
|
||||
query.impressedTweetList)
|
||||
rankedTweetCandidates = rankCandidates(filteredTweetCandidates)
|
||||
hydratedTweetCandidates = hydrateCandidates(rankedTweetCandidates)
|
||||
} yield {
|
||||
hydratedTweetCandidates.map {
|
||||
case (topicId, topicTweets) =>
|
||||
val topKTweets = topicTweets.take(query.maxNumResults)
|
||||
topicId -> topKTweets
|
||||
}
|
||||
}
|
||||
result.raiseWithin(timeoutConfig.topicTweetEndpointTimeout)(timer)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def fetchCandidates(
|
||||
query: TopicTweetCandidateGeneratorQuery
|
||||
): Future[Map[TopicId, Option[Seq[TopicTweetWithScore]]]] = {
|
||||
Future.collect {
|
||||
query.topicIds.map { topicId =>
|
||||
topicId -> StatsUtil.trackOptionStats(fetchCandidatesStats) {
|
||||
Future
|
||||
.join(
|
||||
certoTopicTweetSimilarityEngine.get(CertoTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params)),
|
||||
skitTopicTweetSimilarityEngine
|
||||
.get(SkitTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params)),
|
||||
skitHighPrecisionTopicTweetSimilarityEngine
|
||||
.get(SkitHighPrecisionTopicTweetSimilarityEngine
|
||||
.fromParams(topicId, query.isVideoOnly, query.params))
|
||||
).map {
|
||||
case (certoTopicTweets, skitTfgTopicTweets, skitHighPrecisionTopicTweets) =>
|
||||
val uniqueCandidates = (certoTopicTweets.getOrElse(Nil) ++
|
||||
skitTfgTopicTweets.getOrElse(Nil) ++
|
||||
skitHighPrecisionTopicTweets.getOrElse(Nil))
|
||||
.groupBy(_.tweetId).map {
|
||||
case (_, dupCandidates) => dupCandidates.head
|
||||
}.toSeq
|
||||
Some(uniqueCandidates)
|
||||
}
|
||||
}
|
||||
}.toMap
|
||||
}
|
||||
}
|
||||
|
||||
private def convertToInitialCandidates(
|
||||
candidatesMap: Map[TopicId, Option[Seq[TopicTweetWithScore]]]
|
||||
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
|
||||
val initialCandidates = candidatesMap.map {
|
||||
case (topicId, candidatesOpt) =>
|
||||
val candidates = candidatesOpt.getOrElse(Nil)
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
val numTweetsPreFilter = tweetIds.size
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/** *
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
val tweetyPieFilteredInitialCandidates = candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
similarityEngineType = candidate.similarityEngineType,
|
||||
modelId = None,
|
||||
score = Some(candidate.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
val numTweetsPostFilter = tweetyPieFilteredInitialCandidates.size
|
||||
tweetyPieFilteredStats.add(numTweetsPreFilter - numTweetsPostFilter)
|
||||
topicId -> tweetyPieFilteredInitialCandidates
|
||||
}
|
||||
}
|
||||
|
||||
Future.collect(initialCandidates.toSeq).map(_.toMap)
|
||||
}
|
||||
|
||||
private def filterCandidates(
|
||||
topicTweetMap: Map[TopicId, Seq[InitialCandidate]],
|
||||
maxTweetAge: Duration,
|
||||
isVideoOnly: Boolean,
|
||||
excludeTweetIds: Set[TweetId]
|
||||
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
|
||||
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
|
||||
|
||||
val filteredResults = topicTweetMap.map {
|
||||
case (topicId, tweetsWithScore) =>
|
||||
topicId -> StatsUtil.trackItemsStats(filterCandidatesStats) {
|
||||
|
||||
val timeFilteredTweets =
|
||||
tweetsWithScore.filter { tweetWithScore =>
|
||||
tweetWithScore.tweetId >= earliestTweetId && !excludeTweetIds.contains(
|
||||
tweetWithScore.tweetId)
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("exclude_and_time_filtered").add(tweetsWithScore.size - timeFilteredTweets.size)
|
||||
|
||||
val tweetNudityFilteredTweets =
|
||||
timeFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.isPassTweetMediaNudityTag.contains(true) => tweet
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("tweet_nudity_filtered").add(
|
||||
timeFilteredTweets.size - tweetNudityFilteredTweets.size)
|
||||
|
||||
val userNudityFilteredTweets =
|
||||
tweetNudityFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.isPassUserNudityRateStrict.contains(true) => tweet
|
||||
}
|
||||
|
||||
filterCandidatesStats
|
||||
.stat("user_nudity_filtered").add(
|
||||
tweetNudityFilteredTweets.size - userNudityFilteredTweets.size)
|
||||
|
||||
val videoFilteredTweets = {
|
||||
if (isVideoOnly) {
|
||||
userNudityFilteredTweets.collect {
|
||||
case tweet if tweet.tweetInfo.hasVideo.contains(true) => tweet
|
||||
}
|
||||
} else {
|
||||
userNudityFilteredTweets
|
||||
}
|
||||
}
|
||||
|
||||
Future.value(videoFilteredTweets)
|
||||
}
|
||||
}
|
||||
Future.collect(filteredResults)
|
||||
}
|
||||
|
||||
private def rankCandidates(
|
||||
tweetCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
|
||||
): Map[TopicId, Seq[InitialCandidate]] = {
|
||||
tweetCandidatesMap.mapValues { tweetCandidates =>
|
||||
tweetCandidates.sortBy { candidate =>
|
||||
-candidate.tweetInfo.favCount
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def hydrateCandidates(
|
||||
topicCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
|
||||
): Map[Long, Seq[TopicTweet]] = {
|
||||
topicCandidatesMap.map {
|
||||
case (topicId, tweetsWithScore) =>
|
||||
topicId.entityId ->
|
||||
tweetsWithScore.map { tweetWithScore =>
|
||||
val similarityEngineType: SimilarityEngineType =
|
||||
tweetWithScore.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
|
||||
TopicTweet(
|
||||
tweetId = tweetWithScore.tweetId,
|
||||
score = tweetWithScore.getSimilarityScore,
|
||||
similarityEngineType = similarityEngineType
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,179 +0,0 @@
|
||||
package com.twitter.cr_mixer.candidate_generation
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
|
||||
import com.twitter.cr_mixer.filter.UtegFilterRunner
|
||||
import com.twitter.cr_mixer.model.CandidateGenerationInfo
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.SimilarityEngineInfo
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
|
||||
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class UtegTweetCandidateGenerator @Inject() (
|
||||
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
|
||||
UserTweetEntityGraphSimilarityEngine.Query,
|
||||
TweetWithScoreAndSocialProof
|
||||
],
|
||||
utegTweetScribeLogger: UtegTweetScribeLogger,
|
||||
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
|
||||
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
|
||||
utegFilterRunner: UtegFilterRunner,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val fetchSeedsStats = stats.scope("fetchSeeds")
|
||||
private val fetchCandidatesStats = stats.scope("fetchCandidates")
|
||||
private val utegFilterStats = stats.scope("utegFilter")
|
||||
private val rankStats = stats.scope("rank")
|
||||
|
||||
def get(
|
||||
query: UtegTweetCandidateGeneratorQuery
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
|
||||
val allStats = stats.scope("all")
|
||||
val perProductStats = stats.scope("perProduct", query.product.toString)
|
||||
StatsUtil.trackItemsStats(allStats) {
|
||||
StatsUtil.trackItemsStats(perProductStats) {
|
||||
|
||||
/**
|
||||
* The candidate we return in the end needs a social proof field, which isn't
|
||||
* supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof
|
||||
* instead.
|
||||
*
|
||||
* However, filters and light ranker expect Candidate-typed param to work. In order to minimise the
|
||||
* changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate
|
||||
* in this method.
|
||||
*/
|
||||
for {
|
||||
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchSeedsStats) {
|
||||
fetchSeeds(query)
|
||||
}
|
||||
initialTweets <- StatsUtil.trackItemsStats(fetchCandidatesStats) {
|
||||
fetchCandidates(query, realGraphSeeds)
|
||||
}
|
||||
initialCandidates <- convertToInitialCandidates(initialTweets)
|
||||
filteredCandidates <- StatsUtil.trackItemsStats(utegFilterStats) {
|
||||
utegFilter(query, initialCandidates)
|
||||
}
|
||||
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
|
||||
rankCandidates(query, filteredCandidates)
|
||||
}
|
||||
} yield {
|
||||
val topTweets = rankedCandidates.take(query.maxNumResults)
|
||||
convertToTweets(topTweets, initialTweets.map(tweet => tweet.tweetId -> tweet).toMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def utegFilter(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
candidates: Seq[InitialCandidate]
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
utegFilterRunner.runSequentialFilters(query, Seq(candidates)).map(_.flatten)
|
||||
}
|
||||
|
||||
private def fetchSeeds(
|
||||
query: UtegTweetCandidateGeneratorQuery
|
||||
): Future[Map[UserId, Double]] = {
|
||||
realGraphInSourceGraphFetcher
|
||||
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
|
||||
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
|
||||
}
|
||||
|
||||
private[candidate_generation] def rankCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
filteredCandidates: Seq[InitialCandidate],
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
val blendedCandidates = filteredCandidates.map(candidate =>
|
||||
candidate.toBlendedCandidate(Seq(candidate.candidateGenerationInfo)))
|
||||
|
||||
Future(
|
||||
blendedCandidates.map { candidate =>
|
||||
val score = candidate.getSimilarityScore
|
||||
candidate.toRankedCandidate(score)
|
||||
}
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
def fetchCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
realGraphSeeds: Map[UserId, Double],
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
val engineQuery = UserTweetEntityGraphSimilarityEngine.fromParams(
|
||||
query.userId,
|
||||
realGraphSeeds,
|
||||
Some(query.impressedTweetList.toSeq),
|
||||
query.params
|
||||
)
|
||||
|
||||
utegTweetScribeLogger.scribeInitialCandidates(
|
||||
query,
|
||||
userTweetEntityGraphSimilarityEngine.getCandidates(engineQuery).map(_.toSeq.flatten)
|
||||
)
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToInitialCandidates(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof],
|
||||
): Future[Seq[InitialCandidate]] = {
|
||||
val tweetIds = candidates.map(_.tweetId).toSet
|
||||
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
|
||||
/** *
|
||||
* If tweetInfo does not exist, we will filter out this tweet candidate.
|
||||
*/
|
||||
candidates.collect {
|
||||
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
|
||||
val tweetInfo = tweetInfos(candidate.tweetId)
|
||||
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
|
||||
|
||||
InitialCandidate(
|
||||
tweetId = candidate.tweetId,
|
||||
tweetInfo = tweetInfo,
|
||||
CandidateGenerationInfo(
|
||||
None,
|
||||
SimilarityEngineInfo(
|
||||
similarityEngineType = SimilarityEngineType.Uteg,
|
||||
modelId = None,
|
||||
score = Some(candidate.score)),
|
||||
Seq.empty
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private[candidate_generation] def convertToTweets(
|
||||
candidates: Seq[RankedCandidate],
|
||||
tweetMap: Map[TweetId, TweetWithScoreAndSocialProof]
|
||||
): Seq[TweetWithScoreAndSocialProof] = {
|
||||
candidates.map { candidate =>
|
||||
tweetMap
|
||||
.get(candidate.tweetId).map { tweet =>
|
||||
TweetWithScoreAndSocialProof(
|
||||
tweet.tweetId,
|
||||
candidate.predictionScore,
|
||||
tweet.socialProofByType
|
||||
)
|
||||
// The exception should never be thrown
|
||||
}.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets"))
|
||||
}
|
||||
}
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"configapi/configapi-core",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -1,473 +0,0 @@
|
||||
package com.twitter.cr_mixer.config
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.exception.InvalidSANNConfigException
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
|
||||
import com.twitter.simclustersann.thriftscala.{SimClustersANNConfig => ThriftSimClustersANNConfig}
|
||||
import com.twitter.util.Duration
|
||||
|
||||
case class SimClustersANNConfig(
|
||||
maxNumResults: Int,
|
||||
minScore: Double,
|
||||
candidateEmbeddingType: EmbeddingType,
|
||||
maxTopTweetsPerCluster: Int,
|
||||
maxScanClusters: Int,
|
||||
maxTweetCandidateAge: Duration,
|
||||
minTweetCandidateAge: Duration,
|
||||
annAlgorithm: ScoringAlgorithm) {
|
||||
val toSANNConfigThrift: ThriftSimClustersANNConfig = ThriftSimClustersANNConfig(
|
||||
maxNumResults = maxNumResults,
|
||||
minScore = minScore,
|
||||
candidateEmbeddingType = candidateEmbeddingType,
|
||||
maxTopTweetsPerCluster = maxTopTweetsPerCluster,
|
||||
maxScanClusters = maxScanClusters,
|
||||
maxTweetCandidateAgeHours = maxTweetCandidateAge.inHours,
|
||||
minTweetCandidateAgeHours = minTweetCandidateAge.inHours,
|
||||
annAlgorithm = annAlgorithm,
|
||||
)
|
||||
}
|
||||
|
||||
object SimClustersANNConfig {
|
||||
|
||||
final val DefaultConfig = SimClustersANNConfig(
|
||||
maxNumResults = 200,
|
||||
minScore = 0.0,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
|
||||
maxTopTweetsPerCluster = 800,
|
||||
maxScanClusters = 50,
|
||||
maxTweetCandidateAge = 24.hours,
|
||||
minTweetCandidateAge = 0.hours,
|
||||
annAlgorithm = ScoringAlgorithm.CosineSimilarity,
|
||||
)
|
||||
|
||||
/*
|
||||
SimClustersANNConfigId: String
|
||||
Format: Prod - “EmbeddingType_ModelVersion_Default”
|
||||
Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD
|
||||
*/
|
||||
|
||||
private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val FavBasedProducer_Model20m145k2020_20220617_06 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val FavBasedProducer_Model20m145k2020_20220801 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220810 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220818 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val FavBasedProducer_Model20m145k2020_20220819 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val FavBasedProducer_Model20m145k2020_20221221 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val FavBasedProducer_Model20m145k2020_20221220 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
// SANN-4 config
|
||||
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 =
|
||||
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220617_06 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220801 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_20220617_06.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220810 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220818 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20220819 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20221221 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val UnfilteredUserInterestedIn_Model20m145k2020_20221220 =
|
||||
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220 =
|
||||
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default =
|
||||
DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220 =
|
||||
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
|
||||
|
||||
// Chunnan's exp on maxTweetCandidateAgeDays 2
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220617_06 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxTweetCandidateAge = 48.hours,
|
||||
)
|
||||
|
||||
// Experimental SANN config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220801 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220810 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220818 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20220819 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20221221 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val UserNextInterestedIn_Model20m145k2020_20221220 =
|
||||
UserNextInterestedIn_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow
|
||||
private val FollowBasedProducer_Model20m145k2020_Default =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy()
|
||||
|
||||
// Experimental SANN config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220801 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-1 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220810 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-2 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220818 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
maxNumResults = 100,
|
||||
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
|
||||
maxTweetCandidateAge = 175200.hours,
|
||||
maxTopTweetsPerCluster = 1600
|
||||
)
|
||||
|
||||
// SANN-3 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20220819 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
|
||||
)
|
||||
|
||||
// SANN-5 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20221221 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
|
||||
maxTweetCandidateAge = 1.hours
|
||||
)
|
||||
|
||||
// SANN-4 config
|
||||
private val FollowBasedProducer_Model20m145k2020_20221220 =
|
||||
FavBasedProducer_Model20m145k2020_Default.copy(
|
||||
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
|
||||
maxTweetCandidateAge = 48.hours
|
||||
)
|
||||
val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map(
|
||||
"FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default,
|
||||
"FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06,
|
||||
"FavBasedProducer_Model20m145k2020_20220801" -> FavBasedProducer_Model20m145k2020_20220801,
|
||||
"FavBasedProducer_Model20m145k2020_20220810" -> FavBasedProducer_Model20m145k2020_20220810,
|
||||
"FavBasedProducer_Model20m145k2020_20220818" -> FavBasedProducer_Model20m145k2020_20220818,
|
||||
"FavBasedProducer_Model20m145k2020_20220819" -> FavBasedProducer_Model20m145k2020_20220819,
|
||||
"FavBasedProducer_Model20m145k2020_20221221" -> FavBasedProducer_Model20m145k2020_20221221,
|
||||
"FavBasedProducer_Model20m145k2020_20221220" -> FavBasedProducer_Model20m145k2020_20221220,
|
||||
"FollowBasedProducer_Model20m145k2020_Default" -> FollowBasedProducer_Model20m145k2020_Default,
|
||||
"FollowBasedProducer_Model20m145k2020_20220801" -> FollowBasedProducer_Model20m145k2020_20220801,
|
||||
"FollowBasedProducer_Model20m145k2020_20220810" -> FollowBasedProducer_Model20m145k2020_20220810,
|
||||
"FollowBasedProducer_Model20m145k2020_20220818" -> FollowBasedProducer_Model20m145k2020_20220818,
|
||||
"FollowBasedProducer_Model20m145k2020_20220819" -> FollowBasedProducer_Model20m145k2020_20220819,
|
||||
"FollowBasedProducer_Model20m145k2020_20221221" -> FollowBasedProducer_Model20m145k2020_20221221,
|
||||
"FollowBasedProducer_Model20m145k2020_20221220" -> FollowBasedProducer_Model20m145k2020_20221220,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221,
|
||||
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_Default" -> UnfilteredUserInterestedIn_Model20m145k2020_Default,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220617_06" -> UnfilteredUserInterestedIn_Model20m145k2020_20220617_06,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220801" -> UnfilteredUserInterestedIn_Model20m145k2020_20220801,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220810" -> UnfilteredUserInterestedIn_Model20m145k2020_20220810,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220818" -> UnfilteredUserInterestedIn_Model20m145k2020_20220818,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20220819" -> UnfilteredUserInterestedIn_Model20m145k2020_20220819,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20221221" -> UnfilteredUserInterestedIn_Model20m145k2020_20221221,
|
||||
"UnfilteredUserInterestedIn_Model20m145k2020_20221220" -> UnfilteredUserInterestedIn_Model20m145k2020_20221220,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221,
|
||||
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221,
|
||||
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220,
|
||||
"UserNextInterestedIn_Model20m145k2020_Default" -> UserNextInterestedIn_Model20m145k2020_Default,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220617_06" -> UserNextInterestedIn_Model20m145k2020_20220617_06,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220801" -> UserNextInterestedIn_Model20m145k2020_20220801,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220810" -> UserNextInterestedIn_Model20m145k2020_20220810,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220818" -> UserNextInterestedIn_Model20m145k2020_20220818,
|
||||
"UserNextInterestedIn_Model20m145k2020_20220819" -> UserNextInterestedIn_Model20m145k2020_20220819,
|
||||
"UserNextInterestedIn_Model20m145k2020_20221221" -> UserNextInterestedIn_Model20m145k2020_20221221,
|
||||
"UserNextInterestedIn_Model20m145k2020_20221220" -> UserNextInterestedIn_Model20m145k2020_20221220,
|
||||
)
|
||||
|
||||
def getConfig(
|
||||
embeddingType: String,
|
||||
modelVersion: String,
|
||||
id: String
|
||||
): SimClustersANNConfig = {
|
||||
val configName = embeddingType + "_" + modelVersion + "_" + id
|
||||
DefaultConfigMappings.get(configName) match {
|
||||
case Some(config) => config
|
||||
case None =>
|
||||
throw InvalidSANNConfigException(s"Incorrect config id passed in for SANN $configName")
|
||||
}
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package com.twitter.cr_mixer.config
|
||||
|
||||
import com.twitter.util.Duration
|
||||
|
||||
case class TimeoutConfig(
|
||||
/* Default timeouts for candidate generator */
|
||||
serviceTimeout: Duration,
|
||||
signalFetchTimeout: Duration,
|
||||
similarityEngineTimeout: Duration,
|
||||
annServiceClientTimeout: Duration,
|
||||
/* For Uteg Candidate Generator */
|
||||
utegSimilarityEngineTimeout: Duration,
|
||||
/* For User State Store */
|
||||
userStateUnderlyingStoreTimeout: Duration,
|
||||
userStateStoreTimeout: Duration,
|
||||
/* For FRS based tweets */
|
||||
// Timeout passed to EarlyBird server
|
||||
earlybirdServerTimeout: Duration,
|
||||
// Timeout set on CrMixer side
|
||||
earlybirdSimilarityEngineTimeout: Duration,
|
||||
frsBasedTweetEndpointTimeout: Duration,
|
||||
topicTweetEndpointTimeout: Duration,
|
||||
// Timeout Settings for Navi gRPC Client
|
||||
naviRequestTimeout: Duration)
|
@ -1,48 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/debug",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"finagle/finagle-base-http/src/main",
|
||||
"finagle/finagle-core/src/main",
|
||||
"finagle/finagle-http/src/main/scala",
|
||||
"finatra/http-server/src/main/scala/com/twitter/finatra/http:controller",
|
||||
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/ads/schema:common-scala",
|
||||
"src/thrift/com/twitter/context:twitter-context-scala",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/timelines/render:thrift-scala",
|
||||
"src/thrift/com/twitter/timelines/timeline_logging:thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"stringcenter/client",
|
||||
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview",
|
||||
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview/funnelseries",
|
||||
"twitter-context/src/main/scala",
|
||||
"user-signal-service/thrift/src/main/thrift:thrift-scala",
|
||||
],
|
||||
)
|
@ -1,757 +0,0 @@
|
||||
package com.twitter.cr_mixer.controller
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.candidate_generation.AdsCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.CrCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.FrsTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.RelatedTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.RelatedVideoTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.TopicTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.candidate_generation.UtegTweetCandidateGenerator
|
||||
import com.twitter.cr_mixer.featureswitch.ParamsBuilder
|
||||
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
|
||||
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
|
||||
import com.twitter.cr_mixer.logging.RelatedTweetScribeMetadata
|
||||
import com.twitter.cr_mixer.logging.ScribeMetadata
|
||||
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedAdsCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.AdsParams
|
||||
import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.cr_mixer.param.RelatedTweetGlobalParams
|
||||
import com.twitter.cr_mixer.param.RelatedVideoTweetGlobalParams
|
||||
import com.twitter.cr_mixer.param.TopicTweetParams
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.param.decider.EndpointLoadShedder
|
||||
import com.twitter.cr_mixer.thriftscala.AdTweetRecommendation
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRequest
|
||||
import com.twitter.cr_mixer.thriftscala.AdsResponse
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.FrsTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweet
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweet
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweet
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.TopicTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.TweetRecommendation
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweet
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
|
||||
import com.twitter.cr_mixer.util.MetricTagUtil
|
||||
import com.twitter.cr_mixer.util.SignalTimestampStatsUtil
|
||||
import com.twitter.cr_mixer.{thriftscala => t}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finatra.thrift.Controller
|
||||
import com.twitter.hermit.store.common.ReadableWritableStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.timeline_logging.{thriftscala => thriftlog}
|
||||
import com.twitter.timelines.tracing.lensview.funnelseries.TweetScoreFunnelSeries
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import java.util.UUID
|
||||
import javax.inject.Inject
|
||||
import org.apache.commons.lang.exception.ExceptionUtils
|
||||
|
||||
class CrMixerThriftController @Inject() (
|
||||
crCandidateGenerator: CrCandidateGenerator,
|
||||
relatedTweetCandidateGenerator: RelatedTweetCandidateGenerator,
|
||||
relatedVideoTweetCandidateGenerator: RelatedVideoTweetCandidateGenerator,
|
||||
utegTweetCandidateGenerator: UtegTweetCandidateGenerator,
|
||||
frsTweetCandidateGenerator: FrsTweetCandidateGenerator,
|
||||
topicTweetCandidateGenerator: TopicTweetCandidateGenerator,
|
||||
crMixerScribeLogger: CrMixerScribeLogger,
|
||||
relatedTweetScribeLogger: RelatedTweetScribeLogger,
|
||||
utegTweetScribeLogger: UtegTweetScribeLogger,
|
||||
adsRecommendationsScribeLogger: AdsRecommendationsScribeLogger,
|
||||
adsCandidateGenerator: AdsCandidateGenerator,
|
||||
decider: CrMixerDecider,
|
||||
paramsBuilder: ParamsBuilder,
|
||||
endpointLoadShedder: EndpointLoadShedder,
|
||||
signalTimestampStatsUtil: SignalTimestampStatsUtil,
|
||||
tweetRecommendationResultsStore: ReadableWritableStore[UserId, CrMixerTweetResponse],
|
||||
userStateStore: ReadableStore[UserId, UserState],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends Controller(t.CrMixer) {
|
||||
|
||||
lazy private val tweetScoreFunnelSeries = new TweetScoreFunnelSeries(statsReceiver)
|
||||
|
||||
private def logErrMessage(endpoint: String, e: Throwable): Unit = {
|
||||
val msg = Seq(
|
||||
s"Failed endpoint $endpoint: ${e.getLocalizedMessage}",
|
||||
ExceptionUtils.getStackTrace(e)
|
||||
).mkString("\n")
|
||||
|
||||
/** *
|
||||
* We chose logger.info() here to print message instead of logger.error since that
|
||||
* logger.error sometimes suppresses detailed stacktrace.
|
||||
*/
|
||||
logger.info(msg)
|
||||
}
|
||||
|
||||
private def generateRequestUUID(): Long = {
|
||||
|
||||
/** *
|
||||
* We generate unique UUID via bitwise operations. See the below link for more:
|
||||
* https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid
|
||||
*/
|
||||
UUID.randomUUID().getMostSignificantBits & Long.MaxValue
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args =>
|
||||
val endpointName = "getTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val userId = args.request.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val queryFut = buildCrCandidateGeneratorQuery(args.request, requestUUID, userId)
|
||||
queryFut.flatMap { query =>
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
|
||||
val response = crCandidateGenerator.get(query)
|
||||
|
||||
val blueVerifiedScribedResponse = response.flatMap { rankedCandidates =>
|
||||
val hasBlueVerifiedCandidate = rankedCandidates.exists { tweet =>
|
||||
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
|
||||
}
|
||||
|
||||
if (hasBlueVerifiedCandidate) {
|
||||
crMixerScribeLogger.scribeGetTweetRecommendationsForBlueVerified(
|
||||
scribeMetadata,
|
||||
response)
|
||||
} else {
|
||||
response
|
||||
}
|
||||
}
|
||||
|
||||
val thriftResponse = blueVerifiedScribedResponse.map { candidates =>
|
||||
if (query.product == t.Product.Home) {
|
||||
scribeTweetScoreFunnelSeries(candidates)
|
||||
}
|
||||
buildThriftResponse(candidates)
|
||||
}
|
||||
|
||||
cacheTweetRecommendationResults(args.request, thriftResponse)
|
||||
|
||||
crMixerScribeLogger.scribeGetTweetRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
thriftResponse)
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(CrMixerTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(CrMixerTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** *
|
||||
* GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially
|
||||
* doing very similar things, except that one passes in TweetId which calls TweetBased engine,
|
||||
* and the other passes in AuthorId which calls ProducerBased engine.
|
||||
*/
|
||||
handle(t.CrMixer.GetRelatedTweetsForQueryTweet) {
|
||||
args: t.CrMixer.GetRelatedTweetsForQueryTweet.Args =>
|
||||
val endpointName = "getRelatedTweetsForQueryTweet"
|
||||
getRelatedTweets(endpointName, args.request)
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetRelatedVideoTweetsForQueryTweet) {
|
||||
args: t.CrMixer.GetRelatedVideoTweetsForQueryTweet.Args =>
|
||||
val endpointName = "getRelatedVideoTweetsForQueryVideoTweet"
|
||||
getRelatedVideoTweets(endpointName, args.request)
|
||||
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetRelatedTweetsForQueryAuthor) {
|
||||
args: t.CrMixer.GetRelatedTweetsForQueryAuthor.Args =>
|
||||
val endpointName = "getRelatedTweetsForQueryAuthor"
|
||||
getRelatedTweets(endpointName, args.request)
|
||||
}
|
||||
|
||||
private def getRelatedTweets(
|
||||
endpointName: String,
|
||||
request: RelatedTweetRequest
|
||||
): Future[RelatedTweetResponse] = {
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val queryFut = buildRelatedTweetQuery(request, requestUUID)
|
||||
|
||||
queryFut.flatMap { query =>
|
||||
val relatedTweetScribeMetadata = RelatedTweetScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
relatedTweetScribeLogger.scribeGetRelatedTweets(
|
||||
request,
|
||||
startTime,
|
||||
relatedTweetScribeMetadata,
|
||||
relatedTweetCandidateGenerator
|
||||
.get(query)
|
||||
.map(buildRelatedTweetResponse))
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(RelatedTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(RelatedTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def getRelatedVideoTweets(
|
||||
endpointName: String,
|
||||
request: RelatedVideoTweetRequest
|
||||
): Future[RelatedVideoTweetResponse] = {
|
||||
val requestUUID = generateRequestUUID()
|
||||
val queryFut = buildRelatedVideoTweetQuery(request, requestUUID)
|
||||
|
||||
queryFut.flatMap { query =>
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
relatedVideoTweetCandidateGenerator.get(query).map { initialCandidateSeq =>
|
||||
buildRelatedVideoTweetResponse(initialCandidateSeq)
|
||||
}
|
||||
}.rescue {
|
||||
case EndpointLoadShedder.LoadSheddingException =>
|
||||
Future(RelatedVideoTweetResponse(Seq.empty))
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(RelatedVideoTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetFrsBasedTweetRecommendations) {
|
||||
args: t.CrMixer.GetFrsBasedTweetRecommendations.Args =>
|
||||
val endpointName = "getFrsBasedTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val queryFut = buildFrsBasedTweetQuery(args.request, requestUUID)
|
||||
queryFut.flatMap { query =>
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
frsTweetCandidateGenerator.get(query).map(FrsTweetResponse(_))
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(FrsTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetTopicTweetRecommendations) {
|
||||
args: t.CrMixer.GetTopicTweetRecommendations.Args =>
|
||||
val endpointName = "getTopicTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val query = buildTopicTweetQuery(args.request, requestUUID)
|
||||
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
topicTweetCandidateGenerator.get(query).map(TopicTweetResponse(_))
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(TopicTweetResponse(Map.empty[Long, Seq[TopicTweet]]))
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetUtegTweetRecommendations) {
|
||||
args: t.CrMixer.GetUtegTweetRecommendations.Args =>
|
||||
val endpointName = "getUtegTweetRecommendations"
|
||||
|
||||
val requestUUID = generateRequestUUID()
|
||||
val startTime = Time.now.inMilliseconds
|
||||
val queryFut = buildUtegTweetQuery(args.request, requestUUID)
|
||||
queryFut
|
||||
.flatMap { query =>
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
endpointLoadShedder(endpointName, query.product.originalName) {
|
||||
utegTweetScribeLogger.scribeGetUtegTweetRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
utegTweetCandidateGenerator
|
||||
.get(query)
|
||||
.map(buildUtegTweetResponse)
|
||||
)
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(UtegTweetResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
handle(t.CrMixer.GetAdsRecommendations) { args: t.CrMixer.GetAdsRecommendations.Args =>
|
||||
val endpointName = "getAdsRecommendations"
|
||||
val queryFut = buildAdsCandidateGeneratorQuery(args.request)
|
||||
val startTime = Time.now.inMilliseconds
|
||||
queryFut.flatMap { query =>
|
||||
{
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
val response = adsCandidateGenerator
|
||||
.get(query).map { candidates =>
|
||||
buildAdsResponse(candidates)
|
||||
}
|
||||
adsRecommendationsScribeLogger.scribeGetAdsRecommendations(
|
||||
args.request,
|
||||
startTime,
|
||||
scribeMetadata,
|
||||
response,
|
||||
query.params(AdsParams.EnableScribe)
|
||||
)
|
||||
}.rescue {
|
||||
case e =>
|
||||
logErrMessage(endpointName, e)
|
||||
Future(AdsResponse(Seq.empty))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildCrCandidateGeneratorQuery(
|
||||
thriftRequest: CrMixerTweetRequest,
|
||||
requestUUID: Long,
|
||||
userId: Long
|
||||
): Future[CrCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("CrMixerTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(9999)
|
||||
case (t.Product.Notifications, Some(t.ProductContext.NotificationsContext(cxt))) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.Email, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.ImmersiveMediaViewer, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case (t.Product.VideoCarousel, None) =>
|
||||
params(GlobalParams.MaxCandidatesPerRequestParam)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
CrCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID,
|
||||
languageCode = thriftRequest.clientContext.languageCode
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedTweetQuery(
|
||||
thriftRequest: RelatedTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[RelatedTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("RelatedTweetRequest")
|
||||
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
|
||||
case Some(userId) => userStateStore.get(userId)
|
||||
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
|
||||
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
|
||||
|
||||
userStateFut.map { userState =>
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
// Currently, Home takes 10, and RUX takes 100
|
||||
val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam)
|
||||
|
||||
RelatedTweetCandidateGeneratorQuery(
|
||||
internalId = thriftRequest.internalId,
|
||||
clientContext = thriftRequest.clientContext,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildAdsCandidateGeneratorQuery(
|
||||
thriftRequest: AdsRequest
|
||||
): Future[AdsCandidateGeneratorQuery] = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val requestUUID = generateRequestUUID()
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
val maxNumResults = params(AdsParams.AdsCandidateGenerationMaxCandidatesNumParam)
|
||||
AdsCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
params = params,
|
||||
maxNumResults = maxNumResults,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedVideoTweetQuery(
|
||||
thriftRequest: RelatedVideoTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[RelatedVideoTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val product = thriftRequest.product
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("RelatedVideoTweetRequest")
|
||||
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
|
||||
case Some(userId) => userStateStore.get(userId)
|
||||
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
|
||||
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
|
||||
|
||||
userStateFut.map { userState =>
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState)
|
||||
|
||||
val maxNumResults = params(RelatedVideoTweetGlobalParams.MaxCandidatesPerRequestParam)
|
||||
|
||||
RelatedVideoTweetCandidateGeneratorQuery(
|
||||
internalId = thriftRequest.internalId,
|
||||
clientContext = thriftRequest.clientContext,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildUtegTweetQuery(
|
||||
thriftRequest: UtegTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[UtegTweetCandidateGeneratorQuery] = {
|
||||
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("UtegTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(9999)
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
UtegTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
userState = userState,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private def buildTopicTweetQuery(
|
||||
thriftRequest: TopicTweetRequest,
|
||||
requestUUID: Long
|
||||
): TopicTweetCandidateGeneratorQuery = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in the TopicTweetRequest clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
|
||||
// Specify product-specific behavior mapping here
|
||||
val isVideoOnly = (product, productContext) match {
|
||||
case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) =>
|
||||
context.isVideoOnly
|
||||
case (t.Product.TopicLandingPage, None) =>
|
||||
false
|
||||
case (t.Product.HomeTopicsBackfill, None) =>
|
||||
false
|
||||
case (t.Product.TopicTweetsStrato, None) =>
|
||||
false
|
||||
case _ =>
|
||||
throw new IllegalArgumentException(
|
||||
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
|
||||
)
|
||||
}
|
||||
|
||||
statsReceiver.scope(product.toString).counter(TopicTweetRequest.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
product,
|
||||
UserState.EnumUnknownUserState(100)
|
||||
)
|
||||
|
||||
val topicIds = thriftRequest.topicIds.map { topicId =>
|
||||
TopicId(
|
||||
entityId = topicId,
|
||||
language = thriftRequest.clientContext.languageCode,
|
||||
country = None
|
||||
)
|
||||
}.toSet
|
||||
|
||||
TopicTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
topicIds = topicIds,
|
||||
product = product,
|
||||
maxNumResults = params(TopicTweetParams.MaxTopicTweetCandidatesParam),
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
requestUUID = requestUUID,
|
||||
isVideoOnly = isVideoOnly
|
||||
)
|
||||
}
|
||||
|
||||
private def buildFrsBasedTweetQuery(
|
||||
thriftRequest: FrsTweetRequest,
|
||||
requestUUID: Long
|
||||
): Future[FrsTweetCandidateGeneratorQuery] = {
|
||||
val userId = thriftRequest.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in the FrsTweetRequest clientContext")
|
||||
)
|
||||
val product = thriftRequest.product
|
||||
val productContext = thriftRequest.productContext
|
||||
|
||||
val scopedStats = statsReceiver
|
||||
.scope(product.toString).scope("FrsTweetRequest")
|
||||
|
||||
userStateStore
|
||||
.get(userId).map { userStateOpt =>
|
||||
val userState = userStateOpt
|
||||
.getOrElse(UserState.EnumUnknownUserState(100))
|
||||
scopedStats.scope("UserState").counter(userState.toString).incr()
|
||||
|
||||
val params =
|
||||
paramsBuilder.buildFromClientContext(
|
||||
thriftRequest.clientContext,
|
||||
thriftRequest.product,
|
||||
userState
|
||||
)
|
||||
val maxNumResults = (product, productContext) match {
|
||||
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
|
||||
homeContext.maxResults.getOrElse(
|
||||
params(FrsBasedCandidateGenerationMaxCandidatesNumParam))
|
||||
case _ =>
|
||||
params(FrsBasedCandidateGenerationMaxCandidatesNumParam)
|
||||
}
|
||||
|
||||
FrsTweetCandidateGeneratorQuery(
|
||||
userId = userId,
|
||||
product = product,
|
||||
maxNumResults = maxNumResults,
|
||||
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
|
||||
impressedUserList = thriftRequest.excludedUserIds.getOrElse(Nil).toSet,
|
||||
params = params,
|
||||
languageCodeOpt = thriftRequest.clientContext.languageCode,
|
||||
countryCodeOpt = thriftRequest.clientContext.countryCode,
|
||||
requestUUID = requestUUID
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildThriftResponse(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): CrMixerTweetResponse = {
|
||||
|
||||
val tweets = candidates.map { candidate =>
|
||||
TweetRecommendation(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.predictionScore,
|
||||
metricTags = Some(MetricTagUtil.buildMetricTags(candidate)),
|
||||
latestSourceSignalTimestampInMillis =
|
||||
SignalTimestampStatsUtil.buildLatestSourceSignalTimestamp(candidate)
|
||||
)
|
||||
}
|
||||
signalTimestampStatsUtil.statsSignalTimestamp(tweets)
|
||||
CrMixerTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def scribeTweetScoreFunnelSeries(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Seq[RankedCandidate] = {
|
||||
// 202210210901 is a random number for code search of Lensview
|
||||
tweetScoreFunnelSeries.startNewSpan(
|
||||
name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType",
|
||||
codePtr = 202210210901L) {
|
||||
(
|
||||
candidates,
|
||||
candidates.map { candidate =>
|
||||
thriftlog.TweetDimensionMeasure(
|
||||
dimension = Some(
|
||||
thriftlog
|
||||
.RequestTweetDimension(
|
||||
candidate.tweetId,
|
||||
candidate.reasonChosen.similarityEngineInfo.similarityEngineType.value)),
|
||||
measure = Some(thriftlog.RequestTweetMeasure(candidate.predictionScore))
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildRelatedTweetResponse(candidates: Seq[InitialCandidate]): RelatedTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
RelatedTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
authorId = Some(candidate.tweetInfo.authorId)
|
||||
)
|
||||
}
|
||||
RelatedTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildRelatedVideoTweetResponse(
|
||||
candidates: Seq[InitialCandidate]
|
||||
): RelatedVideoTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
RelatedVideoTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = Some(candidate.getSimilarityScore)
|
||||
)
|
||||
}
|
||||
RelatedVideoTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildUtegTweetResponse(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof]
|
||||
): UtegTweetResponse = {
|
||||
val tweets = candidates.map { candidate =>
|
||||
UtegTweet(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.score,
|
||||
socialProofByType = candidate.socialProofByType
|
||||
)
|
||||
}
|
||||
UtegTweetResponse(tweets)
|
||||
}
|
||||
|
||||
private def buildAdsResponse(
|
||||
candidates: Seq[RankedAdsCandidate]
|
||||
): AdsResponse = {
|
||||
AdsResponse(ads = candidates.map { candidate =>
|
||||
AdTweetRecommendation(
|
||||
tweetId = candidate.tweetId,
|
||||
score = candidate.predictionScore,
|
||||
lineItems = Some(candidate.lineItemInfo))
|
||||
})
|
||||
}
|
||||
|
||||
private def cacheTweetRecommendationResults(
|
||||
request: CrMixerTweetRequest,
|
||||
response: Future[CrMixerTweetResponse]
|
||||
): Unit = {
|
||||
|
||||
val userId = request.clientContext.userId.getOrElse(
|
||||
throw new IllegalArgumentException(
|
||||
"userId must be present in getTweetRecommendations() Thrift clientContext"))
|
||||
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.getTweetRecommendationsCacheRate)) {
|
||||
response.map { crMixerTweetResponse =>
|
||||
{
|
||||
(
|
||||
request.product,
|
||||
request.clientContext.userId,
|
||||
crMixerTweetResponse.tweets.nonEmpty) match {
|
||||
case (t.Product.Home, Some(userId), true) =>
|
||||
tweetRecommendationResultsStore.put((userId, crMixerTweetResponse))
|
||||
case _ => Future.value(Unit)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [],
|
||||
)
|
@ -1,4 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
package exception
|
||||
|
||||
case class InvalidSANNConfigException(msg: String) extends Exception(msg)
|
@ -1,35 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"abdecider/src/main/scala",
|
||||
"configapi/configapi-abdecider",
|
||||
"configapi/configapi-core",
|
||||
"configapi/configapi-featureswitches:v2",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
|
||||
"featureswitches/featureswitches-core",
|
||||
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -1,79 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
package featureswitch
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.abdecider.LoggingABDecider
|
||||
import com.twitter.abdecider.Recipient
|
||||
import com.twitter.abdecider.Bucket
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.util.Local
|
||||
import scala.collection.concurrent.{Map => ConcurrentMap}
|
||||
|
||||
/**
|
||||
* Wraps a LoggingABDecider, so all impressed buckets are recorded to a 'LocalContext' on a given request.
|
||||
*
|
||||
* Contexts (https://twitter.github.io/finagle/guide/Contexts.html) are Finagle's mechanism for
|
||||
* storing state/variables without having to pass these variables all around the request.
|
||||
*
|
||||
* In order for this class to be used the [[SetImpressedBucketsLocalContextFilter]] must be applied
|
||||
* at the beginning of the request, to initialize a concurrent map used to store impressed buckets.
|
||||
*
|
||||
* Whenever we get an a/b impression, the bucket information is logged to the concurrent hashmap.
|
||||
*/
|
||||
case class CrMixerLoggingABDecider(
|
||||
loggingAbDecider: LoggingABDecider,
|
||||
statsReceiver: StatsReceiver)
|
||||
extends LoggingABDecider {
|
||||
|
||||
private val scopedStatsReceiver = statsReceiver.scope("cr_logging_ab_decider")
|
||||
|
||||
override def impression(
|
||||
experimentName: String,
|
||||
recipient: Recipient
|
||||
): Option[Bucket] = {
|
||||
|
||||
StatsUtil.trackNonFutureBlockStats(scopedStatsReceiver.scope("log_impression")) {
|
||||
val maybeBuckets = loggingAbDecider.impression(experimentName, recipient)
|
||||
maybeBuckets.foreach { b =>
|
||||
scopedStatsReceiver.counter("impressions").incr()
|
||||
CrMixerImpressedBuckets.recordImpressedBucket(b)
|
||||
}
|
||||
maybeBuckets
|
||||
}
|
||||
}
|
||||
|
||||
override def track(
|
||||
experimentName: String,
|
||||
eventName: String,
|
||||
recipient: Recipient
|
||||
): Unit = {
|
||||
loggingAbDecider.track(experimentName, eventName, recipient)
|
||||
}
|
||||
|
||||
override def bucket(
|
||||
experimentName: String,
|
||||
recipient: Recipient
|
||||
): Option[Bucket] = {
|
||||
loggingAbDecider.bucket(experimentName, recipient)
|
||||
}
|
||||
|
||||
override def experiments: Seq[String] = loggingAbDecider.experiments
|
||||
|
||||
override def experiment(experimentName: String) =
|
||||
loggingAbDecider.experiment(experimentName)
|
||||
}
|
||||
|
||||
object CrMixerImpressedBuckets {
|
||||
private[featureswitch] val localImpressedBucketsMap = new Local[ConcurrentMap[Bucket, Boolean]]
|
||||
|
||||
/**
|
||||
* Gets all impressed buckets for this request.
|
||||
**/
|
||||
def getAllImpressedBuckets: Option[List[Bucket]] = {
|
||||
localImpressedBucketsMap.apply().map(_.map { case (k, _) => k }.toList)
|
||||
}
|
||||
|
||||
private[featureswitch] def recordImpressedBucket(bucket: Bucket) = {
|
||||
localImpressedBucketsMap().foreach { m => m += bucket -> true }
|
||||
}
|
||||
}
|
@ -1,151 +0,0 @@
|
||||
package com.twitter.cr_mixer.featureswitch
|
||||
|
||||
import com.twitter.abdecider.LoggingABDecider
|
||||
import com.twitter.abdecider.UserRecipient
|
||||
import com.twitter.cr_mixer.{thriftscala => t}
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.discovery.common.configapi.FeatureContextBuilder
|
||||
import com.twitter.featureswitches.FSRecipient
|
||||
import com.twitter.featureswitches.UserAgent
|
||||
import com.twitter.featureswitches.{Recipient => FeatureSwitchRecipient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.timelines.configapi.Config
|
||||
import com.twitter.timelines.configapi.FeatureValue
|
||||
import com.twitter.timelines.configapi.ForcedFeatureContext
|
||||
import com.twitter.timelines.configapi.OrElseFeatureContext
|
||||
import com.twitter.timelines.configapi.Params
|
||||
import com.twitter.timelines.configapi.RequestContext
|
||||
import com.twitter.timelines.configapi.abdecider.LoggingABDeciderExperimentContext
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/** Singleton object for building [[Params]] to override */
|
||||
@Singleton
|
||||
class ParamsBuilder @Inject() (
|
||||
globalStats: StatsReceiver,
|
||||
abDecider: LoggingABDecider,
|
||||
featureContextBuilder: FeatureContextBuilder,
|
||||
config: Config) {
|
||||
|
||||
private val stats = globalStats.scope("params")
|
||||
|
||||
def buildFromClientContext(
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState,
|
||||
userRoleOverride: Option[Set[String]] = None,
|
||||
featureOverrides: Map[String, FeatureValue] = Map.empty,
|
||||
): Params = {
|
||||
clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
val userRecipient = buildFeatureSwitchRecipient(
|
||||
userId,
|
||||
userRoleOverride,
|
||||
clientContext,
|
||||
product,
|
||||
userState
|
||||
)
|
||||
|
||||
val featureContext = OrElseFeatureContext(
|
||||
ForcedFeatureContext(featureOverrides),
|
||||
featureContextBuilder(
|
||||
Some(userId),
|
||||
Some(userRecipient)
|
||||
))
|
||||
|
||||
config(
|
||||
requestContext = RequestContext(
|
||||
userId = Some(userId),
|
||||
experimentContext = LoggingABDeciderExperimentContext(
|
||||
abDecider,
|
||||
Some(UserRecipient(userId, Some(userId)))),
|
||||
featureContext = featureContext
|
||||
),
|
||||
stats
|
||||
)
|
||||
case None =>
|
||||
val guestRecipient =
|
||||
buildFeatureSwitchRecipientWithGuestId(clientContext: ClientContext, product, userState)
|
||||
|
||||
val featureContext = OrElseFeatureContext(
|
||||
ForcedFeatureContext(featureOverrides),
|
||||
featureContextBuilder(
|
||||
clientContext.userId,
|
||||
Some(guestRecipient)
|
||||
)
|
||||
) //ExperimentContext with GuestRecipient is not supported as there is no active use-cases yet in CrMixer
|
||||
|
||||
config(
|
||||
requestContext = RequestContext(
|
||||
userId = clientContext.userId,
|
||||
featureContext = featureContext
|
||||
),
|
||||
stats
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def buildFeatureSwitchRecipientWithGuestId(
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState
|
||||
): FeatureSwitchRecipient = {
|
||||
|
||||
val recipient = FSRecipient(
|
||||
userId = None,
|
||||
userRoles = None,
|
||||
deviceId = clientContext.deviceId,
|
||||
guestId = clientContext.guestId,
|
||||
languageCode = clientContext.languageCode,
|
||||
countryCode = clientContext.countryCode,
|
||||
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
|
||||
isVerified = None,
|
||||
isTwoffice = None,
|
||||
tooClient = None,
|
||||
highWaterMark = None
|
||||
)
|
||||
|
||||
recipient.withCustomFields(
|
||||
(ParamsBuilder.ProductCustomField, product.toString),
|
||||
(ParamsBuilder.UserStateCustomField, userState.toString)
|
||||
)
|
||||
}
|
||||
|
||||
private def buildFeatureSwitchRecipient(
|
||||
userId: Long,
|
||||
userRolesOverride: Option[Set[String]],
|
||||
clientContext: ClientContext,
|
||||
product: t.Product,
|
||||
userState: UserState
|
||||
): FeatureSwitchRecipient = {
|
||||
val userRoles = userRolesOverride match {
|
||||
case Some(overrides) => Some(overrides)
|
||||
case _ => clientContext.userRoles.map(_.toSet)
|
||||
}
|
||||
|
||||
val recipient = FSRecipient(
|
||||
userId = Some(userId),
|
||||
userRoles = userRoles,
|
||||
deviceId = clientContext.deviceId,
|
||||
guestId = clientContext.guestId,
|
||||
languageCode = clientContext.languageCode,
|
||||
countryCode = clientContext.countryCode,
|
||||
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
|
||||
isVerified = None,
|
||||
isTwoffice = None,
|
||||
tooClient = None,
|
||||
highWaterMark = None
|
||||
)
|
||||
|
||||
recipient.withCustomFields(
|
||||
(ParamsBuilder.ProductCustomField, product.toString),
|
||||
(ParamsBuilder.UserStateCustomField, userState.toString)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object ParamsBuilder {
|
||||
private val ProductCustomField = "product_id"
|
||||
private val UserStateCustomField = "user_state"
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.cr_mixer.featureswitch
|
||||
|
||||
import com.twitter.finagle.Filter
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
import scala.collection.concurrent.TrieMap
|
||||
import com.twitter.abdecider.Bucket
|
||||
import com.twitter.finagle.Service
|
||||
|
||||
@Singleton
|
||||
class SetImpressedBucketsLocalContextFilter @Inject() () extends Filter.TypeAgnostic {
|
||||
override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
|
||||
(request: Req, service: Service[Req, Rep]) => {
|
||||
|
||||
val concurrentTrieMap = TrieMap
|
||||
.empty[Bucket, Boolean] // Trie map has no locks and O(1) inserts
|
||||
CrMixerImpressedBuckets.localImpressedBucketsMap.let(concurrentTrieMap) {
|
||||
service(request)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"finagle/finagle-core/src/main",
|
||||
"frigate/frigate-common:util",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
],
|
||||
)
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
|
||||
trait FilterBase {
|
||||
def name: String
|
||||
|
||||
type ConfigType
|
||||
|
||||
def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]]
|
||||
|
||||
/**
|
||||
* Build the config params here. passing in param() into the filter is strongly discouraged
|
||||
* because param() can be slow when called many times
|
||||
*/
|
||||
def requestToConfig[CGQueryType <: CandidateGeneratorQuery](request: CGQueryType): ConfigType
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class ImpressedTweetlistFilter() extends FilterBase {
|
||||
import ImpressedTweetlistFilter._
|
||||
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
|
||||
/*
|
||||
Filtering removes some candidates based on configurable criteria.
|
||||
*/
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: FilterConfig
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
// Remove candidates which match a source tweet, or which are passed in impressedTweetList
|
||||
val sourceTweetsMatch = candidates
|
||||
.flatMap {
|
||||
|
||||
/***
|
||||
* Within a Seq[Seq[InitialCandidate]], all candidates within a inner Seq
|
||||
* are guaranteed to have the same sourceInfo. Hence, we can pick .headOption
|
||||
* to represent the whole list when filtering by the internalId of the sourceInfoOpt.
|
||||
* But of course the similarityEngineInfo could be different.
|
||||
*/
|
||||
_.headOption.flatMap { candidate =>
|
||||
candidate.candidateGenerationInfo.sourceInfoOpt.map(_.internalId)
|
||||
}
|
||||
}.collect {
|
||||
case InternalId.TweetId(id) => id
|
||||
}
|
||||
|
||||
val impressedTweetList: Set[TweetId] =
|
||||
config.impressedTweetList ++ sourceTweetsMatch
|
||||
|
||||
val filteredCandidateMap: Seq[Seq[InitialCandidate]] =
|
||||
candidates.map {
|
||||
_.filterNot { candidate =>
|
||||
impressedTweetList.contains(candidate.tweetId)
|
||||
}
|
||||
}
|
||||
Future.value(filteredCandidateMap)
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType
|
||||
): FilterConfig = {
|
||||
FilterConfig(request.impressedTweetList)
|
||||
}
|
||||
}
|
||||
|
||||
object ImpressedTweetlistFilter {
|
||||
case class FilterConfig(impressedTweetList: Set[TweetId])
|
||||
}
|
@ -1,80 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters in-network tweets
|
||||
*/
|
||||
@Singleton
|
||||
case class InNetworkFilter @Inject() (
|
||||
@Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq],
|
||||
globalStats: StatsReceiver)
|
||||
extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
import InNetworkFilter._
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
|
||||
private val filterCandidatesStats = stats.scope("filter_candidates")
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filterConfig: FilterConfig,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
StatsUtil.trackItemsStats(filterCandidatesStats) {
|
||||
filterCandidates(candidates, filterConfig)
|
||||
}
|
||||
}
|
||||
|
||||
private def filterCandidates(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filterConfig: FilterConfig,
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
|
||||
if (!filterConfig.enableInNetworkFilter) {
|
||||
Future.value(candidates)
|
||||
} else {
|
||||
filterConfig.userIdOpt match {
|
||||
case Some(userId) =>
|
||||
realGraphStoreMh
|
||||
.get(userId).map(_.map(_.candidates.map(_.userId)).getOrElse(Seq.empty).toSet).map {
|
||||
realGraphInNetworkAuthorsSet =>
|
||||
candidates.map(_.filterNot { candidate =>
|
||||
realGraphInNetworkAuthorsSet.contains(candidate.tweetInfo.authorId)
|
||||
})
|
||||
}
|
||||
case None => Future.value(candidates)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType
|
||||
): FilterConfig = {
|
||||
request match {
|
||||
case UtegTweetCandidateGeneratorQuery(userId, _, _, _, _, params, _) =>
|
||||
FilterConfig(Some(userId), params(UtegTweetGlobalParams.EnableInNetworkFilterParam))
|
||||
case _ => FilterConfig(None, false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object InNetworkFilter {
|
||||
case class FilterConfig(
|
||||
userIdOpt: Option[UserId],
|
||||
enableInNetworkFilter: Boolean)
|
||||
}
|
@ -1,58 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class PostRankFilterRunner @Inject() (
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
private val beforeCount = scopedStats.stat("candidate_count", "before")
|
||||
private val afterCount = scopedStats.stat("candidate_count", "after")
|
||||
|
||||
def run(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
|
||||
beforeCount.add(candidates.size)
|
||||
|
||||
Future(
|
||||
removeBadRecentNotificationCandidates(candidates)
|
||||
).map { results =>
|
||||
afterCount.add(results.size)
|
||||
results
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove "bad" quality candidates generated by recent notifications
|
||||
* A candidate is bad when it is generated by a single RecentNotification
|
||||
* SourceKey.
|
||||
* e.x:
|
||||
* tweetA {recent notification1} -> bad
|
||||
* tweetB {recent notification1 recent notification2} -> good
|
||||
*tweetC {recent notification1 recent follow1} -> bad
|
||||
* SD-19397
|
||||
*/
|
||||
private[filter] def removeBadRecentNotificationCandidates(
|
||||
candidates: Seq[RankedCandidate]
|
||||
): Seq[RankedCandidate] = {
|
||||
candidates.filterNot {
|
||||
isBadQualityRecentNotificationCandidate
|
||||
}
|
||||
}
|
||||
|
||||
private def isBadQualityRecentNotificationCandidate(candidate: RankedCandidate): Boolean = {
|
||||
candidate.potentialReasons.size == 1 &&
|
||||
candidate.potentialReasons.head.sourceInfoOpt.nonEmpty &&
|
||||
candidate.potentialReasons.head.sourceInfoOpt.get.sourceType == SourceType.NotificationClick
|
||||
}
|
||||
|
||||
}
|
@ -1,99 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
class PreRankFilterRunner @Inject() (
|
||||
impressedTweetListFilter: ImpressedTweetlistFilter,
|
||||
tweetAgeFilter: TweetAgeFilter,
|
||||
videoTweetFilter: VideoTweetFilter,
|
||||
tweetReplyFilter: ReplyFilter,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
/***
|
||||
* The order of the filters does not matter as long as we do not apply .take(N) truncation
|
||||
* across all filters. In other words, it is fine that we first do tweetAgeFilter, and then
|
||||
* we do impressedTweetListFilter, or the other way around.
|
||||
* Same idea applies to the signal based filter - it is ok that we apply signal based filters
|
||||
* before impressedTweetListFilter.
|
||||
*
|
||||
* We move all signal based filters before tweetAgeFilter and impressedTweetListFilter
|
||||
* as a set of early filters.
|
||||
*/
|
||||
val orderedFilters = Seq(
|
||||
tweetAgeFilter,
|
||||
impressedTweetListFilter,
|
||||
videoTweetFilter,
|
||||
tweetReplyFilter
|
||||
)
|
||||
|
||||
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
PreRankFilterRunner.runSequentialFilters(
|
||||
request,
|
||||
candidates,
|
||||
orderedFilters,
|
||||
scopedStats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object PreRankFilterRunner {
|
||||
private def recordCandidateStatsBeforeFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "before").incr(
|
||||
candidates.count { _.isEmpty }
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "before").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def recordCandidateStatsAfterFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "after").incr(
|
||||
candidates.count { _.isEmpty }
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "after").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Helper function for running some candidates through a sequence of filters
|
||||
*/
|
||||
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filters: Seq[FilterBase],
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Seq[Seq[InitialCandidate]]] =
|
||||
filters.foldLeft(Future.value(candidates)) {
|
||||
case (candsFut, filter) =>
|
||||
candsFut.flatMap { cands =>
|
||||
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
|
||||
filter
|
||||
.filter(cands, filter.requestToConfig(request))
|
||||
.map { filteredCands =>
|
||||
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
|
||||
filteredCands
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,40 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters candidates that are replies
|
||||
*/
|
||||
@Singleton
|
||||
case class ReplyFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filterNot { candidate =>
|
||||
candidate.tweetInfo.isReply.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
true
|
||||
}
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
* Filters candidates that are retweets
|
||||
*/
|
||||
@Singleton
|
||||
case class RetweetFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filterNot { candidate =>
|
||||
candidate.tweetInfo.isRetweet.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
query.params(UtegTweetGlobalParams.EnableRetweetFilterParam)
|
||||
}
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.GlobalParams
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.conversions.DurationOps._
|
||||
|
||||
@Singleton
|
||||
case class TweetAgeFilter() extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = Duration
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
maxTweetAge: Duration
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (maxTweetAge >= 720.hours) {
|
||||
Future.value(candidates)
|
||||
} else {
|
||||
// Tweet IDs are approximately chronological (see http://go/snowflake),
|
||||
// so we are building the earliest tweet id once,
|
||||
// and pass that as the value to filter candidates for each CandidateGenerationModel.
|
||||
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
|
||||
Future.value(candidates.map(_.filter(_.tweetId >= earliestTweetId)))
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): Duration = {
|
||||
query.params(GlobalParams.MaxTweetAgeHoursParam)
|
||||
}
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.HealthThreshold
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
trait TweetInfoHealthFilterBase extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = HealthThreshold.Enum.Value
|
||||
def thresholdToPropertyMap: Map[HealthThreshold.Enum.Value, TweetInfo => Option[Boolean]]
|
||||
def getFilterParamFn: CandidateGeneratorQuery => HealthThreshold.Enum.Value
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: HealthThreshold.Enum.Value
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
Future.value(candidates.map { seq =>
|
||||
seq.filter(p => thresholdToPropertyMap(config)(p.tweetInfo).getOrElse(true))
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the config params here. passing in param() into the filter is strongly discouraged
|
||||
* because param() can be slow when called many times
|
||||
*/
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): HealthThreshold.Enum.Value = {
|
||||
query match {
|
||||
case q: CrCandidateGeneratorQuery => getFilterParamFn(q)
|
||||
case _ => HealthThreshold.Enum.Off
|
||||
}
|
||||
}
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/***
|
||||
*
|
||||
* Run filters sequentially for UTEG candidate generator. The structure is copied from PreRankFilterRunner.
|
||||
*/
|
||||
@Singleton
|
||||
class UtegFilterRunner @Inject() (
|
||||
inNetworkFilter: InNetworkFilter,
|
||||
utegHealthFilter: UtegHealthFilter,
|
||||
retweetFilter: RetweetFilter,
|
||||
globalStats: StatsReceiver) {
|
||||
|
||||
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
|
||||
|
||||
val orderedFilters: Seq[FilterBase] = Seq(
|
||||
inNetworkFilter,
|
||||
utegHealthFilter,
|
||||
retweetFilter
|
||||
)
|
||||
|
||||
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
UtegFilterRunner.runSequentialFilters(
|
||||
request,
|
||||
candidates,
|
||||
orderedFilters,
|
||||
scopedStats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object UtegFilterRunner {
|
||||
private def recordCandidateStatsBeforeFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "before").incr(
|
||||
candidates.count {
|
||||
_.isEmpty
|
||||
}
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "before").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
private def recordCandidateStatsAfterFilter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
statsReceiver
|
||||
.counter("empty_sources", "after").incr(
|
||||
candidates.count {
|
||||
_.isEmpty
|
||||
}
|
||||
)
|
||||
candidates.foreach { candidate =>
|
||||
statsReceiver.counter("candidates", "after").incr(candidate.size)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Helper function for running some candidates through a sequence of filters
|
||||
*/
|
||||
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
|
||||
request: CGQueryType,
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
filters: Seq[FilterBase],
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Seq[Seq[InitialCandidate]]] =
|
||||
filters.foldLeft(Future.value(candidates)) {
|
||||
case (candsFut, filter) =>
|
||||
candsFut.flatMap { cands =>
|
||||
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
|
||||
filter
|
||||
.filter(cands, filter.requestToConfig(request))
|
||||
.map { filteredCands =>
|
||||
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
|
||||
filteredCands
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
|
||||
import com.twitter.util.Future
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* Remove unhealthy candidates
|
||||
* Currently Timeline Ranker applies a check on the following three scores:
|
||||
* - toxicityScore
|
||||
* - pBlockScore
|
||||
* - pReportedTweetScore
|
||||
*
|
||||
* Where isPassTweetHealthFilterStrict checks two additions scores with the same threshold:
|
||||
* - pSpammyTweetScore
|
||||
* - spammyTweetContentScore
|
||||
*
|
||||
* We've verified that both filters behave very similarly.
|
||||
*/
|
||||
@Singleton
|
||||
case class UtegHealthFilter @Inject() () extends FilterBase {
|
||||
override def name: String = this.getClass.getCanonicalName
|
||||
override type ConfigType = Boolean
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
if (config) {
|
||||
Future.value(
|
||||
candidates.map { candidateSeq =>
|
||||
candidateSeq.filter { candidate =>
|
||||
candidate.tweetInfo.isPassTweetHealthFilterStrict.getOrElse(false)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
Future.value(candidates)
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): ConfigType = {
|
||||
query.params(UtegTweetGlobalParams.EnableTLRHealthFilterParam)
|
||||
}
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
package com.twitter.cr_mixer.filter
|
||||
|
||||
import com.twitter.cr_mixer.filter.VideoTweetFilter.FilterConfig
|
||||
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.param.VideoTweetFilterParams
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class VideoTweetFilter() extends FilterBase {
|
||||
override val name: String = this.getClass.getCanonicalName
|
||||
|
||||
override type ConfigType = FilterConfig
|
||||
|
||||
override def filter(
|
||||
candidates: Seq[Seq[InitialCandidate]],
|
||||
config: ConfigType
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
Future.value(candidates.map {
|
||||
_.flatMap {
|
||||
candidate =>
|
||||
if (!config.enableVideoTweetFilter) {
|
||||
Some(candidate)
|
||||
} else {
|
||||
// if hasVideo is true, hasImage, hasGif should be false
|
||||
val hasVideo = checkTweetInfoAttribute(candidate.tweetInfo.hasVideo)
|
||||
val isHighMediaResolution =
|
||||
checkTweetInfoAttribute(candidate.tweetInfo.isHighMediaResolution)
|
||||
val isQuoteTweet = checkTweetInfoAttribute(candidate.tweetInfo.isQuoteTweet)
|
||||
val isReply = checkTweetInfoAttribute(candidate.tweetInfo.isReply)
|
||||
val hasMultipleMedia = checkTweetInfoAttribute(candidate.tweetInfo.hasMultipleMedia)
|
||||
val hasUrl = checkTweetInfoAttribute(candidate.tweetInfo.hasUrl)
|
||||
|
||||
if (hasVideo && isHighMediaResolution && !isQuoteTweet &&
|
||||
!isReply && !hasMultipleMedia && !hasUrl) {
|
||||
Some(candidate)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
def checkTweetInfoAttribute(attributeOpt: => Option[Boolean]): Boolean = {
|
||||
if (attributeOpt.isDefined)
|
||||
attributeOpt.get
|
||||
else {
|
||||
// takes Quoted Tweet (TweetInfo.isQuoteTweet) as an example,
|
||||
// if the attributeOpt is None, we by default say it is not a quoted tweet
|
||||
// similarly, if TweetInfo.hasVideo is a None,
|
||||
// we say it does not have video.
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
|
||||
query: CGQueryType
|
||||
): FilterConfig = {
|
||||
val enableVideoTweetFilter = query match {
|
||||
case _: CrCandidateGeneratorQuery | _: RelatedTweetCandidateGeneratorQuery |
|
||||
_: RelatedVideoTweetCandidateGeneratorQuery =>
|
||||
query.params(VideoTweetFilterParams.EnableVideoTweetFilterParam)
|
||||
case _ => false // e.g., GetRelatedTweets()
|
||||
}
|
||||
FilterConfig(
|
||||
enableVideoTweetFilter = enableVideoTweetFilter
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object VideoTweetFilter {
|
||||
// extend the filterConfig to add more flags if needed.
|
||||
// now they are hardcoded according to the prod setting
|
||||
case class FilterConfig(
|
||||
enableVideoTweetFilter: Boolean)
|
||||
}
|
@ -1,139 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialAdsCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
|
||||
import com.twitter.cr_mixer.thriftscala.AdsRequest
|
||||
import com.twitter.cr_mixer.thriftscala.AdsResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class AdsRecommendationsScribeLogger @Inject() (
|
||||
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
|
||||
/*
|
||||
* Scribe first step results after fetching initial ads candidate
|
||||
* */
|
||||
def scribeInitialAdsCandidates(
|
||||
query: AdsCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
|
||||
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
|
||||
): Future[Seq[Seq[InitialAdsCandidate]]] = {
|
||||
val scribeMetadata = ScribeMetadata.from(query)
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Scribe top level API results
|
||||
* */
|
||||
def scribeGetAdsRecommendations(
|
||||
request: AdsRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[AdsResponse],
|
||||
enableScribe: Boolean
|
||||
): Future[AdsResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
|
||||
AdsRecommendationTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (enableScribe && decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
|
||||
requestUserId: UserId
|
||||
): AdsRecommendationsResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
AdsRecommendationsResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: AdsRecommendationsResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetAdsRecommendationsScribe = {
|
||||
GetAdsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetAdsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = adsRecommendationsScribeLogger,
|
||||
codec = GetAdsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"abdecider/src/main/scala",
|
||||
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
|
||||
"finagle/finagle-core/src/main",
|
||||
"frigate/frigate-common:base",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
|
||||
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
|
||||
"scrooge/scrooge-serializer/src/main/scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/ml/api:data-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"timelines/src/main/scala/com/twitter/timelines/clientevent",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
],
|
||||
)
|
@ -1,489 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.google.common.base.CaseFormat
|
||||
import com.twitter.abdecider.ScribingABDeciderUtil
|
||||
import com.twitter.scribelib.marshallers.ClientDataProvider
|
||||
import com.twitter.scribelib.marshallers.ScribeSerialization
|
||||
import com.twitter.timelines.clientevent.MinimalClientDataProvider
|
||||
import com.twitter.cr_mixer.model.BlendedCandidate
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.RankedCandidate
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.GraphSourceInfo
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.scribe.ScribeCategories
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.InterleaveResult
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.cr_mixer.thriftscala.RankResult
|
||||
import com.twitter.cr_mixer.thriftscala.Result
|
||||
import com.twitter.cr_mixer.thriftscala.SourceSignal
|
||||
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
|
||||
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.SourceInfo
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.cr_mixer.util.MetricTagUtil
|
||||
import com.twitter.decider.SimpleRecipient
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.finatra.kafka.producers.KafkaProducerBase
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import com.twitter.util.Time
|
||||
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import scala.util.Random
|
||||
|
||||
@Singleton
|
||||
case class CrMixerScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
|
||||
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
|
||||
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
|
||||
|
||||
import CrMixerScribeLogger._
|
||||
|
||||
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
|
||||
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
|
||||
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
|
||||
|
||||
private val serialization = new ScribeSerialization {}
|
||||
|
||||
def scribeSignalSources(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
|
||||
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchSignalSourcesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribeInterleaveCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[BlendedCandidate]]
|
||||
): Future[Seq[BlendedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertInterleaveResult,
|
||||
enableKafkaScribe = true
|
||||
)
|
||||
}
|
||||
|
||||
def scribeRankedCandidates(
|
||||
query: CrCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertRankResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetTweetRecommendations(
|
||||
request: CrMixerTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[CrMixerTweetResponse]
|
||||
): Future[CrMixerTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
|
||||
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
|
||||
publishTopLevelDdgMetrics(
|
||||
logger = ddgMetricsLogger,
|
||||
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
|
||||
latencyMs = latencyMs,
|
||||
candidateSize = response.tweets.length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
|
||||
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
|
||||
*/
|
||||
def scribeGetTweetRecommendationsForBlueVerified(
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[Seq[RankedCandidate]]
|
||||
): Future[Seq[RankedCandidate]] = {
|
||||
getResultFn.onSuccess { rankedCandidates =>
|
||||
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
|
||||
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
|
||||
|
||||
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
|
||||
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
|
||||
}
|
||||
|
||||
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
|
||||
|
||||
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
|
||||
blueVerifiedTweetCandidatesStats
|
||||
.scope(scribeMetadata.product.name).counter(
|
||||
candidate.tweetInfo.authorId.toString).incr()
|
||||
VITTweetCandidateScribe(
|
||||
tweetId = candidate.tweetId,
|
||||
authorId = candidate.tweetInfo.authorId,
|
||||
score = candidate.predictionScore,
|
||||
metricTags = MetricTagUtil.buildMetricTags(candidate)
|
||||
)
|
||||
}
|
||||
|
||||
val blueVerifiedScribe =
|
||||
VITTweetCandidatesScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
candidates = blueVerifiedCandidateScribes,
|
||||
product = scribeMetadata.product,
|
||||
impressedBuckets = impressedBuckets
|
||||
)
|
||||
|
||||
publish(
|
||||
logger = blueVerifiedTweetRecsScribeLogger,
|
||||
codec = VITTweetCandidatesScribe,
|
||||
message = blueVerifiedScribe)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch signals, fetch candidates, filters, ranker, etc
|
||||
*/
|
||||
private[logging] def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => Result,
|
||||
enableKafkaScribe: Boolean = false
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
|
||||
// forks the scribe as a Kafka message for async feature hydration
|
||||
if (enableKafkaScribe && shouldScribeKafkaMessage(
|
||||
scribeMetadata.userId,
|
||||
scribeMetadata.product)) {
|
||||
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
|
||||
|
||||
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
|
||||
batchedKafkaMessages.foreach { kafkaMessage =>
|
||||
kafkaProducer.send(
|
||||
topic = ScribeCategories.TweetsRecs.scribeCategory,
|
||||
key = traceId.toString,
|
||||
value = kafkaMessage,
|
||||
timestamp = Time.now.inMilliseconds
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: CrMixerTweetRequest,
|
||||
response: CrMixerTweetResponse,
|
||||
startTime: Long
|
||||
): Result = {
|
||||
Result.TopLevelApiResult(
|
||||
TopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchSignalSourcesResult(
|
||||
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
|
||||
SourceSignal(id = Some(sourceInfo.internalId))
|
||||
}
|
||||
// For source graphs, we pass in requestUserId as a placeholder
|
||||
val sourceGraphs = sourceInfoSetTuple._2.map {
|
||||
case (_, _) =>
|
||||
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
|
||||
}
|
||||
Result.FetchSignalSourcesResult(
|
||||
FetchSignalSourcesResult(
|
||||
signals = Some(sourceSignals ++ sourceGraphs)
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None // not populated yet
|
||||
)
|
||||
}
|
||||
}
|
||||
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
// We take InterleaveResult for Unconstrained dataset ML ranker training
|
||||
private def convertInterleaveResult(
|
||||
blendedCandidates: Seq[BlendedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = blendedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
|
||||
score = Some(blendedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
|
||||
) // hydrate fields for light ranking training data
|
||||
}
|
||||
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertRankResult(
|
||||
rankedCandidates: Seq[RankedCandidate],
|
||||
requestUserId: UserId
|
||||
): Result = {
|
||||
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = rankedCandidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
score = Some(rankedCandidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
|
||||
)
|
||||
}
|
||||
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
result: Result,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetTweetsRecommendationsScribe = {
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
result = result,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Unit = {
|
||||
publish(
|
||||
logger = tweetRecsScribeLogger,
|
||||
codec = GetTweetsRecommendationsScribe,
|
||||
message = scribeMsg)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gate for producing messages to Kafka for async feature hydration
|
||||
*/
|
||||
private def shouldScribeKafkaMessage(
|
||||
userId: UserId,
|
||||
product: Product
|
||||
): Boolean = {
|
||||
val isEligibleUser = decider.isAvailable(
|
||||
DeciderConstants.kafkaMessageScribeSampleRate,
|
||||
Some(SimpleRecipient(userId)))
|
||||
val isHomeProduct = (product == Product.Home)
|
||||
isEligibleUser && isHomeProduct
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
|
||||
*/
|
||||
private[logging] def downsampleKafkaMessage(
|
||||
scribeMsg: GetTweetsRecommendationsScribe
|
||||
): Seq[GetTweetsRecommendationsScribe] = {
|
||||
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
|
||||
case Result.InterleaveResult(interleaveResult) =>
|
||||
val sampledTweetsSeq = interleaveResult.tweets
|
||||
.map { tweets =>
|
||||
Random
|
||||
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
|
||||
.grouped(BatchSize).toSeq
|
||||
}.getOrElse(Seq.empty)
|
||||
|
||||
sampledTweetsSeq.map { sampledTweets =>
|
||||
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
|
||||
}
|
||||
|
||||
// if it's an unrecognized type, err on the side of sending no candidates
|
||||
case _ =>
|
||||
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
|
||||
Seq(Result.InterleaveResult(InterleaveResult(None)))
|
||||
}
|
||||
|
||||
sampledResultSeq.map { sampledResult =>
|
||||
GetTweetsRecommendationsScribe(
|
||||
uuid = scribeMsg.uuid,
|
||||
userId = scribeMsg.userId,
|
||||
result = sampledResult,
|
||||
traceId = scribeMsg.traceId,
|
||||
performanceMetrics = None,
|
||||
impressedBuckets = None
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles client_event serialization to log data into DDG metrics
|
||||
*/
|
||||
private[logging] def publishTopLevelDdgMetrics(
|
||||
logger: Logger,
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
candidateSize: Long,
|
||||
latencyMs: Long,
|
||||
): Unit = {
|
||||
val data = Map[Any, Any](
|
||||
"latency_ms" -> latencyMs,
|
||||
"event_value" -> candidateSize
|
||||
)
|
||||
val label: (String, String) = ("tweetrec", "")
|
||||
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
|
||||
val message =
|
||||
serialization
|
||||
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
|
||||
logger.info(message)
|
||||
}
|
||||
|
||||
private def getClientData(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
|
||||
): ClientDataProvider =
|
||||
MinimalClientDataProvider(
|
||||
userId = topLevelDdgMetricsMetadata.userId,
|
||||
guestId = None,
|
||||
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
|
||||
countryCode = topLevelDdgMetricsMetadata.countryCode
|
||||
)
|
||||
|
||||
private def getNamespace(
|
||||
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
|
||||
label: (String, String)
|
||||
): Map[String, String] = {
|
||||
val productName =
|
||||
CaseFormat.UPPER_CAMEL
|
||||
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
|
||||
|
||||
Map(
|
||||
"client" -> ScribingABDeciderUtil.clientForAppId(
|
||||
topLevelDdgMetricsMetadata.clientApplicationId),
|
||||
"page" -> "cr-mixer",
|
||||
"section" -> productName,
|
||||
"component" -> label._1,
|
||||
"element" -> label._2
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
object CrMixerScribeLogger {
|
||||
val KafkaMaxTweetsPerMessage: Int = 200
|
||||
val BatchSize: Int = 20
|
||||
}
|
@ -1,193 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.InitialCandidate
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class RelatedTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
def scribePreRankFilterCandidates(
|
||||
query: RelatedTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
|
||||
): Future[Seq[Seq[InitialCandidate]]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
RelatedTweetScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertPreRankFilterResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the getRelatedTweets endpoint.
|
||||
*/
|
||||
def scribeGetRelatedTweets(
|
||||
request: RelatedTweetRequest,
|
||||
startTime: Long,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[RelatedTweetResponse]
|
||||
): Future[RelatedTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => RelatedTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
relatedTweetScribeMetadata.clientContext.userId match {
|
||||
case Some(userId) =>
|
||||
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
case _ =>
|
||||
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: RelatedTweetRequest,
|
||||
response: RelatedTweetResponse,
|
||||
startTime: Long
|
||||
): RelatedTweetResult = {
|
||||
RelatedTweetResult.RelatedTweetTopLevelApiResult(
|
||||
RelatedTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.FetchCandidatesResult(
|
||||
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def convertPreRankFilterResult(
|
||||
candidatesSeq: Seq[Seq[InitialCandidate]],
|
||||
requestUserId: UserId
|
||||
): RelatedTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
|
||||
candidates.map { candidate =>
|
||||
val candidateGenerationKey =
|
||||
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = Some(candidateGenerationKey),
|
||||
authorId = Some(candidate.tweetInfo.authorId),
|
||||
score = Some(candidate.getSimilarityScore),
|
||||
numCandidateGenerationKeys = None
|
||||
)
|
||||
}
|
||||
}
|
||||
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
relatedTweetResult: RelatedTweetResult,
|
||||
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetRelatedTweetsScribe = {
|
||||
GetRelatedTweetsScribe(
|
||||
uuid = relatedTweetScribeMetadata.requestUUID,
|
||||
internalId = relatedTweetScribeMetadata.internalId,
|
||||
relatedTweetResult = relatedTweetResult,
|
||||
requesterId = relatedTweetScribeMetadata.clientContext.userId,
|
||||
guestId = relatedTweetScribeMetadata.clientContext.guestId,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetRelatedTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
|
||||
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.StatsUtil
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.scrooge.BinaryThriftStructSerializer
|
||||
import com.twitter.scrooge.ThriftStruct
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
|
||||
object ScribeLoggerUtils {
|
||||
|
||||
/**
|
||||
* Handles base64-encoding, serialization, and publish.
|
||||
*/
|
||||
private[logging] def publish[T <: ThriftStruct](
|
||||
logger: Logger,
|
||||
codec: ThriftStructCodec[T],
|
||||
message: T
|
||||
): Unit = {
|
||||
logger.info(BinaryThriftStructSerializer(codec).toString(message))
|
||||
}
|
||||
|
||||
private[logging] def getImpressedBuckets(
|
||||
scopedStats: StatsReceiver
|
||||
): Option[List[ImpressesedBucketInfo]] = {
|
||||
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
|
||||
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
|
||||
val listBucketsSet = listBuckets.toSet
|
||||
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
|
||||
listBucketsSet.map { bucket =>
|
||||
ImpressesedBucketInfo(
|
||||
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
|
||||
bucketName = bucket.name,
|
||||
version = bucket.experiment.settings.version,
|
||||
)
|
||||
}.toList
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,45 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
|
||||
case class ScribeMetadata(
|
||||
requestUUID: Long,
|
||||
userId: UserId,
|
||||
product: Product)
|
||||
|
||||
object ScribeMetadata {
|
||||
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
|
||||
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
|
||||
ScribeMetadata(query.requestUUID, query.userId, query.product)
|
||||
}
|
||||
}
|
||||
|
||||
case class RelatedTweetScribeMetadata(
|
||||
requestUUID: Long,
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext,
|
||||
product: Product)
|
||||
|
||||
object RelatedTweetScribeMetadata {
|
||||
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
|
||||
RelatedTweetScribeMetadata(
|
||||
query.requestUUID,
|
||||
query.internalId,
|
||||
query.clientContext,
|
||||
query.product)
|
||||
}
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package com.twitter.cr_mixer
|
||||
package logging
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
|
||||
case class TopLevelDdgMetricsMetadata(
|
||||
userId: Option[Long],
|
||||
product: Product,
|
||||
clientApplicationId: Option[Long],
|
||||
countryCode: Option[String])
|
||||
|
||||
object TopLevelDdgMetricsMetadata {
|
||||
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
|
||||
TopLevelDdgMetricsMetadata(
|
||||
userId = request.clientContext.userId,
|
||||
product = request.product,
|
||||
clientApplicationId = request.clientContext.appId,
|
||||
countryCode = request.clientContext.countryCode
|
||||
)
|
||||
}
|
||||
}
|
@ -1,147 +0,0 @@
|
||||
package com.twitter.cr_mixer.logging
|
||||
|
||||
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
|
||||
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderConstants
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
|
||||
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
|
||||
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
|
||||
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
|
||||
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
|
||||
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.tracing.Trace
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Stopwatch
|
||||
import javax.inject.Inject
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
@Singleton
|
||||
case class UtegTweetScribeLogger @Inject() (
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
|
||||
|
||||
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
|
||||
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
|
||||
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
|
||||
|
||||
def scribeInitialCandidates(
|
||||
query: UtegTweetCandidateGeneratorQuery,
|
||||
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
|
||||
): Future[Seq[TweetWithScoreAndSocialProof]] = {
|
||||
scribeResultsAndPerformanceMetrics(
|
||||
ScribeMetadata.from(query),
|
||||
getResultFn,
|
||||
convertToResultFn = convertFetchCandidatesResult
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Top Level API Request / Response and performance metrics
|
||||
* for the GetUtegTweetRecommendations() endpoint.
|
||||
*/
|
||||
def scribeGetUtegTweetRecommendations(
|
||||
request: UtegTweetRequest,
|
||||
startTime: Long,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[UtegTweetResponse]
|
||||
): Future[UtegTweetResponse] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { response =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertTopLevelAPIResult(request, response, startTime)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def convertTopLevelAPIResult(
|
||||
request: UtegTweetRequest,
|
||||
response: UtegTweetResponse,
|
||||
startTime: Long
|
||||
): UtegTweetResult = {
|
||||
UtegTweetResult.UtegTweetTopLevelApiResult(
|
||||
UtegTweetTopLevelApiResult(
|
||||
timestamp = startTime,
|
||||
request = request,
|
||||
response = response
|
||||
))
|
||||
}
|
||||
|
||||
private def buildScribeMessage(
|
||||
utegTweetResult: UtegTweetResult,
|
||||
scribeMetadata: ScribeMetadata,
|
||||
latencyMs: Long,
|
||||
traceId: Long
|
||||
): GetUtegTweetsScribe = {
|
||||
GetUtegTweetsScribe(
|
||||
uuid = scribeMetadata.requestUUID,
|
||||
userId = scribeMetadata.userId,
|
||||
utegTweetResult = utegTweetResult,
|
||||
traceId = Some(traceId),
|
||||
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
|
||||
impressedBuckets = getImpressedBuckets(scopedStats)
|
||||
)
|
||||
}
|
||||
|
||||
private def scribeResult(
|
||||
scribeMsg: GetUtegTweetsScribe
|
||||
): Unit = {
|
||||
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
|
||||
}
|
||||
|
||||
private def convertFetchCandidatesResult(
|
||||
candidates: Seq[TweetWithScoreAndSocialProof],
|
||||
requestUserId: UserId
|
||||
): UtegTweetResult = {
|
||||
val tweetCandidatesWithMetadata = candidates.map { candidate =>
|
||||
TweetCandidateWithMetadata(
|
||||
tweetId = candidate.tweetId,
|
||||
candidateGenerationKey = None
|
||||
) // do not hydrate candidateGenerationKey to save cost
|
||||
}
|
||||
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
|
||||
}
|
||||
|
||||
/**
|
||||
* Scribe Per-step intermediate results and performance metrics
|
||||
* for each step: fetch candidates, filters.
|
||||
*/
|
||||
private def scribeResultsAndPerformanceMetrics[T](
|
||||
scribeMetadata: ScribeMetadata,
|
||||
getResultFn: => Future[T],
|
||||
convertToResultFn: (T, UserId) => UtegTweetResult
|
||||
): Future[T] = {
|
||||
val timer = Stopwatch.start()
|
||||
getResultFn.onSuccess { input =>
|
||||
if (decider.isAvailableForId(
|
||||
scribeMetadata.userId,
|
||||
DeciderConstants.upperFunnelPerStepScribeRate)) {
|
||||
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
|
||||
val latencyMs = timer().inMilliseconds
|
||||
val result = convertToResultFn(input, scribeMetadata.userId)
|
||||
val traceId = Trace.id.traceId.toLong
|
||||
val scribeMsg =
|
||||
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
|
||||
scribeResult(scribeMsg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"configapi/configapi-core",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
],
|
||||
)
|
@ -1,200 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
sealed trait Candidate {
|
||||
val tweetId: TweetId
|
||||
|
||||
override def hashCode: Int = tweetId.toInt
|
||||
}
|
||||
|
||||
case class TweetWithCandidateGenerationInfo(
|
||||
tweetId: TweetId,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
}
|
||||
|
||||
case class InitialCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedCandidate = {
|
||||
BlendedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedCandidate(): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class InitialAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
candidateGenerationInfo: CandidateGenerationInfo)
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
/**
|
||||
* The same candidate can be generated by multiple algorithms.
|
||||
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
|
||||
* from different algorithms, we attach them to a list of potentialReasons.
|
||||
*/
|
||||
def toBlendedAdsCandidate(
|
||||
potentialReasons: Seq[CandidateGenerationInfo],
|
||||
): BlendedAdsCandidate = {
|
||||
BlendedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
candidateGenerationInfo,
|
||||
potentialReasons,
|
||||
)
|
||||
}
|
||||
|
||||
// for experimental purposes only when bypassing interleave / ranking
|
||||
def toRankedAdsCandidate(): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
|
||||
candidateGenerationInfo,
|
||||
Seq(candidateGenerationInfo)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
|
||||
RankedCandidate(
|
||||
tweetId,
|
||||
tweetInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class BlendedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
|
||||
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
|
||||
RankedAdsCandidate(
|
||||
tweetId,
|
||||
lineItemInfo,
|
||||
predictionScore,
|
||||
reasonChosen,
|
||||
potentialReasons
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
case class RankedCandidate(
|
||||
tweetId: TweetId,
|
||||
tweetInfo: TweetInfo,
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class RankedAdsCandidate(
|
||||
tweetId: TweetId,
|
||||
lineItemInfo: Seq[LineItemInfo],
|
||||
predictionScore: Double,
|
||||
reasonChosen: CandidateGenerationInfo,
|
||||
potentialReasons: Seq[CandidateGenerationInfo])
|
||||
extends Candidate {
|
||||
|
||||
/** *
|
||||
* Get the Similarity Score of a Tweet from its CG Info. For instance,
|
||||
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
|
||||
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
|
||||
*/
|
||||
def getSimilarityScore: Double =
|
||||
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
|
||||
|
||||
assert(potentialReasons.contains(reasonChosen))
|
||||
}
|
||||
|
||||
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate
|
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.cr_mixer.thriftscala.SourceType
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.util.Time
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source used in candidate generation
|
||||
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
|
||||
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
|
||||
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
|
||||
*
|
||||
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
|
||||
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
|
||||
* @param internalId, e.g., UserId(0L), TweetId(0L)
|
||||
*/
|
||||
case class SourceInfo(
|
||||
sourceType: SourceType,
|
||||
internalId: InternalId,
|
||||
sourceEventTime: Option[Time])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the source User Graph used in candidate generation
|
||||
* It is an intermediate product, and will not be stored, unlike SourceInfo.
|
||||
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
|
||||
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
|
||||
* UTG or UTEG will leverage these sources to build candidates.
|
||||
*
|
||||
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
|
||||
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
|
||||
*
|
||||
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
|
||||
* have a unified interface.
|
||||
*/
|
||||
case class GraphSourceInfo(
|
||||
sourceType: SourceType,
|
||||
seedWithScores: Map[UserId, Double])
|
||||
|
||||
/***
|
||||
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
|
||||
* candidate generation along with their metadata.
|
||||
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
|
||||
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
|
||||
* @param score - a score generated by this sim engine
|
||||
*/
|
||||
case class SimilarityEngineInfo(
|
||||
similarityEngineType: SimilarityEngineType,
|
||||
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
|
||||
score: Option[Double])
|
||||
|
||||
/****
|
||||
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
|
||||
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
|
||||
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
|
||||
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
|
||||
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
|
||||
*
|
||||
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
|
||||
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
|
||||
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
|
||||
*/
|
||||
case class CandidateGenerationInfo(
|
||||
sourceInfoOpt: Option[SourceInfo],
|
||||
similarityEngineInfo: SimilarityEngineInfo,
|
||||
contributingSimilarityEngines: Seq[SimilarityEngineInfo])
|
@ -1,96 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.core_workflows.user_model.thriftscala.UserState
|
||||
import com.twitter.cr_mixer.thriftscala.Product
|
||||
import com.twitter.product_mixer.core.thriftscala.ClientContext
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.timelines.configapi.Params
|
||||
|
||||
sealed trait CandidateGeneratorQuery {
|
||||
val product: Product
|
||||
val maxNumResults: Int
|
||||
val impressedTweetList: Set[TweetId]
|
||||
val params: Params
|
||||
val requestUUID: Long
|
||||
}
|
||||
|
||||
sealed trait HasUserId {
|
||||
val userId: UserId
|
||||
}
|
||||
|
||||
case class CrCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
languageCode: Option[String] = None)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class UtegTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
with HasUserId
|
||||
|
||||
case class RelatedTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class RelatedVideoTweetCandidateGeneratorQuery(
|
||||
internalId: InternalId,
|
||||
clientContext: ClientContext, // To scribe LogIn/LogOut requests
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class FrsTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedUserList: Set[UserId],
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
languageCodeOpt: Option[String] = None,
|
||||
countryCodeOpt: Option[String] = None,
|
||||
requestUUID: Long)
|
||||
extends CandidateGeneratorQuery
|
||||
|
||||
case class AdsCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
product: Product,
|
||||
userState: UserState,
|
||||
maxNumResults: Int,
|
||||
params: Params,
|
||||
requestUUID: Long)
|
||||
|
||||
case class TopicTweetCandidateGeneratorQuery(
|
||||
userId: UserId,
|
||||
topicIds: Set[TopicId],
|
||||
product: Product,
|
||||
maxNumResults: Int,
|
||||
impressedTweetList: Set[TweetId],
|
||||
params: Params,
|
||||
requestUUID: Long,
|
||||
isVideoOnly: Boolean)
|
||||
extends CandidateGeneratorQuery
|
@ -1,6 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
sealed trait EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
|
||||
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType
|
@ -1,11 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
object HealthThreshold {
|
||||
object Enum extends Enumeration {
|
||||
val Off: Value = Value(1)
|
||||
val Moderate: Value = Value(2)
|
||||
val Strict: Value = Value(3)
|
||||
val Stricter: Value = Value(4)
|
||||
val StricterPlus: Value = Value(5)
|
||||
}
|
||||
}
|
@ -1,77 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* A Configuration class for all Model Based Candidate Sources.
|
||||
*
|
||||
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
|
||||
* If your model is used for multiple product surfaces, name it as all
|
||||
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
|
||||
*
|
||||
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
|
||||
*/
|
||||
object ModelConfig {
|
||||
// Offline SimClusters CG Experiment related Model Ids
|
||||
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
|
||||
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
|
||||
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
|
||||
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
|
||||
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
|
||||
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
|
||||
|
||||
// Twhin Model Ids
|
||||
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
|
||||
"ConsumerBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Averaged Twhin Model Ids
|
||||
val TweetBasedTwHINRegularUpdateAll20221024: String =
|
||||
"TweetBasedTwHINRegularUpdate_All_20221024"
|
||||
|
||||
// Collaborative Filtering Twhin Model Ids
|
||||
val TwhinCollabFilterForFollow: String =
|
||||
"TwhinCollabFilterForFollow"
|
||||
val TwhinCollabFilterForEngagement: String =
|
||||
"TwhinCollabFilterForEngagement"
|
||||
val TwhinMultiClusterForFollow: String =
|
||||
"TwhinMultiClusterForFollow"
|
||||
val TwhinMultiClusterForEngagement: String =
|
||||
"TwhinMultiClusterForEngagement"
|
||||
|
||||
// Two Tower model Ids
|
||||
val TwoTowerFavALL20220808: String =
|
||||
"TwoTowerFav_ALL_20220808"
|
||||
|
||||
// Debugger Demo-Only Model Ids
|
||||
val DebuggerDemo: String = "DebuggerDemo"
|
||||
|
||||
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
|
||||
// indicate ColdStartLookalike candidate source, which is currently being pluged into
|
||||
// CustomizedRetrievalCandidateGeneration temporarily.
|
||||
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
|
||||
|
||||
// consumersBasedUTG-RealGraphOon Model Id
|
||||
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
|
||||
// consumersBasedUAG-RealGraphOon Model Id
|
||||
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
|
||||
|
||||
// FTR
|
||||
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
|
||||
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
|
||||
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
|
||||
|
||||
// All Model Ids of HnswANNSimilarityEngines
|
||||
val allHnswANNSimilarityEngineModelIds = Seq(
|
||||
ConsumerBasedTwHINRegularUpdateAll20221024,
|
||||
TwoTowerFavALL20220808,
|
||||
DebuggerDemo
|
||||
)
|
||||
|
||||
val ConsumerLogFavBasedInterestedInEmbedding: String =
|
||||
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
|
||||
val ConsumerFollowBasedInterestedInEmbedding: String =
|
||||
"ConsumerFollowBasedInterestedIn_ALL_20221228"
|
||||
|
||||
val RetweetBasedDiffusion: String =
|
||||
"RetweetBasedDiffusion"
|
||||
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
/**
|
||||
* Define name annotated module names here
|
||||
*/
|
||||
object ModuleNames {
|
||||
|
||||
final val FrsStore = "FrsStore"
|
||||
final val UssStore = "UssStore"
|
||||
final val UssStratoColumn = "UssStratoColumn"
|
||||
final val RsxStore = "RsxStore"
|
||||
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
|
||||
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
|
||||
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
|
||||
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
|
||||
final val StpStore = "StpStore"
|
||||
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
|
||||
final val TripCandidateStore = "TripCandidateStore"
|
||||
|
||||
final val ConsumerEmbeddingBasedTripSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTripSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
|
||||
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
|
||||
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
|
||||
final val ConsumersBasedUserAdGraphSimilarityEngine =
|
||||
"ConsumersBasedUserAdGraphSimilarityEngine"
|
||||
final val ConsumersBasedUserVideoGraphSimilarityEngine =
|
||||
"ConsumersBasedUserVideoGraphSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
|
||||
|
||||
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
|
||||
|
||||
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
|
||||
|
||||
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
|
||||
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
|
||||
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
|
||||
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
|
||||
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
|
||||
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
|
||||
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
|
||||
final val UnifiedCache = "unifiedCache"
|
||||
final val MLScoreCache = "mlScoreCache"
|
||||
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
|
||||
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
|
||||
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
|
||||
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
|
||||
"earlybirdTweetsWithRetweetsRepliesCacheStore"
|
||||
|
||||
final val AbDeciderLogger = "abDeciderLogger"
|
||||
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
|
||||
final val TweetRecsLogger = "tweetRecsLogger"
|
||||
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
|
||||
final val RelatedTweetsLogger = "relatedTweetsLogger"
|
||||
final val UtegTweetsLogger = "utegTweetsLogger"
|
||||
final val AdsRecommendationsLogger = "adsRecommendationLogger"
|
||||
|
||||
final val OfflineSimClustersANNInterestedInSimilarityEngine =
|
||||
"OfflineSimClustersANNInterestedInSimilarityEngine"
|
||||
|
||||
final val RealGraphOonStore = "RealGraphOonStore"
|
||||
final val RealGraphInStore = "RealGraphInStore"
|
||||
|
||||
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
|
||||
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
|
||||
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
|
||||
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
|
||||
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
|
||||
|
||||
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
|
||||
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
|
||||
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
|
||||
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
|
||||
|
||||
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
|
||||
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
|
||||
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
|
||||
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
|
||||
|
||||
final val ProducerBasedUserAdGraphSimilarityEngine =
|
||||
"ProducerBasedUserAdGraphSimilarityEngine"
|
||||
final val ProducerBasedUserTweetGraphSimilarityEngine =
|
||||
"ProducerBasedUserTweetGraphSimilarityEngine"
|
||||
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
|
||||
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
|
||||
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
|
||||
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
|
||||
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
|
||||
|
||||
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
|
||||
|
||||
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
|
||||
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
|
||||
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
|
||||
|
||||
final val UserTweetEntityGraphSimilarityEngine =
|
||||
"UserTweetEntityGraphSimilarityEngine"
|
||||
|
||||
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
|
||||
final val CertoStratoStoreName = "CertoStratoStore"
|
||||
|
||||
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
|
||||
final val SkitHighPrecisionTopicTweetSimilarityEngine =
|
||||
"SkitHighPrecisionTopicTweetSimilarityEngine"
|
||||
final val SkitStratoStoreName = "SkitStratoStore"
|
||||
|
||||
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
|
||||
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
|
||||
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
|
||||
|
||||
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
|
||||
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
|
||||
|
||||
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
* @param similarityEngineType, which underlying topic source the topic tweet is from
|
||||
*/
|
||||
case class TopicTweetWithScore(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
similarityEngineType: SimilarityEngineType)
|
@ -1,6 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
|
||||
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)
|
@ -1,8 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score generated from one single Similarity Engine
|
||||
*/
|
||||
case class TweetWithScore(tweetId: TweetId, score: Double)
|
@ -1,12 +0,0 @@
|
||||
package com.twitter.cr_mixer.model
|
||||
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.recos.recos_common.thriftscala.SocialProofType
|
||||
|
||||
/***
|
||||
* Bind a tweetId with a raw score and social proofs by type
|
||||
*/
|
||||
case class TweetWithScoreAndSocialProof(
|
||||
tweetId: TweetId,
|
||||
score: Double,
|
||||
socialProofByType: Map[SocialProofType, Seq[Long]])
|
@ -1,135 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.thrift.CompactThriftCodec
|
||||
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.LineItemInfo
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.DataType
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.GeneralTensor
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.manhattan.Revenue
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ActivePromotedTweetStoreModule extends TwitterModule {
|
||||
|
||||
case class ActivePromotedTweetStore(
|
||||
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
|
||||
statsReceiver: StatsReceiver)
|
||||
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
|
||||
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
|
||||
activePromotedTweetMHStore.get(tweetId.toString).map {
|
||||
_.map { dataRecord =>
|
||||
val richDataRecord = new RichDataRecord(dataRecord)
|
||||
val lineItemIdsFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
|
||||
|
||||
val lineItemObjectivesFeature: Feature[GeneralTensor] =
|
||||
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
|
||||
|
||||
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
|
||||
val lineItemObjectivesTensor: GeneralTensor =
|
||||
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
|
||||
|
||||
val lineItemIds: Seq[Long] =
|
||||
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemObjectives: Seq[LineItemObjective] =
|
||||
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
|
||||
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
|
||||
LineItemObjective(objective.toInt))
|
||||
} else Seq.empty
|
||||
|
||||
val lineItemInfo =
|
||||
if (lineItemIds.size == lineItemObjectives.size) {
|
||||
lineItemIds.zipWithIndex.map {
|
||||
case (lineItemId, index) =>
|
||||
LineItemInfo(
|
||||
lineItemId = lineItemId,
|
||||
lineItemObjective = lineItemObjectives(index)
|
||||
)
|
||||
}
|
||||
} else Seq.empty
|
||||
|
||||
lineItemInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesActivePromotedTweetStore(
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
crMixerStatsReceiver: StatsReceiver
|
||||
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
|
||||
|
||||
val mhConfig = new ManhattanROConfig {
|
||||
val hdfsPath = HDFSPath("")
|
||||
val applicationID = ApplicationID("ads_bigquery_features")
|
||||
val datasetName = DatasetName("active_promoted_tweets")
|
||||
val cluster = Revenue
|
||||
|
||||
override def statsReceiver: StatsReceiver =
|
||||
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
|
||||
}
|
||||
val mhStore: ReadableStore[String, DataRecord] =
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[String, DataRecord](
|
||||
mhConfig,
|
||||
manhattanKVClientMtlsParams
|
||||
)(
|
||||
implicitly[Injection[String, Array[Byte]]],
|
||||
CompactThriftCodec[DataRecord]
|
||||
)
|
||||
|
||||
val underlyingStore =
|
||||
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 60.minutes,
|
||||
asyncUpdate = false
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
|
||||
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
|
||||
keyToString = { k: TweetId => s"apt/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 30.minutes,
|
||||
maxKeys = 250000, // size of promoted tweet is around 200,000
|
||||
windowSize = 10000L,
|
||||
cacheName = "active_promoted_tweet_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,130 +0,0 @@
|
||||
scala_library(
|
||||
sources = [
|
||||
"*.scala",
|
||||
"core/*.scala",
|
||||
"grpc_client/*.scala",
|
||||
"similarity_engine/*.scala",
|
||||
"source_signal/*.scala",
|
||||
"thrift_client/*.scala",
|
||||
],
|
||||
compiler_option_sets = ["fatal_warnings"],
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/bijection:core",
|
||||
"3rdparty/jvm/com/twitter/bijection:scrooge",
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/jvm/com/twitter/storehaus:memcache",
|
||||
"3rdparty/jvm/io/grpc:grpc-api",
|
||||
"3rdparty/jvm/io/grpc:grpc-auth",
|
||||
"3rdparty/jvm/io/grpc:grpc-core",
|
||||
"3rdparty/jvm/io/grpc:grpc-netty",
|
||||
"3rdparty/jvm/io/grpc:grpc-protobuf",
|
||||
"3rdparty/jvm/io/grpc:grpc-stub",
|
||||
"3rdparty/jvm/javax/inject:javax.inject",
|
||||
"3rdparty/jvm/org/scalanlp:breeze",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"abdecider/src/main/scala",
|
||||
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
|
||||
"configapi/configapi-abdecider",
|
||||
"configapi/configapi-core",
|
||||
"configapi/configapi-featureswitches:v2",
|
||||
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
|
||||
"content-recommender/thrift/src/main/thrift:thrift-scala",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
|
||||
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
|
||||
"cr-mixer/thrift/src/main/thrift:thrift-scala",
|
||||
"decider/src/main/scala",
|
||||
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
|
||||
"featureswitches/featureswitches-core",
|
||||
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
|
||||
"finagle-internal/finagle-grpc/src/main/scala",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
|
||||
"finatra-internal/mtls-thriftmux/src/main/scala",
|
||||
"finatra/inject/inject-core/src/main/scala",
|
||||
"finatra/inject/inject-modules/src/main/scala",
|
||||
"finatra/inject/inject-thrift-client",
|
||||
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
|
||||
"frigate/frigate-common:util",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"hydra/partition/thrift/src/main/thrift:thrift-scala",
|
||||
"hydra/root/thrift/src/main/thrift:thrift-scala",
|
||||
"mediaservices/commons/src/main/scala:futuretracker",
|
||||
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
|
||||
"qig-ranker/thrift/src/main/thrift:thrift-scala",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
|
||||
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
|
||||
"relevance-platform/thrift/src/main/thrift:thrift-scala",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
|
||||
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
|
||||
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
|
||||
"representation-manager/server/src/main/thrift:thrift-scala",
|
||||
"representation-scorer/server/src/main/thrift:thrift-scala",
|
||||
"servo/decider",
|
||||
"servo/util/src/main/scala",
|
||||
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
|
||||
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
|
||||
"src/scala/com/twitter/algebird_internal/injection",
|
||||
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
|
||||
"src/scala/com/twitter/ml/api/embedding",
|
||||
"src/scala/com/twitter/ml/featurestore/lib",
|
||||
"src/scala/com/twitter/scalding_internal/multiformat/format",
|
||||
"src/scala/com/twitter/simclusters_v2/candidate_source",
|
||||
"src/scala/com/twitter/simclusters_v2/common",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan",
|
||||
"src/scala/com/twitter/storehaus_internal/manhattan/config",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache/config",
|
||||
"src/scala/com/twitter/storehaus_internal/offline",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/scala/com/twitter/topic_recos/stores",
|
||||
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
|
||||
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
|
||||
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/ml/api:embedding-scala",
|
||||
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
|
||||
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-common-scala",
|
||||
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
|
||||
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
|
||||
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
|
||||
"src/thrift/com/twitter/search:earlybird-scala",
|
||||
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
|
||||
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:service-scala",
|
||||
"src/thrift/com/twitter/twistly:twistly-scala",
|
||||
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
|
||||
"stitch/stitch-storehaus",
|
||||
"stitch/stitch-tweetypie/src/main/scala",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"user-signal-service/thrift/src/main/thrift:thrift-scala",
|
||||
"util-internal/scribe/src/main/scala/com/twitter/logging",
|
||||
"util/util-hashing",
|
||||
],
|
||||
)
|
@ -1,52 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Athena
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
|
||||
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore)
|
||||
def providesBlueVerifiedAnnotationStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
|
||||
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("content_recommender_athena"),
|
||||
DatasetName("blue_verified_annotations"),
|
||||
Athena),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
underlyingStore,
|
||||
ttl = 24.hours,
|
||||
maxKeys = 100000,
|
||||
windowSize = 10000L,
|
||||
cacheName = "blue_verified_annotation_cache"
|
||||
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
|
||||
}
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.simclusters_v2.thriftscala.TopicId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
|
||||
import com.twitter.topic_recos.thriftscala.TweetWithScores
|
||||
|
||||
object CertoStratoStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.CertoStratoStoreName)
|
||||
def providesCertoStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
|
||||
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
|
||||
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
|
||||
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = certoStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_certo_store"),
|
||||
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "certo_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("certo_in_memory_cache"))
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
|
||||
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
|
||||
def providesConsumerBasedUserAdGraphStore(
|
||||
userAdGraphService: UserAdGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedAdRequest
|
||||
): Future[Option[RelatedAdResponse]] = {
|
||||
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
|
||||
def providesConsumerBasedUserTweetGraphStore(
|
||||
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
|
||||
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
|
||||
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
|
||||
def providesConsumerBasedUserVideoGraphStore(
|
||||
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
|
||||
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
|
||||
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
|
||||
override def get(
|
||||
k: ConsumersBasedRelatedTweetRequest
|
||||
): Future[Option[RelatedTweetResponse]] = {
|
||||
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.timelines.configapi.Config
|
||||
import com.twitter.cr_mixer.param.CrMixerParamConfig
|
||||
import com.twitter.inject.TwitterModule
|
||||
import javax.inject.Singleton
|
||||
|
||||
object CrMixerParamConfigModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def provideConfig(): Config = {
|
||||
CrMixerParamConfig.config
|
||||
}
|
||||
}
|
@ -1,54 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object DiffusionStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
|
||||
BinaryScalaCodec(TweetsWithScore)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
|
||||
def retweetBasedDiffusionRecsMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("diffusion_retweet_tweet_recs"),
|
||||
Apollo
|
||||
)
|
||||
|
||||
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildTweetRecsStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[Long, TweetsWithScore] = {
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, TweetsWithScore](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, tweetRecsInjection)
|
||||
}
|
||||
}
|
@ -1,189 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
|
||||
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.SeqLongInjection
|
||||
import com.twitter.hashing.KeyHasher
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
|
||||
import com.twitter.search.earlybird.thriftscala.EarlybirdService
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
|
||||
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.util.Future
|
||||
import javax.inject.Named
|
||||
|
||||
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
// Home based EB filters out retweets and replies
|
||||
val earlybirdRequest =
|
||||
buildEarlybirdRequest(
|
||||
userId,
|
||||
FilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
timeoutConfig.earlybirdServerTimeout)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
|
||||
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
|
||||
timeoutConfig: TimeoutConfig
|
||||
): ReadableStore[UserId, Seq[TweetId]] = {
|
||||
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
|
||||
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
|
||||
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
|
||||
val earlybirdRequest = buildEarlybirdRequest(
|
||||
userId,
|
||||
// Notifications based EB keeps retweets and replies
|
||||
NotFilterOutRetweetsAndReplies,
|
||||
DefaultMaxNumTweetPerUser,
|
||||
processingTimeout = timeoutConfig.earlybirdServerTimeout
|
||||
)
|
||||
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
|
||||
}
|
||||
}
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = earlybirdRecencyBasedTweetsCache,
|
||||
ttl = MemcacheKeyTimeToLiveDuration,
|
||||
asyncUpdate = true
|
||||
)(
|
||||
valueInjection = SeqLongInjection,
|
||||
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
|
||||
keyToString = { k =>
|
||||
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
|
||||
|
||||
/**
|
||||
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
|
||||
* If the value changes, it will increase the size of the memcache.
|
||||
*/
|
||||
private val DefaultMaxNumTweetPerUser: Int = 100
|
||||
private val FilterOutRetweetsAndReplies = true
|
||||
private val NotFilterOutRetweetsAndReplies = false
|
||||
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
|
||||
|
||||
private def buildEarlybirdRequest(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): EarlybirdRequest =
|
||||
EarlybirdRequest(
|
||||
searchQuery = getThriftSearchQuery(
|
||||
seedUserId = seedUserId,
|
||||
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
|
||||
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
|
||||
processingTimeout = processingTimeout
|
||||
),
|
||||
clientId = Some(EarlybirdClientId),
|
||||
timeoutMs = processingTimeout.inMilliseconds.intValue(),
|
||||
getOlderResults = Some(false),
|
||||
adjustedProtectedRequestParams = None,
|
||||
adjustedFullArchiveRequestParams = None,
|
||||
getProtectedTweetsOnly = Some(false),
|
||||
skipVeryRecentTweets = true,
|
||||
)
|
||||
|
||||
private def getThriftSearchQuery(
|
||||
seedUserId: UserId,
|
||||
filterOutRetweetsAndReplies: Boolean,
|
||||
maxNumTweetsPerSeedUser: Int,
|
||||
processingTimeout: Duration
|
||||
): ThriftSearchQuery = ThriftSearchQuery(
|
||||
serializedQuery = GetEarlybirdQuery(
|
||||
None,
|
||||
None,
|
||||
Set.empty,
|
||||
filterOutRetweetsAndReplies
|
||||
).map(_.serialize),
|
||||
fromUserIDFilter64 = Some(Seq(seedUserId)),
|
||||
numResults = maxNumTweetsPerSeedUser,
|
||||
rankingMode = ThriftSearchRankingMode.Recency,
|
||||
collectorParams = Some(
|
||||
CollectorParams(
|
||||
// numResultsToReturn defines how many results each EB shard will return to search root
|
||||
numResultsToReturn = maxNumTweetsPerSeedUser,
|
||||
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
|
||||
terminationParams =
|
||||
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
|
||||
)),
|
||||
facetFieldNames = Some(FacetsToFetch),
|
||||
resultMetadataOptions = Some(MetadataOptions),
|
||||
searchStatusIds = None
|
||||
)
|
||||
|
||||
private def getEarlybirdSearchResult(
|
||||
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
|
||||
request: EarlybirdRequest,
|
||||
statsReceiver: StatsReceiver
|
||||
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
|
||||
.search(request)
|
||||
.map { response =>
|
||||
response.responseCode match {
|
||||
case EarlybirdResponseCode.Success =>
|
||||
val earlybirdSearchResult =
|
||||
response.searchResults
|
||||
.map {
|
||||
_.results
|
||||
.map(searchResult => searchResult.id)
|
||||
}
|
||||
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
|
||||
earlybirdSearchResult
|
||||
case e =>
|
||||
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
|
||||
Some(Seq.empty)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,195 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.ml.api.{thriftscala => api}
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object EmbeddingStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
CompactScalaCodec(api.Embedding)
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def twHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
|
||||
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
|
||||
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("twhin_user_embedding_regular_update_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
|
||||
def twoTowerFavConsumerEmbeddingMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("two_tower_fav_user_embedding_apollo"),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoUserEmbeddingMhStoreName)
|
||||
def debuggerDemoUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_user_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
|
||||
def debuggerDemoTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
|
||||
// Change the above sql if you want to use a diff embedding
|
||||
val manhattanROConfig = ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("experimental_tweet_embedding"),
|
||||
Apollo
|
||||
)
|
||||
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
|
||||
}
|
||||
|
||||
private def buildUserEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.UserId(userId) =>
|
||||
userId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
|
||||
private def buildTweetEmbeddingStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
manhattanROConfig: ManhattanROConfig
|
||||
): ReadableStore[InternalId, api.Embedding] = {
|
||||
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
|
||||
BinaryScalaCodec(api.Embedding)
|
||||
|
||||
val longCodec = implicitly[Injection[Long, Array[Byte]]]
|
||||
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[TweetId, api.Embedding](
|
||||
manhattanROConfig,
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
|
||||
case InternalId.TweetId(tweetId) =>
|
||||
tweetId
|
||||
case _ =>
|
||||
throw new UnsupportedOperationException("Invalid Internal Id")
|
||||
}
|
||||
}
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore
|
||||
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object FrsStoreModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.FrsStore)
|
||||
def providesFrsStore(
|
||||
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
|
||||
statsReceiver: StatsReceiver,
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
|
||||
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
|
||||
statsReceiver.scope("follow_recommendations_store"))
|
||||
}
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import javax.inject.Singleton
|
||||
|
||||
object MHMtlsParamsModule extends TwitterModule {
|
||||
@Singleton
|
||||
@Provides
|
||||
def providesManhattanMtlsParams(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ManhattanKVClientMtlsParams = {
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
}
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.bijection.Injection
|
||||
import com.twitter.bijection.scrooge.CompactScalaCodec
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
|
||||
object OfflineCandidateStoreModule extends TwitterModule {
|
||||
type UserId = Long
|
||||
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
|
||||
CompactScalaCodec(CandidateTweetsList)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020CandidateStore)
|
||||
def offlineTweet2020CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
|
||||
def offlineTweet2020Hl0El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
|
||||
def offlineTweet2020Hl2El15CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
|
||||
def offlineTweet2020Hl2El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
|
||||
def offlineTweet2020Hl8El50CandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
|
||||
def offlineTweetMTSCandidateMhStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
|
||||
def offlineFavDecayedSumCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_decayed_sum"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
|
||||
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
buildOfflineCandidateStore(
|
||||
serviceIdentifier,
|
||||
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
|
||||
)
|
||||
}
|
||||
|
||||
private def buildOfflineCandidateStore(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
datasetName: String
|
||||
): ReadableStore[UserId, CandidateTweetsList] = {
|
||||
ManhattanRO
|
||||
.getReadableStoreWithMtls[Long, CandidateTweetsList](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""), // not needed
|
||||
ApplicationID("multi_type_simclusters"),
|
||||
DatasetName(datasetName),
|
||||
Apollo
|
||||
),
|
||||
ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
import javax.inject.Singleton
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphOonStoreModule extends TwitterModule {
|
||||
|
||||
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.userRealGraphOonColumnPath",
|
||||
default = "recommendations/twistly/userRealgraphOon",
|
||||
help = "Strato column path for user real graph OON Store"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphOonStore)
|
||||
def providesRealGraphOonStore(
|
||||
stratoClient: StratoClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
val realGraphOonStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
realGraphOonStratoFetchableStore
|
||||
)(statsReceiver.scope("user_real_graph_oon_store"))
|
||||
}
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.storehaus_internal.manhattan.Apollo
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanRO
|
||||
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
|
||||
import com.twitter.storehaus_internal.util.ApplicationID
|
||||
import com.twitter.storehaus_internal.util.DatasetName
|
||||
import com.twitter.storehaus_internal.util.HDFSPath
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
|
||||
|
||||
object RealGraphStoreMhModule extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RealGraphInStore)
|
||||
def providesRealGraphStoreMh(
|
||||
decider: CrMixerDecider,
|
||||
statsReceiver: StatsReceiver,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
): ReadableStore[UserId, CandidateSeq] = {
|
||||
|
||||
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
|
||||
val underlyingStore = ManhattanRO
|
||||
.getReadableStoreWithMtls[UserId, CandidateSeq](
|
||||
ManhattanROConfig(
|
||||
HDFSPath(""),
|
||||
ApplicationID("cr_mixer_apollo"),
|
||||
DatasetName("real_graph_scores_apollo"),
|
||||
Apollo),
|
||||
manhattanKVClientMtlsParams
|
||||
)
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 24.hours,
|
||||
)(
|
||||
valueInjection = valueCodec,
|
||||
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
|
||||
keyToString = { k: UserId => s"uRGraph/$k" }
|
||||
)
|
||||
|
||||
DeciderableReadableStore(
|
||||
memCachedStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
|
||||
statsReceiver.scope("RealGraphMh")
|
||||
)
|
||||
}
|
||||
}
|
@ -1,107 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
|
||||
|
||||
object RepresentationManagerModule extends TwitterModule {
|
||||
private val ColPathPrefix = "recommendations/representation_manager/"
|
||||
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
|
||||
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
|
||||
def providesRepresentationManagerTweetStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersTweetColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavLongestL2EmbeddingTweet,
|
||||
ModelVersion.Model20m145k2020))
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
|
||||
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FavBasedProducer,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_fav_based_producer_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.LogFavBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
|
||||
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, SimClustersEmbedding] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
|
||||
stratoClient,
|
||||
SimclustersUserColPath,
|
||||
SimClustersEmbeddingView(
|
||||
EmbeddingType.FollowBasedUserInterestedIn,
|
||||
ModelVersion.Model20m145k2020
|
||||
)
|
||||
)
|
||||
.mapValues(SimClustersEmbedding(_)))(
|
||||
statsReceiver.scope("rms_user_follow_interestedin_store"))
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
|
||||
import com.twitter.simclusters_v2.thriftscala.InternalId
|
||||
import com.twitter.simclusters_v2.thriftscala.ModelVersion
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import javax.inject.Named
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.representationscorer.thriftscala.ListScoreId
|
||||
|
||||
object RepresentationScorerModule extends TwitterModule {
|
||||
|
||||
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
|
||||
|
||||
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
|
||||
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.RsxStore)
|
||||
def providesRepresentationScorerStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[(UserId, TweetId), Double] = {
|
||||
ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
|
||||
UserId,
|
||||
TweetId
|
||||
)] { key =>
|
||||
representationScorerStoreKeyMapping(key._1, key._2)
|
||||
}
|
||||
)(statsReceiver.scope("rsx_store"))
|
||||
}
|
||||
|
||||
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
|
||||
ListScoreId(
|
||||
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
|
||||
modelVersion = SimClusterModelVersion,
|
||||
targetEmbeddingType = TweetEmbeddingType,
|
||||
targetId = InternalId.TweetId(t1),
|
||||
candidateEmbeddingType = TweetEmbeddingType,
|
||||
candidateIds = Seq(InternalId.TweetId(t2))
|
||||
)
|
||||
}
|
||||
}
|
@ -1,90 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.config.TimeoutConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
|
||||
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Singleton
|
||||
|
||||
/**
|
||||
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
|
||||
*/
|
||||
object SimpleSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimpleSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStore = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
3L -> Seq((300L, 3.0), (301L, 3.0))
|
||||
))
|
||||
|
||||
new StandardSimilarityEngine[UserId, (TweetId, Double)](
|
||||
implementingStore = dummyStore,
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
|
||||
*/
|
||||
object LookupSimilarityEngineModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesLookupSimilarityEngine(
|
||||
timeoutConfig: TimeoutConfig,
|
||||
globalStats: StatsReceiver
|
||||
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
|
||||
// Inject your readableStore implementation here
|
||||
val dummyStoreV1 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
val dummyStoreV2 = ReadableStore.fromMap(
|
||||
Map(
|
||||
1L -> Seq((100L, 1.0), (101L, 1.0)),
|
||||
2L -> Seq((200L, 2.0), (201L, 2.0)),
|
||||
))
|
||||
|
||||
new LookupSimilarityEngine[UserId, (TweetId, Double)](
|
||||
versionedStoreMap = Map(
|
||||
"V1" -> dummyStoreV1,
|
||||
"V2" -> dummyStoreV2
|
||||
),
|
||||
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
|
||||
globalStats = globalStats,
|
||||
engineConfig = SimilarityEngineConfig(
|
||||
timeout = timeoutConfig.similarityEngineTimeout,
|
||||
gatingConfig = GatingConfig(
|
||||
deciderConfig = None,
|
||||
enableFeatureSwitch = None
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclustersann.thriftscala.SimClustersANNService
|
||||
import javax.inject.Named
|
||||
|
||||
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesSimClustersANNServiceNameToClientMapping(
|
||||
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
|
||||
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
|
||||
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
|
||||
Map[String, SimClustersANNService.MethodPerEndpoint](
|
||||
"simclusters-ann" -> simClustersANNServiceProd,
|
||||
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
|
||||
"simclusters-ann-1" -> simClustersANNService1,
|
||||
"simclusters-ann-2" -> simClustersANNService2,
|
||||
"simclusters-ann-3" -> simClustersANNService3,
|
||||
"simclusters-ann-5" -> simClustersANNService5,
|
||||
"simclusters-ann-4" -> simClustersANNService4
|
||||
)
|
||||
}
|
||||
}
|
@ -1,65 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.google.inject.name.Named
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.relevance_platform.common.injection.LZ4Injection
|
||||
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.Client
|
||||
import com.twitter.topic_recos.thriftscala.TopicTopTweets
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweet
|
||||
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
|
||||
|
||||
/**
|
||||
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
|
||||
*/
|
||||
object SkitStratoStoreModule extends TwitterModule {
|
||||
|
||||
val column = "recommendations/topic_recos/topicTopTweets"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.SkitStratoStoreName)
|
||||
def providesSkitStratoStore(
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
stratoClient: Client,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
|
||||
val skitStore = ObservedReadableStore(
|
||||
StratoFetchableStore
|
||||
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
|
||||
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
|
||||
topicTopTweets.topTweets
|
||||
}
|
||||
|
||||
val memCachedStore = ObservedMemcachedReadableStore
|
||||
.fromCacheClient(
|
||||
backingStore = skitStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 10.minutes
|
||||
)(
|
||||
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
|
||||
statsReceiver = statsReceiver.scope("memcached_skit_store"),
|
||||
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
|
||||
memCachedStore,
|
||||
ttl = 5.minutes,
|
||||
maxKeys = 100000, // ~150MB max
|
||||
cacheName = "skit_in_memory_cache",
|
||||
windowSize = 10000L
|
||||
)(statsReceiver.scope("skit_in_memory_cache"))
|
||||
}
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.hermit.stp.thriftscala.STPResult
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import javax.inject.Named
|
||||
|
||||
object StrongTiePredictionStoreModule extends TwitterModule {
|
||||
|
||||
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.strongTiePredictionColumnPath",
|
||||
default = "onboarding/userrecs/strong_tie_prediction_big",
|
||||
help = "Strato column path for StrongTiePredictionStore"
|
||||
)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.StpStore)
|
||||
def providesStrongTiePredictionStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[UserId, STPResult] = {
|
||||
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
|
||||
|
||||
ObservedReadableStore(
|
||||
strongTiePredictionStratoFetchableStore
|
||||
)(statsReceiver.scope("strong_tie_prediction_big_store"))
|
||||
}
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
|
||||
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
|
||||
import javax.inject.Named
|
||||
|
||||
object TripCandidateStoreModule extends TwitterModule {
|
||||
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
|
||||
|
||||
@Provides
|
||||
@Named(ModuleNames.TripCandidateStore)
|
||||
def providesSimClustersTripCandidateStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[TripDomain, Seq[TripTweet]] = {
|
||||
val tripCandidateStratoFetchableStore =
|
||||
StratoFetchableStore
|
||||
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
|
||||
.mapValues(_.tweets)
|
||||
|
||||
ObservedReadableStore(
|
||||
tripCandidateStratoFetchableStore
|
||||
)(statsReceiver.scope("simclusters_trip_candidate_store"))
|
||||
}
|
||||
}
|
@ -1,205 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Module
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.contentrecommender.thriftscala.TweetInfo
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore
|
||||
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
|
||||
import com.twitter.frigate.common.store.health.UserHealthModelStore
|
||||
import com.twitter.frigate.thriftscala.TweetHealthScores
|
||||
import com.twitter.frigate.thriftscala.UserAgathaScores
|
||||
import com.twitter.hermit.store.common.DeciderableReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedCachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.contentrecommender.store.TweetInfoStore
|
||||
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.param.decider.CrMixerDecider
|
||||
import com.twitter.cr_mixer.param.decider.DeciderKey
|
||||
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
|
||||
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
|
||||
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
|
||||
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
|
||||
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
|
||||
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
|
||||
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
|
||||
import com.twitter.tweetypie.thriftscala.TweetService
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.JavaTimer
|
||||
import com.twitter.util.Timer
|
||||
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetInfoStoreModule extends TwitterModule {
|
||||
implicit val timer: Timer = new JavaTimer(true)
|
||||
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetInfoStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
stratoClient: StratoClient,
|
||||
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
|
||||
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
|
||||
tweetyPieService: TweetService.MethodPerEndpoint,
|
||||
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
|
||||
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
|
||||
String,
|
||||
BlueVerifiedAnnotationsV2
|
||||
],
|
||||
decider: CrMixerDecider
|
||||
): ReadableStore[TweetId, TweetInfo] = {
|
||||
|
||||
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
|
||||
val underlyingStore =
|
||||
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
|
||||
override def get(
|
||||
k: TweetId
|
||||
): Future[Option[TweetEngagementScores]] = {
|
||||
userTweetGraphPlusService.tweetEngagementScore(k).map {
|
||||
Some(_)
|
||||
}
|
||||
}
|
||||
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(
|
||||
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
|
||||
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
|
||||
val underlyingStore = TweetHealthModelStore.buildReadableStore(
|
||||
stratoClient,
|
||||
Some(
|
||||
TweetHealthModelStoreConfig(
|
||||
enablePBlock = true,
|
||||
enableToxicity = true,
|
||||
enablePSpammy = true,
|
||||
enablePReported = true,
|
||||
enableSpammyTweetContent = true,
|
||||
enablePNegMultimodal = true,
|
||||
))
|
||||
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
|
||||
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 2.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetHealthScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
|
||||
keyToString = { k: TweetId => s"tHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
|
||||
statsReceiver.scope("TweetHealthModelStore")
|
||||
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
|
||||
}
|
||||
|
||||
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
|
||||
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
|
||||
statsReceiver.scope("UnderlyingUserHealthModelStore"))
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 18.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserAgathaScores),
|
||||
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
|
||||
keyToString = { k: UserId => s"uHMS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
|
||||
statsReceiver.scope("UserHealthModelStore")
|
||||
)
|
||||
}
|
||||
|
||||
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
|
||||
val underlyingStore =
|
||||
UserMediaRepresentationHealthStore.buildReadableStore(
|
||||
manhattanKVClientMtlsParams,
|
||||
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 12.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
|
||||
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
|
||||
keyToString = { k: UserId => s"uMRHS/$k" }
|
||||
),
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
|
||||
statsReceiver.scope("UserMediaRepresentationHealthStore")
|
||||
)
|
||||
}
|
||||
|
||||
val magicRecsRealTimeAggregatesStore: ReadableStore[
|
||||
TweetId,
|
||||
MagicRecsRealTimeAggregatesScores
|
||||
] = {
|
||||
val underlyingStore =
|
||||
MagicRecsRealTimeAggregatesStore.buildReadableStore(
|
||||
serviceIdentifier,
|
||||
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
|
||||
)
|
||||
DeciderableReadableStore(
|
||||
underlyingStore,
|
||||
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
|
||||
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
|
||||
)
|
||||
}
|
||||
|
||||
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
|
||||
val underlyingStore = TweetInfoStore(
|
||||
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
|
||||
userMediaRepresentationHealthStore,
|
||||
magicRecsRealTimeAggregatesStore,
|
||||
tweetEngagementScoreStore,
|
||||
blueVerifiedAnnotationStore
|
||||
)(statsReceiver.scope("tweetInfoStore"))
|
||||
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = underlyingStore,
|
||||
cacheClient = crMixerUnifiedCacheClient,
|
||||
ttl = 15.minutes,
|
||||
// Hydrating tweetInfo is now a required step for all candidates,
|
||||
// hence we needed to tune these thresholds.
|
||||
asyncUpdate = serviceIdentifier.environment == "prod"
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(TweetInfo),
|
||||
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
|
||||
keyToString = { k: TweetId => s"tIS/$k" }
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 15.minutes,
|
||||
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
|
||||
windowSize = 10000L,
|
||||
cacheName = "tweet_info_cache",
|
||||
maxMultiGetSize = 20
|
||||
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
|
||||
}
|
||||
tweetInfoStore
|
||||
}
|
||||
}
|
@ -1,42 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableStore
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
|
||||
|
||||
object TweetRecentEngagedUserStoreModule extends TwitterModule {
|
||||
|
||||
private val tweetRecentEngagedUsersStoreDefaultVersion =
|
||||
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
|
||||
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
|
||||
name = "crMixer.tweetRecentEngagedUsersColumnPath",
|
||||
default = "recommendations/twistly/tweetRecentEngagedUsers",
|
||||
help = "Strato column path for TweetRecentEngagedUsersStore"
|
||||
)
|
||||
private type Version = Long
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecentEngagedUserStore(
|
||||
statsReceiver: StatsReceiver,
|
||||
stratoClient: StratoClient,
|
||||
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
|
||||
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
|
||||
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
|
||||
stratoClient,
|
||||
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
|
||||
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
|
||||
|
||||
ObservedReadableStore(
|
||||
tweetRecentEngagedUsersStratoFetchableStore
|
||||
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
|
||||
import com.twitter.finagle.memcached.{Client => MemcachedClient}
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.hermit.store.common.ReadableWritableStore
|
||||
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
|
||||
import com.twitter.simclusters_v2.common.UserId
|
||||
import javax.inject.Named
|
||||
|
||||
object TweetRecommendationResultsStoreModule extends TwitterModule {
|
||||
@Provides
|
||||
@Singleton
|
||||
def providesTweetRecommendationResultsStore(
|
||||
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
|
||||
statsReceiver: StatsReceiver
|
||||
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
|
||||
ObservedReadableWritableMemcacheStore.fromCacheClient(
|
||||
cacheClient = tweetRecommendationResultsCacheClient,
|
||||
ttl = 24.hours)(
|
||||
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
|
||||
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
|
||||
keyToString = { k: UserId => k.toString }
|
||||
)
|
||||
}
|
||||
}
|
@ -1,67 +0,0 @@
|
||||
package com.twitter.cr_mixer.module
|
||||
|
||||
import com.google.inject.Provides
|
||||
import com.google.inject.Singleton
|
||||
import com.twitter.inject.TwitterModule
|
||||
import com.twitter.cr_mixer.model.ModuleNames
|
||||
import com.twitter.frigate.common.store.strato.StratoFetchableStore
|
||||
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
|
||||
import com.twitter.strato.client.{Client => StratoClient}
|
||||
import com.twitter.simclusters_v2.common.TweetId
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import javax.inject.Named
|
||||
|
||||
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
|
||||
|
||||
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
|
||||
def providesTwhinCollabFilterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
|
||||
def providesTwhinCollabFilterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
|
||||
def providesTwhinMultiClusterStratoStoreForFollow(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterFollow20220921")
|
||||
)
|
||||
}
|
||||
|
||||
@Provides
|
||||
@Singleton
|
||||
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
|
||||
def providesTwhinMultiClusterStratoStoreForEngagement(
|
||||
stratoClient: StratoClient
|
||||
): ReadableStore[Long, Seq[TweetId]] = {
|
||||
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
|
||||
stratoClient,
|
||||
column = stratoColumnPath,
|
||||
view = TwhinCollabFilterView("multiclusterEng20220921"))
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user