Delete cr-mixer directory

This commit is contained in:
dogemanttv 2024-01-10 17:06:38 -06:00 committed by GitHub
parent 29a136d27d
commit 1a6792fd07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
285 changed files with 0 additions and 25871 deletions

View File

@ -1,24 +0,0 @@
jvm_binary(
name = "bin",
basename = "cr-mixer",
main = "com.twitter.cr_mixer.CrMixerServerMain",
runtime_platform = "java11",
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/ch/qos/logback:logback-classic",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer",
"finagle/finagle-zipkin-scribe/src/main/scala",
"finatra/inject/inject-logback/src/main/scala",
"loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback",
"twitter-server-internal/src/main/scala",
"twitter-server/logback-classic/src/main/scala",
],
)
# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app
jvm_app(
name = "cr-mixer-app",
archive = "zip",
binary = ":bin",
tags = ["bazel-compatible"],
)

View File

@ -1,7 +0,0 @@
# CR-Mixer
CR-Mixer is a candidate generation service proposed as part of the Personalization Strategy vision for Twitter. Its aim is to speed up the iteration and development of candidate generation and light ranking. The service acts as a lightweight coordinating layer that delegates candidate generation tasks to underlying compute services. It focuses on Twitter's candidate generation use cases and offers a centralized platform for fetching, mixing, and managing candidate sources and light rankers. The overarching goal is to increase the speed and ease of testing and developing candidate generation pipelines, ultimately delivering more value to Twitter users.
CR-Mixer acts as a configurator and delegator, providing abstractions for the challenging parts of candidate generation and handling performance issues. It will offer a 1-stop-shop for fetching and mixing candidate sources, a managed and shared performant platform, a light ranking layer, a common filtering layer, a version control system, a co-owned feature switch set, and peripheral tooling.
CR-Mixer's pipeline consists of 4 steps: source signal extraction, candidate generation, filtering, and ranking. It also provides peripheral tooling like scribing, debugging, and monitoring. The service fetches source signals externally from stores like UserProfileService and RealGraph, calls external candidate generation services, and caches results. Filters are applied for deduping and pre-ranking, and a light ranking step follows.

View File

@ -1,8 +0,0 @@
resources(
sources = [
"*.xml",
"*.yml",
"config/*.yml",
],
tags = ["bazel-compatible"],
)

View File

@ -1,146 +0,0 @@
# The keys in this file correspond to the DeciderValues defined in
# https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala
dark_traffic_filter:
comment: Proportion of the requests that are forwarded as dark traffic to the proxy
default_availability: 0
enable_tweet_recommendations_home_product:
comment: Proportion of requests where we return an actual response for TweetRecommendations Home product
default_availability: 10000
enable_tweet_health_score:
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute TweetHealthModelScore"
default_availability: 10000
enable_user_agatha_score:
comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute UserHealthModelScore"
default_availability: 10000
enable_user_tweet_entity_graph_traffic:
comment: "Enable the traffic to user entity tweet graph to fetch liked-by tweets candidates"
default_availability: 10000
enable_user_tweet_graph_traffic:
comment: "Enable the traffic to user tweet graph to fetch similar tweets candidates"
default_availability: 10000
enable_user_video_graph_traffic:
comment: "Enable the traffic to user video graph to fetch similar tweets candidates"
default_availability: 10000
enable_user_ad_graph_traffic:
comment: "Enable the traffic to user ad graph to fetch similar tweets candidates"
default_availability: 10000
enable_qig_similar_tweets_traffic:
comment: "Enable the traffic to QIG to fetch similar tweet candidates"
default_availability: 0
enable_frs_traffic:
comment: "Enable the traffic to FRS to fetch user follow recommendations"
default_availability: 0
enable_hydra_dark_traffic:
comment: "Enable dark traffic to hydra"
default_availability: 0
enable_real_graph_mh_store:
comment: "Enable traffic for the real graph manhattan based store"
default_availability: 0
enable_simclusters_ann_experimental_dark_traffic:
comment: "Enable dark traffic to simclusters-ann-experimental"
default_availability: 0
enable_simclusters_ann_2_dark_traffic:
comment: "Enable dark traffic to prod SimClustersANN2"
default_availability: 0
enable_user_state_store:
comment: "Enable traffic user state store to hydrate user state"
default_availability: 0
upper_funnel_per_step_scribe_rate:
comment: "Enable Upper Funnel Event Scribe Sampling (fetch, pre-rank, interleave etc.) for getTweetsRecommendations() endpoint"
default_availability: 0
kafka_message_scribe_sample_rate:
comment: "Gates the production of forked scribe messages to kafka for the async feature hydrator"
default_availability: 0
top_level_api_ddg_metrics_scribe_rate:
comment: "Enable Top Level API DDG Metrics Scribe Sampling for getTweetsRecommendations() endpoint"
default_availability: 0
ads_recommendations_per_experiment_scribe_rate:
comment: "Percentage of DDG traffic to Scribe for getAdsRecommendations() endpoint"
default_availability: 0
enable_loadshedding_getTweetRecommendations:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Notifications:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getTweetRecommendations_Email:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryTweet_MoreTweetsModule:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryAuthor:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getRelatedTweetsForQueryAuthor_MoreTweetsModule:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getFrsBasedTweetRecommendations_Home:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_loadshedding_getFrsBasedTweetRecommendations_Notifications:
comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response"
default_availability: 0
enable_user_media_representation_store:
comment: "Enable fetching user nudity rate signal from Media Understanding"
default_availability: 0
enable_magic_recs_real_time_aggregates_store:
comment: "Enable fetching real time aggregates features from Magic Recs memcache"
default_availability: 0
enable_utg_realtime_tweet_engagement_score:
comment: "Enable fetching real time tweet engagement score from utg-plus"
default_availability: 0
get_tweet_recommendations_cache_rate:
comment: "Proportion of users where getTweetRecommendations() request and responses will be cached"
default_availability: 1000
enable_earlybird_traffic:
comment: "Enable fetching tweet candidates from Earlybird"
default_availability: 0
enable_scribe_for_blue_verified_tweet_candidates:
comment: "Enable scribing for tweet candidates from Blue Verified users"
default_availability: 0

View File

@ -1,168 +0,0 @@
<configuration>
<shutdownHook class="ch.qos.logback.core.hook.DelayingShutdownHook"/>
<!-- ===================================================== -->
<!-- Service Config -->
<!-- ===================================================== -->
<property name="DEFAULT_SERVICE_PATTERN"
value="%-16X{traceId} %-12X{clientId:--} %-16X{method} %-25logger{0} %msg"/>
<property name="DEFAULT_ACCESS_PATTERN"
value="%msg"/>
<!-- ===================================================== -->
<!-- Common Config -->
<!-- ===================================================== -->
<!-- JUL/JDK14 to Logback bridge -->
<contextListener class="ch.qos.logback.classic.jul.LevelChangePropagator">
<resetJUL>true</resetJUL>
</contextListener>
<!-- ====================================================================================== -->
<!-- NOTE: The following appenders use a simple TimeBasedRollingPolicy configuration. -->
<!-- You may want to consider using a more advanced SizeAndTimeBasedRollingPolicy. -->
<!-- See: https://logback.qos.ch/manual/appenders.html#SizeAndTimeBasedRollingPolicy -->
<!-- ====================================================================================== -->
<!-- Service Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="SERVICE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.service.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.service.output}.%d.gz</fileNamePattern>
<!-- keep 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- Access Log (rollover daily, keep maximum of 21 days of gzip compressed logs) -->
<appender name="ACCESS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${log.access.output}</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>${log.access.output}.%d.gz</fileNamePattern>
<!-- keep 21 days' worth of history -->
<maxHistory>21</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>${DEFAULT_ACCESS_PATTERN}%n</pattern>
</encoder>
</appender>
<!--LogLens -->
<appender name="LOGLENS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/service</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- LogLens Access -->
<appender name="LOGLENS-ACCESS" class="com.twitter.loglens.logback.LoglensAppender">
<mdcAdditionalContext>true</mdcAdditionalContext>
<category>${log.lens.category}</category>
<index>${log.lens.index}</index>
<tag>${log.lens.tag}/access</tag>
<encoder>
<pattern>%msg</pattern>
</encoder>
</appender>
<!-- Pipeline Execution Logs -->
<appender name="ALLOW-LISTED-PIPELINE-EXECUTIONS" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>allow_listed_pipeline_executions.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<!-- daily rollover -->
<fileNamePattern>allow_listed_pipeline_executions.log.%d.gz</fileNamePattern>
<!-- keep 7 days' worth of history -->
<maxHistory>7</maxHistory>
<cleanHistoryOnStart>true</cleanHistoryOnStart>
</rollingPolicy>
<encoder>
<pattern>%date %.-3level ${DEFAULT_SERVICE_PATTERN}%n</pattern>
</encoder>
</appender>
<!-- ===================================================== -->
<!-- Primary Async Appenders -->
<!-- ===================================================== -->
<property name="async_queue_size" value="${queue.size:-50000}"/>
<property name="async_max_flush_time" value="${max.flush.time:-0}"/>
<appender name="ASYNC-SERVICE" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="SERVICE"/>
</appender>
<appender name="ASYNC-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ACCESS"/>
</appender>
<appender name="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="ALLOW-LISTED-PIPELINE-EXECUTIONS"/>
</appender>
<appender name="ASYNC-LOGLENS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS"/>
</appender>
<appender name="ASYNC-LOGLENS-ACCESS" class="com.twitter.inject.logback.AsyncAppender">
<queueSize>${async_queue_size}</queueSize>
<maxFlushTime>${async_max_flush_time}</maxFlushTime>
<appender-ref ref="LOGLENS-ACCESS"/>
</appender>
<!-- ===================================================== -->
<!-- Package Config -->
<!-- ===================================================== -->
<!-- Per-Package Config -->
<logger name="com.twitter" level="info"/>
<logger name="com.twitter.wilyns" level="warn"/>
<logger name="com.twitter.configbus.client.file" level="off"/>
<logger name="com.twitter.finagle.mux" level="warn"/>
<logger name="com.twitter.finagle.serverset2" level="warn"/>
<logger name="com.twitter.logging.ScribeHandler" level="off"/>
<logger name="com.twitter.zookeeper.client.internal" level="warn"/>
<logger name="io.netty.handler.ssl.SslHandler" level="OFF"/>
<!-- Root Config -->
<root level="${log_level:-INFO}">
<appender-ref ref="ASYNC-SERVICE"/>
<appender-ref ref="ASYNC-LOGLENS"/>
</root>
<!-- Access Logging -->
<logger name="com.twitter.finatra.thrift.filters.AccessLoggingFilter"
level="info"
additivity="false">
<appender-ref ref="ASYNC-ACCESS"/>
<appender-ref ref="ASYNC-LOGLENS-ACCESS"/>
</logger>
<!-- Pipeline Executions Log -->
<logger name="com.twitter.product_mixer.core.service.pipeline_execution_logger"
level="info"
additivity="false">
<appender-ref ref="ASYNC-ALLOW-LISTED-PIPELINE-EXECUTIONS" />
</logger>
</configuration>

View File

@ -1,48 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/google/inject:guice",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/net/codingwell:scala-guice",
"3rdparty/jvm/org/slf4j:slf4j-api",
"cr-mixer/server/src/main/resources",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"finagle/finagle-core/src/main",
"finagle/finagle-http/src/main/scala",
"finagle/finagle-thriftmux/src/main/scala",
"finatra-internal/mtls-http/src/main/scala",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/http-core/src/main/java/com/twitter/finatra/http",
"finatra/inject/inject-app/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-server/src/main/scala",
"finatra/inject/inject-utils/src/main/scala",
"finatra/utils/src/main/java/com/twitter/finatra/annotations",
"hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/controllers",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters",
"src/thrift/com/twitter/timelines/render:thrift-scala",
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms",
"thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view",
"timelines/src/main/scala/com/twitter/timelines/features/app",
"twitter-server-internal",
"twitter-server/server/src/main/scala",
"util/util-app/src/main/scala",
"util/util-core:scala",
"util/util-slf4j-api/src/main/scala",
],
)

View File

@ -1,18 +0,0 @@
package com.twitter.cr_mixer
import com.twitter.finatra.http.routing.HttpWarmup
import com.twitter.finatra.httpclient.RequestBuilder._
import com.twitter.inject.Logging
import com.twitter.inject.utils.Handler
import com.twitter.util.Try
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class CrMixerHttpServerWarmupHandler @Inject() (warmup: HttpWarmup) extends Handler with Logging {
override def handle(): Unit = {
Try(warmup.send(get("/admin/cr-mixer/product-pipelines"), admin = true)())
.onFailure(e => error(e.getMessage, e))
}
}

View File

@ -1,229 +0,0 @@
package com.twitter.cr_mixer
import com.google.inject.Module
import com.twitter.cr_mixer.controller.CrMixerThriftController
import com.twitter.cr_mixer.featureswitch.SetImpressedBucketsLocalContextFilter
import com.twitter.cr_mixer.module.ActivePromotedTweetStoreModule
import com.twitter.cr_mixer.module.CertoStratoStoreModule
import com.twitter.cr_mixer.module.CrMixerParamConfigModule
import com.twitter.cr_mixer.module.EmbeddingStoreModule
import com.twitter.cr_mixer.module.FrsStoreModule
import com.twitter.cr_mixer.module.MHMtlsParamsModule
import com.twitter.cr_mixer.module.OfflineCandidateStoreModule
import com.twitter.cr_mixer.module.RealGraphStoreMhModule
import com.twitter.cr_mixer.module.RealGraphOonStoreModule
import com.twitter.cr_mixer.module.RepresentationManagerModule
import com.twitter.cr_mixer.module.RepresentationScorerModule
import com.twitter.cr_mixer.module.TweetInfoStoreModule
import com.twitter.cr_mixer.module.TweetRecentEngagedUserStoreModule
import com.twitter.cr_mixer.module.TweetRecommendationResultsStoreModule
import com.twitter.cr_mixer.module.TripCandidateStoreModule
import com.twitter.cr_mixer.module.TwhinCollabFilterStratoStoreModule
import com.twitter.cr_mixer.module.UserSignalServiceColumnModule
import com.twitter.cr_mixer.module.UserSignalServiceStoreModule
import com.twitter.cr_mixer.module.UserStateStoreModule
import com.twitter.cr_mixer.module.core.ABDeciderModule
import com.twitter.cr_mixer.module.core.CrMixerFlagModule
import com.twitter.cr_mixer.module.core.CrMixerLoggingABDeciderModule
import com.twitter.cr_mixer.module.core.FeatureContextBuilderModule
import com.twitter.cr_mixer.module.core.FeatureSwitchesModule
import com.twitter.cr_mixer.module.core.KafkaProducerModule
import com.twitter.cr_mixer.module.core.LoggerFactoryModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUnifiedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.SimClustersANNSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUnifiedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedQigSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedTwHINSimlarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserAdGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserTweetGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserVideoGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.TwhinCollabFilterLookupSimilarityEngineModule
import com.twitter.cr_mixer.module.ConsumersBasedUserAdGraphStoreModule
import com.twitter.cr_mixer.module.ConsumersBasedUserTweetGraphStoreModule
import com.twitter.cr_mixer.module.ConsumersBasedUserVideoGraphStoreModule
import com.twitter.cr_mixer.module.DiffusionStoreModule
import com.twitter.cr_mixer.module.EarlybirdRecencyBasedCandidateStoreModule
import com.twitter.cr_mixer.module.TwiceClustersMembersStoreModule
import com.twitter.cr_mixer.module.StrongTiePredictionStoreModule
import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.EarlybirdSearchClientModule
import com.twitter.cr_mixer.module.thrift_client.FrsClientModule
import com.twitter.cr_mixer.module.thrift_client.QigServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.SimClustersAnnServiceClientModule
import com.twitter.cr_mixer.module.thrift_client.TweetyPieClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphPlusClientModule
import com.twitter.cr_mixer.module.thrift_client.UserVideoGraphClientModule
import com.twitter.cr_mixer.{thriftscala => st}
import com.twitter.finagle.Filter
import com.twitter.finatra.annotations.DarkTrafficFilterType
import com.twitter.finatra.decider.modules.DeciderModule
import com.twitter.finatra.http.HttpServer
import com.twitter.finatra.http.routing.HttpRouter
import com.twitter.finatra.jackson.modules.ScalaObjectMapperModule
import com.twitter.finatra.mtls.http.{Mtls => HttpMtls}
import com.twitter.finatra.mtls.thriftmux.Mtls
import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule
import com.twitter.finatra.thrift.ThriftServer
import com.twitter.finatra.thrift.filters._
import com.twitter.finatra.thrift.routing.ThriftRouter
import com.twitter.hydra.common.model_config.{ConfigModule => HydraConfigModule}
import com.twitter.inject.thrift.modules.ThriftClientIdModule
import com.twitter.product_mixer.core.module.LoggingThrowableExceptionMapper
import com.twitter.product_mixer.core.module.StratoClientModule
import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule
import com.twitter.relevance_platform.common.filters.ClientStatsFilter
import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule
import com.twitter.cr_mixer.module.SimClustersANNServiceNameToClientMapper
import com.twitter.cr_mixer.module.SkitStratoStoreModule
import com.twitter.cr_mixer.module.BlueVerifiedAnnotationStoreModule
import com.twitter.cr_mixer.module.core.TimeoutConfigModule
import com.twitter.cr_mixer.module.grpc_client.NaviGRPCClientModule
import com.twitter.cr_mixer.module.similarity_engine.CertoTopicTweetSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.ConsumerBasedWalsSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.DiffusionBasedSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.EarlybirdSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.SkitTopicTweetSimilarityEngineModule
import com.twitter.cr_mixer.module.similarity_engine.UserTweetEntityGraphSimilarityEngineModule
import com.twitter.cr_mixer.module.thrift_client.HydraPartitionClientModule
import com.twitter.cr_mixer.module.thrift_client.HydraRootClientModule
import com.twitter.cr_mixer.module.thrift_client.UserAdGraphClientModule
import com.twitter.cr_mixer.module.thrift_client.UserTweetEntityGraphClientModule
import com.twitter.thriftwebforms.MethodOptions
object CrMixerServerMain extends CrMixerServer
class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls {
override val name = "cr-mixer-server"
private val coreModules = Seq(
ABDeciderModule,
CrMixerFlagModule,
CrMixerLoggingABDeciderModule,
CrMixerParamConfigModule,
new DarkTrafficFilterModule[st.CrMixer.ReqRepServicePerEndpoint](),
DeciderModule,
FeatureContextBuilderModule,
FeatureSwitchesModule,
KafkaProducerModule,
LoggerFactoryModule,
MHMtlsParamsModule,
ProductMixerFlagModule,
ScalaObjectMapperModule,
ThriftClientIdModule
)
private val thriftClientModules = Seq(
AnnQueryServiceClientModule,
EarlybirdSearchClientModule,
FrsClientModule,
HydraPartitionClientModule,
HydraRootClientModule,
QigServiceClientModule,
SimClustersAnnServiceClientModule,
TweetyPieClientModule,
UserAdGraphClientModule,
UserTweetEntityGraphClientModule,
UserTweetGraphClientModule,
UserTweetGraphPlusClientModule,
UserVideoGraphClientModule,
)
private val grpcClientModules = Seq(
NaviGRPCClientModule
)
// Modules sorted alphabetically, please keep the order when adding a new module
override val modules: Seq[Module] =
coreModules ++ thriftClientModules ++ grpcClientModules ++
Seq(
ActivePromotedTweetStoreModule,
CertoStratoStoreModule,
CertoTopicTweetSimilarityEngineModule,
ConsumersBasedUserAdGraphSimilarityEngineModule,
ConsumersBasedUserTweetGraphStoreModule,
ConsumersBasedUserVideoGraphSimilarityEngineModule,
ConsumersBasedUserVideoGraphStoreModule,
ConsumerEmbeddingBasedTripSimilarityEngineModule,
ConsumerEmbeddingBasedTwHINSimilarityEngineModule,
ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule,
ConsumersBasedUserAdGraphStoreModule,
ConsumerBasedWalsSimilarityEngineModule,
DiffusionStoreModule,
EmbeddingStoreModule,
EarlybirdSimilarityEngineModule,
EarlybirdRecencyBasedCandidateStoreModule,
FrsStoreModule,
HydraConfigModule,
OfflineCandidateStoreModule,
ProducerBasedUnifiedSimilarityEngineModule,
ProducerBasedUserAdGraphSimilarityEngineModule,
ProducerBasedUserTweetGraphSimilarityEngineModule,
RealGraphOonStoreModule,
RealGraphStoreMhModule,
RepresentationManagerModule,
RepresentationScorerModule,
SimClustersANNServiceNameToClientMapper,
SimClustersANNSimilarityEngineModule,
SkitStratoStoreModule,
SkitTopicTweetSimilarityEngineModule,
StratoClientModule,
StrongTiePredictionStoreModule,
TimeoutConfigModule,
TripCandidateStoreModule,
TwiceClustersMembersStoreModule,
TweetBasedQigSimilarityEngineModule,
TweetBasedTwHINSimlarityEngineModule,
TweetBasedUnifiedSimilarityEngineModule,
TweetBasedUserAdGraphSimilarityEngineModule,
TweetBasedUserTweetGraphSimilarityEngineModule,
TweetBasedUserVideoGraphSimilarityEngineModule,
TweetInfoStoreModule,
TweetRecentEngagedUserStoreModule,
TweetRecommendationResultsStoreModule,
TwhinCollabFilterStratoStoreModule,
TwhinCollabFilterLookupSimilarityEngineModule,
UserSignalServiceColumnModule,
UserSignalServiceStoreModule,
UserStateStoreModule,
UserTweetEntityGraphSimilarityEngineModule,
DiffusionBasedSimilarityEngineModule,
BlueVerifiedAnnotationStoreModule,
new MtlsThriftWebFormsModule[st.CrMixer.MethodPerEndpoint](this) {
override protected def defaultMethodAccess: MethodOptions.Access = {
MethodOptions.Access.ByLdapGroup(
Seq(
"cr-mixer-admins",
"recosplat-sensitive-data-medium",
"recos-platform-admins",
))
}
}
)
def configureThrift(router: ThriftRouter): Unit = {
router
.filter[LoggingMDCFilter]
.filter[TraceIdMDCFilter]
.filter[ThriftMDCFilter]
.filter[ClientStatsFilter]
.filter[AccessLoggingFilter]
.filter[SetImpressedBucketsLocalContextFilter]
.filter[ExceptionMappingFilter]
.filter[Filter.TypeAgnostic, DarkTrafficFilterType]
.exceptionMapper[LoggingThrowableExceptionMapper]
.add[CrMixerThriftController]
}
override protected def warmup(): Unit = {
handle[CrMixerThriftServerWarmupHandler]()
handle[CrMixerHttpServerWarmupHandler]()
}
}

View File

@ -1,75 +0,0 @@
package com.twitter.cr_mixer
import com.twitter.finagle.thrift.ClientId
import com.twitter.finatra.thrift.routing.ThriftWarmup
import com.twitter.inject.Logging
import com.twitter.inject.utils.Handler
import com.twitter.product_mixer.core.{thriftscala => pt}
import com.twitter.cr_mixer.{thriftscala => st}
import com.twitter.scrooge.Request
import com.twitter.scrooge.Response
import com.twitter.util.Return
import com.twitter.util.Throw
import com.twitter.util.Try
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class CrMixerThriftServerWarmupHandler @Inject() (warmup: ThriftWarmup)
extends Handler
with Logging {
private val clientId = ClientId("thrift-warmup-client")
def handle(): Unit = {
val testIds = Seq(1, 2, 3)
try {
clientId.asCurrent {
testIds.foreach { id =>
val warmupReq = warmupQuery(id)
info(s"Sending warm-up request to service with query: $warmupReq")
warmup.sendRequest(
method = st.CrMixer.GetTweetRecommendations,
req = Request(st.CrMixer.GetTweetRecommendations.Args(warmupReq)))(assertWarmupResponse)
}
}
} catch {
case e: Throwable =>
// we don't want a warmup failure to prevent start-up
error(e.getMessage, e)
}
info("Warm-up done.")
}
private def warmupQuery(userId: Long): st.CrMixerTweetRequest = {
val clientContext = pt.ClientContext(
userId = Some(userId),
guestId = None,
appId = Some(258901L),
ipAddress = Some("0.0.0.0"),
userAgent = Some("FAKE_USER_AGENT_FOR_WARMUPS"),
countryCode = Some("US"),
languageCode = Some("en"),
isTwoffice = None,
userRoles = None,
deviceId = Some("FAKE_DEVICE_ID_FOR_WARMUPS")
)
st.CrMixerTweetRequest(
clientContext = clientContext,
product = st.Product.Home,
productContext = Some(st.ProductContext.HomeContext(st.HomeContext())),
)
}
private def assertWarmupResponse(
result: Try[Response[st.CrMixer.GetTweetRecommendations.SuccessType]]
): Unit = {
// we collect and log any exceptions from the result.
result match {
case Return(_) => // ok
case Throw(exception) =>
warn("Error performing warm-up request.")
error(exception.getMessage, exception)
}
}
}

View File

@ -1,77 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedAdsCandidate
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
import scala.collection.mutable
@Singleton
case class AdsBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Interleaves candidates by iteratively choosing InterestedIn candidates and TWISTLY candidates
* in turn. InterestedIn candidates have no source signal, whereas TWISTLY candidates do. TWISTLY
* candidates themselves are interleaved by source before equal blending with InterestedIn
* candidates.
*/
def blend(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
): Future[Seq[BlendedAdsCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val (interestedInCandidates, twistlyCandidates) =
candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty)
// First interleave twistly candidates
val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates)
val twistlyAndInterestedInCandidates =
Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates)
// then interleave twistly candidates with interested in to make them even
val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
private def buildBlendedAdsCandidate(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
interleavedCandidates: Seq[InitialAdsCandidate]
): Seq[BlendedAdsCandidate] = {
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
interleavedCandidates.map { interleavedCandidate =>
interleavedCandidate.toBlendedAdsCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
}
}
private def buildCandidateToCGInfosMap(
candidateSeq: Seq[Seq[InitialAdsCandidate]],
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
candidateSeq.foreach { candidates =>
candidates.foreach { candidate =>
val candidateGenerationInfoSeq = {
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
}
val candidateGenerationInfo = candidate.candidateGenerationInfo
tweetIdMap.put(
candidate.tweetId,
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
}
}
tweetIdMap.toMap
}
}

View File

@ -1,20 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
],
)

View File

@ -1,48 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.simclusters_v2.common.TweetId
import scala.collection.mutable
object BlendedCandidatesBuilder {
/**
* @param inputCandidates input candidate prior to interleaving
* @param interleavedCandidates after interleaving. These tweets are de-duplicated.
*/
def build(
inputCandidates: Seq[Seq[InitialCandidate]],
interleavedCandidates: Seq[InitialCandidate]
): Seq[BlendedCandidate] = {
val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates)
interleavedCandidates.map { interleavedCandidate =>
interleavedCandidate.toBlendedCandidate(cgInfoLookupMap(interleavedCandidate.tweetId))
}
}
/**
* The same tweet can be generated by different sources.
* This function tells you which CandidateGenerationInfo generated a given tweet
*/
private def buildCandidateToCGInfosMap(
candidateSeq: Seq[Seq[InitialCandidate]],
): Map[TweetId, Seq[CandidateGenerationInfo]] = {
val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]()
candidateSeq.foreach { candidates =>
candidates.foreach { candidate =>
val candidateGenerationInfoSeq = {
tweetIdMap.getOrElse(candidate.tweetId, Seq.empty)
}
val candidateGenerationInfo = candidate.candidateGenerationInfo
tweetIdMap.put(
candidate.tweetId,
candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo))
}
}
tweetIdMap.toMap
}
}

View File

@ -1,121 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Exposes multiple types of sorting relying only on Content Based signals
* Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores
* that come from the active SimilarityEngine and then sort on the standardized scores.
*/
def blend(
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match {
case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency =>
candidates.flatten.sortBy(c => getSnowflakeTimeStamp(c.tweetId)).reverse
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting =>
candidates.flatten.sortBy(_ => scala.util.Random.nextDouble())
case BlenderParams.ContentBasedSortingAlgorithmEnum.FavoriteCount =>
candidates.flatten.sortBy(-_.tweetInfo.favCount)
case BlenderParams.ContentBasedSortingAlgorithmEnum.SimilarityToSignalSorting =>
standardizeAndSortByScore(flattenAndGroupByEngineTypeOrFirstContribEngine(candidates))
case _ =>
candidates.flatten.sortBy(-_.tweetInfo.favCount)
}
stats.stat("candidates").add(sortedCandidates.size)
val blendedCandidates =
BlendedCandidatesBuilder.build(inputCandidates, removeDuplicates(sortedCandidates))
Future.value(blendedCandidates)
}
private def removeDuplicates(candidates: Seq[InitialCandidate]): Seq[InitialCandidate] = {
val seen = collection.mutable.Set.empty[Long]
candidates.filter { c =>
if (seen.contains(c.tweetId)) {
false
} else {
seen += c.tweetId
true
}
}
}
private def groupByEngineTypeOrFirstContribEngine(
candidates: Seq[InitialCandidate]
): Map[SimilarityEngineType, Seq[InitialCandidate]] = {
val grouped = candidates.groupBy { candidate =>
val contrib = candidate.candidateGenerationInfo.contributingSimilarityEngines
if (contrib.nonEmpty) {
contrib.head.similarityEngineType
} else {
candidate.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
}
}
grouped
}
private def flattenAndGroupByEngineTypeOrFirstContribEngine(
candidates: Seq[Seq[InitialCandidate]]
): Seq[Seq[InitialCandidate]] = {
val flat = candidates.flatten
val grouped = groupByEngineTypeOrFirstContribEngine(flat)
grouped.values.toSeq
}
private def standardizeAndSortByScore(
candidates: Seq[Seq[InitialCandidate]]
): Seq[InitialCandidate] = {
candidates
.map { innerSeq =>
val meanScore = innerSeq
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
.sum / innerSeq.length
val stdDev = scala.math
.sqrt(
innerSeq
.map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0))
.map(a => a - meanScore)
.map(a => a * a)
.sum / innerSeq.length)
innerSeq
.map(c =>
(
c,
c.candidateGenerationInfo.similarityEngineInfo.score
.map { score =>
if (stdDev != 0) (score - meanScore) / stdDev
else 0.0
}
.getOrElse(0.0)))
}.flatten.sortBy { case (_, standardizedScore) => -standardizedScore }
.map { case (candidate, _) => candidate }
}
private def getSnowflakeTimeStamp(tweetId: Long): Time = {
val isSnowflake = SnowflakeId.isSnowflakeId(tweetId)
if (isSnowflake) {
SnowflakeId(tweetId).time
} else {
Time.fromMilliseconds(0L)
}
}
}

View File

@ -1,90 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.util.CountWeightedInterleaveUtil
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/**
* A weighted round robin interleaving algorithm.
* The weight of each blending group based on the count of candidates in each blending group.
* The more candidates under a blending group, the more candidates are selected from it during round
* robin, which in effect prioritizes this group.
*
* Weights sum up to 1. For example:
* total candidates = 8
* Group Weight
* [A1, A2, A3, A4] 4/8 = 0.5 // select 50% of results from group A
* [B1, B2] 2/8 = 0.25 // 25% from group B
* [C1, C2] 2/8 = 0.25 // 25% from group C
*
* Blended results = [A1, A2, B1, C1, A3, A4, B2, C2]
* See @linht's go/weighted-interleave
*/
@Singleton
case class CountWeightedInterleaveBlender @Inject() (globalStats: StatsReceiver) {
import CountWeightedInterleaveBlender._
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
def blend(
query: CrCandidateGeneratorQuery,
inputCandidates: Seq[Seq[InitialCandidate]]
): Future[Seq[BlendedCandidate]] = {
val weightedBlenderQuery = CountWeightedInterleaveBlender.paramToQuery(query.params)
countWeightedInterleave(weightedBlenderQuery, inputCandidates)
}
private[blender] def countWeightedInterleave(
query: WeightedBlenderQuery,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
val candidatesAndWeightKeyByIndexId: Seq[(Seq[InitialCandidate], Double)] = {
CountWeightedInterleaveUtil.buildInitialCandidatesWithWeightKeyByFeature(
inputCandidates,
query.rankerWeightShrinkage)
}
val interleavedCandidates =
InterleaveUtil.weightedInterleave(candidatesAndWeightKeyByIndexId, query.maxWeightAdjustments)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}
object CountWeightedInterleaveBlender {
/**
* We pass two parameters to the weighted interleaver:
* @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we
* stay to uniform sampling. The bigger the shrinkage the
* closer we are to uniform round robin
* @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to
* uniform. Set so that we avoid infinite loops (e.g. if weights are
* 0)
*/
case class WeightedBlenderQuery(
rankerWeightShrinkage: Double,
maxWeightAdjustments: Int)
def paramToQuery(params: Params): WeightedBlenderQuery = {
val rankerWeightShrinkage: Double =
params(BlenderParams.RankingInterleaveWeightShrinkageParam)
val maxWeightAdjustments: Int =
params(BlenderParams.RankingInterleaveMaxWeightAdjustments)
WeightedBlenderQuery(rankerWeightShrinkage, maxWeightAdjustments)
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class InterleaveBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Interleaves candidates, by taking 1 candidate from each Seq[Seq[InitialCandidate]] in sequence,
* until we run out of candidates.
*/
def blend(
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
val interleavedCandidates = InterleaveUtil.interleave(inputCandidates)
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}

View File

@ -1,64 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypes
import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypesWithVideo
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
private val name: String = this.getClass.getCanonicalName
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Partition the candidates based on source type
* Interleave the two partitions of candidates separately
* Then append the back fill candidates to the end
*/
def blend(
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
val candidates = inputCandidates.filter(_.nonEmpty)
val backFillSourceTypes =
if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo
else BackFillSourceTypes
// partition candidates based on their source types
val (backFillCandidates, regularCandidates) =
candidates.partition(
_.head.candidateGenerationInfo.sourceInfoOpt
.exists(sourceInfo => backFillSourceTypes.contains(sourceInfo.sourceType)))
val interleavedRegularCandidates = InterleaveUtil.interleave(regularCandidates)
val interleavedBackFillCandidates =
InterleaveUtil.interleave(backFillCandidates)
stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size)
// Append interleaved backfill candidates to the end
val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates
stats.stat("candidates").add(interleavedCandidates.size)
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}
object ImplicitSignalBackFillBlender {
final val BackFillSourceTypesWithVideo: Set[SourceType] = Set(
SourceType.UserRepeatedProfileVisit,
SourceType.VideoTweetPlayback50,
SourceType.VideoTweetQualityView)
final val BackFillSourceTypes: Set[SourceType] = Set(SourceType.UserRepeatedProfileVisit)
}

View File

@ -1,81 +0,0 @@
package com.twitter.cr_mixer.blender
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.BlenderParams
import com.twitter.cr_mixer.param.BlenderParams.BlendingAlgorithmEnum
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class SwitchBlender @Inject() (
defaultBlender: InterleaveBlender,
sourceTypeBackFillBlender: SourceTypeBackFillBlender,
adsBlender: AdsBlender,
contentSignalBlender: ContentSignalBlender,
globalStats: StatsReceiver) {
private val stats = globalStats.scope(this.getClass.getCanonicalName)
def blend(
params: Params,
userState: UserState,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Take out empty seq
val nonEmptyCandidates = inputCandidates.collect {
case candidates if candidates.nonEmpty =>
candidates
}
stats.stat("num_of_sequences").add(inputCandidates.size)
// Sort the seqs in an order
val innerSignalSorting = params(BlenderParams.SignalTypeSortingAlgorithmParam) match {
case BlenderParams.ContentBasedSortingAlgorithmEnum.SourceSignalRecency =>
SwitchBlender.TimestampOrder
case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => SwitchBlender.RandomOrder
case _ => SwitchBlender.TimestampOrder
}
val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting)
// Blend based on specified blender rules
params(BlenderParams.BlendingAlgorithmParam) match {
case BlendingAlgorithmEnum.RoundRobin =>
defaultBlender.blend(candidatesToBlend)
case BlendingAlgorithmEnum.SourceTypeBackFill =>
sourceTypeBackFillBlender.blend(params, candidatesToBlend)
case BlendingAlgorithmEnum.SourceSignalSorting =>
contentSignalBlender.blend(params, candidatesToBlend)
case _ => defaultBlender.blend(candidatesToBlend)
}
}
}
object SwitchBlender {
/**
* Prefers candidates generated from sources with the latest timestamps.
* The newer the source signal, the higher a candidate ranks.
* This ordering biases against consumer-based candidates because their timestamp defaults to 0
*
* Within a Seq[Seq[Candidate]], all candidates within a inner Seq
* are guaranteed to have the same sourceInfo because they are grouped by (sourceInfo, SE model).
* Hence, we can pick .headOption to represent the whole list when filtering by the internalId of the sourceInfoOpt.
* But of course the similarityEngine score in a CGInfo could be different.
*/
val TimestampOrder: Ordering[InitialCandidate] =
math.Ordering
.by[InitialCandidate, Time](
_.candidateGenerationInfo.sourceInfoOpt
.flatMap(_.sourceEventTime)
.getOrElse(Time.fromMilliseconds(0L)))
.reverse
private val RandomOrder: Ordering[InitialCandidate] =
Ordering.by[InitialCandidate, Double](_ => scala.util.Random.nextDouble())
}

View File

@ -1,140 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.blender.AdsBlender
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.BlendedAdsCandidate
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.RankedAdsCandidate
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.param.AdsParams
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
import com.twitter.cr_mixer.source_signal.UssSourceSignalFetcher
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class AdsCandidateGenerator @Inject() (
ussSourceSignalFetcher: UssSourceSignalFetcher,
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
adsCandidateSourceRouter: AdsCandidateSourcesRouter,
adsBlender: AdsBlender,
scribeLogger: AdsRecommendationsScribeLogger,
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSourcesStats = stats.scope("fetchSources")
private val fetchRealGraphSeedsStats = stats.scope("fetchRealGraphSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val interleaveStats = stats.scope("interleave")
private val rankStats = stats.scope("rank")
def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
// fetch source signals
sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) {
fetchSources(query)
}
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) {
fetchSeeds(query)
}
// get initial candidates from similarity engines
// hydrate lineItemInfo and filter out non active ads
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, sourceSignals, realGraphSeeds)
}
// blend candidates
blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
interleave(initialCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
rank(
blendedCandidates,
query.params(AdsParams.EnableScoreBoost),
query.params(AdsParams.AdsCandidateGenerationScoreBoostFactor),
rankStats)
}
} yield {
rankedCandidates.take(query.maxNumResults)
}
}
}
}
def fetchSources(
query: AdsCandidateGeneratorQuery
): Future[Set[SourceInfo]] = {
val fetcherQuery =
FetcherQuery(query.userId, query.product, query.userState, query.params)
ussSourceSignalFetcher.get(fetcherQuery).map(_.getOrElse(Seq.empty).toSet)
}
private def fetchCandidates(
query: AdsCandidateGeneratorQuery,
sourceSignals: Set[SourceInfo],
realGraphSeeds: Map[UserId, Double]
): Future[Seq[Seq[InitialAdsCandidate]]] = {
scribeLogger.scribeInitialAdsCandidates(
query,
adsCandidateSourceRouter
.fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params),
query.params(AdsParams.EnableScribe)
)
}
private def fetchSeeds(
query: AdsCandidateGeneratorQuery
): Future[Map[UserId, Double]] = {
if (query.params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
realGraphInSourceGraphFetcher
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
} else Future.value(Map.empty[UserId, Double])
}
private def interleave(
candidates: Seq[Seq[InitialAdsCandidate]]
): Future[Seq[BlendedAdsCandidate]] = {
adsBlender
.blend(candidates)
}
private def rank(
candidates: Seq[BlendedAdsCandidate],
enableScoreBoost: Boolean,
scoreBoostFactor: Double,
statsReceiver: StatsReceiver,
): Future[Seq[RankedAdsCandidate]] = {
val candidateSize = candidates.size
val rankedCandidates = candidates.zipWithIndex.map {
case (candidate, index) =>
val score = 0.5 + 0.5 * ((candidateSize - index).toDouble / candidateSize)
val boostedScore = if (enableScoreBoost) {
statsReceiver.stat("boostedScore").add((100.0 * score * scoreBoostFactor).toFloat)
score * scoreBoostFactor
} else {
statsReceiver.stat("score").add((100.0 * score).toFloat)
score
}
candidate.toRankedAdsCandidate(boostedScore)
}
Future.value(rankedCandidates)
}
}

View File

@ -1,516 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.ModelConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.InterestedInParams
import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.SimClustersANNParams
import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.FilterUtil
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class AdsCandidateSourcesRouter @Inject() (
activePromotedTweetStore: ReadableStore[TweetId, Seq[LineItemInfo]],
decider: CrMixerDecider,
@Named(ModuleNames.SimClustersANNSimilarityEngine) simClustersANNSimilarityEngine: StandardSimilarityEngine[
Query,
TweetWithScore
],
@Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine)
tweetBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
TweetBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine)
consumersBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
ConsumersBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine)
producerBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUserAdGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine)
tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) consumerTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
ConsumerBasedWalsSimilarityEngine.Query,
TweetWithScore
],
globalStats: StatsReceiver,
) {
import AdsCandidateSourcesRouter._
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
def fetchCandidates(
requestUserId: UserId,
sourceSignals: Set[SourceInfo],
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
val tweetBasedSANNMinScore = params(
TweetBasedCandidateGenerationParams.SimClustersMinScoreParam)
val tweetBasedSANN1Candidates =
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN1ConfigId,
tweetBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val tweetBasedSANN2Candidates =
if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN2ConfigId,
tweetBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val tweetBasedUagCandidates =
if (params(TweetBasedCandidateGenerationParams.EnableUAGParam)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getTweetBasedUserAdGraphCandidates(Some(sourceInfo), params)
})
} else Future.value(Seq.empty)
val realGraphInNetworkBasedUagCandidates =
if (params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) {
getRealGraphConsumersBasedUserAdGraphCandidates(realGraphSeeds, params).map(Seq(_))
} else Future.value(Seq.empty)
val producerBasedUagCandidates =
if (params(ProducerBasedCandidateGenerationParams.EnableUAGParam)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getProducerBasedUserAdGraphCandidates(Some(sourceInfo), params)
})
} else Future.value(Seq.empty)
val tweetBasedTwhinAdsCandidates =
if (params(TweetBasedCandidateGenerationParams.EnableTwHINParam)) {
Future.collect(
CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getTwHINAdsCandidates(
tweetBasedTwHINANNSimilarityEngine,
SimilarityEngineType.TweetBasedTwHINANN,
requestUserId,
Some(sourceInfo),
ModelConfig.DebuggerDemo)
})
} else Future.value(Seq.empty)
val producerBasedSANNMinScore = params(
ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam)
val producerBasedSANN1Candidates =
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN1ConfigId,
producerBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val producerBasedSANN2Candidates =
if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param)) {
Future.collect(
CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo =>
getSimClustersANNCandidates(
requestUserId,
Some(sourceInfo),
params,
simClustersANN2ConfigId,
producerBasedSANNMinScore)
})
} else Future.value(Seq.empty)
val interestedInMinScore = params(InterestedInParams.MinScoreParam)
val interestedInSANN1Candidates = if (params(InterestedInParams.EnableSimClustersANN1Param)) {
getSimClustersANNCandidates(
requestUserId,
None,
params,
simClustersANN1ConfigId,
interestedInMinScore).map(Seq(_))
} else Future.value(Seq.empty)
val interestedInSANN2Candidates = if (params(InterestedInParams.EnableSimClustersANN2Param)) {
getSimClustersANNCandidates(
requestUserId,
None,
params,
simClustersANN2ConfigId,
interestedInMinScore).map(Seq(_))
} else Future.value(Seq.empty)
val consumerTwHINAdsCandidates =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
getTwHINAdsCandidates(
consumerTwHINANNSimilarityEngine,
SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN,
requestUserId,
None,
ModelConfig.DebuggerDemo).map(Seq(_))
} else Future.value(Seq.empty)
val consumerBasedWalsCandidates =
if (params(
ConsumerBasedWalsParams.EnableSourceParam
)) {
getConsumerBasedWalsCandidates(sourceSignals, params)
}.map {
Seq(_)
}
else Future.value(Seq.empty)
Future
.collect(Seq(
tweetBasedSANN1Candidates,
tweetBasedSANN2Candidates,
tweetBasedUagCandidates,
tweetBasedTwhinAdsCandidates,
producerBasedUagCandidates,
producerBasedSANN1Candidates,
producerBasedSANN2Candidates,
realGraphInNetworkBasedUagCandidates,
interestedInSANN1Candidates,
interestedInSANN2Candidates,
consumerTwHINAdsCandidates,
consumerBasedWalsCandidates,
)).map(_.flatten).map { tweetsWithCGInfoSeq =>
Future.collect(
tweetsWithCGInfoSeq.map(candidates => convertToInitialCandidates(candidates, stats)))
}.flatten.map { candidatesLists =>
val result = candidatesLists.filter(_.nonEmpty)
stats.stat("numOfSequences").add(result.size)
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
result
}
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
stats: StatsReceiver
): Future[Seq[InitialAdsCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
stats.stat("initialCandidateSizeBeforeLineItemFilter").add(tweetIds.size)
Future.collect(activePromotedTweetStore.multiGet(tweetIds)).map { lineItemInfos =>
/** *
* If lineItemInfo does not exist, we will filter out the promoted tweet as it cannot be targeted and ranked in admixer
*/
val filteredCandidates = candidates.collect {
case candidate if lineItemInfos.getOrElse(candidate.tweetId, None).isDefined =>
val lineItemInfo = lineItemInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialAdsCandidate(
tweetId = candidate.tweetId,
lineItemInfo = lineItemInfo,
candidate.candidateGenerationInfo
)
}
stats.stat("initialCandidateSizeAfterLineItemFilter").add(filteredCandidates.size)
filteredCandidates
}
}
private[candidate_generation] def getSimClustersANNCandidates(
requestUserId: UserId,
sourceInfo: Option[SourceInfo],
params: configapi.Params,
configId: String,
minScore: Double
) = {
val simClustersModelVersion =
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
val embeddingType =
if (sourceInfo.isEmpty) {
params(InterestedInParams.InterestedInEmbeddingIdParam).embeddingType
} else getSimClustersANNEmbeddingType(sourceInfo.get)
val query = SimClustersANNSimilarityEngine.fromParams(
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
embeddingType,
simClustersModelVersion,
configId,
params
)
// dark traffic to simclusters-ann-2
if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) {
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val sann2Query = SimClustersANNSimilarityEngine.fromParams(
if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId,
embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params
)
simClustersANNSimilarityEngine
.getCandidates(sann2Query)
}
simClustersANNSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.filter(_.score > minScore).map {
tweetWithScore =>
val similarityEngineInfo = SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getProducerBasedUserAdGraphCandidates(
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
)
producerBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = ProducerBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getTweetBasedUserAdGraphCandidates(
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = TweetBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
)
tweetBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = TweetBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getRealGraphConsumersBasedUserAdGraphCandidates(
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
) = {
val query = ConsumersBasedUserAdGraphSimilarityEngine
.fromParams(realGraphSeeds, params)
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
val sourceInfo = SourceInfo(
sourceType = SourceType.RealGraphIn,
internalId = InternalId.UserId(0L),
sourceEventTime = None
)
consumersBasedUserAdGraphSimilarityEngine
.getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore =>
val similarityEngineInfo = ConsumersBasedUserAdGraphSimilarityEngine
.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
Some(sourceInfo),
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
})
}
private[candidate_generation] def getTwHINAdsCandidates(
similarityEngine: HnswANNSimilarityEngine,
similarityEngineType: SimilarityEngineType,
requestUserId: UserId,
sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine
model: String
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
val internalId =
if (sourceInfo.nonEmpty) sourceInfo.get.internalId else InternalId.UserId(requestUserId)
similarityEngine
.getCandidates(buildHnswANNQuery(internalId, model)).map(_.getOrElse(Seq.empty)).map(_.map {
tweetWithScore =>
val similarityEngineInfo = SimilarityEngineInfo(
similarityEngineType = similarityEngineType,
modelId = Some(model),
score = Some(tweetWithScore.score))
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq(similarityEngineInfo)
))
})
}
private[candidate_generation] def getConsumerBasedWalsCandidates(
sourceSignals: Set[SourceInfo],
params: configapi.Params
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
// Fetch source signals and filter them based on age.
val signals = FilterUtil.tweetSourceAgeFilter(
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
consumerBasedWalsSimilarityEngine.getScopedStats
.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
} yield tweetsWithCandidateGenerationInfoOpt.toSeq.flatten
}
}
object AdsCandidateSourcesRouter {
def getSimClustersANNEmbeddingType(
sourceInfo: SourceInfo
): EmbeddingType = {
sourceInfo.sourceType match {
case SourceType.TweetFavorite | SourceType.Retweet | SourceType.OriginalTweet |
SourceType.Reply | SourceType.TweetShare | SourceType.NotificationClick |
SourceType.GoodTweetClick | SourceType.VideoTweetQualityView |
SourceType.VideoTweetPlayback50 =>
EmbeddingType.LogFavLongestL2EmbeddingTweet
case SourceType.UserFollow | SourceType.UserRepeatedProfileVisit | SourceType.RealGraphOon |
SourceType.FollowRecommendation | SourceType.UserTrafficAttributionProfileVisit |
SourceType.GoodProfileClick | SourceType.TwiceUserId =>
EmbeddingType.FavBasedProducer
case _ => throw new IllegalArgumentException("sourceInfo.sourceType not supported")
}
}
def buildHnswANNQuery(internalId: InternalId, modelId: String): HnswANNEngineQuery = {
HnswANNEngineQuery(
sourceId = internalId,
modelId = modelId,
params = Params.Empty
)
}
def getConsumerBasedWalsSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.TweetDontLike.value, //currently no-op
SourceType.TweetReport.value, //currently no-op
SourceType.AccountMute.value, //currently no-op
SourceType.AccountBlock.value //currently no-op
)
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
}

View File

@ -1,51 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"ann/src/main/scala/com/twitter/ann/hnsw",
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"cuad/projects/hashspace/thrift:thrift-scala",
"decider/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
"frigate/frigate-common:base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/constants",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/model",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
"src/scala/com/twitter/ml/featurestore/lib",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/ml/api:embedding-scala",
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"strato/config/columns/cuad/hashspace:hashspace-strato-client",
],
)

View File

@ -1,536 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModelConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.model.TripTweetWithScore
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.param.ConsumerBasedWalsParams
import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams
import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.EngineQuery
import com.twitter.cr_mixer.similarity_engine.FilterUtil
import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery
import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TripEngineQuery
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/**
* Route the SourceInfo to the associated Candidate Engines.
*/
@Singleton
case class CandidateSourcesRouter @Inject() (
customizedRetrievalCandidateGeneration: CustomizedRetrievalCandidateGeneration,
simClustersInterestedInCandidateGeneration: SimClustersInterestedInCandidateGeneration,
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine)
tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine)
producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine)
consumerEmbeddingBasedTripSimilarityEngine: StandardSimilarityEngine[
TripEngineQuery,
TripTweetWithScore
],
@Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine)
consumerBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine)
consumerBasedTwoTowerSimilarityEngine: HnswANNSimilarityEngine,
@Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine)
consumersBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[
ConsumersBasedUserVideoGraphSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
UserTweetEntityGraphSimilarityEngine.Query,
TweetWithScoreAndSocialProof
],
@Named(ModuleNames.ConsumerBasedWalsSimilarityEngine)
consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[
ConsumerBasedWalsSimilarityEngine.Query,
TweetWithScore
],
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver,
) {
import CandidateSourcesRouter._
val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName)
def fetchCandidates(
requestUserId: UserId,
sourceSignals: Set[SourceInfo],
sourceGraphs: Map[String, Option[GraphSourceInfo]],
params: configapi.Params,
): Future[Seq[Seq[InitialCandidate]]] = {
val tweetBasedCandidatesFuture = getCandidates(
getTweetBasedSourceInfo(sourceSignals),
params,
TweetBasedUnifiedSimilarityEngine.fromParams,
tweetBasedUnifiedSimilarityEngine.getCandidates)
val producerBasedCandidatesFuture =
getCandidates(
getProducerBasedSourceInfo(sourceSignals),
params,
ProducerBasedUnifiedSimilarityEngine.fromParams(_, _),
producerBasedUnifiedSimilarityEngine.getCandidates
)
val simClustersInterestedInBasedCandidatesFuture =
getCandidatesPerSimilarityEngineModel(
requestUserId,
params,
SimClustersInterestedInCandidateGeneration.fromParams,
simClustersInterestedInCandidateGeneration.get)
val consumerEmbeddingBasedLogFavBasedTripCandidatesFuture =
if (params(
ConsumerEmbeddingBasedCandidateGenerationParams.EnableLogFavBasedSimClustersTripParam)) {
getSimClustersTripCandidates(
params,
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
ModelConfig.ConsumerLogFavBasedInterestedInEmbedding,
InternalId.UserId(requestUserId),
params
),
consumerEmbeddingBasedTripSimilarityEngine
).map {
Seq(_)
}
} else
Future.Nil
val consumersBasedUvgRealGraphInCandidatesFuture =
if (params(ConsumersBasedUserVideoGraphParams.EnableSourceParam)) {
val realGraphInGraphSourceInfoOpt =
getGraphSourceInfoBySourceType(SourceType.RealGraphIn.name, sourceGraphs)
getGraphBasedCandidates(
params,
ConsumersBasedUserVideoGraphSimilarityEngine
.fromParamsForRealGraphIn(
realGraphInGraphSourceInfoOpt
.map { graphSourceInfo => graphSourceInfo.seedWithScores }.getOrElse(Map.empty),
params),
consumersBasedUserVideoGraphSimilarityEngine,
ConsumersBasedUserVideoGraphSimilarityEngine.toSimilarityEngineInfo,
realGraphInGraphSourceInfoOpt
).map {
Seq(_)
}
} else Future.Nil
val consumerEmbeddingBasedFollowBasedTripCandidatesFuture =
if (params(
ConsumerEmbeddingBasedCandidateGenerationParams.EnableFollowBasedSimClustersTripParam)) {
getSimClustersTripCandidates(
params,
ConsumerEmbeddingBasedTripSimilarityEngine.fromParams(
ModelConfig.ConsumerFollowBasedInterestedInEmbedding,
InternalId.UserId(requestUserId),
params
),
consumerEmbeddingBasedTripSimilarityEngine
).map {
Seq(_)
}
} else
Future.Nil
val consumerBasedWalsCandidatesFuture =
if (params(
ConsumerBasedWalsParams.EnableSourceParam
)) {
getConsumerBasedWalsCandidates(sourceSignals, params)
}.map { Seq(_) }
else Future.Nil
val consumerEmbeddingBasedTwHINCandidatesFuture =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) {
getHnswCandidates(
params,
ConsumerEmbeddingBasedTwHINSimilarityEngine.fromParams(
InternalId.UserId(requestUserId),
params),
consumerBasedTwHINANNSimilarityEngine
).map { Seq(_) }
} else Future.Nil
val consumerEmbeddingBasedTwoTowerCandidatesFuture =
if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwoTowerParam)) {
getHnswCandidates(
params,
ConsumerEmbeddingBasedTwoTowerSimilarityEngine.fromParams(
InternalId.UserId(requestUserId),
params),
consumerBasedTwoTowerSimilarityEngine
).map {
Seq(_)
}
} else Future.Nil
val customizedRetrievalBasedCandidatesFuture =
getCandidatesPerSimilarityEngineModel(
requestUserId,
params,
CustomizedRetrievalCandidateGeneration.fromParams,
customizedRetrievalCandidateGeneration.get)
Future
.collect(
Seq(
tweetBasedCandidatesFuture,
producerBasedCandidatesFuture,
simClustersInterestedInBasedCandidatesFuture,
consumerBasedWalsCandidatesFuture,
consumerEmbeddingBasedLogFavBasedTripCandidatesFuture,
consumerEmbeddingBasedFollowBasedTripCandidatesFuture,
consumerEmbeddingBasedTwHINCandidatesFuture,
consumerEmbeddingBasedTwoTowerCandidatesFuture,
consumersBasedUvgRealGraphInCandidatesFuture,
customizedRetrievalBasedCandidatesFuture
)).map { candidatesList =>
// remove empty innerSeq
val result = candidatesList.flatten.filter(_.nonEmpty)
stats.stat("numOfSequences").add(result.size)
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
result
}
}
private def getGraphBasedCandidates[QueryType](
params: configapi.Params,
query: EngineQuery[QueryType],
engine: StandardSimilarityEngine[QueryType, TweetWithScore],
toSimilarityEngineInfo: Double => SimilarityEngineInfo,
graphSourceInfoOpt: Option[GraphSourceInfo] = None
): Future[Seq[InitialCandidate]] = {
val candidatesOptFut = engine.getCandidates(query)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
engine.getScopedStats.stat("sortedCandidates_size").add(sortedCandidates.size)
val tweetsWithCandidateGenerationInfo = sortedCandidates.map { tweetWithScore =>
{
val similarityEngineInfo = toSimilarityEngineInfo(tweetWithScore.score)
val sourceInfo = graphSourceInfoOpt.map { graphSourceInfo =>
// The internalId is a placeholder value. We do not plan to store the full seedUserId set.
SourceInfo(
sourceType = graphSourceInfo.sourceType,
internalId = InternalId.UserId(0L),
sourceEventTime = None
)
}
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getCandidates[QueryType](
sourceSignals: Set[SourceInfo],
params: configapi.Params,
fromParams: (SourceInfo, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
val queries = sourceSignals.map { sourceInfo =>
fromParams(sourceInfo, params)
}.toSeq
Future
.collect {
queries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(candidates.toSeq.flatten)
} yield {
prefilterCandidates
}
}
}
}
private def getConsumerBasedWalsCandidates(
sourceSignals: Set[SourceInfo],
params: configapi.Params
): Future[Seq[InitialCandidate]] = {
// Fetch source signals and filter them based on age.
val signals = FilterUtil.tweetSourceAgeFilter(
getConsumerBasedWalsSourceInfo(sourceSignals).toSeq,
params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam))
val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates(
ConsumerBasedWalsSimilarityEngine.fromParams(signals, params)
)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
consumerBasedWalsSimilarityEngine.getScopedStats
.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getSimClustersTripCandidates(
params: configapi.Params,
query: TripEngineQuery,
engine: StandardSimilarityEngine[
TripEngineQuery,
TripTweetWithScore
],
): Future[Seq[InitialCandidate]] = {
val tweetsWithCandidatesGenerationInfoOptFut =
engine.getCandidates(EngineQuery(query, params)).map {
_.map {
_.map { tweetWithScore =>
// define filters
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
SimilarityEngineType.ExploreTripOfflineSimClustersTweets,
None,
Some(tweetWithScore.score)),
Seq.empty
)
)
}
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidatesGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
private def getHnswCandidates(
params: configapi.Params,
query: HnswANNEngineQuery,
engine: HnswANNSimilarityEngine,
): Future[Seq[InitialCandidate]] = {
val candidatesOptFut = engine.getCandidates(query)
val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map {
_.map { tweetsWithScores =>
val sortedCandidates = tweetsWithScores.sortBy(-_.score)
val filteredCandidates =
FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam))
engine.getScopedStats.stat("filteredCandidates_size").add(filteredCandidates.size)
val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore =>
{
val similarityEngineInfo =
engine.toSimilarityEngineInfo(query, tweetWithScore.score)
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
)
)
}
}
val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam)
tweetsWithCandidateGenerationInfo.take(maxCandidateNum)
}
}
for {
tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut
initialCandidates <- convertToInitialCandidates(
tweetsWithCandidateGenerationInfoOpt.toSeq.flatten)
} yield initialCandidates
}
/**
* Returns candidates from each similarity engine separately.
* For 1 requestUserId, it will fetch results from each similarity engine e_i,
* and returns Seq[Seq[TweetCandidate]].
*/
private def getCandidatesPerSimilarityEngineModel[QueryType](
requestUserId: UserId,
params: configapi.Params,
fromParams: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[
Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]
]
): Future[Seq[Seq[InitialCandidate]]] = {
val query = fromParams(InternalId.UserId(requestUserId), params)
getFunc(query).flatMap { candidatesPerSimilarityEngineModelOpt =>
val candidatesPerSimilarityEngineModel = candidatesPerSimilarityEngineModelOpt.toSeq.flatten
Future.collect {
candidatesPerSimilarityEngineModel.map(convertToInitialCandidates)
}
}
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}
object CandidateSourcesRouter {
def getGraphSourceInfoBySourceType(
sourceTypeStr: String,
sourceGraphs: Map[String, Option[GraphSourceInfo]]
): Option[GraphSourceInfo] = {
sourceGraphs.getOrElse(sourceTypeStr, None)
}
def getTweetBasedSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForTweetBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
def getProducerBasedSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForProducerBasedUnifiedSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
def getConsumerBasedWalsSourceInfo(
sourceSignals: Set[SourceInfo]
): Set[SourceInfo] = {
sourceSignals.collect {
case sourceInfo
if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) =>
sourceInfo
}
}
/***
* Signal funneling should not exist in CG or even in any SimilarityEngine.
* They will be in Router, or eventually, in CrCandidateGenerator.
*/
val AllowedSourceTypesForConsumerBasedWalsSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.TweetDontLike.value, //currently no-op
SourceType.TweetReport.value, //currently no-op
SourceType.AccountMute.value, //currently no-op
SourceType.AccountBlock.value //currently no-op
)
val AllowedSourceTypesForTweetBasedUnifiedSE = Set(
SourceType.TweetFavorite.value,
SourceType.Retweet.value,
SourceType.OriginalTweet.value,
SourceType.Reply.value,
SourceType.TweetShare.value,
SourceType.NotificationClick.value,
SourceType.GoodTweetClick.value,
SourceType.VideoTweetQualityView.value,
SourceType.VideoTweetPlayback50.value,
SourceType.TweetAggregation.value,
)
val AllowedSourceTypesForProducerBasedUnifiedSE = Set(
SourceType.UserFollow.value,
SourceType.UserRepeatedProfileVisit.value,
SourceType.RealGraphOon.value,
SourceType.FollowRecommendation.value,
SourceType.UserTrafficAttributionProfileVisit.value,
SourceType.GoodProfileClick.value,
SourceType.ProducerAggregation.value,
)
}

View File

@ -1,350 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.blender.SwitchBlender
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.filter.PostRankFilterRunner
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.param.RankerParams
import com.twitter.cr_mixer.param.RecentNegativeSignalParams
import com.twitter.cr_mixer.ranker.SwitchRanker
import com.twitter.cr_mixer.source_signal.SourceInfoRouter
import com.twitter.cr_mixer.source_signal.UssStore.EnabledNegativeSourceTypes
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
import javax.inject.Inject
import javax.inject.Singleton
/**
* For now it performs the main steps as follows:
* 1. Source signal (via USS, FRS) fetch
* 2. Candidate generation
* 3. Filtering
* 4. Interleave blender
* 5. Ranker
* 6. Post-ranker filter
* 7. Truncation
*/
@Singleton
class CrCandidateGenerator @Inject() (
sourceInfoRouter: SourceInfoRouter,
candidateSourceRouter: CandidateSourcesRouter,
switchBlender: SwitchBlender,
preRankFilterRunner: PreRankFilterRunner,
postRankFilterRunner: PostRankFilterRunner,
switchRanker: SwitchRanker,
crMixerScribeLogger: CrMixerScribeLogger,
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
private val timer: Timer = new JavaTimer(true)
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSourcesStats = stats.scope("fetchSources")
private val fetchPositiveSourcesStats = stats.scope("fetchPositiveSources")
private val fetchNegativeSourcesStats = stats.scope("fetchNegativeSources")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val fetchCandidatesAfterFilterStats = stats.scope("fetchCandidatesAfterFilter")
private val preRankFilterStats = stats.scope("preRankFilter")
private val interleaveStats = stats.scope("interleave")
private val rankStats = stats.scope("rank")
private val postRankFilterStats = stats.scope("postRankFilter")
private val blueVerifiedTweetStats = stats.scope("blueVerifiedTweetStats")
private val blueVerifiedTweetStatsPerSimilarityEngine =
stats.scope("blueVerifiedTweetStatsPerSimilarityEngine")
def get(query: CrCandidateGeneratorQuery): Future[Seq[RankedCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
val perProductBlueVerifiedStats =
blueVerifiedTweetStats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
trackResultStats(perProductStats) {
StatsUtil.trackItemsStats(perProductStats) {
val result = for {
(sourceSignals, sourceGraphsMap) <- StatsUtil.trackBlockStats(fetchSourcesStats) {
fetchSources(query)
}
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) {
// find the positive and negative signals
val (positiveSignals, negativeSignals) = sourceSignals.partition { signal =>
!EnabledNegativeSourceTypes.contains(signal.sourceType)
}
fetchPositiveSourcesStats.stat("size").add(positiveSignals.size)
fetchNegativeSourcesStats.stat("size").add(negativeSignals.size)
// find the positive signals to keep, removing block and muted users
val filteredSourceInfo =
if (negativeSignals.nonEmpty && query.params(
RecentNegativeSignalParams.EnableSourceParam)) {
filterSourceInfo(positiveSignals, negativeSignals)
} else {
positiveSignals
}
// fetch candidates from the positive signals
StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, filteredSourceInfo, sourceGraphsMap)
}
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
interleavedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
interleave(query, filteredCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
val candidatesToRank =
interleavedCandidates.take(query.params(RankerParams.MaxCandidatesToRank))
rank(query, candidatesToRank)
}
postRankFilterCandidates <- StatsUtil.trackItemsStats(postRankFilterStats) {
postRankFilter(query, rankedCandidates)
}
} yield {
trackTopKStats(
800,
postRankFilterCandidates,
isQueryK = false,
perProductBlueVerifiedStats)
trackTopKStats(
400,
postRankFilterCandidates,
isQueryK = false,
perProductBlueVerifiedStats)
trackTopKStats(
query.maxNumResults,
postRankFilterCandidates,
isQueryK = true,
perProductBlueVerifiedStats)
val (blueVerifiedTweets, remainingTweets) =
postRankFilterCandidates.partition(
_.tweetInfo.hasBlueVerifiedAnnotation.contains(true))
val topKBlueVerified = blueVerifiedTweets.take(query.maxNumResults)
val topKRemaining = remainingTweets.take(query.maxNumResults - topKBlueVerified.size)
trackBlueVerifiedTweetStats(topKBlueVerified, perProductBlueVerifiedStats)
if (topKBlueVerified.nonEmpty && query.params(RankerParams.EnableBlueVerifiedTopK)) {
topKBlueVerified ++ topKRemaining
} else {
postRankFilterCandidates
}
}
result.raiseWithin(timeoutConfig.serviceTimeout)(timer)
}
}
}
}
private def fetchSources(
query: CrCandidateGeneratorQuery
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
crMixerScribeLogger.scribeSignalSources(
query,
sourceInfoRouter
.get(query.userId, query.product, query.userState, query.params))
}
private def filterSourceInfo(
positiveSignals: Set[SourceInfo],
negativeSignals: Set[SourceInfo]
): Set[SourceInfo] = {
val filterUsers: Set[Long] = negativeSignals.flatMap {
case SourceInfo(_, InternalId.UserId(userId), _) => Some(userId)
case _ => None
}
positiveSignals.filter {
case SourceInfo(_, InternalId.UserId(userId), _) => !filterUsers.contains(userId)
case _ => true
}
}
def fetchCandidates(
query: CrCandidateGeneratorQuery,
sourceSignals: Set[SourceInfo],
sourceGraphs: Map[String, Option[GraphSourceInfo]]
): Future[Seq[Seq[InitialCandidate]]] = {
val initialCandidates = candidateSourceRouter
.fetchCandidates(
query.userId,
sourceSignals,
sourceGraphs,
query.params
)
initialCandidates.map(_.flatten.map { candidate =>
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
blueVerifiedTweetStatsPerSimilarityEngine
.scope(query.product.toString).scope(
candidate.candidateGenerationInfo.contributingSimilarityEngines.head.similarityEngineType.toString).counter(
candidate.tweetInfo.authorId.toString).incr()
}
})
crMixerScribeLogger.scribeInitialCandidates(
query,
initialCandidates
)
}
private def preRankFilter(
query: CrCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
crMixerScribeLogger.scribePreRankFilterCandidates(
query,
preRankFilterRunner
.runSequentialFilters(query, candidates))
}
private def postRankFilter(
query: CrCandidateGeneratorQuery,
candidates: Seq[RankedCandidate]
): Future[Seq[RankedCandidate]] = {
postRankFilterRunner.run(query, candidates)
}
private def interleave(
query: CrCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[BlendedCandidate]] = {
crMixerScribeLogger.scribeInterleaveCandidates(
query,
switchBlender
.blend(query.params, query.userState, candidates))
}
private def rank(
query: CrCandidateGeneratorQuery,
candidates: Seq[BlendedCandidate],
): Future[Seq[RankedCandidate]] = {
crMixerScribeLogger.scribeRankedCandidates(
query,
switchRanker.rank(query, candidates)
)
}
private def trackResultStats(
stats: StatsReceiver
)(
fn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
fn.onSuccess { candidates =>
trackReasonChosenSourceTypeStats(candidates, stats)
trackReasonChosenSimilarityEngineStats(candidates, stats)
trackPotentialReasonsSourceTypeStats(candidates, stats)
trackPotentialReasonsSimilarityEngineStats(candidates, stats)
}
}
private def trackReasonChosenSourceTypeStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.groupBy(_.reasonChosen.sourceInfoOpt.map(_.sourceType))
.foreach {
case (sourceTypeOpt, rankedCands) =>
val sourceType = sourceTypeOpt.map(_.toString).getOrElse("RequesterId") // default
stats.stat("reasonChosen", "sourceType", sourceType, "size").add(rankedCands.size)
}
}
private def trackReasonChosenSimilarityEngineStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.groupBy(_.reasonChosen.similarityEngineInfo.similarityEngineType)
.foreach {
case (seInfoType, rankedCands) =>
stats
.stat("reasonChosen", "similarityEngine", seInfoType.toString, "size").add(
rankedCands.size)
}
}
private def trackPotentialReasonsSourceTypeStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.flatMap(_.potentialReasons.map(_.sourceInfoOpt.map(_.sourceType)))
.groupBy(source => source)
.foreach {
case (sourceInfoOpt, seq) =>
val sourceType = sourceInfoOpt.map(_.toString).getOrElse("RequesterId") // default
stats.stat("potentialReasons", "sourceType", sourceType, "size").add(seq.size)
}
}
private def trackPotentialReasonsSimilarityEngineStats(
candidates: Seq[RankedCandidate],
stats: StatsReceiver
): Unit = {
candidates
.flatMap(_.potentialReasons.map(_.similarityEngineInfo.similarityEngineType))
.groupBy(se => se)
.foreach {
case (seType, seq) =>
stats.stat("potentialReasons", "similarityEngine", seType.toString, "size").add(seq.size)
}
}
private def trackBlueVerifiedTweetStats(
candidates: Seq[RankedCandidate],
statsReceiver: StatsReceiver
): Unit = {
candidates.foreach { candidate =>
if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) {
statsReceiver.counter(candidate.tweetInfo.authorId.toString).incr()
statsReceiver
.scope(candidate.tweetInfo.authorId.toString).counter(candidate.tweetId.toString).incr()
}
}
}
private def trackTopKStats(
k: Int,
tweetCandidates: Seq[RankedCandidate],
isQueryK: Boolean,
statsReceiver: StatsReceiver
): Unit = {
val (topK, beyondK) = tweetCandidates.splitAt(k)
val blueVerifiedIds = tweetCandidates.collect {
case candidate if candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true) =>
candidate.tweetInfo.authorId
}.toSet
blueVerifiedIds.foreach { blueVerifiedId =>
val numTweetsTopK = topK.count(_.tweetInfo.authorId == blueVerifiedId)
val numTweetsBeyondK = beyondK.count(_.tweetInfo.authorId == blueVerifiedId)
if (isQueryK) {
statsReceiver.scope(blueVerifiedId.toString).stat(s"topK").add(numTweetsTopK)
statsReceiver
.scope(blueVerifiedId.toString).stat(s"beyondK").add(numTweetsBeyondK)
} else {
statsReceiver.scope(blueVerifiedId.toString).stat(s"top$k").add(numTweetsTopK)
statsReceiver
.scope(blueVerifiedId.toString).stat(s"beyond$k").add(numTweetsBeyondK)
}
}
}
}

View File

@ -1,345 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.candidate_generation.CustomizedRetrievalCandidateGeneration.Query
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedCandidateGenerationParams._
import com.twitter.cr_mixer.param.CustomizedRetrievalBasedTwhinParams._
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.LookupEngineQuery
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine
import com.twitter.cr_mixer.util.InterleaveUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.base.CandidateSource
import com.twitter.frigate.common.base.Stats
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.timelines.configapi
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
import scala.collection.mutable.ArrayBuffer
/**
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources
*
* Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different
* similarity engines without blending. In other words, this class shall not be thought of as a
* Unified Similarity Engine. It is a CG that calls multiple singular Similarity Engines.
*/
@Singleton
case class CustomizedRetrievalCandidateGeneration @Inject() (
@Named(ModuleNames.TwhinCollabFilterSimilarityEngine)
twhinCollabFilterSimilarityEngine: LookupSimilarityEngine[
TwhinCollabFilterSimilarityEngine.Query,
TweetWithScore
],
@Named(ModuleNames.DiffusionBasedSimilarityEngine)
diffusionBasedSimilarityEngine: LookupSimilarityEngine[
DiffusionBasedSimilarityEngine.Query,
TweetWithScore
],
statsReceiver: StatsReceiver)
extends CandidateSource[
Query,
Seq[TweetWithCandidateGenerationInfo]
] {
override def name: String = this.getClass.getSimpleName
private val stats = statsReceiver.scope(name)
private val fetchCandidatesStat = stats.scope("fetchCandidates")
/**
* For each Similarity Engine Model, return a list of tweet candidates
*/
override def get(
query: Query
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
query.internalId match {
case InternalId.UserId(_) =>
Stats.trackOption(fetchCandidatesStat) {
val twhinCollabFilterForFollowCandidatesFut = if (query.enableTwhinCollabFilter) {
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinCollabFilterFollowQuery)
} else Future.None
val twhinCollabFilterForEngagementCandidatesFut =
if (query.enableTwhinCollabFilter) {
twhinCollabFilterSimilarityEngine.getCandidates(
query.twhinCollabFilterEngagementQuery)
} else Future.None
val twhinMultiClusterForFollowCandidatesFut = if (query.enableTwhinMultiCluster) {
twhinCollabFilterSimilarityEngine.getCandidates(query.twhinMultiClusterFollowQuery)
} else Future.None
val twhinMultiClusterForEngagementCandidatesFut =
if (query.enableTwhinMultiCluster) {
twhinCollabFilterSimilarityEngine.getCandidates(
query.twhinMultiClusterEngagementQuery)
} else Future.None
val diffusionBasedSimilarityEngineCandidatesFut = if (query.enableRetweetBasedDiffusion) {
diffusionBasedSimilarityEngine.getCandidates(query.diffusionBasedSimilarityEngineQuery)
} else Future.None
Future
.join(
twhinCollabFilterForFollowCandidatesFut,
twhinCollabFilterForEngagementCandidatesFut,
twhinMultiClusterForFollowCandidatesFut,
twhinMultiClusterForEngagementCandidatesFut,
diffusionBasedSimilarityEngineCandidatesFut
).map {
case (
twhinCollabFilterForFollowCandidates,
twhinCollabFilterForEngagementCandidates,
twhinMultiClusterForFollowCandidates,
twhinMultiClusterForEngagementCandidates,
diffusionBasedSimilarityEngineCandidates) =>
val maxCandidateNumPerSourceKey = 200
val twhinCollabFilterForFollowWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinCollabFilterForFollowCandidates,
maxCandidateNumPerSourceKey,
query.twhinCollabFilterFollowQuery,
)
val twhinCollabFilterForEngagementWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinCollabFilterForEngagementCandidates,
maxCandidateNumPerSourceKey,
query.twhinCollabFilterEngagementQuery,
)
val twhinMultiClusterForFollowWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinMultiClusterForFollowCandidates,
maxCandidateNumPerSourceKey,
query.twhinMultiClusterFollowQuery,
)
val twhinMultiClusterForEngagementWithCGInfo =
getTwhinCollabCandidatesWithCGInfo(
twhinMultiClusterForEngagementCandidates,
maxCandidateNumPerSourceKey,
query.twhinMultiClusterEngagementQuery,
)
val retweetBasedDiffusionWithCGInfo =
getDiffusionBasedCandidatesWithCGInfo(
diffusionBasedSimilarityEngineCandidates,
maxCandidateNumPerSourceKey,
query.diffusionBasedSimilarityEngineQuery,
)
val twhinCollabCandidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
twhinCollabFilterForFollowWithCGInfo,
twhinCollabFilterForEngagementWithCGInfo,
)
val twhinMultiClusterCandidateSourcesToBeInterleaved =
ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](
twhinMultiClusterForFollowWithCGInfo,
twhinMultiClusterForEngagementWithCGInfo,
)
val interleavedTwhinCollabCandidates =
InterleaveUtil.interleave(twhinCollabCandidateSourcesToBeInterleaved)
val interleavedTwhinMultiClusterCandidates =
InterleaveUtil.interleave(twhinMultiClusterCandidateSourcesToBeInterleaved)
val twhinCollabFilterResults =
if (interleavedTwhinCollabCandidates.nonEmpty) {
Some(interleavedTwhinCollabCandidates.take(maxCandidateNumPerSourceKey))
} else None
val twhinMultiClusterResults =
if (interleavedTwhinMultiClusterCandidates.nonEmpty) {
Some(interleavedTwhinMultiClusterCandidates.take(maxCandidateNumPerSourceKey))
} else None
val diffusionResults =
if (retweetBasedDiffusionWithCGInfo.nonEmpty) {
Some(retweetBasedDiffusionWithCGInfo.take(maxCandidateNumPerSourceKey))
} else None
Some(
Seq(
twhinCollabFilterResults,
twhinMultiClusterResults,
diffusionResults
).flatten)
}
}
case _ =>
throw new IllegalArgumentException("sourceId_is_not_userId_cnt")
}
}
/** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */
private def tweetAgeFilter(
candidates: Seq[TweetWithScore],
maxTweetAgeHours: Duration
): Seq[TweetWithScore] = {
// Tweet IDs are approximately chronological (see http://go/snowflake),
// so we are building the earliest tweet id once
// The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper.
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours)
candidates.filter { candidate => candidate.tweetId >= earliestTweetId }
}
/**
* AgeFilters tweetCandidates with stats
* Only age filter logic is effective here (through tweetAgeFilter). This function acts mostly for metric logging.
*/
private def ageFilterWithStats(
offlineInterestedInCandidates: Seq[TweetWithScore],
maxTweetAgeHours: Duration,
scopedStatsReceiver: StatsReceiver
): Seq[TweetWithScore] = {
scopedStatsReceiver.stat("size").add(offlineInterestedInCandidates.size)
val candidates = offlineInterestedInCandidates.map { candidate =>
TweetWithScore(candidate.tweetId, candidate.score)
}
val filteredCandidates = tweetAgeFilter(candidates, maxTweetAgeHours)
scopedStatsReceiver.stat(f"filtered_size").add(filteredCandidates.size)
if (filteredCandidates.isEmpty) scopedStatsReceiver.counter(f"empty").incr()
filteredCandidates
}
private def getTwhinCollabCandidatesWithCGInfo(
tweetCandidates: Option[Seq[TweetWithScore]],
maxCandidateNumPerSourceKey: Int,
twhinCollabFilterQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
): Seq[TweetWithCandidateGenerationInfo] = {
val twhinTweets = tweetCandidates match {
case Some(tweetsWithScores) =>
tweetsWithScores.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
TwhinCollabFilterSimilarityEngine
.toSimilarityEngineInfo(twhinCollabFilterQuery, tweetWithScore.score),
Seq.empty
)
)
}
case _ => Seq.empty
}
twhinTweets.take(maxCandidateNumPerSourceKey)
}
private def getDiffusionBasedCandidatesWithCGInfo(
tweetCandidates: Option[Seq[TweetWithScore]],
maxCandidateNumPerSourceKey: Int,
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
DiffusionBasedSimilarityEngine.Query
],
): Seq[TweetWithCandidateGenerationInfo] = {
val diffusionTweets = tweetCandidates match {
case Some(tweetsWithScores) =>
tweetsWithScores.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
DiffusionBasedSimilarityEngine
.toSimilarityEngineInfo(diffusionBasedSimilarityEngineQuery, tweetWithScore.score),
Seq.empty
)
)
}
case _ => Seq.empty
}
diffusionTweets.take(maxCandidateNumPerSourceKey)
}
}
object CustomizedRetrievalCandidateGeneration {
case class Query(
internalId: InternalId,
maxCandidateNumPerSourceKey: Int,
maxTweetAgeHours: Duration,
// twhinCollabFilter
enableTwhinCollabFilter: Boolean,
twhinCollabFilterFollowQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
twhinCollabFilterEngagementQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
// twhinMultiCluster
enableTwhinMultiCluster: Boolean,
twhinMultiClusterFollowQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
twhinMultiClusterEngagementQuery: LookupEngineQuery[
TwhinCollabFilterSimilarityEngine.Query
],
enableRetweetBasedDiffusion: Boolean,
diffusionBasedSimilarityEngineQuery: LookupEngineQuery[
DiffusionBasedSimilarityEngine.Query
],
)
def fromParams(
internalId: InternalId,
params: configapi.Params
): Query = {
val twhinCollabFilterFollowQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinCollabFilterFollowSource),
params)
val twhinCollabFilterEngagementQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinCollabFilterEngagementSource),
params)
val twhinMultiClusterFollowQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinMultiClusterFollowSource),
params)
val twhinMultiClusterEngagementQuery =
TwhinCollabFilterSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedTwhinMultiClusterEngagementSource),
params)
val diffusionBasedSimilarityEngineQuery =
DiffusionBasedSimilarityEngine.fromParams(
internalId,
params(CustomizedRetrievalBasedRetweetDiffusionSource),
params)
Query(
internalId = internalId,
maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam),
maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam),
// twhinCollabFilter
enableTwhinCollabFilter = params(EnableTwhinCollabFilterClusterParam),
twhinCollabFilterFollowQuery = twhinCollabFilterFollowQuery,
twhinCollabFilterEngagementQuery = twhinCollabFilterEngagementQuery,
enableTwhinMultiCluster = params(EnableTwhinMultiClusterParam),
twhinMultiClusterFollowQuery = twhinMultiClusterFollowQuery,
twhinMultiClusterEngagementQuery = twhinMultiClusterEngagementQuery,
enableRetweetBasedDiffusion = params(EnableRetweetBasedDiffusionParam),
diffusionBasedSimilarityEngineQuery = diffusionBasedSimilarityEngineQuery
)
}
}

View File

@ -1,220 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithAuthor
import com.twitter.cr_mixer.param.FrsParams
import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineRouter
import com.twitter.cr_mixer.source_signal.FrsStore
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
import com.twitter.cr_mixer.thriftscala.FrsTweet
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.hermit.constants.AlgorithmFeedbackTokens
import com.twitter.hermit.constants.AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap
import com.twitter.hermit.model.Algorithm
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi.Params
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/**
* TweetCandidateGenerator based on FRS seed users. For now this candidate generator fetches seed
* users from FRS, and retrieves the seed users' past tweets from Earlybird with Earlybird light
* ranking models.
*/
@Singleton
class FrsTweetCandidateGenerator @Inject() (
@Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]],
frsBasedSimilarityEngine: EarlybirdSimilarityEngineRouter,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
import FrsTweetCandidateGenerator._
private val timer = DefaultTimer
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSeedsStats = stats.scope("fetchSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val filterCandidatesStats = stats.scope("filterCandidates")
private val hydrateCandidatesStats = stats.scope("hydrateCandidates")
private val getCandidatesStats = stats.scope("getCandidates")
/**
* The function retrieves the candidate for the given user as follows:
* 1. Seed user fetch from FRS.
* 2. Candidate fetch from Earlybird.
* 3. Filtering.
* 4. Candidate hydration.
* 5. Truncation.
*/
def get(
frsTweetCandidateGeneratorQuery: FrsTweetCandidateGeneratorQuery
): Future[Seq[FrsTweet]] = {
val userId = frsTweetCandidateGeneratorQuery.userId
val product = frsTweetCandidateGeneratorQuery.product
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", product.name)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
val result = for {
seedAuthorWithScores <- StatsUtil.trackOptionItemMapStats(fetchSeedsStats) {
fetchSeeds(
userId,
frsTweetCandidateGeneratorQuery.impressedUserList,
frsTweetCandidateGeneratorQuery.languageCodeOpt,
frsTweetCandidateGeneratorQuery.countryCodeOpt,
frsTweetCandidateGeneratorQuery.params,
)
}
tweetCandidates <- StatsUtil.trackOptionItemsStats(fetchCandidatesStats) {
fetchCandidates(
userId,
seedAuthorWithScores.map(_.keys.toSeq).getOrElse(Seq.empty),
frsTweetCandidateGeneratorQuery.impressedTweetList,
seedAuthorWithScores.map(_.mapValues(_.score)).getOrElse(Map.empty),
frsTweetCandidateGeneratorQuery.params
)
}
filteredTweetCandidates <- StatsUtil.trackOptionItemsStats(filterCandidatesStats) {
filterCandidates(
tweetCandidates,
frsTweetCandidateGeneratorQuery.params
)
}
hydratedTweetCandidates <- StatsUtil.trackOptionItemsStats(hydrateCandidatesStats) {
hydrateCandidates(
seedAuthorWithScores,
filteredTweetCandidates
)
}
} yield {
hydratedTweetCandidates
.map(_.take(frsTweetCandidateGeneratorQuery.maxNumResults)).getOrElse(Seq.empty)
}
result.raiseWithin(timeoutConfig.frsBasedTweetEndpointTimeout)(timer)
}
}
}
/**
* Fetch recommended seed users from FRS
*/
private def fetchSeeds(
userId: UserId,
userDenyList: Set[UserId],
languageCodeOpt: Option[String],
countryCodeOpt: Option[String],
params: Params
): Future[Option[Map[UserId, FrsQueryResult]]] = {
frsStore
.get(
FrsStore.Query(
userId,
params(FrsParams.FrsBasedCandidateGenerationMaxSeedsNumParam),
params(FrsParams.FrsBasedCandidateGenerationDisplayLocationParam).displayLocation,
userDenyList.toSeq,
languageCodeOpt,
countryCodeOpt
)).map {
_.map { seedAuthors =>
seedAuthors.map(user => user.userId -> user).toMap
}
}
}
/**
* Fetch tweet candidates from Earlybird
*/
private def fetchCandidates(
searcherUserId: UserId,
seedAuthors: Seq[UserId],
impressedTweetList: Set[TweetId],
frsUserToScores: Map[UserId, Double],
params: Params
): Future[Option[Seq[TweetWithAuthor]]] = {
if (seedAuthors.nonEmpty) {
// call earlybird
val query = EarlybirdSimilarityEngineRouter.queryFromParams(
Some(searcherUserId),
seedAuthors,
impressedTweetList,
frsUserToScoresForScoreAdjustment = Some(frsUserToScores),
params
)
frsBasedSimilarityEngine.get(query)
} else Future.None
}
/**
* Filter candidates that do not pass visibility filter policy
*/
private def filterCandidates(
candidates: Option[Seq[TweetWithAuthor]],
params: Params
): Future[Option[Seq[TweetWithAuthor]]] = {
val tweetIds = candidates.map(_.map(_.tweetId).toSet).getOrElse(Set.empty)
if (params(FrsParams.FrsBasedCandidateGenerationEnableVisibilityFilteringParam))
Future
.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
candidates.map {
// If tweetInfo does not exist, we will filter out this tweet candidate.
_.filter(candidate => tweetInfos.getOrElse(candidate.tweetId, None).isDefined)
}
}
else {
Future.value(candidates)
}
}
/**
* Hydrate the candidates with the FRS candidate sources and scores
*/
private def hydrateCandidates(
frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]],
candidates: Option[Seq[TweetWithAuthor]]
): Future[Option[Seq[FrsTweet]]] = {
Future.value {
candidates.map {
_.map { tweetWithAuthor =>
val frsQueryResult = frsAuthorWithScores.flatMap(_.get(tweetWithAuthor.authorId))
FrsTweet(
tweetId = tweetWithAuthor.tweetId,
authorId = tweetWithAuthor.authorId,
frsPrimarySource = frsQueryResult.flatMap(_.primarySource),
frsAuthorScore = frsQueryResult.map(_.score),
frsCandidateSourceScores = frsQueryResult.flatMap { result =>
result.sourceWithScores.map {
_.collect {
// see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
// see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
case (candidateSourceAlgoStr, score)
if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains(
candidateSourceAlgoStr) =>
AlgorithmToFeedbackTokenMap.getOrElse(
AlgorithmFeedbackTokens.TokenStrToAlgorithmMap
.getOrElse(candidateSourceAlgoStr, DefaultAlgo),
DefaultAlgoToken) -> score
}
}
}
)
}
}
}
}
}
object FrsTweetCandidateGenerator {
val DefaultAlgo: Algorithm.Value = Algorithm.Other
// 9999 is the token for Algorithm.Other
val DefaultAlgoToken: Int = AlgorithmToFeedbackTokenMap.getOrElse(DefaultAlgo, 9999)
}

View File

@ -1,156 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class RelatedTweetCandidateGenerator @Inject() (
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
@Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
ProducerBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
preRankFilterRunner: PreRankFilterRunner,
relatedTweetScribeLogger: RelatedTweetScribeLogger,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val preRankFilterStats = stats.scope("preRankFilter")
def get(
query: RelatedTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query)
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
} yield {
filteredCandidates.headOption
.getOrElse(
throw new UnsupportedOperationException(
"RelatedTweetCandidateGenerator results invalid")
).take(query.maxNumResults)
}
}
}
}
def fetchCandidates(
query: RelatedTweetCandidateGeneratorQuery
): Future[Seq[Seq[InitialCandidate]]] = {
relatedTweetScribeLogger.scribeInitialCandidates(
query,
query.internalId match {
case InternalId.TweetId(_) =>
getCandidatesFromSimilarityEngine(
query,
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
tweetBasedUnifiedSimilarityEngine.getCandidates)
case InternalId.UserId(_) =>
getCandidatesFromSimilarityEngine(
query,
ProducerBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet,
producerBasedUnifiedSimilarityEngine.getCandidates)
case _ =>
throw new UnsupportedOperationException(
"RelatedTweetCandidateGenerator gets invalid InternalId")
}
)
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
private def getCandidatesFromSimilarityEngine[QueryType](
query: RelatedTweetCandidateGeneratorQuery,
fromParamsForRelatedTweet: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
*/
val engineQueries =
Seq(fromParamsForRelatedTweet(query.internalId, query.params))
Future
.collect {
engineQueries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(
candidates.toSeq.flatten
)
} yield prefilterCandidates
}
}
}
private def preRankFilter(
query: RelatedTweetCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
relatedTweetScribeLogger.scribePreRankFilterCandidates(
query,
preRankFilterRunner
.runSequentialFilters(query, candidates))
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}

View File

@ -1,139 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.filter.PreRankFilterRunner
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class RelatedVideoTweetCandidateGenerator @Inject() (
@Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[
TweetBasedUnifiedSimilarityEngine.Query,
TweetWithCandidateGenerationInfo
],
preRankFilterRunner: PreRankFilterRunner,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val preRankFilterStats = stats.scope("preRankFilter")
def get(
query: RelatedVideoTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query)
}
filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) {
preRankFilter(query, initialCandidates)
}
} yield {
filteredCandidates.headOption
.getOrElse(
throw new UnsupportedOperationException(
"RelatedVideoTweetCandidateGenerator results invalid")
).take(query.maxNumResults)
}
}
}
}
def fetchCandidates(
query: RelatedVideoTweetCandidateGeneratorQuery
): Future[Seq[Seq[InitialCandidate]]] = {
query.internalId match {
case InternalId.TweetId(_) =>
getCandidatesFromSimilarityEngine(
query,
TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedVideoTweet,
tweetBasedUnifiedSimilarityEngine.getCandidates)
case _ =>
throw new UnsupportedOperationException(
"RelatedVideoTweetCandidateGenerator gets invalid InternalId")
}
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
private def getCandidatesFromSimilarityEngine[QueryType](
query: RelatedVideoTweetCandidateGeneratorQuery,
fromParamsForRelatedVideoTweet: (InternalId, configapi.Params) => QueryType,
getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]]
): Future[Seq[Seq[InitialCandidate]]] = {
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
*/
val engineQueries =
Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params))
Future
.collect {
engineQueries.map { query =>
for {
candidates <- getFunc(query)
prefilterCandidates <- convertToInitialCandidates(
candidates.toSeq.flatten
)
} yield prefilterCandidates
}
}
}
private def preRankFilter(
query: RelatedVideoTweetCandidateGeneratorQuery,
candidates: Seq[Seq[InitialCandidate]]
): Future[Seq[Seq[InitialCandidate]]] = {
preRankFilterRunner
.runSequentialFilters(query, candidates)
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithCandidateGenerationInfo],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
candidate.candidateGenerationInfo
)
}
}
}
}

View File

@ -1,640 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo
import com.twitter.cr_mixer.model.TweetWithScore
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.InterestedInParams
import com.twitter.cr_mixer.param.SimClustersANNParams
import com.twitter.cr_mixer.similarity_engine.EngineQuery
import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.base.CandidateSource
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.ModelVersions
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.timelines.configapi
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
/**
* This store looks for similar tweets for a given UserId that generates UserInterestedIn
* from SimClustersANN. It will be a standalone CandidateGeneration class moving forward.
*
* After the abstraction improvement (apply SimilarityEngine trait)
* these CG will be subjected to change.
*/
@Singleton
case class SimClustersInterestedInCandidateGeneration @Inject() (
@Named(ModuleNames.SimClustersANNSimilarityEngine)
simClustersANNSimilarityEngine: StandardSimilarityEngine[
SimClustersANNSimilarityEngine.Query,
TweetWithScore
],
statsReceiver: StatsReceiver)
extends CandidateSource[
SimClustersInterestedInCandidateGeneration.Query,
Seq[TweetWithCandidateGenerationInfo]
] {
override def name: String = this.getClass.getSimpleName
private val stats = statsReceiver.scope(name)
private val fetchCandidatesStat = stats.scope("fetchCandidates")
override def get(
query: SimClustersInterestedInCandidateGeneration.Query
): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = {
query.internalId match {
case _: InternalId.UserId =>
StatsUtil.trackOptionItemsStats(fetchCandidatesStat) {
// UserInterestedIn Queries
val userInterestedInCandidateResultFut =
if (query.enableUserInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInExperimentalSANNCandidateResultFut =
if (query.enableUserInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN1CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN2CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN3CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN5CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userInterestedInSANN4CandidateResultFut =
if (query.enableUserInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.interestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
// UserNextInterestedIn Queries
val userNextInterestedInCandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInExperimentalSANNCandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN1CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN2CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN3CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN5CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userNextInterestedInSANN4CandidateResultFut =
if (query.enableUserNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.nextInterestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
// AddressBookInterestedIn Queries
val userAddressBookInterestedInCandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookExperimentalSANNCandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInExperimentalSimClustersANNQuery,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN1CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN1SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN1Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN2CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN2SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN2Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN3CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN3SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN3Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN5CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN5SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN5Query,
query.simClustersInterestedInMinScore)
else
Future.None
val userAddressBookSANN4CandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN4SimilarityEngine)
getInterestedInCandidateResult(
simClustersANNSimilarityEngine,
query.addressbookInterestedInSimClustersANN4Query,
query.simClustersInterestedInMinScore)
else
Future.None
Future
.collect(
Seq(
userInterestedInCandidateResultFut,
userNextInterestedInCandidateResultFut,
userAddressBookInterestedInCandidateResultFut,
userInterestedInExperimentalSANNCandidateResultFut,
userNextInterestedInExperimentalSANNCandidateResultFut,
userAddressBookExperimentalSANNCandidateResultFut,
userInterestedInSANN1CandidateResultFut,
userNextInterestedInSANN1CandidateResultFut,
userAddressBookSANN1CandidateResultFut,
userInterestedInSANN2CandidateResultFut,
userNextInterestedInSANN2CandidateResultFut,
userAddressBookSANN2CandidateResultFut,
userInterestedInSANN3CandidateResultFut,
userNextInterestedInSANN3CandidateResultFut,
userAddressBookSANN3CandidateResultFut,
userInterestedInSANN5CandidateResultFut,
userNextInterestedInSANN5CandidateResultFut,
userAddressBookSANN5CandidateResultFut,
userInterestedInSANN4CandidateResultFut,
userNextInterestedInSANN4CandidateResultFut,
userAddressBookSANN4CandidateResultFut
)
).map { candidateResults =>
Some(
candidateResults.map(candidateResult => candidateResult.getOrElse(Seq.empty))
)
}
}
case _ =>
stats.counter("sourceId_is_not_userId_cnt").incr()
Future.None
}
}
private def simClustersCandidateMinScoreFilter(
simClustersAnnCandidates: Seq[TweetWithScore],
simClustersInterestedInMinScore: Double,
simClustersANNConfigId: String
): Seq[TweetWithScore] = {
val filteredCandidates = simClustersAnnCandidates
.filter { candidate =>
candidate.score > simClustersInterestedInMinScore
}
stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size)
stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr()
if (filteredCandidates.isEmpty)
stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr()
filteredCandidates.map { candidate =>
TweetWithScore(candidate.tweetId, candidate.score)
}
}
private def getInterestedInCandidateResult(
simClustersANNSimilarityEngine: StandardSimilarityEngine[
SimClustersANNSimilarityEngine.Query,
TweetWithScore
],
simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
simClustersInterestedInMinScore: Double,
): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = {
val interestedInCandidatesFut =
simClustersANNSimilarityEngine.getCandidates(simClustersANNQuery)
val interestedInCandidateResultFut = interestedInCandidatesFut.map { interestedInCandidates =>
stats.stat("candidateSize").add(interestedInCandidates.size)
val embeddingCandidatesStat = stats.scope(
simClustersANNQuery.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.name)
embeddingCandidatesStat.stat("candidateSize").add(interestedInCandidates.size)
if (interestedInCandidates.isEmpty) {
embeddingCandidatesStat.counter("empty_results").incr()
}
embeddingCandidatesStat.counter("requests").incr()
val filteredTweets = simClustersCandidateMinScoreFilter(
interestedInCandidates.toSeq.flatten,
simClustersInterestedInMinScore,
simClustersANNQuery.storeQuery.simClustersANNConfigId)
val interestedInTweetsWithCGInfo = filteredTweets.map { tweetWithScore =>
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
None,
SimClustersANNSimilarityEngine
.toSimilarityEngineInfo(simClustersANNQuery, tweetWithScore.score),
Seq.empty // SANN is an atomic SE, and hence it has no contributing SEs
)
)
}
val interestedInResults = if (interestedInTweetsWithCGInfo.nonEmpty) {
Some(interestedInTweetsWithCGInfo)
} else None
interestedInResults
}
interestedInCandidateResultFut
}
}
object SimClustersInterestedInCandidateGeneration {
case class Query(
internalId: InternalId,
enableUserInterestedIn: Boolean,
enableUserNextInterestedIn: Boolean,
enableAddressBookNextInterestedIn: Boolean,
enableProdSimClustersANNSimilarityEngine: Boolean,
enableExperimentalSimClustersANNSimilarityEngine: Boolean,
enableSimClustersANN1SimilarityEngine: Boolean,
enableSimClustersANN2SimilarityEngine: Boolean,
enableSimClustersANN3SimilarityEngine: Boolean,
enableSimClustersANN5SimilarityEngine: Boolean,
enableSimClustersANN4SimilarityEngine: Boolean,
simClustersInterestedInMinScore: Double,
simClustersNextInterestedInMinScore: Double,
simClustersAddressBookInterestedInMinScore: Double,
interestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInExperimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInExperimentalSimClustersANNQuery: EngineQuery[
SimClustersANNSimilarityEngine.Query
],
addressbookInterestedInExperimentalSimClustersANNQuery: EngineQuery[
SimClustersANNSimilarityEngine.Query
],
interestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
interestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
nextInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
addressbookInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query],
)
def fromParams(
internalId: InternalId,
params: configapi.Params,
): Query = {
// SimClusters common configs
val simClustersModelVersion =
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId)
val experimentalSimClustersANNConfigId = params(
SimClustersANNParams.ExperimentalSimClustersANNConfigId)
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId)
val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId)
val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId)
val simClustersInterestedInMinScore = params(InterestedInParams.MinScoreParam)
val simClustersNextInterestedInMinScore = params(
InterestedInParams.MinScoreSequentialModelParam)
val simClustersAddressBookInterestedInMinScore = params(
InterestedInParams.MinScoreAddressBookParam)
// InterestedIn embeddings parameters
val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam)
val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam)
val addressbookInterestedInEmbedding = params(
InterestedInParams.AddressBookInterestedInEmbeddingIdParam)
// Prod SimClustersANN Query
val interestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
val nextInterestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
val addressbookInterestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANNConfigId,
params)
// Experimental SANN cluster Query
val interestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
val nextInterestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
val addressbookInterestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
experimentalSimClustersANNConfigId,
params)
// SimClusters ANN cluster 1 Query
val interestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
val nextInterestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
val addressbookInterestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN1ConfigId,
params)
// SimClusters ANN cluster 2 Query
val interestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
val nextInterestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
val addressbookInterestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN2ConfigId,
params)
// SimClusters ANN cluster 3 Query
val interestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
val nextInterestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
val addressbookInterestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN3ConfigId,
params)
// SimClusters ANN cluster 5 Query
val interestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
// SimClusters ANN cluster 4 Query
val interestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
interestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
val nextInterestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
val nextInterestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
nextInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
val addressbookInterestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN5ConfigId,
params)
val addressbookInterestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
addressbookInterestedInEmbedding.embeddingType,
simClustersModelVersion,
simClustersANN4ConfigId,
params)
Query(
internalId = internalId,
enableUserInterestedIn = params(InterestedInParams.EnableSourceParam),
enableUserNextInterestedIn = params(InterestedInParams.EnableSourceSequentialModelParam),
enableAddressBookNextInterestedIn = params(InterestedInParams.EnableSourceAddressBookParam),
enableProdSimClustersANNSimilarityEngine =
params(InterestedInParams.EnableProdSimClustersANNParam),
enableExperimentalSimClustersANNSimilarityEngine =
params(InterestedInParams.EnableExperimentalSimClustersANNParam),
enableSimClustersANN1SimilarityEngine = params(InterestedInParams.EnableSimClustersANN1Param),
enableSimClustersANN2SimilarityEngine = params(InterestedInParams.EnableSimClustersANN2Param),
enableSimClustersANN3SimilarityEngine = params(InterestedInParams.EnableSimClustersANN3Param),
enableSimClustersANN5SimilarityEngine = params(InterestedInParams.EnableSimClustersANN5Param),
enableSimClustersANN4SimilarityEngine = params(InterestedInParams.EnableSimClustersANN4Param),
simClustersInterestedInMinScore = simClustersInterestedInMinScore,
simClustersNextInterestedInMinScore = simClustersNextInterestedInMinScore,
simClustersAddressBookInterestedInMinScore = simClustersAddressBookInterestedInMinScore,
interestedInSimClustersANNQuery = interestedInSimClustersANNQuery,
nextInterestedInSimClustersANNQuery = nextInterestedInSimClustersANNQuery,
addressbookInterestedInSimClustersANNQuery = addressbookInterestedInSimClustersANNQuery,
interestedInExperimentalSimClustersANNQuery = interestedInExperimentalSimClustersANNQuery,
nextInterestedInExperimentalSimClustersANNQuery =
nextInterestedInExperimentalSimClustersANNQuery,
addressbookInterestedInExperimentalSimClustersANNQuery =
addressbookInterestedInExperimentalSimClustersANNQuery,
interestedInSimClustersANN1Query = interestedInSimClustersANN1Query,
nextInterestedInSimClustersANN1Query = nextInterestedInSimClustersANN1Query,
addressbookInterestedInSimClustersANN1Query = addressbookInterestedInSimClustersANN1Query,
interestedInSimClustersANN2Query = interestedInSimClustersANN2Query,
nextInterestedInSimClustersANN2Query = nextInterestedInSimClustersANN2Query,
addressbookInterestedInSimClustersANN2Query = addressbookInterestedInSimClustersANN2Query,
interestedInSimClustersANN3Query = interestedInSimClustersANN3Query,
nextInterestedInSimClustersANN3Query = nextInterestedInSimClustersANN3Query,
addressbookInterestedInSimClustersANN3Query = addressbookInterestedInSimClustersANN3Query,
interestedInSimClustersANN5Query = interestedInSimClustersANN5Query,
nextInterestedInSimClustersANN5Query = nextInterestedInSimClustersANN5Query,
addressbookInterestedInSimClustersANN5Query = addressbookInterestedInSimClustersANN5Query,
interestedInSimClustersANN4Query = interestedInSimClustersANN4Query,
nextInterestedInSimClustersANN4Query = nextInterestedInSimClustersANN4Query,
addressbookInterestedInSimClustersANN4Query = addressbookInterestedInSimClustersANN4Query,
)
}
}

View File

@ -1,232 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TopicTweetWithScore
import com.twitter.cr_mixer.param.TopicTweetParams
import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.TopicTweet
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.servo.util.MemoizingStatsReceiver
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Singleton
/**
* Formerly CrTopic in legacy Content Recommender. This generator finds top Tweets per Topic.
*/
@Singleton
class TopicTweetCandidateGenerator @Inject() (
certoTopicTweetSimilarityEngine: CertoTopicTweetSimilarityEngine,
skitTopicTweetSimilarityEngine: SkitTopicTweetSimilarityEngine,
skitHighPrecisionTopicTweetSimilarityEngine: SkitHighPrecisionTopicTweetSimilarityEngine,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
private val timer = DefaultTimer
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val filterCandidatesStats = stats.scope("filterCandidates")
private val tweetyPieFilteredStats = filterCandidatesStats.stat("tweetypie_filtered")
private val memoizedStatsReceiver = new MemoizingStatsReceiver(stats)
def get(
query: TopicTweetCandidateGeneratorQuery
): Future[Map[Long, Seq[TopicTweet]]] = {
val maxTweetAge = query.params(TopicTweetParams.MaxTweetAge)
val product = query.product
val allStats = memoizedStatsReceiver.scope("all")
val perProductStats = memoizedStatsReceiver.scope("perProduct", product.name)
StatsUtil.trackMapValueStats(allStats) {
StatsUtil.trackMapValueStats(perProductStats) {
val result = for {
retrievedTweets <- fetchCandidates(query)
initialTweetCandidates <- convertToInitialCandidates(retrievedTweets)
filteredTweetCandidates <- filterCandidates(
initialTweetCandidates,
maxTweetAge,
query.isVideoOnly,
query.impressedTweetList)
rankedTweetCandidates = rankCandidates(filteredTweetCandidates)
hydratedTweetCandidates = hydrateCandidates(rankedTweetCandidates)
} yield {
hydratedTweetCandidates.map {
case (topicId, topicTweets) =>
val topKTweets = topicTweets.take(query.maxNumResults)
topicId -> topKTweets
}
}
result.raiseWithin(timeoutConfig.topicTweetEndpointTimeout)(timer)
}
}
}
private def fetchCandidates(
query: TopicTweetCandidateGeneratorQuery
): Future[Map[TopicId, Option[Seq[TopicTweetWithScore]]]] = {
Future.collect {
query.topicIds.map { topicId =>
topicId -> StatsUtil.trackOptionStats(fetchCandidatesStats) {
Future
.join(
certoTopicTweetSimilarityEngine.get(CertoTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params)),
skitTopicTweetSimilarityEngine
.get(SkitTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params)),
skitHighPrecisionTopicTweetSimilarityEngine
.get(SkitHighPrecisionTopicTweetSimilarityEngine
.fromParams(topicId, query.isVideoOnly, query.params))
).map {
case (certoTopicTweets, skitTfgTopicTweets, skitHighPrecisionTopicTweets) =>
val uniqueCandidates = (certoTopicTweets.getOrElse(Nil) ++
skitTfgTopicTweets.getOrElse(Nil) ++
skitHighPrecisionTopicTweets.getOrElse(Nil))
.groupBy(_.tweetId).map {
case (_, dupCandidates) => dupCandidates.head
}.toSeq
Some(uniqueCandidates)
}
}
}.toMap
}
}
private def convertToInitialCandidates(
candidatesMap: Map[TopicId, Option[Seq[TopicTweetWithScore]]]
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
val initialCandidates = candidatesMap.map {
case (topicId, candidatesOpt) =>
val candidates = candidatesOpt.getOrElse(Nil)
val tweetIds = candidates.map(_.tweetId).toSet
val numTweetsPreFilter = tweetIds.size
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
val tweetyPieFilteredInitialCandidates = candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
similarityEngineType = candidate.similarityEngineType,
modelId = None,
score = Some(candidate.score)),
Seq.empty
)
)
}
val numTweetsPostFilter = tweetyPieFilteredInitialCandidates.size
tweetyPieFilteredStats.add(numTweetsPreFilter - numTweetsPostFilter)
topicId -> tweetyPieFilteredInitialCandidates
}
}
Future.collect(initialCandidates.toSeq).map(_.toMap)
}
private def filterCandidates(
topicTweetMap: Map[TopicId, Seq[InitialCandidate]],
maxTweetAge: Duration,
isVideoOnly: Boolean,
excludeTweetIds: Set[TweetId]
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
val filteredResults = topicTweetMap.map {
case (topicId, tweetsWithScore) =>
topicId -> StatsUtil.trackItemsStats(filterCandidatesStats) {
val timeFilteredTweets =
tweetsWithScore.filter { tweetWithScore =>
tweetWithScore.tweetId >= earliestTweetId && !excludeTweetIds.contains(
tweetWithScore.tweetId)
}
filterCandidatesStats
.stat("exclude_and_time_filtered").add(tweetsWithScore.size - timeFilteredTweets.size)
val tweetNudityFilteredTweets =
timeFilteredTweets.collect {
case tweet if tweet.tweetInfo.isPassTweetMediaNudityTag.contains(true) => tweet
}
filterCandidatesStats
.stat("tweet_nudity_filtered").add(
timeFilteredTweets.size - tweetNudityFilteredTweets.size)
val userNudityFilteredTweets =
tweetNudityFilteredTweets.collect {
case tweet if tweet.tweetInfo.isPassUserNudityRateStrict.contains(true) => tweet
}
filterCandidatesStats
.stat("user_nudity_filtered").add(
tweetNudityFilteredTweets.size - userNudityFilteredTweets.size)
val videoFilteredTweets = {
if (isVideoOnly) {
userNudityFilteredTweets.collect {
case tweet if tweet.tweetInfo.hasVideo.contains(true) => tweet
}
} else {
userNudityFilteredTweets
}
}
Future.value(videoFilteredTweets)
}
}
Future.collect(filteredResults)
}
private def rankCandidates(
tweetCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
): Map[TopicId, Seq[InitialCandidate]] = {
tweetCandidatesMap.mapValues { tweetCandidates =>
tweetCandidates.sortBy { candidate =>
-candidate.tweetInfo.favCount
}
}
}
private def hydrateCandidates(
topicCandidatesMap: Map[TopicId, Seq[InitialCandidate]]
): Map[Long, Seq[TopicTweet]] = {
topicCandidatesMap.map {
case (topicId, tweetsWithScore) =>
topicId.entityId ->
tweetsWithScore.map { tweetWithScore =>
val similarityEngineType: SimilarityEngineType =
tweetWithScore.candidateGenerationInfo.similarityEngineInfo.similarityEngineType
TopicTweet(
tweetId = tweetWithScore.tweetId,
score = tweetWithScore.getSimilarityScore,
similarityEngineType = similarityEngineType
)
}
}
}
}

View File

@ -1,179 +0,0 @@
package com.twitter.cr_mixer.candidate_generation
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
import com.twitter.cr_mixer.filter.UtegFilterRunner
import com.twitter.cr_mixer.model.CandidateGenerationInfo
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.SimilarityEngineInfo
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher
import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class UtegTweetCandidateGenerator @Inject() (
@Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[
UserTweetEntityGraphSimilarityEngine.Query,
TweetWithScoreAndSocialProof
],
utegTweetScribeLogger: UtegTweetScribeLogger,
tweetInfoStore: ReadableStore[TweetId, TweetInfo],
realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher,
utegFilterRunner: UtegFilterRunner,
globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSeedsStats = stats.scope("fetchSeeds")
private val fetchCandidatesStats = stats.scope("fetchCandidates")
private val utegFilterStats = stats.scope("utegFilter")
private val rankStats = stats.scope("rank")
def get(
query: UtegTweetCandidateGeneratorQuery
): Future[Seq[TweetWithScoreAndSocialProof]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
/**
* The candidate we return in the end needs a social proof field, which isn't
* supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof
* instead.
*
* However, filters and light ranker expect Candidate-typed param to work. In order to minimise the
* changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate
* in this method.
*/
for {
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchSeedsStats) {
fetchSeeds(query)
}
initialTweets <- StatsUtil.trackItemsStats(fetchCandidatesStats) {
fetchCandidates(query, realGraphSeeds)
}
initialCandidates <- convertToInitialCandidates(initialTweets)
filteredCandidates <- StatsUtil.trackItemsStats(utegFilterStats) {
utegFilter(query, initialCandidates)
}
rankedCandidates <- StatsUtil.trackItemsStats(rankStats) {
rankCandidates(query, filteredCandidates)
}
} yield {
val topTweets = rankedCandidates.take(query.maxNumResults)
convertToTweets(topTweets, initialTweets.map(tweet => tweet.tweetId -> tweet).toMap)
}
}
}
}
private def utegFilter(
query: UtegTweetCandidateGeneratorQuery,
candidates: Seq[InitialCandidate]
): Future[Seq[InitialCandidate]] = {
utegFilterRunner.runSequentialFilters(query, Seq(candidates)).map(_.flatten)
}
private def fetchSeeds(
query: UtegTweetCandidateGeneratorQuery
): Future[Map[UserId, Double]] = {
realGraphInSourceGraphFetcher
.get(FetcherQuery(query.userId, query.product, query.userState, query.params))
.map(_.map(_.seedWithScores).getOrElse(Map.empty))
}
private[candidate_generation] def rankCandidates(
query: UtegTweetCandidateGeneratorQuery,
filteredCandidates: Seq[InitialCandidate],
): Future[Seq[RankedCandidate]] = {
val blendedCandidates = filteredCandidates.map(candidate =>
candidate.toBlendedCandidate(Seq(candidate.candidateGenerationInfo)))
Future(
blendedCandidates.map { candidate =>
val score = candidate.getSimilarityScore
candidate.toRankedCandidate(score)
}
)
}
def fetchCandidates(
query: UtegTweetCandidateGeneratorQuery,
realGraphSeeds: Map[UserId, Double],
): Future[Seq[TweetWithScoreAndSocialProof]] = {
val engineQuery = UserTweetEntityGraphSimilarityEngine.fromParams(
query.userId,
realGraphSeeds,
Some(query.impressedTweetList.toSeq),
query.params
)
utegTweetScribeLogger.scribeInitialCandidates(
query,
userTweetEntityGraphSimilarityEngine.getCandidates(engineQuery).map(_.toSeq.flatten)
)
}
private[candidate_generation] def convertToInitialCandidates(
candidates: Seq[TweetWithScoreAndSocialProof],
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>
val tweetInfo = tweetInfos(candidate.tweetId)
.getOrElse(throw new IllegalStateException("Check previous line's condition"))
InitialCandidate(
tweetId = candidate.tweetId,
tweetInfo = tweetInfo,
CandidateGenerationInfo(
None,
SimilarityEngineInfo(
similarityEngineType = SimilarityEngineType.Uteg,
modelId = None,
score = Some(candidate.score)),
Seq.empty
)
)
}
}
}
private[candidate_generation] def convertToTweets(
candidates: Seq[RankedCandidate],
tweetMap: Map[TweetId, TweetWithScoreAndSocialProof]
): Seq[TweetWithScoreAndSocialProof] = {
candidates.map { candidate =>
tweetMap
.get(candidate.tweetId).map { tweet =>
TweetWithScoreAndSocialProof(
tweet.tweetId,
candidate.predictionScore,
tweet.socialProofByType
)
// The exception should never be thrown
}.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets"))
}
}
}

View File

@ -1,13 +0,0 @@
scala_library(
sources = ["*.scala"],
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"configapi/configapi-core",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"finatra/inject/inject-core/src/main/scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -1,473 +0,0 @@
package com.twitter.cr_mixer.config
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.exception.InvalidSANNConfigException
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclustersann.thriftscala.ScoringAlgorithm
import com.twitter.simclustersann.thriftscala.{SimClustersANNConfig => ThriftSimClustersANNConfig}
import com.twitter.util.Duration
case class SimClustersANNConfig(
maxNumResults: Int,
minScore: Double,
candidateEmbeddingType: EmbeddingType,
maxTopTweetsPerCluster: Int,
maxScanClusters: Int,
maxTweetCandidateAge: Duration,
minTweetCandidateAge: Duration,
annAlgorithm: ScoringAlgorithm) {
val toSANNConfigThrift: ThriftSimClustersANNConfig = ThriftSimClustersANNConfig(
maxNumResults = maxNumResults,
minScore = minScore,
candidateEmbeddingType = candidateEmbeddingType,
maxTopTweetsPerCluster = maxTopTweetsPerCluster,
maxScanClusters = maxScanClusters,
maxTweetCandidateAgeHours = maxTweetCandidateAge.inHours,
minTweetCandidateAgeHours = minTweetCandidateAge.inHours,
annAlgorithm = annAlgorithm,
)
}
object SimClustersANNConfig {
final val DefaultConfig = SimClustersANNConfig(
maxNumResults = 200,
minScore = 0.0,
candidateEmbeddingType = EmbeddingType.LogFavBasedTweet,
maxTopTweetsPerCluster = 800,
maxScanClusters = 50,
maxTweetCandidateAge = 24.hours,
minTweetCandidateAge = 0.hours,
annAlgorithm = ScoringAlgorithm.CosineSimilarity,
)
/*
SimClustersANNConfigId: String
Format: Prod - EmbeddingType_ModelVersion_Default
Format: Experiment - EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number. Date : YYYYMMDD
*/
private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val FavBasedProducer_Model20m145k2020_20220617_06 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val FavBasedProducer_Model20m145k2020_20220801 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val FavBasedProducer_Model20m145k2020_20220810 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val FavBasedProducer_Model20m145k2020_20220818 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val FavBasedProducer_Model20m145k2020_20220819 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val FavBasedProducer_Model20m145k2020_20221221 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val FavBasedProducer_Model20m145k2020_20221220 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val UnfilteredUserInterestedIn_Model20m145k2020_20220617_06 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220801 =
UnfilteredUserInterestedIn_Model20m145k2020_20220617_06.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220810 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220818 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20220819 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20221221 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val UnfilteredUserInterestedIn_Model20m145k2020_20221220 =
UnfilteredUserInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220 =
LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default =
DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220 =
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
private val UserNextInterestedIn_Model20m145k2020_20220617_06 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxTweetCandidateAge = 48.hours,
)
// Experimental SANN config
private val UserNextInterestedIn_Model20m145k2020_20220801 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val UserNextInterestedIn_Model20m145k2020_20220810 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val UserNextInterestedIn_Model20m145k2020_20220818 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val UserNextInterestedIn_Model20m145k2020_20220819 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val UserNextInterestedIn_Model20m145k2020_20221221 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val UserNextInterestedIn_Model20m145k2020_20221220 =
UserNextInterestedIn_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow
private val FollowBasedProducer_Model20m145k2020_Default =
FavBasedProducer_Model20m145k2020_Default.copy()
// Experimental SANN config
private val FollowBasedProducer_Model20m145k2020_20220801 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet,
)
// SANN-1 config
private val FollowBasedProducer_Model20m145k2020_20220810 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-2 config
private val FollowBasedProducer_Model20m145k2020_20220818 =
FavBasedProducer_Model20m145k2020_Default.copy(
maxNumResults = 100,
candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet,
maxTweetCandidateAge = 175200.hours,
maxTopTweetsPerCluster = 1600
)
// SANN-3 config
private val FollowBasedProducer_Model20m145k2020_20220819 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet,
)
// SANN-5 config
private val FollowBasedProducer_Model20m145k2020_20221221 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val FollowBasedProducer_Model20m145k2020_20221220 =
FavBasedProducer_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map(
"FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default,
"FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06,
"FavBasedProducer_Model20m145k2020_20220801" -> FavBasedProducer_Model20m145k2020_20220801,
"FavBasedProducer_Model20m145k2020_20220810" -> FavBasedProducer_Model20m145k2020_20220810,
"FavBasedProducer_Model20m145k2020_20220818" -> FavBasedProducer_Model20m145k2020_20220818,
"FavBasedProducer_Model20m145k2020_20220819" -> FavBasedProducer_Model20m145k2020_20220819,
"FavBasedProducer_Model20m145k2020_20221221" -> FavBasedProducer_Model20m145k2020_20221221,
"FavBasedProducer_Model20m145k2020_20221220" -> FavBasedProducer_Model20m145k2020_20221220,
"FollowBasedProducer_Model20m145k2020_Default" -> FollowBasedProducer_Model20m145k2020_Default,
"FollowBasedProducer_Model20m145k2020_20220801" -> FollowBasedProducer_Model20m145k2020_20220801,
"FollowBasedProducer_Model20m145k2020_20220810" -> FollowBasedProducer_Model20m145k2020_20220810,
"FollowBasedProducer_Model20m145k2020_20220818" -> FollowBasedProducer_Model20m145k2020_20220818,
"FollowBasedProducer_Model20m145k2020_20220819" -> FollowBasedProducer_Model20m145k2020_20220819,
"FollowBasedProducer_Model20m145k2020_20221221" -> FollowBasedProducer_Model20m145k2020_20221221,
"FollowBasedProducer_Model20m145k2020_20221220" -> FollowBasedProducer_Model20m145k2020_20221220,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221,
"LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220,
"UnfilteredUserInterestedIn_Model20m145k2020_Default" -> UnfilteredUserInterestedIn_Model20m145k2020_Default,
"UnfilteredUserInterestedIn_Model20m145k2020_20220617_06" -> UnfilteredUserInterestedIn_Model20m145k2020_20220617_06,
"UnfilteredUserInterestedIn_Model20m145k2020_20220801" -> UnfilteredUserInterestedIn_Model20m145k2020_20220801,
"UnfilteredUserInterestedIn_Model20m145k2020_20220810" -> UnfilteredUserInterestedIn_Model20m145k2020_20220810,
"UnfilteredUserInterestedIn_Model20m145k2020_20220818" -> UnfilteredUserInterestedIn_Model20m145k2020_20220818,
"UnfilteredUserInterestedIn_Model20m145k2020_20220819" -> UnfilteredUserInterestedIn_Model20m145k2020_20220819,
"UnfilteredUserInterestedIn_Model20m145k2020_20221221" -> UnfilteredUserInterestedIn_Model20m145k2020_20221221,
"UnfilteredUserInterestedIn_Model20m145k2020_20221220" -> UnfilteredUserInterestedIn_Model20m145k2020_20221220,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221,
"LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221,
"LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220,
"UserNextInterestedIn_Model20m145k2020_Default" -> UserNextInterestedIn_Model20m145k2020_Default,
"UserNextInterestedIn_Model20m145k2020_20220617_06" -> UserNextInterestedIn_Model20m145k2020_20220617_06,
"UserNextInterestedIn_Model20m145k2020_20220801" -> UserNextInterestedIn_Model20m145k2020_20220801,
"UserNextInterestedIn_Model20m145k2020_20220810" -> UserNextInterestedIn_Model20m145k2020_20220810,
"UserNextInterestedIn_Model20m145k2020_20220818" -> UserNextInterestedIn_Model20m145k2020_20220818,
"UserNextInterestedIn_Model20m145k2020_20220819" -> UserNextInterestedIn_Model20m145k2020_20220819,
"UserNextInterestedIn_Model20m145k2020_20221221" -> UserNextInterestedIn_Model20m145k2020_20221221,
"UserNextInterestedIn_Model20m145k2020_20221220" -> UserNextInterestedIn_Model20m145k2020_20221220,
)
def getConfig(
embeddingType: String,
modelVersion: String,
id: String
): SimClustersANNConfig = {
val configName = embeddingType + "_" + modelVersion + "_" + id
DefaultConfigMappings.get(configName) match {
case Some(config) => config
case None =>
throw InvalidSANNConfigException(s"Incorrect config id passed in for SANN $configName")
}
}
}

View File

@ -1,24 +0,0 @@
package com.twitter.cr_mixer.config
import com.twitter.util.Duration
case class TimeoutConfig(
/* Default timeouts for candidate generator */
serviceTimeout: Duration,
signalFetchTimeout: Duration,
similarityEngineTimeout: Duration,
annServiceClientTimeout: Duration,
/* For Uteg Candidate Generator */
utegSimilarityEngineTimeout: Duration,
/* For User State Store */
userStateUnderlyingStoreTimeout: Duration,
userStateStoreTimeout: Duration,
/* For FRS based tweets */
// Timeout passed to EarlyBird server
earlybirdServerTimeout: Duration,
// Timeout set on CrMixer side
earlybirdSimilarityEngineTimeout: Duration,
frsBasedTweetEndpointTimeout: Duration,
topicTweetEndpointTimeout: Duration,
// Timeout Settings for Navi gRPC Client
naviRequestTimeout: Duration)

View File

@ -1,48 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/debug",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"finagle/finagle-base-http/src/main",
"finagle/finagle-core/src/main",
"finagle/finagle-http/src/main/scala",
"finatra/http-server/src/main/scala/com/twitter/finatra/http:controller",
"finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/ads/schema:common-scala",
"src/thrift/com/twitter/context:twitter-context-scala",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/timelines/render:thrift-scala",
"src/thrift/com/twitter/timelines/timeline_logging:thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"stringcenter/client",
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview",
"timelines/src/main/scala/com/twitter/timelines/tracing/lensview/funnelseries",
"twitter-context/src/main/scala",
"user-signal-service/thrift/src/main/thrift:thrift-scala",
],
)

View File

@ -1,757 +0,0 @@
package com.twitter.cr_mixer.controller
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.candidate_generation.AdsCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.CrCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.FrsTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.RelatedTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.RelatedVideoTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.TopicTweetCandidateGenerator
import com.twitter.cr_mixer.candidate_generation.UtegTweetCandidateGenerator
import com.twitter.cr_mixer.featureswitch.ParamsBuilder
import com.twitter.cr_mixer.logging.CrMixerScribeLogger
import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger
import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger
import com.twitter.cr_mixer.logging.RelatedTweetScribeMetadata
import com.twitter.cr_mixer.logging.ScribeMetadata
import com.twitter.cr_mixer.logging.UtegTweetScribeLogger
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedAdsCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.AdsParams
import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.cr_mixer.param.RelatedTweetGlobalParams
import com.twitter.cr_mixer.param.RelatedVideoTweetGlobalParams
import com.twitter.cr_mixer.param.TopicTweetParams
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.param.decider.EndpointLoadShedder
import com.twitter.cr_mixer.thriftscala.AdTweetRecommendation
import com.twitter.cr_mixer.thriftscala.AdsRequest
import com.twitter.cr_mixer.thriftscala.AdsResponse
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.cr_mixer.thriftscala.FrsTweetRequest
import com.twitter.cr_mixer.thriftscala.FrsTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedTweet
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweet
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetResponse
import com.twitter.cr_mixer.thriftscala.TopicTweet
import com.twitter.cr_mixer.thriftscala.TopicTweetRequest
import com.twitter.cr_mixer.thriftscala.TopicTweetResponse
import com.twitter.cr_mixer.thriftscala.TweetRecommendation
import com.twitter.cr_mixer.thriftscala.UtegTweet
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
import com.twitter.cr_mixer.util.MetricTagUtil
import com.twitter.cr_mixer.util.SignalTimestampStatsUtil
import com.twitter.cr_mixer.{thriftscala => t}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finatra.thrift.Controller
import com.twitter.hermit.store.common.ReadableWritableStore
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.storehaus.ReadableStore
import com.twitter.timelines.timeline_logging.{thriftscala => thriftlog}
import com.twitter.timelines.tracing.lensview.funnelseries.TweetScoreFunnelSeries
import com.twitter.util.Future
import com.twitter.util.Time
import java.util.UUID
import javax.inject.Inject
import org.apache.commons.lang.exception.ExceptionUtils
class CrMixerThriftController @Inject() (
crCandidateGenerator: CrCandidateGenerator,
relatedTweetCandidateGenerator: RelatedTweetCandidateGenerator,
relatedVideoTweetCandidateGenerator: RelatedVideoTweetCandidateGenerator,
utegTweetCandidateGenerator: UtegTweetCandidateGenerator,
frsTweetCandidateGenerator: FrsTweetCandidateGenerator,
topicTweetCandidateGenerator: TopicTweetCandidateGenerator,
crMixerScribeLogger: CrMixerScribeLogger,
relatedTweetScribeLogger: RelatedTweetScribeLogger,
utegTweetScribeLogger: UtegTweetScribeLogger,
adsRecommendationsScribeLogger: AdsRecommendationsScribeLogger,
adsCandidateGenerator: AdsCandidateGenerator,
decider: CrMixerDecider,
paramsBuilder: ParamsBuilder,
endpointLoadShedder: EndpointLoadShedder,
signalTimestampStatsUtil: SignalTimestampStatsUtil,
tweetRecommendationResultsStore: ReadableWritableStore[UserId, CrMixerTweetResponse],
userStateStore: ReadableStore[UserId, UserState],
statsReceiver: StatsReceiver)
extends Controller(t.CrMixer) {
lazy private val tweetScoreFunnelSeries = new TweetScoreFunnelSeries(statsReceiver)
private def logErrMessage(endpoint: String, e: Throwable): Unit = {
val msg = Seq(
s"Failed endpoint $endpoint: ${e.getLocalizedMessage}",
ExceptionUtils.getStackTrace(e)
).mkString("\n")
/** *
* We chose logger.info() here to print message instead of logger.error since that
* logger.error sometimes suppresses detailed stacktrace.
*/
logger.info(msg)
}
private def generateRequestUUID(): Long = {
/** *
* We generate unique UUID via bitwise operations. See the below link for more:
* https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid
*/
UUID.randomUUID().getMostSignificantBits & Long.MaxValue
}
handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args =>
val endpointName = "getTweetRecommendations"
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val userId = args.request.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val queryFut = buildCrCandidateGeneratorQuery(args.request, requestUUID, userId)
queryFut.flatMap { query =>
val scribeMetadata = ScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
val response = crCandidateGenerator.get(query)
val blueVerifiedScribedResponse = response.flatMap { rankedCandidates =>
val hasBlueVerifiedCandidate = rankedCandidates.exists { tweet =>
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
}
if (hasBlueVerifiedCandidate) {
crMixerScribeLogger.scribeGetTweetRecommendationsForBlueVerified(
scribeMetadata,
response)
} else {
response
}
}
val thriftResponse = blueVerifiedScribedResponse.map { candidates =>
if (query.product == t.Product.Home) {
scribeTweetScoreFunnelSeries(candidates)
}
buildThriftResponse(candidates)
}
cacheTweetRecommendationResults(args.request, thriftResponse)
crMixerScribeLogger.scribeGetTweetRecommendations(
args.request,
startTime,
scribeMetadata,
thriftResponse)
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(CrMixerTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(CrMixerTweetResponse(Seq.empty))
}
}
}
/** *
* GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially
* doing very similar things, except that one passes in TweetId which calls TweetBased engine,
* and the other passes in AuthorId which calls ProducerBased engine.
*/
handle(t.CrMixer.GetRelatedTweetsForQueryTweet) {
args: t.CrMixer.GetRelatedTweetsForQueryTweet.Args =>
val endpointName = "getRelatedTweetsForQueryTweet"
getRelatedTweets(endpointName, args.request)
}
handle(t.CrMixer.GetRelatedVideoTweetsForQueryTweet) {
args: t.CrMixer.GetRelatedVideoTweetsForQueryTweet.Args =>
val endpointName = "getRelatedVideoTweetsForQueryVideoTweet"
getRelatedVideoTweets(endpointName, args.request)
}
handle(t.CrMixer.GetRelatedTweetsForQueryAuthor) {
args: t.CrMixer.GetRelatedTweetsForQueryAuthor.Args =>
val endpointName = "getRelatedTweetsForQueryAuthor"
getRelatedTweets(endpointName, args.request)
}
private def getRelatedTweets(
endpointName: String,
request: RelatedTweetRequest
): Future[RelatedTweetResponse] = {
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val queryFut = buildRelatedTweetQuery(request, requestUUID)
queryFut.flatMap { query =>
val relatedTweetScribeMetadata = RelatedTweetScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
relatedTweetScribeLogger.scribeGetRelatedTweets(
request,
startTime,
relatedTweetScribeMetadata,
relatedTweetCandidateGenerator
.get(query)
.map(buildRelatedTweetResponse))
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(RelatedTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(RelatedTweetResponse(Seq.empty))
}
}
}
private def getRelatedVideoTweets(
endpointName: String,
request: RelatedVideoTweetRequest
): Future[RelatedVideoTweetResponse] = {
val requestUUID = generateRequestUUID()
val queryFut = buildRelatedVideoTweetQuery(request, requestUUID)
queryFut.flatMap { query =>
endpointLoadShedder(endpointName, query.product.originalName) {
relatedVideoTweetCandidateGenerator.get(query).map { initialCandidateSeq =>
buildRelatedVideoTweetResponse(initialCandidateSeq)
}
}.rescue {
case EndpointLoadShedder.LoadSheddingException =>
Future(RelatedVideoTweetResponse(Seq.empty))
case e =>
logErrMessage(endpointName, e)
Future(RelatedVideoTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetFrsBasedTweetRecommendations) {
args: t.CrMixer.GetFrsBasedTweetRecommendations.Args =>
val endpointName = "getFrsBasedTweetRecommendations"
val requestUUID = generateRequestUUID()
val queryFut = buildFrsBasedTweetQuery(args.request, requestUUID)
queryFut.flatMap { query =>
endpointLoadShedder(endpointName, query.product.originalName) {
frsTweetCandidateGenerator.get(query).map(FrsTweetResponse(_))
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(FrsTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetTopicTweetRecommendations) {
args: t.CrMixer.GetTopicTweetRecommendations.Args =>
val endpointName = "getTopicTweetRecommendations"
val requestUUID = generateRequestUUID()
val query = buildTopicTweetQuery(args.request, requestUUID)
endpointLoadShedder(endpointName, query.product.originalName) {
topicTweetCandidateGenerator.get(query).map(TopicTweetResponse(_))
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(TopicTweetResponse(Map.empty[Long, Seq[TopicTweet]]))
}
}
handle(t.CrMixer.GetUtegTweetRecommendations) {
args: t.CrMixer.GetUtegTweetRecommendations.Args =>
val endpointName = "getUtegTweetRecommendations"
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val queryFut = buildUtegTweetQuery(args.request, requestUUID)
queryFut
.flatMap { query =>
val scribeMetadata = ScribeMetadata.from(query)
endpointLoadShedder(endpointName, query.product.originalName) {
utegTweetScribeLogger.scribeGetUtegTweetRecommendations(
args.request,
startTime,
scribeMetadata,
utegTweetCandidateGenerator
.get(query)
.map(buildUtegTweetResponse)
)
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(UtegTweetResponse(Seq.empty))
}
}
}
handle(t.CrMixer.GetAdsRecommendations) { args: t.CrMixer.GetAdsRecommendations.Args =>
val endpointName = "getAdsRecommendations"
val queryFut = buildAdsCandidateGeneratorQuery(args.request)
val startTime = Time.now.inMilliseconds
queryFut.flatMap { query =>
{
val scribeMetadata = ScribeMetadata.from(query)
val response = adsCandidateGenerator
.get(query).map { candidates =>
buildAdsResponse(candidates)
}
adsRecommendationsScribeLogger.scribeGetAdsRecommendations(
args.request,
startTime,
scribeMetadata,
response,
query.params(AdsParams.EnableScribe)
)
}.rescue {
case e =>
logErrMessage(endpointName, e)
Future(AdsResponse(Seq.empty))
}
}
}
private def buildCrCandidateGeneratorQuery(
thriftRequest: CrMixerTweetRequest,
requestUUID: Long,
userId: Long
): Future[CrCandidateGeneratorQuery] = {
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("CrMixerTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
// Specify product-specific behavior mapping here
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(9999)
case (t.Product.Notifications, Some(t.ProductContext.NotificationsContext(cxt))) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.Email, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.ImmersiveMediaViewer, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case (t.Product.VideoCarousel, None) =>
params(GlobalParams.MaxCandidatesPerRequestParam)
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
CrCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID,
languageCode = thriftRequest.clientContext.languageCode
)
}
}
private def buildRelatedTweetQuery(
thriftRequest: RelatedTweetRequest,
requestUUID: Long
): Future[RelatedTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedTweetRequest")
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
case Some(userId) => userStateStore.get(userId)
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
userStateFut.map { userState =>
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
// Specify product-specific behavior mapping here
// Currently, Home takes 10, and RUX takes 100
val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam)
RelatedTweetCandidateGeneratorQuery(
internalId = thriftRequest.internalId,
clientContext = thriftRequest.clientContext,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildAdsCandidateGeneratorQuery(
thriftRequest: AdsRequest
): Future[AdsCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val product = thriftRequest.product
val requestUUID = generateRequestUUID()
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
val maxNumResults = params(AdsParams.AdsCandidateGenerationMaxCandidatesNumParam)
AdsCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
params = params,
maxNumResults = maxNumResults,
requestUUID = requestUUID
)
}
}
private def buildRelatedVideoTweetQuery(
thriftRequest: RelatedVideoTweetRequest,
requestUUID: Long
): Future[RelatedVideoTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedVideoTweetRequest")
val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match {
case Some(userId) => userStateStore.get(userId)
case None => Future.value(Some(UserState.EnumUnknownUserState(100)))
}).map(_.getOrElse(UserState.EnumUnknownUserState(100)))
userStateFut.map { userState =>
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState)
val maxNumResults = params(RelatedVideoTweetGlobalParams.MaxCandidatesPerRequestParam)
RelatedVideoTweetCandidateGeneratorQuery(
internalId = thriftRequest.internalId,
clientContext = thriftRequest.clientContext,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildUtegTweetQuery(
thriftRequest: UtegTweetRequest,
requestUUID: Long
): Future[UtegTweetCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("UtegTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
// Specify product-specific behavior mapping here
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(9999)
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
UtegTweetCandidateGeneratorQuery(
userId = userId,
product = product,
userState = userState,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID
)
}
}
private def buildTopicTweetQuery(
thriftRequest: TopicTweetRequest,
requestUUID: Long
): TopicTweetCandidateGeneratorQuery = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in the TopicTweetRequest clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
// Specify product-specific behavior mapping here
val isVideoOnly = (product, productContext) match {
case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) =>
context.isVideoOnly
case (t.Product.TopicLandingPage, None) =>
false
case (t.Product.HomeTopicsBackfill, None) =>
false
case (t.Product.TopicTweetsStrato, None) =>
false
case _ =>
throw new IllegalArgumentException(
s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer"
)
}
statsReceiver.scope(product.toString).counter(TopicTweetRequest.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
product,
UserState.EnumUnknownUserState(100)
)
val topicIds = thriftRequest.topicIds.map { topicId =>
TopicId(
entityId = topicId,
language = thriftRequest.clientContext.languageCode,
country = None
)
}.toSet
TopicTweetCandidateGeneratorQuery(
userId = userId,
topicIds = topicIds,
product = product,
maxNumResults = params(TopicTweetParams.MaxTopicTweetCandidatesParam),
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
params = params,
requestUUID = requestUUID,
isVideoOnly = isVideoOnly
)
}
private def buildFrsBasedTweetQuery(
thriftRequest: FrsTweetRequest,
requestUUID: Long
): Future[FrsTweetCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in the FrsTweetRequest clientContext")
)
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
.scope(product.toString).scope("FrsTweetRequest")
userStateStore
.get(userId).map { userStateOpt =>
val userState = userStateOpt
.getOrElse(UserState.EnumUnknownUserState(100))
scopedStats.scope("UserState").counter(userState.toString).incr()
val params =
paramsBuilder.buildFromClientContext(
thriftRequest.clientContext,
thriftRequest.product,
userState
)
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(
params(FrsBasedCandidateGenerationMaxCandidatesNumParam))
case _ =>
params(FrsBasedCandidateGenerationMaxCandidatesNumParam)
}
FrsTweetCandidateGeneratorQuery(
userId = userId,
product = product,
maxNumResults = maxNumResults,
impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet,
impressedUserList = thriftRequest.excludedUserIds.getOrElse(Nil).toSet,
params = params,
languageCodeOpt = thriftRequest.clientContext.languageCode,
countryCodeOpt = thriftRequest.clientContext.countryCode,
requestUUID = requestUUID
)
}
}
private def buildThriftResponse(
candidates: Seq[RankedCandidate]
): CrMixerTweetResponse = {
val tweets = candidates.map { candidate =>
TweetRecommendation(
tweetId = candidate.tweetId,
score = candidate.predictionScore,
metricTags = Some(MetricTagUtil.buildMetricTags(candidate)),
latestSourceSignalTimestampInMillis =
SignalTimestampStatsUtil.buildLatestSourceSignalTimestamp(candidate)
)
}
signalTimestampStatsUtil.statsSignalTimestamp(tweets)
CrMixerTweetResponse(tweets)
}
private def scribeTweetScoreFunnelSeries(
candidates: Seq[RankedCandidate]
): Seq[RankedCandidate] = {
// 202210210901 is a random number for code search of Lensview
tweetScoreFunnelSeries.startNewSpan(
name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType",
codePtr = 202210210901L) {
(
candidates,
candidates.map { candidate =>
thriftlog.TweetDimensionMeasure(
dimension = Some(
thriftlog
.RequestTweetDimension(
candidate.tweetId,
candidate.reasonChosen.similarityEngineInfo.similarityEngineType.value)),
measure = Some(thriftlog.RequestTweetMeasure(candidate.predictionScore))
)
}
)
}
}
private def buildRelatedTweetResponse(candidates: Seq[InitialCandidate]): RelatedTweetResponse = {
val tweets = candidates.map { candidate =>
RelatedTweet(
tweetId = candidate.tweetId,
score = Some(candidate.getSimilarityScore),
authorId = Some(candidate.tweetInfo.authorId)
)
}
RelatedTweetResponse(tweets)
}
private def buildRelatedVideoTweetResponse(
candidates: Seq[InitialCandidate]
): RelatedVideoTweetResponse = {
val tweets = candidates.map { candidate =>
RelatedVideoTweet(
tweetId = candidate.tweetId,
score = Some(candidate.getSimilarityScore)
)
}
RelatedVideoTweetResponse(tweets)
}
private def buildUtegTweetResponse(
candidates: Seq[TweetWithScoreAndSocialProof]
): UtegTweetResponse = {
val tweets = candidates.map { candidate =>
UtegTweet(
tweetId = candidate.tweetId,
score = candidate.score,
socialProofByType = candidate.socialProofByType
)
}
UtegTweetResponse(tweets)
}
private def buildAdsResponse(
candidates: Seq[RankedAdsCandidate]
): AdsResponse = {
AdsResponse(ads = candidates.map { candidate =>
AdTweetRecommendation(
tweetId = candidate.tweetId,
score = candidate.predictionScore,
lineItems = Some(candidate.lineItemInfo))
})
}
private def cacheTweetRecommendationResults(
request: CrMixerTweetRequest,
response: Future[CrMixerTweetResponse]
): Unit = {
val userId = request.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in getTweetRecommendations() Thrift clientContext"))
if (decider.isAvailableForId(userId, DeciderConstants.getTweetRecommendationsCacheRate)) {
response.map { crMixerTweetResponse =>
{
(
request.product,
request.clientContext.userId,
crMixerTweetResponse.tweets.nonEmpty) match {
case (t.Product.Home, Some(userId), true) =>
tweetRecommendationResultsStore.put((userId, crMixerTweetResponse))
case _ => Future.value(Unit)
}
}
}
}
}
}

View File

@ -1,7 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [],
)

View File

@ -1,4 +0,0 @@
package com.twitter.cr_mixer
package exception
case class InvalidSANNConfigException(msg: String) extends Exception(msg)

View File

@ -1,35 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"abdecider/src/main/scala",
"configapi/configapi-abdecider",
"configapi/configapi-core",
"configapi/configapi-featureswitches:v2",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
"featureswitches/featureswitches-core",
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -1,79 +0,0 @@
package com.twitter.cr_mixer
package featureswitch
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.abdecider.LoggingABDecider
import com.twitter.abdecider.Recipient
import com.twitter.abdecider.Bucket
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.util.Local
import scala.collection.concurrent.{Map => ConcurrentMap}
/**
* Wraps a LoggingABDecider, so all impressed buckets are recorded to a 'LocalContext' on a given request.
*
* Contexts (https://twitter.github.io/finagle/guide/Contexts.html) are Finagle's mechanism for
* storing state/variables without having to pass these variables all around the request.
*
* In order for this class to be used the [[SetImpressedBucketsLocalContextFilter]] must be applied
* at the beginning of the request, to initialize a concurrent map used to store impressed buckets.
*
* Whenever we get an a/b impression, the bucket information is logged to the concurrent hashmap.
*/
case class CrMixerLoggingABDecider(
loggingAbDecider: LoggingABDecider,
statsReceiver: StatsReceiver)
extends LoggingABDecider {
private val scopedStatsReceiver = statsReceiver.scope("cr_logging_ab_decider")
override def impression(
experimentName: String,
recipient: Recipient
): Option[Bucket] = {
StatsUtil.trackNonFutureBlockStats(scopedStatsReceiver.scope("log_impression")) {
val maybeBuckets = loggingAbDecider.impression(experimentName, recipient)
maybeBuckets.foreach { b =>
scopedStatsReceiver.counter("impressions").incr()
CrMixerImpressedBuckets.recordImpressedBucket(b)
}
maybeBuckets
}
}
override def track(
experimentName: String,
eventName: String,
recipient: Recipient
): Unit = {
loggingAbDecider.track(experimentName, eventName, recipient)
}
override def bucket(
experimentName: String,
recipient: Recipient
): Option[Bucket] = {
loggingAbDecider.bucket(experimentName, recipient)
}
override def experiments: Seq[String] = loggingAbDecider.experiments
override def experiment(experimentName: String) =
loggingAbDecider.experiment(experimentName)
}
object CrMixerImpressedBuckets {
private[featureswitch] val localImpressedBucketsMap = new Local[ConcurrentMap[Bucket, Boolean]]
/**
* Gets all impressed buckets for this request.
**/
def getAllImpressedBuckets: Option[List[Bucket]] = {
localImpressedBucketsMap.apply().map(_.map { case (k, _) => k }.toList)
}
private[featureswitch] def recordImpressedBucket(bucket: Bucket) = {
localImpressedBucketsMap().foreach { m => m += bucket -> true }
}
}

View File

@ -1,151 +0,0 @@
package com.twitter.cr_mixer.featureswitch
import com.twitter.abdecider.LoggingABDecider
import com.twitter.abdecider.UserRecipient
import com.twitter.cr_mixer.{thriftscala => t}
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.discovery.common.configapi.FeatureContextBuilder
import com.twitter.featureswitches.FSRecipient
import com.twitter.featureswitches.UserAgent
import com.twitter.featureswitches.{Recipient => FeatureSwitchRecipient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.timelines.configapi.Config
import com.twitter.timelines.configapi.FeatureValue
import com.twitter.timelines.configapi.ForcedFeatureContext
import com.twitter.timelines.configapi.OrElseFeatureContext
import com.twitter.timelines.configapi.Params
import com.twitter.timelines.configapi.RequestContext
import com.twitter.timelines.configapi.abdecider.LoggingABDeciderExperimentContext
import javax.inject.Inject
import javax.inject.Singleton
/** Singleton object for building [[Params]] to override */
@Singleton
class ParamsBuilder @Inject() (
globalStats: StatsReceiver,
abDecider: LoggingABDecider,
featureContextBuilder: FeatureContextBuilder,
config: Config) {
private val stats = globalStats.scope("params")
def buildFromClientContext(
clientContext: ClientContext,
product: t.Product,
userState: UserState,
userRoleOverride: Option[Set[String]] = None,
featureOverrides: Map[String, FeatureValue] = Map.empty,
): Params = {
clientContext.userId match {
case Some(userId) =>
val userRecipient = buildFeatureSwitchRecipient(
userId,
userRoleOverride,
clientContext,
product,
userState
)
val featureContext = OrElseFeatureContext(
ForcedFeatureContext(featureOverrides),
featureContextBuilder(
Some(userId),
Some(userRecipient)
))
config(
requestContext = RequestContext(
userId = Some(userId),
experimentContext = LoggingABDeciderExperimentContext(
abDecider,
Some(UserRecipient(userId, Some(userId)))),
featureContext = featureContext
),
stats
)
case None =>
val guestRecipient =
buildFeatureSwitchRecipientWithGuestId(clientContext: ClientContext, product, userState)
val featureContext = OrElseFeatureContext(
ForcedFeatureContext(featureOverrides),
featureContextBuilder(
clientContext.userId,
Some(guestRecipient)
)
) //ExperimentContext with GuestRecipient is not supported as there is no active use-cases yet in CrMixer
config(
requestContext = RequestContext(
userId = clientContext.userId,
featureContext = featureContext
),
stats
)
}
}
private def buildFeatureSwitchRecipientWithGuestId(
clientContext: ClientContext,
product: t.Product,
userState: UserState
): FeatureSwitchRecipient = {
val recipient = FSRecipient(
userId = None,
userRoles = None,
deviceId = clientContext.deviceId,
guestId = clientContext.guestId,
languageCode = clientContext.languageCode,
countryCode = clientContext.countryCode,
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
isVerified = None,
isTwoffice = None,
tooClient = None,
highWaterMark = None
)
recipient.withCustomFields(
(ParamsBuilder.ProductCustomField, product.toString),
(ParamsBuilder.UserStateCustomField, userState.toString)
)
}
private def buildFeatureSwitchRecipient(
userId: Long,
userRolesOverride: Option[Set[String]],
clientContext: ClientContext,
product: t.Product,
userState: UserState
): FeatureSwitchRecipient = {
val userRoles = userRolesOverride match {
case Some(overrides) => Some(overrides)
case _ => clientContext.userRoles.map(_.toSet)
}
val recipient = FSRecipient(
userId = Some(userId),
userRoles = userRoles,
deviceId = clientContext.deviceId,
guestId = clientContext.guestId,
languageCode = clientContext.languageCode,
countryCode = clientContext.countryCode,
userAgent = clientContext.userAgent.flatMap(UserAgent(_)),
isVerified = None,
isTwoffice = None,
tooClient = None,
highWaterMark = None
)
recipient.withCustomFields(
(ParamsBuilder.ProductCustomField, product.toString),
(ParamsBuilder.UserStateCustomField, userState.toString)
)
}
}
object ParamsBuilder {
private val ProductCustomField = "product_id"
private val UserStateCustomField = "user_state"
}

View File

@ -1,22 +0,0 @@
package com.twitter.cr_mixer.featureswitch
import com.twitter.finagle.Filter
import javax.inject.Inject
import javax.inject.Singleton
import scala.collection.concurrent.TrieMap
import com.twitter.abdecider.Bucket
import com.twitter.finagle.Service
@Singleton
class SetImpressedBucketsLocalContextFilter @Inject() () extends Filter.TypeAgnostic {
override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] =
(request: Req, service: Service[Req, Rep]) => {
val concurrentTrieMap = TrieMap
.empty[Bucket, Boolean] // Trie map has no locks and O(1) inserts
CrMixerImpressedBuckets.localImpressedBucketsMap.let(concurrentTrieMap) {
service(request)
}
}
}

View File

@ -1,22 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/javax/inject:javax.inject",
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"finagle/finagle-core/src/main",
"frigate/frigate-common:util",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
],
)

View File

@ -1,22 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
trait FilterBase {
def name: String
type ConfigType
def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]]
/**
* Build the config params here. passing in param() into the filter is strongly discouraged
* because param() can be slow when called many times
*/
def requestToConfig[CGQueryType <: CandidateGeneratorQuery](request: CGQueryType): ConfigType
}

View File

@ -1,63 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
case class ImpressedTweetlistFilter() extends FilterBase {
import ImpressedTweetlistFilter._
override val name: String = this.getClass.getCanonicalName
override type ConfigType = FilterConfig
/*
Filtering removes some candidates based on configurable criteria.
*/
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: FilterConfig
): Future[Seq[Seq[InitialCandidate]]] = {
// Remove candidates which match a source tweet, or which are passed in impressedTweetList
val sourceTweetsMatch = candidates
.flatMap {
/***
* Within a Seq[Seq[InitialCandidate]], all candidates within a inner Seq
* are guaranteed to have the same sourceInfo. Hence, we can pick .headOption
* to represent the whole list when filtering by the internalId of the sourceInfoOpt.
* But of course the similarityEngineInfo could be different.
*/
_.headOption.flatMap { candidate =>
candidate.candidateGenerationInfo.sourceInfoOpt.map(_.internalId)
}
}.collect {
case InternalId.TweetId(id) => id
}
val impressedTweetList: Set[TweetId] =
config.impressedTweetList ++ sourceTweetsMatch
val filteredCandidateMap: Seq[Seq[InitialCandidate]] =
candidates.map {
_.filterNot { candidate =>
impressedTweetList.contains(candidate.tweetId)
}
}
Future.value(filteredCandidateMap)
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType
): FilterConfig = {
FilterConfig(request.impressedTweetList)
}
}
object ImpressedTweetlistFilter {
case class FilterConfig(impressedTweetList: Set[TweetId])
}

View File

@ -1,80 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
/***
* Filters in-network tweets
*/
@Singleton
case class InNetworkFilter @Inject() (
@Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq],
globalStats: StatsReceiver)
extends FilterBase {
override val name: String = this.getClass.getCanonicalName
import InNetworkFilter._
override type ConfigType = FilterConfig
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val filterCandidatesStats = stats.scope("filter_candidates")
override def filter(
candidates: Seq[Seq[InitialCandidate]],
filterConfig: FilterConfig,
): Future[Seq[Seq[InitialCandidate]]] = {
StatsUtil.trackItemsStats(filterCandidatesStats) {
filterCandidates(candidates, filterConfig)
}
}
private def filterCandidates(
candidates: Seq[Seq[InitialCandidate]],
filterConfig: FilterConfig,
): Future[Seq[Seq[InitialCandidate]]] = {
if (!filterConfig.enableInNetworkFilter) {
Future.value(candidates)
} else {
filterConfig.userIdOpt match {
case Some(userId) =>
realGraphStoreMh
.get(userId).map(_.map(_.candidates.map(_.userId)).getOrElse(Seq.empty).toSet).map {
realGraphInNetworkAuthorsSet =>
candidates.map(_.filterNot { candidate =>
realGraphInNetworkAuthorsSet.contains(candidate.tweetInfo.authorId)
})
}
case None => Future.value(candidates)
}
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType
): FilterConfig = {
request match {
case UtegTweetCandidateGeneratorQuery(userId, _, _, _, _, params, _) =>
FilterConfig(Some(userId), params(UtegTweetGlobalParams.EnableInNetworkFilterParam))
case _ => FilterConfig(None, false)
}
}
}
object InNetworkFilter {
case class FilterConfig(
userIdOpt: Option[UserId],
enableInNetworkFilter: Boolean)
}

View File

@ -1,58 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
case class PostRankFilterRunner @Inject() (
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
private val beforeCount = scopedStats.stat("candidate_count", "before")
private val afterCount = scopedStats.stat("candidate_count", "after")
def run(
query: CrCandidateGeneratorQuery,
candidates: Seq[RankedCandidate]
): Future[Seq[RankedCandidate]] = {
beforeCount.add(candidates.size)
Future(
removeBadRecentNotificationCandidates(candidates)
).map { results =>
afterCount.add(results.size)
results
}
}
/**
* Remove "bad" quality candidates generated by recent notifications
* A candidate is bad when it is generated by a single RecentNotification
* SourceKey.
* e.x:
* tweetA {recent notification1} -> bad
* tweetB {recent notification1 recent notification2} -> good
*tweetC {recent notification1 recent follow1} -> bad
* SD-19397
*/
private[filter] def removeBadRecentNotificationCandidates(
candidates: Seq[RankedCandidate]
): Seq[RankedCandidate] = {
candidates.filterNot {
isBadQualityRecentNotificationCandidate
}
}
private def isBadQualityRecentNotificationCandidate(candidate: RankedCandidate): Boolean = {
candidate.potentialReasons.size == 1 &&
candidate.potentialReasons.head.sourceInfoOpt.nonEmpty &&
candidate.potentialReasons.head.sourceInfoOpt.get.sourceType == SourceType.NotificationClick
}
}

View File

@ -1,99 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
@Singleton
class PreRankFilterRunner @Inject() (
impressedTweetListFilter: ImpressedTweetlistFilter,
tweetAgeFilter: TweetAgeFilter,
videoTweetFilter: VideoTweetFilter,
tweetReplyFilter: ReplyFilter,
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
/***
* The order of the filters does not matter as long as we do not apply .take(N) truncation
* across all filters. In other words, it is fine that we first do tweetAgeFilter, and then
* we do impressedTweetListFilter, or the other way around.
* Same idea applies to the signal based filter - it is ok that we apply signal based filters
* before impressedTweetListFilter.
*
* We move all signal based filters before tweetAgeFilter and impressedTweetListFilter
* as a set of early filters.
*/
val orderedFilters = Seq(
tweetAgeFilter,
impressedTweetListFilter,
videoTweetFilter,
tweetReplyFilter
)
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
): Future[Seq[Seq[InitialCandidate]]] = {
PreRankFilterRunner.runSequentialFilters(
request,
candidates,
orderedFilters,
scopedStats
)
}
}
object PreRankFilterRunner {
private def recordCandidateStatsBeforeFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "before").incr(
candidates.count { _.isEmpty }
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "before").incr(candidate.size)
}
}
private def recordCandidateStatsAfterFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "after").incr(
candidates.count { _.isEmpty }
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "after").incr(candidate.size)
}
}
/*
Helper function for running some candidates through a sequence of filters
*/
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
filters: Seq[FilterBase],
statsReceiver: StatsReceiver
): Future[Seq[Seq[InitialCandidate]]] =
filters.foldLeft(Future.value(candidates)) {
case (candsFut, filter) =>
candsFut.flatMap { cands =>
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
filter
.filter(cands, filter.requestToConfig(request))
.map { filteredCands =>
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
filteredCands
}
}
}
}

View File

@ -1,40 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
* Filters candidates that are replies
*/
@Singleton
case class ReplyFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filterNot { candidate =>
candidate.tweetInfo.isReply.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
true
}
}

View File

@ -1,41 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
* Filters candidates that are retweets
*/
@Singleton
case class RetweetFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filterNot { candidate =>
candidate.tweetInfo.isRetweet.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
query.params(UtegTweetGlobalParams.EnableRetweetFilterParam)
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.GlobalParams
import com.twitter.snowflake.id.SnowflakeId
import com.twitter.util.Duration
import com.twitter.util.Future
import com.twitter.util.Time
import javax.inject.Singleton
import com.twitter.conversions.DurationOps._
@Singleton
case class TweetAgeFilter() extends FilterBase {
override val name: String = this.getClass.getCanonicalName
override type ConfigType = Duration
override def filter(
candidates: Seq[Seq[InitialCandidate]],
maxTweetAge: Duration
): Future[Seq[Seq[InitialCandidate]]] = {
if (maxTweetAge >= 720.hours) {
Future.value(candidates)
} else {
// Tweet IDs are approximately chronological (see http://go/snowflake),
// so we are building the earliest tweet id once,
// and pass that as the value to filter candidates for each CandidateGenerationModel.
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
Future.value(candidates.map(_.filter(_.tweetId >= earliestTweetId)))
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): Duration = {
query.params(GlobalParams.MaxTweetAgeHoursParam)
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.HealthThreshold
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
trait TweetInfoHealthFilterBase extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = HealthThreshold.Enum.Value
def thresholdToPropertyMap: Map[HealthThreshold.Enum.Value, TweetInfo => Option[Boolean]]
def getFilterParamFn: CandidateGeneratorQuery => HealthThreshold.Enum.Value
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: HealthThreshold.Enum.Value
): Future[Seq[Seq[InitialCandidate]]] = {
Future.value(candidates.map { seq =>
seq.filter(p => thresholdToPropertyMap(config)(p.tweetInfo).getOrElse(true))
})
}
/**
* Build the config params here. passing in param() into the filter is strongly discouraged
* because param() can be slow when called many times
*/
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): HealthThreshold.Enum.Value = {
query match {
case q: CrCandidateGeneratorQuery => getFilterParamFn(q)
case _ => HealthThreshold.Enum.Off
}
}
}

View File

@ -1,96 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/***
*
* Run filters sequentially for UTEG candidate generator. The structure is copied from PreRankFilterRunner.
*/
@Singleton
class UtegFilterRunner @Inject() (
inNetworkFilter: InNetworkFilter,
utegHealthFilter: UtegHealthFilter,
retweetFilter: RetweetFilter,
globalStats: StatsReceiver) {
private val scopedStats = globalStats.scope(this.getClass.getCanonicalName)
val orderedFilters: Seq[FilterBase] = Seq(
inNetworkFilter,
utegHealthFilter,
retweetFilter
)
def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
): Future[Seq[Seq[InitialCandidate]]] = {
UtegFilterRunner.runSequentialFilters(
request,
candidates,
orderedFilters,
scopedStats
)
}
}
object UtegFilterRunner {
private def recordCandidateStatsBeforeFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "before").incr(
candidates.count {
_.isEmpty
}
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "before").incr(candidate.size)
}
}
private def recordCandidateStatsAfterFilter(
candidates: Seq[Seq[InitialCandidate]],
statsReceiver: StatsReceiver
): Unit = {
statsReceiver
.counter("empty_sources", "after").incr(
candidates.count {
_.isEmpty
}
)
candidates.foreach { candidate =>
statsReceiver.counter("candidates", "after").incr(candidate.size)
}
}
/*
Helper function for running some candidates through a sequence of filters
*/
private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery](
request: CGQueryType,
candidates: Seq[Seq[InitialCandidate]],
filters: Seq[FilterBase],
statsReceiver: StatsReceiver
): Future[Seq[Seq[InitialCandidate]]] =
filters.foldLeft(Future.value(candidates)) {
case (candsFut, filter) =>
candsFut.flatMap { cands =>
recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name))
filter
.filter(cands, filter.requestToConfig(request))
.map { filteredCands =>
recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name))
filteredCands
}
}
}
}

View File

@ -1,51 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.param.UtegTweetGlobalParams
import com.twitter.util.Future
import javax.inject.Inject
import javax.inject.Singleton
/**
* Remove unhealthy candidates
* Currently Timeline Ranker applies a check on the following three scores:
* - toxicityScore
* - pBlockScore
* - pReportedTweetScore
*
* Where isPassTweetHealthFilterStrict checks two additions scores with the same threshold:
* - pSpammyTweetScore
* - spammyTweetContentScore
*
* We've verified that both filters behave very similarly.
*/
@Singleton
case class UtegHealthFilter @Inject() () extends FilterBase {
override def name: String = this.getClass.getCanonicalName
override type ConfigType = Boolean
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
if (config) {
Future.value(
candidates.map { candidateSeq =>
candidateSeq.filter { candidate =>
candidate.tweetInfo.isPassTweetHealthFilterStrict.getOrElse(false)
}
}
)
} else {
Future.value(candidates)
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): ConfigType = {
query.params(UtegTweetGlobalParams.EnableTLRHealthFilterParam)
}
}

View File

@ -1,81 +0,0 @@
package com.twitter.cr_mixer.filter
import com.twitter.cr_mixer.filter.VideoTweetFilter.FilterConfig
import com.twitter.cr_mixer.model.CandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.param.VideoTweetFilterParams
import com.twitter.util.Future
import javax.inject.Singleton
@Singleton
case class VideoTweetFilter() extends FilterBase {
override val name: String = this.getClass.getCanonicalName
override type ConfigType = FilterConfig
override def filter(
candidates: Seq[Seq[InitialCandidate]],
config: ConfigType
): Future[Seq[Seq[InitialCandidate]]] = {
Future.value(candidates.map {
_.flatMap {
candidate =>
if (!config.enableVideoTweetFilter) {
Some(candidate)
} else {
// if hasVideo is true, hasImage, hasGif should be false
val hasVideo = checkTweetInfoAttribute(candidate.tweetInfo.hasVideo)
val isHighMediaResolution =
checkTweetInfoAttribute(candidate.tweetInfo.isHighMediaResolution)
val isQuoteTweet = checkTweetInfoAttribute(candidate.tweetInfo.isQuoteTweet)
val isReply = checkTweetInfoAttribute(candidate.tweetInfo.isReply)
val hasMultipleMedia = checkTweetInfoAttribute(candidate.tweetInfo.hasMultipleMedia)
val hasUrl = checkTweetInfoAttribute(candidate.tweetInfo.hasUrl)
if (hasVideo && isHighMediaResolution && !isQuoteTweet &&
!isReply && !hasMultipleMedia && !hasUrl) {
Some(candidate)
} else {
None
}
}
}
})
}
def checkTweetInfoAttribute(attributeOpt: => Option[Boolean]): Boolean = {
if (attributeOpt.isDefined)
attributeOpt.get
else {
// takes Quoted Tweet (TweetInfo.isQuoteTweet) as an example,
// if the attributeOpt is None, we by default say it is not a quoted tweet
// similarly, if TweetInfo.hasVideo is a None,
// we say it does not have video.
false
}
}
override def requestToConfig[CGQueryType <: CandidateGeneratorQuery](
query: CGQueryType
): FilterConfig = {
val enableVideoTweetFilter = query match {
case _: CrCandidateGeneratorQuery | _: RelatedTweetCandidateGeneratorQuery |
_: RelatedVideoTweetCandidateGeneratorQuery =>
query.params(VideoTweetFilterParams.EnableVideoTweetFilterParam)
case _ => false // e.g., GetRelatedTweets()
}
FilterConfig(
enableVideoTweetFilter = enableVideoTweetFilter
)
}
}
object VideoTweetFilter {
// extend the filterConfig to add more flags if needed.
// now they are hardcoded according to the prod setting
case class FilterConfig(
enableVideoTweetFilter: Boolean)
}

View File

@ -1,139 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialAdsCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult
import com.twitter.cr_mixer.thriftscala.AdsRequest
import com.twitter.cr_mixer.thriftscala.AdsResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class AdsRecommendationsScribeLogger @Inject() (
@Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger,
decider: CrMixerDecider,
statsReceiver: StatsReceiver) {
private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName)
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
/*
* Scribe first step results after fetching initial ads candidate
* */
def scribeInitialAdsCandidates(
query: AdsCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]],
enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val scribeMetadata = ScribeMetadata.from(query)
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertFetchCandidatesResult(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
/*
* Scribe top level API results
* */
def scribeGetAdsRecommendations(
request: AdsRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[AdsResponse],
enableScribe: Boolean
): Future[AdsResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult(
AdsRecommendationTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (enableScribe && decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.adsRecommendationsPerExperimentScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
}
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialAdsCandidate]],
requestUserId: UserId
): AdsRecommendationsResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
AdsRecommendationsResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: AdsRecommendationsResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetAdsRecommendationsScribe = {
GetAdsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetAdsRecommendationsScribe
): Unit = {
publish(
logger = adsRecommendationsScribeLogger,
codec = GetAdsRecommendationsScribe,
message = scribeMsg)
}
}

View File

@ -1,34 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/javax/inject:javax.inject",
"abdecider/src/main/scala",
"content-recommender/thrift/src/main/thrift:content-recommender-common-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"featureswitches/featureswitches-core/src/main/scala:experimentation-settings",
"finagle/finagle-core/src/main",
"frigate/frigate-common:base",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers",
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
"scrooge/scrooge-serializer/src/main/scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/ml/api:data-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"timelines/src/main/scala/com/twitter/timelines/clientevent",
"util-internal/scribe/src/main/scala/com/twitter/logging",
],
)

View File

@ -1,489 +0,0 @@
package com.twitter.cr_mixer.logging
import com.google.common.base.CaseFormat
import com.twitter.abdecider.ScribingABDeciderUtil
import com.twitter.scribelib.marshallers.ClientDataProvider
import com.twitter.scribelib.marshallers.ScribeSerialization
import com.twitter.timelines.clientevent.MinimalClientDataProvider
import com.twitter.cr_mixer.model.BlendedCandidate
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.RankedCandidate
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.GraphSourceInfo
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.scribe.ScribeCategories
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult
import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe
import com.twitter.cr_mixer.thriftscala.InterleaveResult
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.cr_mixer.thriftscala.RankResult
import com.twitter.cr_mixer.thriftscala.Result
import com.twitter.cr_mixer.thriftscala.SourceSignal
import com.twitter.cr_mixer.thriftscala.TopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe
import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.SourceInfo
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.cr_mixer.util.MetricTagUtil
import com.twitter.decider.SimpleRecipient
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.finatra.kafka.producers.KafkaProducerBase
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import com.twitter.util.Time
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
import scala.util.Random
@Singleton
case class CrMixerScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger,
@Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger,
@Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger,
kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) {
import CrMixerScribeLogger._
private val scopedStats = statsReceiver.scope("CrMixerScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val kafkaMessagesStats = scopedStats.scope("KafkaMessages")
private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics")
private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates")
private val serialization = new ScribeSerialization {}
def scribeSignalSources(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])]
): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchSignalSourcesResult
)
}
def scribeInitialCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
def scribeInterleaveCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[BlendedCandidate]]
): Future[Seq[BlendedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertInterleaveResult,
enableKafkaScribe = true
)
}
def scribeRankedCandidates(
query: CrCandidateGeneratorQuery,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertRankResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getTweetRecommendations() endpoint.
*/
def scribeGetTweetRecommendations(
request: CrMixerTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[CrMixerTweetResponse]
): Future[CrMixerTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
// We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.topLevelApiDdgMetricsScribeRate)) {
topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr()
val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request)
publishTopLevelDdgMetrics(
logger = ddgMetricsLogger,
topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata,
latencyMs = latencyMs,
candidateSize = response.tweets.length)
}
}
}
/**
* Scribe all of the Blue Verified tweets that are candidates from cr-mixer
* from the getTweetRecommendations() endpoint for stats tracking/debugging purposes.
*/
def scribeGetTweetRecommendationsForBlueVerified(
scribeMetadata: ScribeMetadata,
getResultFn: => Future[Seq[RankedCandidate]]
): Future[Seq[RankedCandidate]] = {
getResultFn.onSuccess { rankedCandidates =>
if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) {
blueVerifiedTweetCandidatesStats.counter("process_request").incr()
val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet =>
tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true)
}
val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil)
val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate =>
blueVerifiedTweetCandidatesStats
.scope(scribeMetadata.product.name).counter(
candidate.tweetInfo.authorId.toString).incr()
VITTweetCandidateScribe(
tweetId = candidate.tweetId,
authorId = candidate.tweetInfo.authorId,
score = candidate.predictionScore,
metricTags = MetricTagUtil.buildMetricTags(candidate)
)
}
val blueVerifiedScribe =
VITTweetCandidatesScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
candidates = blueVerifiedCandidateScribes,
product = scribeMetadata.product,
impressedBuckets = impressedBuckets
)
publish(
logger = blueVerifiedTweetRecsScribeLogger,
codec = VITTweetCandidatesScribe,
message = blueVerifiedScribe)
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch signals, fetch candidates, filters, ranker, etc
*/
private[logging] def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => Result,
enableKafkaScribe: Boolean = false
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
scribeResult(scribeMsg)
}
// forks the scribe as a Kafka message for async feature hydration
if (enableKafkaScribe && shouldScribeKafkaMessage(
scribeMetadata.userId,
scribeMetadata.product)) {
kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr()
val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg)
batchedKafkaMessages.foreach { kafkaMessage =>
kafkaProducer.send(
topic = ScribeCategories.TweetsRecs.scribeCategory,
key = traceId.toString,
value = kafkaMessage,
timestamp = Time.now.inMilliseconds
)
}
}
}
}
private def convertTopLevelAPIResult(
request: CrMixerTweetRequest,
response: CrMixerTweetResponse,
startTime: Long
): Result = {
Result.TopLevelApiResult(
TopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchSignalSourcesResult(
sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]),
requestUserId: UserId
): Result = {
val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo =>
SourceSignal(id = Some(sourceInfo.internalId))
}
// For source graphs, we pass in requestUserId as a placeholder
val sourceGraphs = sourceInfoSetTuple._2.map {
case (_, _) =>
SourceSignal(id = Some(InternalId.UserId(requestUserId)))
}
Result.FetchSignalSourcesResult(
FetchSignalSourcesResult(
signals = Some(sourceSignals ++ sourceGraphs)
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None // not populated yet
)
}
}
Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
// We take InterleaveResult for Unconstrained dataset ML ranker training
private def convertInterleaveResult(
blendedCandidates: Seq[BlendedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = blendedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training
score = Some(blendedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size)
) // hydrate fields for light ranking training data
}
Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata)))
}
private def convertRankResult(
rankedCandidates: Seq[RankedCandidate],
requestUserId: UserId
): Result = {
val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId)
TweetCandidateWithMetadata(
tweetId = rankedCandidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
score = Some(rankedCandidate.getSimilarityScore),
numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size)
)
}
Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
result: Result,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetTweetsRecommendationsScribe = {
GetTweetsRecommendationsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
result = result,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetTweetsRecommendationsScribe
): Unit = {
publish(
logger = tweetRecsScribeLogger,
codec = GetTweetsRecommendationsScribe,
message = scribeMsg)
}
/**
* Gate for producing messages to Kafka for async feature hydration
*/
private def shouldScribeKafkaMessage(
userId: UserId,
product: Product
): Boolean = {
val isEligibleUser = decider.isAvailable(
DeciderConstants.kafkaMessageScribeSampleRate,
Some(SimpleRecipient(userId)))
val isHomeProduct = (product == Product.Home)
isEligibleUser && isHomeProduct
}
/**
* Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled
*/
private[logging] def downsampleKafkaMessage(
scribeMsg: GetTweetsRecommendationsScribe
): Seq[GetTweetsRecommendationsScribe] = {
val sampledResultSeq: Seq[Result] = scribeMsg.result match {
case Result.InterleaveResult(interleaveResult) =>
val sampledTweetsSeq = interleaveResult.tweets
.map { tweets =>
Random
.shuffle(tweets).take(KafkaMaxTweetsPerMessage)
.grouped(BatchSize).toSeq
}.getOrElse(Seq.empty)
sampledTweetsSeq.map { sampledTweets =>
Result.InterleaveResult(InterleaveResult(Some(sampledTweets)))
}
// if it's an unrecognized type, err on the side of sending no candidates
case _ =>
kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr()
Seq(Result.InterleaveResult(InterleaveResult(None)))
}
sampledResultSeq.map { sampledResult =>
GetTweetsRecommendationsScribe(
uuid = scribeMsg.uuid,
userId = scribeMsg.userId,
result = sampledResult,
traceId = scribeMsg.traceId,
performanceMetrics = None,
impressedBuckets = None
)
}
}
/**
* Handles client_event serialization to log data into DDG metrics
*/
private[logging] def publishTopLevelDdgMetrics(
logger: Logger,
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
candidateSize: Long,
latencyMs: Long,
): Unit = {
val data = Map[Any, Any](
"latency_ms" -> latencyMs,
"event_value" -> candidateSize
)
val label: (String, String) = ("tweetrec", "")
val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates")
val message =
serialization
.serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data)
logger.info(message)
}
private def getClientData(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata
): ClientDataProvider =
MinimalClientDataProvider(
userId = topLevelDdgMetricsMetadata.userId,
guestId = None,
clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId,
countryCode = topLevelDdgMetricsMetadata.countryCode
)
private def getNamespace(
topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata,
label: (String, String)
): Map[String, String] = {
val productName =
CaseFormat.UPPER_CAMEL
.to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName)
Map(
"client" -> ScribingABDeciderUtil.clientForAppId(
topLevelDdgMetricsMetadata.clientApplicationId),
"page" -> "cr-mixer",
"section" -> productName,
"component" -> label._1,
"element" -> label._2
)
}
}
object CrMixerScribeLogger {
val KafkaMaxTweetsPerMessage: Int = 200
val BatchSize: Int = 20
}

View File

@ -1,193 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.InitialCandidate
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.PreRankFilterResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest
import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse
import com.twitter.cr_mixer.thriftscala.RelatedTweetResult
import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class RelatedTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId")
def scribeInitialCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
def scribePreRankFilterCandidates(
query: RelatedTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[Seq[InitialCandidate]]]
): Future[Seq[Seq[InitialCandidate]]] = {
scribeResultsAndPerformanceMetrics(
RelatedTweetScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertPreRankFilterResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the getRelatedTweets endpoint.
*/
def scribeGetRelatedTweets(
request: RelatedTweetRequest,
startTime: Long,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[RelatedTweetResponse]
): Future[RelatedTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => RelatedTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
relatedTweetScribeMetadata.clientContext.userId match {
case Some(userId) =>
if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
case _ =>
upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr()
}
}
}
private def convertTopLevelAPIResult(
request: RelatedTweetRequest,
response: RelatedTweetResponse,
startTime: Long
): RelatedTweetResult = {
RelatedTweetResult.RelatedTweetTopLevelApiResult(
RelatedTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def convertFetchCandidatesResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
}
RelatedTweetResult.FetchCandidatesResult(
FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
private def convertPreRankFilterResult(
candidatesSeq: Seq[Seq[InitialCandidate]],
requestUserId: UserId
): RelatedTweetResult = {
val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates =>
candidates.map { candidate =>
val candidateGenerationKey =
CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = Some(candidateGenerationKey),
authorId = Some(candidate.tweetInfo.authorId),
score = Some(candidate.getSimilarityScore),
numCandidateGenerationKeys = None
)
}
}
RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata)))
}
private def buildScribeMessage(
relatedTweetResult: RelatedTweetResult,
relatedTweetScribeMetadata: RelatedTweetScribeMetadata,
latencyMs: Long,
traceId: Long
): GetRelatedTweetsScribe = {
GetRelatedTweetsScribe(
uuid = relatedTweetScribeMetadata.requestUUID,
internalId = relatedTweetScribeMetadata.internalId,
relatedTweetResult = relatedTweetResult,
requesterId = relatedTweetScribeMetadata.clientContext.userId,
guestId = relatedTweetScribeMetadata.clientContext.guestId,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetRelatedTweetsScribe
): Unit = {
publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg)
}
}

View File

@ -1,43 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets
import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.StatsUtil
import com.twitter.logging.Logger
import com.twitter.scrooge.BinaryThriftStructSerializer
import com.twitter.scrooge.ThriftStruct
import com.twitter.scrooge.ThriftStructCodec
object ScribeLoggerUtils {
/**
* Handles base64-encoding, serialization, and publish.
*/
private[logging] def publish[T <: ThriftStruct](
logger: Logger,
codec: ThriftStructCodec[T],
message: T
): Unit = {
logger.info(BinaryThriftStructSerializer(codec).toString(message))
}
private[logging] def getImpressedBuckets(
scopedStats: StatsReceiver
): Option[List[ImpressesedBucketInfo]] = {
StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) {
CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets =>
val listBucketsSet = listBuckets.toSet
scopedStats.stat("impressed_buckets").add(listBucketsSet.size)
listBucketsSet.map { bucket =>
ImpressesedBucketInfo(
experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L),
bucketName = bucket.name,
version = bucket.experiment.settings.version,
)
}.toList
}
}
}
}

View File

@ -1,45 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery
import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery
import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
case class ScribeMetadata(
requestUUID: Long,
userId: UserId,
product: Product)
object ScribeMetadata {
def from(query: CrCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = {
ScribeMetadata(query.requestUUID, query.userId, query.product)
}
}
case class RelatedTweetScribeMetadata(
requestUUID: Long,
internalId: InternalId,
clientContext: ClientContext,
product: Product)
object RelatedTweetScribeMetadata {
def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = {
RelatedTweetScribeMetadata(
query.requestUUID,
query.internalId,
query.clientContext,
query.product)
}
}

View File

@ -1,22 +0,0 @@
package com.twitter.cr_mixer
package logging
import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest
import com.twitter.cr_mixer.thriftscala.Product
case class TopLevelDdgMetricsMetadata(
userId: Option[Long],
product: Product,
clientApplicationId: Option[Long],
countryCode: Option[String])
object TopLevelDdgMetricsMetadata {
def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = {
TopLevelDdgMetricsMetadata(
userId = request.clientContext.userId,
product = request.product,
clientApplicationId = request.clientContext.appId,
countryCode = request.clientContext.countryCode
)
}
}

View File

@ -1,147 +0,0 @@
package com.twitter.cr_mixer.logging
import com.twitter.cr_mixer.logging.ScribeLoggerUtils._
import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderConstants
import com.twitter.cr_mixer.thriftscala.UtegTweetRequest
import com.twitter.cr_mixer.thriftscala.UtegTweetResponse
import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult
import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe
import com.twitter.cr_mixer.thriftscala.PerformanceMetrics
import com.twitter.cr_mixer.thriftscala.UtegTweetResult
import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult
import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.tracing.Trace
import com.twitter.logging.Logger
import com.twitter.simclusters_v2.common.UserId
import com.twitter.util.Future
import com.twitter.util.Stopwatch
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
case class UtegTweetScribeLogger @Inject() (
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
@Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) {
private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger")
private val topLevelApiStats = scopedStats.scope("TopLevelApi")
private val upperFunnelsStats = scopedStats.scope("UpperFunnels")
def scribeInitialCandidates(
query: UtegTweetCandidateGeneratorQuery,
getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]]
): Future[Seq[TweetWithScoreAndSocialProof]] = {
scribeResultsAndPerformanceMetrics(
ScribeMetadata.from(query),
getResultFn,
convertToResultFn = convertFetchCandidatesResult
)
}
/**
* Scribe Top Level API Request / Response and performance metrics
* for the GetUtegTweetRecommendations() endpoint.
*/
def scribeGetUtegTweetRecommendations(
request: UtegTweetRequest,
startTime: Long,
scribeMetadata: ScribeMetadata,
getResultFn: => Future[UtegTweetResponse]
): Future[UtegTweetResponse] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { response =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
topLevelApiStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertTopLevelAPIResult(request, response, startTime)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
private def convertTopLevelAPIResult(
request: UtegTweetRequest,
response: UtegTweetResponse,
startTime: Long
): UtegTweetResult = {
UtegTweetResult.UtegTweetTopLevelApiResult(
UtegTweetTopLevelApiResult(
timestamp = startTime,
request = request,
response = response
))
}
private def buildScribeMessage(
utegTweetResult: UtegTweetResult,
scribeMetadata: ScribeMetadata,
latencyMs: Long,
traceId: Long
): GetUtegTweetsScribe = {
GetUtegTweetsScribe(
uuid = scribeMetadata.requestUUID,
userId = scribeMetadata.userId,
utegTweetResult = utegTweetResult,
traceId = Some(traceId),
performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))),
impressedBuckets = getImpressedBuckets(scopedStats)
)
}
private def scribeResult(
scribeMsg: GetUtegTweetsScribe
): Unit = {
publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg)
}
private def convertFetchCandidatesResult(
candidates: Seq[TweetWithScoreAndSocialProof],
requestUserId: UserId
): UtegTweetResult = {
val tweetCandidatesWithMetadata = candidates.map { candidate =>
TweetCandidateWithMetadata(
tweetId = candidate.tweetId,
candidateGenerationKey = None
) // do not hydrate candidateGenerationKey to save cost
}
UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata)))
}
/**
* Scribe Per-step intermediate results and performance metrics
* for each step: fetch candidates, filters.
*/
private def scribeResultsAndPerformanceMetrics[T](
scribeMetadata: ScribeMetadata,
getResultFn: => Future[T],
convertToResultFn: (T, UserId) => UtegTweetResult
): Future[T] = {
val timer = Stopwatch.start()
getResultFn.onSuccess { input =>
if (decider.isAvailableForId(
scribeMetadata.userId,
DeciderConstants.upperFunnelPerStepScribeRate)) {
upperFunnelsStats.counter(scribeMetadata.product.originalName).incr()
val latencyMs = timer().inMilliseconds
val result = convertToResultFn(input, scribeMetadata.userId)
val traceId = Trace.id.traceId.toLong
val scribeMsg =
buildScribeMessage(result, scribeMetadata, latencyMs, traceId)
scribeResult(scribeMsg)
}
}
}
}

View File

@ -1,16 +0,0 @@
scala_library(
sources = ["*.scala"],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"configapi/configapi-core",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"src/scala/com/twitter/simclusters_v2/common",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
],
)

View File

@ -1,200 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.simclusters_v2.common.TweetId
sealed trait Candidate {
val tweetId: TweetId
override def hashCode: Int = tweetId.toInt
}
case class TweetWithCandidateGenerationInfo(
tweetId: TweetId,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
}
case class InitialCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedCandidate = {
BlendedCandidate(
tweetId,
tweetInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedCandidate(): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class InitialAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
candidateGenerationInfo: CandidateGenerationInfo)
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)
/**
* The same candidate can be generated by multiple algorithms.
* During blending, candidate deduping happens. In order to retain the candidateGenerationInfo
* from different algorithms, we attach them to a list of potentialReasons.
*/
def toBlendedAdsCandidate(
potentialReasons: Seq[CandidateGenerationInfo],
): BlendedAdsCandidate = {
BlendedAdsCandidate(
tweetId,
lineItemInfo,
candidateGenerationInfo,
potentialReasons,
)
}
// for experimental purposes only when bypassing interleave / ranking
def toRankedAdsCandidate(): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op
candidateGenerationInfo,
Seq(candidateGenerationInfo)
)
}
}
case class BlendedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedCandidate(predictionScore: Double): RankedCandidate = {
RankedCandidate(
tweetId,
tweetInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class BlendedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = {
RankedAdsCandidate(
tweetId,
lineItemInfo,
predictionScore,
reasonChosen,
potentialReasons
)
}
}
case class RankedCandidate(
tweetId: TweetId,
tweetInfo: TweetInfo,
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class RankedAdsCandidate(
tweetId: TweetId,
lineItemInfo: Seq[LineItemInfo],
predictionScore: Double,
reasonChosen: CandidateGenerationInfo,
potentialReasons: Seq[CandidateGenerationInfo])
extends Candidate {
/** *
* Get the Similarity Score of a Tweet from its CG Info. For instance,
* If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score
* And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score
*/
def getSimilarityScore: Double =
reasonChosen.similarityEngineInfo.score.getOrElse(0.0)
assert(potentialReasons.contains(reasonChosen))
}
case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.cr_mixer.thriftscala.SourceType
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.util.Time
/***
* Tweet-level attributes. Represents the source used in candidate generation
* Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType
* Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId
* At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN
*
* Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal"
* @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId
* @param internalId, e.g., UserId(0L), TweetId(0L)
*/
case class SourceInfo(
sourceType: SourceType,
internalId: InternalId,
sourceEventTime: Option[Time])
/***
* Tweet-level attributes. Represents the source User Graph used in candidate generation
* It is an intermediate product, and will not be stored, unlike SourceInfo.
* Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources.
* For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as
* UTG or UTEG will leverage these sources to build candidates.
*
* We extended the definition of SourceType to cover both "Source Signal" and "Source Graph"
* See [CrMixer] Graph Based Source Fetcher Abstraction Proposal:
*
* consider making both SourceInfo and GraphSourceInfo extends the same trait to
* have a unified interface.
*/
case class GraphSourceInfo(
sourceType: SourceType,
seedWithScores: Map[UserId, Double])
/***
* Tweet-level attributes. Represents the similarity engine (the algorithm) used for
* candidate generation along with their metadata.
* @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph
* @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708
* @param score - a score generated by this sim engine
*/
case class SimilarityEngineInfo(
similarityEngineType: SimilarityEngineType,
modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc
score: Option[Double])
/****
* Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo
* SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines.
* For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE.
* Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing)
* to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list
*
* @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source
* @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE
* @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic
*/
case class CandidateGenerationInfo(
sourceInfoOpt: Option[SourceInfo],
similarityEngineInfo: SimilarityEngineInfo,
contributingSimilarityEngines: Seq[SimilarityEngineInfo])

View File

@ -1,96 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.core_workflows.user_model.thriftscala.UserState
import com.twitter.cr_mixer.thriftscala.Product
import com.twitter.product_mixer.core.thriftscala.ClientContext
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.timelines.configapi.Params
sealed trait CandidateGeneratorQuery {
val product: Product
val maxNumResults: Int
val impressedTweetList: Set[TweetId]
val params: Params
val requestUUID: Long
}
sealed trait HasUserId {
val userId: UserId
}
case class CrCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
languageCode: Option[String] = None)
extends CandidateGeneratorQuery
with HasUserId
case class UtegTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
with HasUserId
case class RelatedTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class RelatedVideoTweetCandidateGeneratorQuery(
internalId: InternalId,
clientContext: ClientContext, // To scribe LogIn/LogOut requests
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long)
extends CandidateGeneratorQuery
case class FrsTweetCandidateGeneratorQuery(
userId: UserId,
product: Product,
maxNumResults: Int,
impressedUserList: Set[UserId],
impressedTweetList: Set[TweetId],
params: Params,
languageCodeOpt: Option[String] = None,
countryCodeOpt: Option[String] = None,
requestUUID: Long)
extends CandidateGeneratorQuery
case class AdsCandidateGeneratorQuery(
userId: UserId,
product: Product,
userState: UserState,
maxNumResults: Int,
params: Params,
requestUUID: Long)
case class TopicTweetCandidateGeneratorQuery(
userId: UserId,
topicIds: Set[TopicId],
product: Product,
maxNumResults: Int,
impressedTweetList: Set[TweetId],
params: Params,
requestUUID: Long,
isVideoOnly: Boolean)
extends CandidateGeneratorQuery

View File

@ -1,6 +0,0 @@
package com.twitter.cr_mixer.model
sealed trait EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType
object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType

View File

@ -1,11 +0,0 @@
package com.twitter.cr_mixer.model
object HealthThreshold {
object Enum extends Enumeration {
val Off: Value = Value(1)
val Moderate: Value = Value(2)
val Strict: Value = Value(3)
val Stricter: Value = Value(4)
val StricterPlus: Value = Value(5)
}
}

View File

@ -1,77 +0,0 @@
package com.twitter.cr_mixer.model
/**
* A Configuration class for all Model Based Candidate Sources.
*
* The Model Name Guideline. Please your modelId as "Algorithm_Product_Date"
* If your model is used for multiple product surfaces, name it as all
* Don't name your algorithm as MBCG. All the algorithms here are MBCG =.=
*
* Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list.
*/
object ModelConfig {
// Offline SimClusters CG Experiment related Model Ids
val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414"
val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15"
val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15"
val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50"
val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50"
val OfflineMTSConsumerEmbeddingsFav90P20M: String =
"OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414"
// Twhin Model Ids
val ConsumerBasedTwHINRegularUpdateAll20221024: String =
"ConsumerBasedTwHINRegularUpdate_All_20221024"
// Averaged Twhin Model Ids
val TweetBasedTwHINRegularUpdateAll20221024: String =
"TweetBasedTwHINRegularUpdate_All_20221024"
// Collaborative Filtering Twhin Model Ids
val TwhinCollabFilterForFollow: String =
"TwhinCollabFilterForFollow"
val TwhinCollabFilterForEngagement: String =
"TwhinCollabFilterForEngagement"
val TwhinMultiClusterForFollow: String =
"TwhinMultiClusterForFollow"
val TwhinMultiClusterForEngagement: String =
"TwhinMultiClusterForEngagement"
// Two Tower model Ids
val TwoTowerFavALL20220808: String =
"TwoTowerFav_ALL_20220808"
// Debugger Demo-Only Model Ids
val DebuggerDemo: String = "DebuggerDemo"
// ColdStartLookalike - this is not really a model name, it is as a placeholder to
// indicate ColdStartLookalike candidate source, which is currently being pluged into
// CustomizedRetrievalCandidateGeneration temporarily.
val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707"
// consumersBasedUTG-RealGraphOon Model Id
val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705"
// consumersBasedUAG-RealGraphOon Model Id
val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205"
// FTR
val OfflineFavDecayedSum: String = "OfflineFavDecayedSum"
val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11"
val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11"
// All Model Ids of HnswANNSimilarityEngines
val allHnswANNSimilarityEngineModelIds = Seq(
ConsumerBasedTwHINRegularUpdateAll20221024,
TwoTowerFavALL20220808,
DebuggerDemo
)
val ConsumerLogFavBasedInterestedInEmbedding: String =
"ConsumerLogFavBasedInterestedIn_ALL_20221228"
val ConsumerFollowBasedInterestedInEmbedding: String =
"ConsumerFollowBasedInterestedIn_ALL_20221228"
val RetweetBasedDiffusion: String =
"RetweetBasedDiffusion"
}

View File

@ -1,122 +0,0 @@
package com.twitter.cr_mixer.model
/**
* Define name annotated module names here
*/
object ModuleNames {
final val FrsStore = "FrsStore"
final val UssStore = "UssStore"
final val UssStratoColumn = "UssStratoColumn"
final val RsxStore = "RsxStore"
final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore"
final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore"
final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore"
final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore"
final val StpStore = "StpStore"
final val TwiceClustersMembersStore = "TwiceClustersMembersStore"
final val TripCandidateStore = "TripCandidateStore"
final val ConsumerEmbeddingBasedTripSimilarityEngine =
"ConsumerEmbeddingBasedTripSimilarityEngine"
final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine =
"ConsumerEmbeddingBasedTwHINANNSimilarityEngine"
final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine =
"ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine"
final val ConsumersBasedUserAdGraphSimilarityEngine =
"ConsumersBasedUserAdGraphSimilarityEngine"
final val ConsumersBasedUserVideoGraphSimilarityEngine =
"ConsumersBasedUserVideoGraphSimilarityEngine"
final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine"
final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine"
final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine"
final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient"
final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient"
final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1"
final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2"
final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3"
final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5"
final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4"
final val UnifiedCache = "unifiedCache"
final val MLScoreCache = "mlScoreCache"
final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache"
final val EarlybirdTweetsCache = "earlybirdTweetsCache"
final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache =
"earlybirdTweetsWithoutRetweetsRepliesCacheStore"
final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache =
"earlybirdTweetsWithRetweetsRepliesCacheStore"
final val AbDeciderLogger = "abDeciderLogger"
final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger"
final val TweetRecsLogger = "tweetRecsLogger"
final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger"
final val RelatedTweetsLogger = "relatedTweetsLogger"
final val UtegTweetsLogger = "utegTweetsLogger"
final val AdsRecommendationsLogger = "adsRecommendationLogger"
final val OfflineSimClustersANNInterestedInSimilarityEngine =
"OfflineSimClustersANNInterestedInSimilarityEngine"
final val RealGraphOonStore = "RealGraphOonStore"
final val RealGraphInStore = "RealGraphInStore"
final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore"
final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore"
final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore"
final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore"
final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore"
final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore"
final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore"
final val OfflineFtrAt5Pop1000RankDecay11CandidateStore =
"OfflineFtrAt5Pop1000RankDecay11CandidateStore"
final val OfflineFtrAt5Pop10000RankDecay11CandidateStore =
"OfflineFtrAt5Pop10000RankDecay11CandidateStore"
final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow"
final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement"
final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow"
final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement"
final val ProducerBasedUserAdGraphSimilarityEngine =
"ProducerBasedUserAdGraphSimilarityEngine"
final val ProducerBasedUserTweetGraphSimilarityEngine =
"ProducerBasedUserTweetGraphSimilarityEngine"
final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine"
final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine"
final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine"
final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine"
final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine"
final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine"
final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine"
final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore"
final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore"
final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore"
final val UserTweetEntityGraphSimilarityEngine =
"UserTweetEntityGraphSimilarityEngine"
final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine"
final val CertoStratoStoreName = "CertoStratoStore"
final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine"
final val SkitHighPrecisionTopicTweetSimilarityEngine =
"SkitHighPrecisionTopicTweetSimilarityEngine"
final val SkitStratoStoreName = "SkitStratoStore"
final val HomeNaviGRPCClient = "HomeNaviGRPCClient"
final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient"
final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient"
final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore"
final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine"
final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore"
}

View File

@ -1,13 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
* @param similarityEngineType, which underlying topic source the topic tweet is from
*/
case class TopicTweetWithScore(
tweetId: TweetId,
score: Double,
similarityEngineType: SimilarityEngineType)

View File

@ -1,6 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
case class TweetWithAuthor(tweetId: TweetId, authorId: UserId)

View File

@ -1,8 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
/***
* Bind a tweetId with a raw score generated from one single Similarity Engine
*/
case class TweetWithScore(tweetId: TweetId, score: Double)

View File

@ -1,12 +0,0 @@
package com.twitter.cr_mixer.model
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.recos.recos_common.thriftscala.SocialProofType
/***
* Bind a tweetId with a raw score and social proofs by type
*/
case class TweetWithScoreAndSocialProof(
tweetId: TweetId,
score: Double,
socialProofByType: Map[SocialProofType, Seq[Long]])

View File

@ -1,135 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.thrift.CompactThriftCodec
import com.twitter.ads.entities.db.thriftscala.LineItemObjective
import com.twitter.bijection.Injection
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.LineItemInfo
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.DataRecord
import com.twitter.ml.api.DataType
import com.twitter.ml.api.Feature
import com.twitter.ml.api.GeneralTensor
import com.twitter.ml.api.RichDataRecord
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.manhattan.Revenue
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.util.Future
import javax.inject.Named
import scala.collection.JavaConverters._
object ActivePromotedTweetStoreModule extends TwitterModule {
case class ActivePromotedTweetStore(
activePromotedTweetMHStore: ReadableStore[String, DataRecord],
statsReceiver: StatsReceiver)
extends ReadableStore[TweetId, Seq[LineItemInfo]] {
override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = {
activePromotedTweetMHStore.get(tweetId.toString).map {
_.map { dataRecord =>
val richDataRecord = new RichDataRecord(dataRecord)
val lineItemIdsFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64)
val lineItemObjectivesFeature: Feature[GeneralTensor] =
new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64)
val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature)
val lineItemObjectivesTensor: GeneralTensor =
richDataRecord.getFeatureValue(lineItemObjectivesFeature)
val lineItemIds: Seq[Long] =
if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) {
lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong)
} else Seq.empty
val lineItemObjectives: Seq[LineItemObjective] =
if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) {
lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective =>
LineItemObjective(objective.toInt))
} else Seq.empty
val lineItemInfo =
if (lineItemIds.size == lineItemObjectives.size) {
lineItemIds.zipWithIndex.map {
case (lineItemId, index) =>
LineItemInfo(
lineItemId = lineItemId,
lineItemObjective = lineItemObjectives(index)
)
}
} else Seq.empty
lineItemInfo
}
}
}
}
@Provides
@Singleton
def providesActivePromotedTweetStore(
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
crMixerStatsReceiver: StatsReceiver
): ReadableStore[TweetId, Seq[LineItemInfo]] = {
val mhConfig = new ManhattanROConfig {
val hdfsPath = HDFSPath("")
val applicationID = ApplicationID("ads_bigquery_features")
val datasetName = DatasetName("active_promoted_tweets")
val cluster = Revenue
override def statsReceiver: StatsReceiver =
crMixerStatsReceiver.scope("active_promoted_tweets_mh")
}
val mhStore: ReadableStore[String, DataRecord] =
ManhattanRO
.getReadableStoreWithMtls[String, DataRecord](
mhConfig,
manhattanKVClientMtlsParams
)(
implicitly[Injection[String, Array[Byte]]],
CompactThriftCodec[DataRecord]
)
val underlyingStore =
ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 60.minutes,
asyncUpdate = false
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()),
statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"),
keyToString = { k: TweetId => s"apt/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 30.minutes,
maxKeys = 250000, // size of promoted tweet is around 200,000
windowSize = 10000L,
cacheName = "active_promoted_tweet_cache",
maxMultiGetSize = 20
)(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore"))
}
}

View File

@ -1,130 +0,0 @@
scala_library(
sources = [
"*.scala",
"core/*.scala",
"grpc_client/*.scala",
"similarity_engine/*.scala",
"source_signal/*.scala",
"thrift_client/*.scala",
],
compiler_option_sets = ["fatal_warnings"],
strict_deps = True,
tags = ["bazel-compatible"],
dependencies = [
"3rdparty/jvm/com/twitter/bijection:core",
"3rdparty/jvm/com/twitter/bijection:scrooge",
"3rdparty/jvm/com/twitter/storehaus:core",
"3rdparty/jvm/com/twitter/storehaus:memcache",
"3rdparty/jvm/io/grpc:grpc-api",
"3rdparty/jvm/io/grpc:grpc-auth",
"3rdparty/jvm/io/grpc:grpc-core",
"3rdparty/jvm/io/grpc:grpc-netty",
"3rdparty/jvm/io/grpc:grpc-protobuf",
"3rdparty/jvm/io/grpc:grpc-stub",
"3rdparty/jvm/javax/inject:javax.inject",
"3rdparty/jvm/org/scalanlp:breeze",
"3rdparty/src/jvm/com/twitter/storehaus:core",
"abdecider/src/main/scala",
"ann/src/main/thrift/com/twitter/ann/common:ann-common-scala",
"configapi/configapi-abdecider",
"configapi/configapi-core",
"configapi/configapi-featureswitches:v2",
"content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps",
"content-recommender/thrift/src/main/thrift:thrift-scala",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal",
"cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util",
"cr-mixer/thrift/src/main/thrift:thrift-scala",
"decider/src/main/scala",
"discovery-common/src/main/scala/com/twitter/discovery/common/configapi",
"featureswitches/featureswitches-core",
"featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder",
"finagle-internal/finagle-grpc/src/main/scala",
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
"finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers",
"finatra-internal/mtls-thriftmux/src/main/scala",
"finatra/inject/inject-core/src/main/scala",
"finatra/inject/inject-modules/src/main/scala",
"finatra/inject/inject-thrift-client",
"follow-recommendations-service/thrift/src/main/thrift:thrift-scala",
"frigate/frigate-common:util",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests",
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato",
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
"hydra/partition/thrift/src/main/thrift:thrift-scala",
"hydra/root/thrift/src/main/thrift:thrift-scala",
"mediaservices/commons/src/main/scala:futuretracker",
"product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala",
"qig-ranker/thrift/src/main/thrift:thrift-scala",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store",
"relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection",
"relevance-platform/thrift/src/main/thrift:thrift-scala",
"representation-manager/client/src/main/scala/com/twitter/representation_manager",
"representation-manager/client/src/main/scala/com/twitter/representation_manager/config",
"representation-manager/server/src/main/scala/com/twitter/representation_manager/migration",
"representation-manager/server/src/main/thrift:thrift-scala",
"representation-scorer/server/src/main/thrift:thrift-scala",
"servo/decider",
"servo/util/src/main/scala",
"simclusters-ann/thrift/src/main/thrift:thrift-scala",
"snowflake/src/main/scala/com/twitter/snowflake/id",
"src/java/com/twitter/ml/api:api-base",
"src/java/com/twitter/search/queryparser/query:core-query-nodes",
"src/java/com/twitter/search/queryparser/query/search:search-query-nodes",
"src/scala/com/twitter/algebird_internal/injection",
"src/scala/com/twitter/cortex/ml/embeddings/common:Helpers",
"src/scala/com/twitter/ml/api/embedding",
"src/scala/com/twitter/ml/featurestore/lib",
"src/scala/com/twitter/scalding_internal/multiformat/format",
"src/scala/com/twitter/simclusters_v2/candidate_source",
"src/scala/com/twitter/simclusters_v2/common",
"src/scala/com/twitter/storehaus_internal/manhattan",
"src/scala/com/twitter/storehaus_internal/manhattan/config",
"src/scala/com/twitter/storehaus_internal/memcache",
"src/scala/com/twitter/storehaus_internal/memcache/config",
"src/scala/com/twitter/storehaus_internal/offline",
"src/scala/com/twitter/storehaus_internal/util",
"src/scala/com/twitter/topic_recos/stores",
"src/thrift/com/twitter/core_workflows/user_model:user_model-scala",
"src/thrift/com/twitter/frigate:frigate-common-thrift-scala",
"src/thrift/com/twitter/frigate:frigate-thrift-scala",
"src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala",
"src/thrift/com/twitter/hermit/stp:hermit-stp-scala",
"src/thrift/com/twitter/ml/api:data-java",
"src/thrift/com/twitter/ml/api:embedding-scala",
"src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala",
"src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala",
"src/thrift/com/twitter/recos:recos-common-scala",
"src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala",
"src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala",
"src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala",
"src/thrift/com/twitter/search:earlybird-scala",
"src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala",
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
"src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala",
"src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala",
"src/thrift/com/twitter/tweetypie:service-scala",
"src/thrift/com/twitter/twistly:twistly-scala",
"src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala",
"stitch/stitch-storehaus",
"stitch/stitch-tweetypie/src/main/scala",
"strato/src/main/scala/com/twitter/strato/client",
"user-signal-service/thrift/src/main/thrift:thrift-scala",
"util-internal/scribe/src/main/scala/com/twitter/logging",
"util/util-hashing",
],
)

View File

@ -1,52 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Athena
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.hermit.store.common.ObservedCachedReadableStore
object BlueVerifiedAnnotationStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.BlueVerifiedAnnotationStore)
def providesBlueVerifiedAnnotationStore(
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
): ReadableStore[String, BlueVerifiedAnnotationsV2] = {
implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("content_recommender_athena"),
DatasetName("blue_verified_annotations"),
Athena),
manhattanKVClientMtlsParams
)
ObservedCachedReadableStore.from(
underlyingStore,
ttl = 24.hours,
maxKeys = 100000,
windowSize = 10000L,
cacheName = "blue_verified_annotation_cache"
)(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore"))
}
}

View File

@ -1,57 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.simclusters_v2.thriftscala.TopicId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore
import com.twitter.topic_recos.thriftscala.TweetWithScores
object CertoStratoStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.CertoStratoStoreName)
def providesCertoStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicId, Seq[TweetWithScores]] = {
val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))(
statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores =>
topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = certoStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()),
statsReceiver = statsReceiver.scope("memcached_certo_store"),
keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "certo_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("certo_in_memory_cache"))
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest
import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse
import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserAdGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserAdGraphStore)
def providesConsumerBasedUserAdGraphStore(
userAdGraphService: UserAdGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = {
new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] {
override def get(
k: ConsumersBasedRelatedAdRequest
): Future[Option[RelatedAdResponse]] = {
userAdGraphService.consumersBasedRelatedAds(k).map(Some(_))
}
}
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserTweetGraphStore)
def providesConsumerBasedUserTweetGraphStore(
userTweetGraphService: UserTweetGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -1,30 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest
import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse
import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import javax.inject.Named
import javax.inject.Singleton
object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.ConsumerBasedUserVideoGraphStore)
def providesConsumerBasedUserVideoGraphStore(
userVideoGraphService: UserVideoGraph.MethodPerEndpoint
): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = {
new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] {
override def get(
k: ConsumersBasedRelatedTweetRequest
): Future[Option[RelatedTweetResponse]] = {
userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_))
}
}
}
}

View File

@ -1,16 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.timelines.configapi.Config
import com.twitter.cr_mixer.param.CrMixerParamConfig
import com.twitter.inject.TwitterModule
import javax.inject.Singleton
object CrMixerParamConfigModule extends TwitterModule {
@Provides
@Singleton
def provideConfig(): Config = {
CrMixerParamConfig.config
}
}

View File

@ -1,54 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.TweetsWithScore
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object DiffusionStoreModule extends TwitterModule {
type UserId = Long
implicit val longCodec = implicitly[Injection[Long, Array[Byte]]]
implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] =
BinaryScalaCodec(TweetsWithScore)
@Provides
@Singleton
@Named(ModuleNames.RetweetBasedDiffusionRecsMhStore)
def retweetBasedDiffusionRecsMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[Long, TweetsWithScore] = {
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("diffusion_retweet_tweet_recs"),
Apollo
)
buildTweetRecsStore(serviceIdentifier, manhattanROConfig)
}
private def buildTweetRecsStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[Long, TweetsWithScore] = {
ManhattanRO
.getReadableStoreWithMtls[Long, TweetsWithScore](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, tweetRecsInjection)
}
}

View File

@ -1,189 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery
import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.util.SeqLongInjection
import com.twitter.hashing.KeyHasher
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams
import com.twitter.search.earlybird.thriftscala.EarlybirdRequest
import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode
import com.twitter.search.earlybird.thriftscala.EarlybirdService
import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery
import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Duration
import com.twitter.util.Future
import javax.inject.Named
object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
// Home based EB filters out retweets and replies
val earlybirdRequest =
buildEarlybirdRequest(
userId,
FilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
timeoutConfig.earlybirdServerTimeout)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"),
keyToString = { k =>
f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe
}
)
}
@Provides
@Singleton
@Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache)
def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore(
statsReceiver: StatsReceiver,
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
@Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient,
timeoutConfig: TimeoutConfig
): ReadableStore[UserId, Seq[TweetId]] = {
val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore")
val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] {
override def get(userId: UserId): Future[Option[Seq[TweetId]]] = {
val earlybirdRequest = buildEarlybirdRequest(
userId,
// Notifications based EB keeps retweets and replies
NotFilterOutRetweetsAndReplies,
DefaultMaxNumTweetPerUser,
processingTimeout = timeoutConfig.earlybirdServerTimeout
)
getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats)
}
}
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = earlybirdRecencyBasedTweetsCache,
ttl = MemcacheKeyTimeToLiveDuration,
asyncUpdate = true
)(
valueInjection = SeqLongInjection,
statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"),
keyToString = { k =>
f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications
}
)
}
private val keyHasher: KeyHasher = KeyHasher.FNV1A_64
/**
* Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry.
* If the value changes, it will increase the size of the memcache.
*/
private val DefaultMaxNumTweetPerUser: Int = 100
private val FilterOutRetweetsAndReplies = true
private val NotFilterOutRetweetsAndReplies = false
private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15)
private def buildEarlybirdRequest(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): EarlybirdRequest =
EarlybirdRequest(
searchQuery = getThriftSearchQuery(
seedUserId = seedUserId,
filterOutRetweetsAndReplies = filterOutRetweetsAndReplies,
maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser,
processingTimeout = processingTimeout
),
clientId = Some(EarlybirdClientId),
timeoutMs = processingTimeout.inMilliseconds.intValue(),
getOlderResults = Some(false),
adjustedProtectedRequestParams = None,
adjustedFullArchiveRequestParams = None,
getProtectedTweetsOnly = Some(false),
skipVeryRecentTweets = true,
)
private def getThriftSearchQuery(
seedUserId: UserId,
filterOutRetweetsAndReplies: Boolean,
maxNumTweetsPerSeedUser: Int,
processingTimeout: Duration
): ThriftSearchQuery = ThriftSearchQuery(
serializedQuery = GetEarlybirdQuery(
None,
None,
Set.empty,
filterOutRetweetsAndReplies
).map(_.serialize),
fromUserIDFilter64 = Some(Seq(seedUserId)),
numResults = maxNumTweetsPerSeedUser,
rankingMode = ThriftSearchRankingMode.Recency,
collectorParams = Some(
CollectorParams(
// numResultsToReturn defines how many results each EB shard will return to search root
numResultsToReturn = maxNumTweetsPerSeedUser,
// terminationParams.maxHitsToProcess is used for early terminating per shard results fetching.
terminationParams =
GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout)
)),
facetFieldNames = Some(FacetsToFetch),
resultMetadataOptions = Some(MetadataOptions),
searchStatusIds = None
)
private def getEarlybirdSearchResult(
earlybirdSearchClient: EarlybirdService.MethodPerEndpoint,
request: EarlybirdRequest,
statsReceiver: StatsReceiver
): Future[Option[Seq[TweetId]]] = earlybirdSearchClient
.search(request)
.map { response =>
response.responseCode match {
case EarlybirdResponseCode.Success =>
val earlybirdSearchResult =
response.searchResults
.map {
_.results
.map(searchResult => searchResult.id)
}
statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size)
earlybirdSearchResult
case e =>
statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr()
Some(Seq.empty)
}
}
}

View File

@ -1,195 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.ml.api.{thriftscala => api}
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object EmbeddingStoreModule extends TwitterModule {
type UserId = Long
implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
CompactScalaCodec(api.Embedding)
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(TwHINEmbeddingRegularUpdateMhStoreName)
def twHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_regular_update_tweet_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName =
"ConsumerBasedTwHINEmbeddingRegularUpdateMhStore"
@Provides
@Singleton
@Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName)
def consumerBasedTwHINEmbeddingRegularUpdateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("twhin_user_embedding_regular_update_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore"
@Provides
@Singleton
@Named(TwoTowerFavConsumerEmbeddingMhStoreName)
def twoTowerFavConsumerEmbeddingMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("two_tower_fav_user_embedding_apollo"),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName"
@Provides
@Singleton
@Named(DebuggerDemoUserEmbeddingMhStoreName)
def debuggerDemoUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_user_embedding"),
Apollo
)
buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore"
@Provides
@Singleton
@Named(DebuggerDemoTweetEmbeddingMhStoreName)
def debuggerDemoTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[InternalId, api.Embedding] = {
// This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql
// Change the above sql if you want to use a diff embedding
val manhattanROConfig = ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("cr_mixer_apollo"),
DatasetName("experimental_tweet_embedding"),
Apollo
)
buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig)
}
private def buildUserEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[UserId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.UserId(userId) =>
userId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
private def buildTweetEmbeddingStore(
serviceIdentifier: ServiceIdentifier,
manhattanROConfig: ManhattanROConfig
): ReadableStore[InternalId, api.Embedding] = {
val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] =
BinaryScalaCodec(api.Embedding)
val longCodec = implicitly[Injection[Long, Array[Byte]]]
ManhattanRO
.getReadableStoreWithMtls[TweetId, api.Embedding](
manhattanROConfig,
ManhattanKVClientMtlsParams(serviceIdentifier)
)(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] {
case InternalId.TweetId(tweetId) =>
tweetId
case _ =>
throw new UnsupportedOperationException("Invalid Internal Id")
}
}
}

View File

@ -1,29 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.source_signal.FrsStore
import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object FrsStoreModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.FrsStore)
def providesFrsStore(
frsClient: FollowRecommendationsThriftService.MethodPerEndpoint,
statsReceiver: StatsReceiver,
decider: CrMixerDecider
): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = {
ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))(
statsReceiver.scope("follow_recommendations_store"))
}
}

View File

@ -1,17 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import javax.inject.Singleton
object MHMtlsParamsModule extends TwitterModule {
@Singleton
@Provides
def providesManhattanMtlsParams(
serviceIdentifier: ServiceIdentifier
): ManhattanKVClientMtlsParams = {
ManhattanKVClientMtlsParams(serviceIdentifier)
}
}

View File

@ -1,150 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.bijection.Injection
import com.twitter.bijection.scrooge.CompactScalaCodec
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import javax.inject.Named
import javax.inject.Singleton
object OfflineCandidateStoreModule extends TwitterModule {
type UserId = Long
implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] =
CompactScalaCodec(CandidateTweetsList)
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020CandidateStore)
def offlineTweet2020CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore)
def offlineTweet2020Hl0El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore)
def offlineTweet2020Hl2El15CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore)
def offlineTweet2020Hl2El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore)
def offlineTweet2020Hl8El50CandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineTweetMTSCandidateStore)
def offlineTweetMTSCandidateMhStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFavDecayedSumCandidateStore)
def offlineFavDecayedSumCandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_decayed_sum"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore)
def offlineFtrAt5Pop1000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1"
)
}
@Provides
@Singleton
@Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore)
def offlineFtrAt5Pop10000RankDecay11CandidateStore(
serviceIdentifier: ServiceIdentifier
): ReadableStore[UserId, CandidateTweetsList] = {
buildOfflineCandidateStore(
serviceIdentifier,
datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1"
)
}
private def buildOfflineCandidateStore(
serviceIdentifier: ServiceIdentifier,
datasetName: String
): ReadableStore[UserId, CandidateTweetsList] = {
ManhattanRO
.getReadableStoreWithMtls[Long, CandidateTweetsList](
ManhattanROConfig(
HDFSPath(""), // not needed
ApplicationID("multi_type_simclusters"),
DatasetName(datasetName),
Apollo
),
ManhattanKVClientMtlsParams(serviceIdentifier)
)
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
import javax.inject.Singleton
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphOonStoreModule extends TwitterModule {
private val userRealGraphOonColumnPath: Flag[String] = flag[String](
name = "crMixer.userRealGraphOonColumnPath",
default = "recommendations/twistly/userRealgraphOon",
help = "Strato column path for user real graph OON Store"
)
@Provides
@Singleton
@Named(ModuleNames.RealGraphOonStore)
def providesRealGraphOonStore(
stratoClient: StratoClient,
statsReceiver: StatsReceiver
): ReadableStore[UserId, CandidateSeq] = {
val realGraphOonStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath())
ObservedReadableStore(
realGraphOonStratoFetchableStore
)(statsReceiver.scope("user_real_graph_oon_store"))
}
}

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.storehaus.ReadableStore
import com.twitter.storehaus_internal.manhattan.Apollo
import com.twitter.storehaus_internal.manhattan.ManhattanRO
import com.twitter.storehaus_internal.manhattan.ManhattanROConfig
import com.twitter.storehaus_internal.util.ApplicationID
import com.twitter.storehaus_internal.util.DatasetName
import com.twitter.storehaus_internal.util.HDFSPath
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.wtf.candidate.thriftscala.CandidateSeq
object RealGraphStoreMhModule extends TwitterModule {
@Provides
@Singleton
@Named(ModuleNames.RealGraphInStore)
def providesRealGraphStoreMh(
decider: CrMixerDecider,
statsReceiver: StatsReceiver,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
): ReadableStore[UserId, CandidateSeq] = {
implicit val valueCodec = new BinaryScalaCodec(CandidateSeq)
val underlyingStore = ManhattanRO
.getReadableStoreWithMtls[UserId, CandidateSeq](
ManhattanROConfig(
HDFSPath(""),
ApplicationID("cr_mixer_apollo"),
DatasetName("real_graph_scores_apollo"),
Apollo),
manhattanKVClientMtlsParams
)
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 24.hours,
)(
valueInjection = valueCodec,
statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"),
keyToString = { k: UserId => s"uRGraph/$k" }
)
DeciderableReadableStore(
memCachedStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey),
statsReceiver.scope("RealGraphMh")
)
}
}

View File

@ -1,107 +0,0 @@
package com.twitter.cr_mixer.module
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.SimClustersEmbedding
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.google.inject.Provides
import com.google.inject.Singleton
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding}
object RepresentationManagerModule extends TwitterModule {
private val ColPathPrefix = "recommendations/representation_manager/"
private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet"
private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User"
@Provides
@Singleton
@Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore)
def providesRepresentationManagerTweetStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersTweetColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavLongestL2EmbeddingTweet,
ModelVersion.Model20m145k2020))
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_tweet_log_fav_longest_l2_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore)
def providesRepresentationManagerUserFavBasedProducerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FavBasedProducer,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_fav_based_producer_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore)
def providesRepresentationManagerUserLogFavConsumerEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.LogFavBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_log_fav_interestedin_store"))
}
@Provides
@Singleton
@Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore)
def providesRepresentationManagerUserFollowInterestedInEmbeddingStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, SimClustersEmbedding] = {
ObservedReadableStore(
StratoFetchableStore
.withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding](
stratoClient,
SimclustersUserColPath,
SimClustersEmbeddingView(
EmbeddingType.FollowBasedUserInterestedIn,
ModelVersion.Model20m145k2020
)
)
.mapValues(SimClustersEmbedding(_)))(
statsReceiver.scope("rms_user_follow_interestedin_store"))
}
}

View File

@ -1,56 +0,0 @@
package com.twitter.cr_mixer.module
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.simclusters_v2.common.UserId
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.storehaus.ReadableStore
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.hermit.store.common.ObservedReadableStore
import javax.inject.Named
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.representationscorer.thriftscala.ListScoreId
object RepresentationScorerModule extends TwitterModule {
private val rsxColumnPath = "recommendations/representation_scorer/listScore"
private final val SimClusterModelVersion = ModelVersion.Model20m145k2020
private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet
@Provides
@Singleton
@Named(ModuleNames.RsxStore)
def providesRepresentationScorerStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[(UserId, TweetId), Double] = {
ObservedReadableStore(
StratoFetchableStore
.withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[(
UserId,
TweetId
)] { key =>
representationScorerStoreKeyMapping(key._1, key._2)
}
)(statsReceiver.scope("rsx_store"))
}
private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = {
ListScoreId(
algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity,
modelVersion = SimClusterModelVersion,
targetEmbeddingType = TweetEmbeddingType,
targetId = InternalId.TweetId(t1),
candidateEmbeddingType = TweetEmbeddingType,
candidateIds = Seq(InternalId.TweetId(t2))
)
}
}

View File

@ -1,90 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.config.TimeoutConfig
import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig
import com.twitter.cr_mixer.thriftscala.SimilarityEngineType
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import javax.inject.Singleton
/**
* In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store
*/
object SimpleSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesSimpleSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): StandardSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStore = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
3L -> Seq((300L, 3.0), (301L, 3.0))
))
new StandardSimilarityEngine[UserId, (TweetId, Double)](
implementingStore = dummyStore,
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}
/**
* In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions
*/
object LookupSimilarityEngineModule extends TwitterModule {
@Provides
@Singleton
def providesLookupSimilarityEngine(
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver
): LookupSimilarityEngine[UserId, (TweetId, Double)] = {
// Inject your readableStore implementation here
val dummyStoreV1 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
val dummyStoreV2 = ReadableStore.fromMap(
Map(
1L -> Seq((100L, 1.0), (101L, 1.0)),
2L -> Seq((200L, 2.0), (201L, 2.0)),
))
new LookupSimilarityEngine[UserId, (TweetId, Double)](
versionedStoreMap = Map(
"V1" -> dummyStoreV1,
"V2" -> dummyStoreV2
),
identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998),
globalStats = globalStats,
engineConfig = SimilarityEngineConfig(
timeout = timeoutConfig.similarityEngineTimeout,
gatingConfig = GatingConfig(
deciderConfig = None,
enableFeatureSwitch = None
)
)
)
}
}

View File

@ -1,33 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.inject.TwitterModule
import com.twitter.simclustersann.thriftscala.SimClustersANNService
import javax.inject.Named
object SimClustersANNServiceNameToClientMapper extends TwitterModule {
@Provides
@Singleton
def providesSimClustersANNServiceNameToClientMapping(
@Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint,
@Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint
): Map[String, SimClustersANNService.MethodPerEndpoint] = {
Map[String, SimClustersANNService.MethodPerEndpoint](
"simclusters-ann" -> simClustersANNServiceProd,
"simclusters-ann-experimental" -> simClustersANNServiceExperimental,
"simclusters-ann-1" -> simClustersANNService1,
"simclusters-ann-2" -> simClustersANNService2,
"simclusters-ann-3" -> simClustersANNService3,
"simclusters-ann-5" -> simClustersANNService5,
"simclusters-ann-4" -> simClustersANNService4
)
}
}

View File

@ -1,65 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.google.inject.name.Named
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.relevance_platform.common.injection.LZ4Injection
import com.twitter.relevance_platform.common.injection.SeqObjectInjection
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.Client
import com.twitter.topic_recos.thriftscala.TopicTopTweets
import com.twitter.topic_recos.thriftscala.TopicTweet
import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey
/**
* Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job
*/
object SkitStratoStoreModule extends TwitterModule {
val column = "recommendations/topic_recos/topicTopTweets"
@Provides
@Singleton
@Named(ModuleNames.SkitStratoStoreName)
def providesSkitStratoStore(
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
stratoClient: Client,
statsReceiver: StatsReceiver
): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = {
val skitStore = ObservedReadableStore(
StratoFetchableStore
.withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))(
statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets =>
topicTopTweets.topTweets
}
val memCachedStore = ObservedMemcachedReadableStore
.fromCacheClient(
backingStore = skitStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 10.minutes
)(
valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()),
statsReceiver = statsReceiver.scope("memcached_skit_store"),
keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" }
)
ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]](
memCachedStore,
ttl = 5.minutes,
maxKeys = 100000, // ~150MB max
cacheName = "skit_in_memory_cache",
windowSize = 10000L
)(statsReceiver.scope("skit_in_memory_cache"))
}
}

View File

@ -1,39 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.UserId
import com.twitter.hermit.stp.thriftscala.STPResult
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import javax.inject.Named
object StrongTiePredictionStoreModule extends TwitterModule {
private val strongTiePredictionColumnPath: Flag[String] = flag[String](
name = "crMixer.strongTiePredictionColumnPath",
default = "onboarding/userrecs/strong_tie_prediction_big",
help = "Strato column path for StrongTiePredictionStore"
)
@Provides
@Singleton
@Named(ModuleNames.StpStore)
def providesStrongTiePredictionStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[UserId, STPResult] = {
val strongTiePredictionStratoFetchableStore = StratoFetchableStore
.withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath())
ObservedReadableStore(
strongTiePredictionStratoFetchableStore
)(statsReceiver.scope("strong_tie_prediction_big_store"))
}
}

View File

@ -1,34 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets
import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain
import javax.inject.Named
object TripCandidateStoreModule extends TwitterModule {
private val stratoColumn = "trends/trip/tripTweetsDataflowProd"
@Provides
@Named(ModuleNames.TripCandidateStore)
def providesSimClustersTripCandidateStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient
): ReadableStore[TripDomain, Seq[TripTweet]] = {
val tripCandidateStratoFetchableStore =
StratoFetchableStore
.withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn)
.mapValues(_.tweets)
ObservedReadableStore(
tripCandidateStratoFetchableStore
)(statsReceiver.scope("simclusters_trip_candidate_store"))
}
}

View File

@ -1,205 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.contentrecommender.thriftscala.TweetInfo
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
import com.twitter.frigate.common.store.health.TweetHealthModelStore
import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig
import com.twitter.frigate.common.store.health.UserHealthModelStore
import com.twitter.frigate.thriftscala.TweetHealthScores
import com.twitter.frigate.thriftscala.UserAgathaScores
import com.twitter.hermit.store.common.DeciderableReadableStore
import com.twitter.hermit.store.common.ObservedCachedReadableStore
import com.twitter.hermit.store.common.ObservedMemcachedReadableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.simclusters_v2.common.UserId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.contentrecommender.store.TweetInfoStore
import com.twitter.contentrecommender.store.TweetyPieFieldsStore
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.param.decider.CrMixerDecider
import com.twitter.cr_mixer.param.decider.DeciderKey
import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2
import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus
import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores
import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore
import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore
import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores
import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores
import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams
import com.twitter.tweetypie.thriftscala.TweetService
import com.twitter.util.Future
import com.twitter.util.JavaTimer
import com.twitter.util.Timer
import javax.inject.Named
object TweetInfoStoreModule extends TwitterModule {
implicit val timer: Timer = new JavaTimer(true)
override def modules: Seq[Module] = Seq(UnifiedCacheClient)
@Provides
@Singleton
def providesTweetInfoStore(
statsReceiver: StatsReceiver,
serviceIdentifier: ServiceIdentifier,
stratoClient: StratoClient,
@Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient,
manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams,
tweetyPieService: TweetService.MethodPerEndpoint,
userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint,
@Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[
String,
BlueVerifiedAnnotationsV2
],
decider: CrMixerDecider
): ReadableStore[TweetId, TweetInfo] = {
val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = {
val underlyingStore =
ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] {
override def get(
k: TweetId
): Future[Option[TweetEngagementScores]] = {
userTweetGraphPlusService.tweetEngagementScore(k).map {
Some(_)
}
}
})(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore"))
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(
DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey),
statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")
)
}
val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = {
val underlyingStore = TweetHealthModelStore.buildReadableStore(
stratoClient,
Some(
TweetHealthModelStoreConfig(
enablePBlock = true,
enableToxicity = true,
enablePSpammy = true,
enablePReported = true,
enableSpammyTweetContent = true,
enablePNegMultimodal = true,
))
)(statsReceiver.scope("UnderlyingTweetHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 2.hours
)(
valueInjection = BinaryScalaCodec(TweetHealthScores),
statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"),
keyToString = { k: TweetId => s"tHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey),
statsReceiver.scope("TweetHealthModelStore")
) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache
}
val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = {
val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)(
statsReceiver.scope("UnderlyingUserHealthModelStore"))
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 18.hours
)(
valueInjection = BinaryScalaCodec(UserAgathaScores),
statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"),
keyToString = { k: UserId => s"uHMS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey),
statsReceiver.scope("UserHealthModelStore")
)
}
val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = {
val underlyingStore =
UserMediaRepresentationHealthStore.buildReadableStore(
manhattanKVClientMtlsParams,
statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore")
)
DeciderableReadableStore(
ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 12.hours
)(
valueInjection = BinaryScalaCodec(UserMediaRepresentationScores),
statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"),
keyToString = { k: UserId => s"uMRHS/$k" }
),
decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey),
statsReceiver.scope("UserMediaRepresentationHealthStore")
)
}
val magicRecsRealTimeAggregatesStore: ReadableStore[
TweetId,
MagicRecsRealTimeAggregatesScores
] = {
val underlyingStore =
MagicRecsRealTimeAggregatesStore.buildReadableStore(
serviceIdentifier,
statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores")
)
DeciderableReadableStore(
underlyingStore,
decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore),
statsReceiver.scope("MagicRecsRealTimeAggregatesStore")
)
}
val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = {
val underlyingStore = TweetInfoStore(
TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService),
userMediaRepresentationHealthStore,
magicRecsRealTimeAggregatesStore,
tweetEngagementScoreStore,
blueVerifiedAnnotationStore
)(statsReceiver.scope("tweetInfoStore"))
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
backingStore = underlyingStore,
cacheClient = crMixerUnifiedCacheClient,
ttl = 15.minutes,
// Hydrating tweetInfo is now a required step for all candidates,
// hence we needed to tune these thresholds.
asyncUpdate = serviceIdentifier.environment == "prod"
)(
valueInjection = BinaryScalaCodec(TweetInfo),
statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"),
keyToString = { k: TweetId => s"tIS/$k" }
)
ObservedCachedReadableStore.from(
memcachedStore,
ttl = 15.minutes,
maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB
windowSize = 10000L,
cacheName = "tweet_info_cache",
maxMultiGetSize = 20
)(statsReceiver.scope("inMemoryCachedTweetInfoStore"))
}
tweetInfoStore
}
}

View File

@ -1,42 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.app.Flag
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.hermit.store.common.ObservedReadableStore
import com.twitter.inject.TwitterModule
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers
object TweetRecentEngagedUserStoreModule extends TwitterModule {
private val tweetRecentEngagedUsersStoreDefaultVersion =
0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion)
private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String](
name = "crMixer.tweetRecentEngagedUsersColumnPath",
default = "recommendations/twistly/tweetRecentEngagedUsers",
help = "Strato column path for TweetRecentEngagedUsersStore"
)
private type Version = Long
@Provides
@Singleton
def providesTweetRecentEngagedUserStore(
statsReceiver: StatsReceiver,
stratoClient: StratoClient,
): ReadableStore[TweetId, TweetRecentEngagedUsers] = {
val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore
.withUnitView[(TweetId, Version), TweetRecentEngagedUsers](
stratoClient,
tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId =>
(tweetId, tweetRecentEngagedUsersStoreDefaultVersion))
ObservedReadableStore(
tweetRecentEngagedUsersStratoFetchableStore
)(statsReceiver.scope("tweet_recent_engaged_users_store"))
}
}

View File

@ -1,32 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.bijection.scrooge.BinaryScalaCodec
import com.twitter.conversions.DurationOps._
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse
import com.twitter.finagle.memcached.{Client => MemcachedClient}
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.inject.TwitterModule
import com.twitter.hermit.store.common.ReadableWritableStore
import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore
import com.twitter.simclusters_v2.common.UserId
import javax.inject.Named
object TweetRecommendationResultsStoreModule extends TwitterModule {
@Provides
@Singleton
def providesTweetRecommendationResultsStore(
@Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient,
statsReceiver: StatsReceiver
): ReadableWritableStore[UserId, CrMixerTweetResponse] = {
ObservedReadableWritableMemcacheStore.fromCacheClient(
cacheClient = tweetRecommendationResultsCacheClient,
ttl = 24.hours)(
valueInjection = BinaryScalaCodec(CrMixerTweetResponse),
statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"),
keyToString = { k: UserId => k.toString }
)
}
}

View File

@ -1,67 +0,0 @@
package com.twitter.cr_mixer.module
import com.google.inject.Provides
import com.google.inject.Singleton
import com.twitter.inject.TwitterModule
import com.twitter.cr_mixer.model.ModuleNames
import com.twitter.frigate.common.store.strato.StratoFetchableStore
import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView
import com.twitter.strato.client.{Client => StratoClient}
import com.twitter.simclusters_v2.common.TweetId
import com.twitter.storehaus.ReadableStore
import javax.inject.Named
object TwhinCollabFilterStratoStoreModule extends TwitterModule {
val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User"
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow)
def providesTwhinCollabFilterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("follow_2022_03_10_c_500K")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement)
def providesTwhinCollabFilterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("engagement_2022_04_10_c_500K"))
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow)
def providesTwhinMultiClusterStratoStoreForFollow(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterFollow20220921")
)
}
@Provides
@Singleton
@Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement)
def providesTwhinMultiClusterStratoStoreForEngagement(
stratoClient: StratoClient
): ReadableStore[Long, Seq[TweetId]] = {
StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]](
stratoClient,
column = stratoColumnPath,
view = TwhinCollabFilterView("multiclusterEng20220921"))
}
}

Some files were not shown because too many files have changed in this diff Show More