From 1a6792fd072443bdb2bdee69cd4d8acc706b7a09 Mon Sep 17 00:00:00 2001 From: dogemanttv <80775876+dogemanttv@users.noreply.github.com> Date: Wed, 10 Jan 2024 17:06:38 -0600 Subject: [PATCH] Delete cr-mixer directory --- cr-mixer/BUILD.bazel | 24 - cr-mixer/README.md | 7 - .../server/src/main/resources/BUILD.bazel | 8 - .../src/main/resources/config/decider.yml | 146 --- .../server/src/main/resources/logback.xml | 168 --- .../scala/com/twitter/cr_mixer/BUILD.bazel | 48 - .../CrMixerHttpServerWarmupHandler.scala | 18 - .../com/twitter/cr_mixer/CrMixerServer.scala | 229 ----- .../CrMixerThriftServerWarmupHandler.scala | 75 -- .../twitter/cr_mixer/blender/AdsBlender.scala | 77 -- .../scala/com/twitter/cr_mixer/blender/BUILD | 20 - .../blender/BlendedCandidatesBuilder.scala | 48 - .../blender/ContentSignalBlender.scala | 121 --- .../CountWeightedInterleaveBlender.scala | 90 -- .../cr_mixer/blender/InterleaveBlender.scala | 33 - .../blender/SourceTypeBackFillBlender.scala | 64 -- .../cr_mixer/blender/SwitchBlender.scala | 81 -- .../AdsCandidateGenerator.scala | 140 --- .../AdsCandidateSourcesRouter.scala | 516 ---------- .../cr_mixer/candidate_generation/BUILD | 51 - .../CandidateSourcesRouter.scala | 536 ---------- .../CrCandidateGenerator.scala | 350 ------- ...stomizedRetrievalCandidateGeneration.scala | 345 ------- .../FrsTweetCandidateGenerator.scala | 220 ---- .../RelatedTweetCandidateGenerator.scala | 156 --- .../RelatedVideoTweetCandidateGenerator.scala | 139 --- ...stersInterestedInCandidateGeneration.scala | 640 ------------ .../TopicTweetCandidateGenerator.scala | 232 ----- .../UtegTweetCandidateGenerator.scala | 179 ---- .../scala/com/twitter/cr_mixer/config/BUILD | 13 - .../config/SimClustersANNConfig.scala | 473 --------- .../cr_mixer/config/TimeoutConfig.scala | 24 - .../twitter/cr_mixer/controller/BUILD.bazel | 48 - .../controller/CrMixerThriftController.scala | 757 -------------- .../com/twitter/cr_mixer/exception/BUILD | 7 - .../InvalidSANNConfigException.scala | 4 - .../com/twitter/cr_mixer/featureswitch/BUILD | 35 - .../CrMixerLoggingABDecider.scala | 79 -- .../featureswitch/ParamsBuilder.scala | 151 --- ...etImpressedBucketsLocalContextFilter.scala | 22 - .../scala/com/twitter/cr_mixer/filter/BUILD | 22 - .../twitter/cr_mixer/filter/FilterBase.scala | 22 - .../filter/ImpressedTweetlistFilter.scala | 63 -- .../cr_mixer/filter/InNetworkFilter.scala | 80 -- .../filter/PostRankFilterRunner.scala | 58 -- .../cr_mixer/filter/PreRankFilterRunner.scala | 99 -- .../twitter/cr_mixer/filter/ReplyFilter.scala | 40 - .../cr_mixer/filter/RetweetFilter.scala | 41 - .../cr_mixer/filter/TweetAgeFilter.scala | 39 - .../filter/TweetInfoHealthFilterBase.scala | 39 - .../cr_mixer/filter/UtegFilterRunner.scala | 96 -- .../cr_mixer/filter/UtegHealthFilter.scala | 51 - .../cr_mixer/filter/VideoTweetFilter.scala | 81 -- .../AdsRecommendationsScribeLogger.scala | 139 --- .../scala/com/twitter/cr_mixer/logging/BUILD | 34 - .../logging/CrMixerScribeLogger.scala | 489 --------- .../logging/RelatedTweetScribeLogger.scala | 193 ---- .../cr_mixer/logging/ScribeLoggerUtils.scala | 43 - .../cr_mixer/logging/ScribeMetadata.scala | 45 - .../logging/TopLevelDdgMetricsMetadata.scala | 22 - .../logging/UtegTweetScribeLogger.scala | 147 --- .../scala/com/twitter/cr_mixer/model/BUILD | 16 - .../twitter/cr_mixer/model/Candidate.scala | 200 ---- .../model/CandidateGenerationInfo.scala | 67 -- .../model/CandidateGeneratorQuery.scala | 96 -- .../model/EarlybirdSimilarityEngineType.scala | 6 - .../cr_mixer/model/HealthThreshold.scala | 11 - .../twitter/cr_mixer/model/ModelConfig.scala | 77 -- .../twitter/cr_mixer/model/ModuleNames.scala | 122 --- .../cr_mixer/model/TopicTweetWithScore.scala | 13 - .../cr_mixer/model/TweetWithAuthor.scala | 6 - .../cr_mixer/model/TweetWithScore.scala | 8 - .../model/TweetWithScoreAndSocialProof.scala | 12 - .../ActivePromotedTweetStoreModule.scala | 135 --- .../com/twitter/cr_mixer/module/BUILD.bazel | 130 --- .../BlueVerifiedAnnotationStoreModule.scala | 52 - .../module/CertoStratoStoreModule.scala | 57 -- ...ConsumersBasedUserAdGraphStoreModule.scala | 30 - ...sumersBasedUserTweetGraphStoreModule.scala | 30 - ...sumersBasedUserVideoGraphStoreModule.scala | 30 - .../module/CrMixerParamConfigModule.scala | 16 - .../module/DiffusionStoreModule.scala | 54 - ...birdRecencyBasedCandidateStoreModule.scala | 189 ---- .../module/EmbeddingStoreModule.scala | 195 ---- .../cr_mixer/module/FrsStoreModule.scala | 29 - .../cr_mixer/module/MHMtlsParamsModule.scala | 17 - .../module/OfflineCandidateStoreModule.scala | 150 --- .../module/RealGraphOonStoreModule.scala | 39 - .../module/RealGraphStoreMhModule.scala | 67 -- .../module/RepresentationManagerModule.scala | 107 -- .../module/RepresentationScorerModule.scala | 56 - .../module/SampleSimilarityEngineModule.scala | 90 -- ...ClustersANNServiceNameToClientMapper.scala | 33 - .../module/SkitStratoStoreModule.scala | 65 -- .../StrongTiePredictionStoreModule.scala | 39 - .../module/TripCandidateStoreModule.scala | 34 - .../module/TweetInfoStoreModule.scala | 205 ---- .../TweetRecentEngagedUserStoreModule.scala | 42 - ...weetRecommendationResultsStoreModule.scala | 32 - .../TwhinCollabFilterStratoStoreModule.scala | 67 -- .../TwiceClustersMembersStoreModule.scala | 42 - .../cr_mixer/module/UnifiedCacheClient.scala | 83 -- .../UserSignalServiceColumnModule.scala | 30 - .../module/UserSignalServiceStoreModule.scala | 37 - .../module/UserStateStoreModule.scala | 113 -- .../module/core/ABDeciderModule.scala | 33 - .../module/core/CrMixerFlagModule.scala | 20 - .../core/CrMixerLoggingABDeciderModule.scala | 20 - .../core/FeatureContextBuilderModule.scala | 16 - .../module/core/FeatureSwitchesModule.scala | 74 -- .../module/core/KafkaProducerModule.scala | 70 -- .../module/core/LoggerFactoryModule.scala | 155 --- .../core/MemoizingStatsReceiverModule.scala | 12 - .../module/core/TimeoutConfigModule.scala | 104 -- .../grpc_client/NaviGRPCClientModule.scala | 90 -- ...ertoTopicTweetSimilarityEngineModule.scala | 57 -- ...sumerBasedWalsSimilarityEngineModule.scala | 54 - ...ddingBasedTripSimilarityEngineModule.scala | 60 -- ...dingBasedTwHINSimilarityEngineModule.scala | 58 -- ...gBasedTwoTowerSimilarityEngineModule.scala | 51 - ...sedUserAdGraphSimilarityEngineModule.scala | 61 -- ...UserVideoGraphSimilarityEngineModule.scala | 62 -- ...DiffusionBasedSimilarityEngineModule.scala | 52 - .../EarlybirdSimilarityEngineModule.scala | 120 --- ...erBasedUnifiedSimilarityEngineModule.scala | 68 -- ...sedUserAdGraphSimilarityEngineModule.scala | 67 -- ...UserTweetGraphSimilarityEngineModule.scala | 67 -- ...SimClustersANNSimilarityEngineModule.scala | 117 --- ...SkitTopicTweetSimilarityEngineModule.scala | 88 -- .../TweetBasedQigSimilarityEngineModule.scala | 66 -- ...TweetBasedTwHINSimlarityEngineModule.scala | 70 -- ...etBasedUnifiedSimilarityEngineModule.scala | 83 -- ...sedUserAdGraphSimilarityEngineModule.scala | 91 -- ...UserTweetGraphSimilarityEngineModule.scala | 92 -- ...UserVideoGraphSimilarityEngineModule.scala | 92 -- ...abFilterLookupSimilarityEngineModule.scala | 71 -- ...eetEntityGraphSimilarityEngineModule.scala | 55 - .../AnnQueryServiceClientModule.scala | 107 -- .../EarlybirdSearchClientModule.scala | 39 - .../thrift_client/FrsClientModule.scala | 41 - .../HydraPartitionClientModule.scala | 25 - .../thrift_client/HydraRootClientModule.scala | 25 - .../QigServiceClientModule.scala | 40 - .../SimClustersAnnServiceClientModule.scala | 147 --- .../thrift_client/TweetyPieClientModule.scala | 60 -- .../UserAdGraphClientModule.scala | 47 - .../UserTweetEntityGraphClientModule.scala | 44 - .../UserTweetGraphClientModule.scala | 43 - .../UserTweetGraphPlusClientModule.scala | 46 - .../UserVideoGraphClientModule.scala | 46 - .../twitter/cr_mixer/param/AdsParams.scala | 64 -- .../scala/com/twitter/cr_mixer/param/BUILD | 27 - .../cr_mixer/param/BlenderParams.scala | 152 --- .../param/BypassInterleaveAndRankParams.scala | 98 -- .../param/ConsumerBasedWalsParams.scala | 96 -- ...eddingBasedCandidateGenerationParams.scala | 55 - .../ConsumerEmbeddingBasedTripParams.scala | 46 - .../ConsumerEmbeddingBasedTwHINParams.scala | 33 - ...ConsumerEmbeddingBasedTwoTowerParams.scala | 32 - .../ConsumersBasedUserAdGraphParams.scala | 54 - .../ConsumersBasedUserTweetGraphParams.scala | 44 - .../ConsumersBasedUserVideoGraphParams.scala | 65 -- .../cr_mixer/param/CrMixerParamConfig.scala | 122 --- ...rievalBasedCandidateGenerationParams.scala | 81 -- ...valBasedFTROfflineInterestedInParams.scala | 31 - ...rievalBasedOfflineInterestedInParams.scala | 33 - .../CustomizedRetrievalBasedTwhinParams.scala | 60 -- ...irdFrsBasedCandidateGenerationParams.scala | 117 --- .../twitter/cr_mixer/param/FrsParams.scala | 131 --- .../twitter/cr_mixer/param/GlobalParams.scala | 106 -- .../param/GoodProfileClickParams.scala | 60 -- .../cr_mixer/param/GoodTweetClickParams.scala | 75 -- .../cr_mixer/param/InterestedInParams.scala | 213 ---- ...oducerBasedCandidateGenerationParams.scala | 143 --- .../ProducerBasedUserAdGraphParams.scala | 53 - .../ProducerBasedUserTweetGraphParams.scala | 53 - .../twitter/cr_mixer/param/RankerParams.scala | 59 -- .../cr_mixer/param/RealGraphInParams.scala | 25 - .../cr_mixer/param/RealGraphOonParams.scala | 51 - .../cr_mixer/param/RecentFollowsParams.scala | 27 - .../param/RecentNegativeSignalParams.scala | 39 - .../param/RecentNotificationsParams.scala | 28 - .../param/RecentOriginalTweetsParams.scala | 28 - .../param/RecentReplyTweetsParams.scala | 27 - .../cr_mixer/param/RecentRetweetsParams.scala | 30 - .../param/RecentTweetFavoritesParams.scala | 29 - .../param/RelatedTweetGlobalParams.scala | 32 - .../RelatedTweetProducerBasedParams.scala | 111 -- .../param/RelatedTweetTweetBasedParams.scala | 141 --- .../param/RelatedVideoTweetGlobalParams.scala | 32 - .../RelatedVideoTweetTweetBasedParams.scala | 134 --- .../param/RepeatedProfileVisitsParams.scala | 72 -- .../cr_mixer/param/SimClustersANNParams.scala | 76 -- .../cr_mixer/param/TopicTweetParams.scala | 115 --- .../TweetBasedCandidateGenerationParams.scala | 189 ---- .../param/TweetBasedTwHINParams.scala | 30 - .../param/TweetBasedUserAdGraphParams.scala | 58 -- .../TweetBasedUserTweetGraphParams.scala | 89 -- .../TweetBasedUserVideoGraphParams.scala | 81 -- .../cr_mixer/param/TweetSharesParams.scala | 29 - .../UnifiedSETweetCombinationMethod.scala | 15 - .../param/UnifiedUSSSignalParams.scala | 121 --- .../param/UtegTweetGlobalParams.scala | 94 -- .../param/VideoTweetFilterParams.scala | 31 - .../param/VideoViewTweetsParams.scala | 64 -- .../com/twitter/cr_mixer/param/decider/BUILD | 16 - .../param/decider/CrMixerDecider.scala | 39 - .../cr_mixer/param/decider/DeciderKey.scala | 67 -- .../param/decider/EndpointLoadShedder.scala | 57 -- .../scala/com/twitter/cr_mixer/ranker/BUILD | 30 - .../cr_mixer/ranker/DefaultRanker.scala | 23 - .../cr_mixer/ranker/SwitchRanker.scala | 46 - .../scala/com/twitter/cr_mixer/scribe/BUILD | 22 - .../cr_mixer/scribe/ScribeCategory.scala | 64 -- .../com/twitter/cr_mixer/service/BUILD.bazel | 15 - .../CrMixerAlertNotificationConfig.scala | 26 - .../twitter/cr_mixer/similarity_engine/BUILD | 74 -- .../CertoTopicTweetSimilarityEngine.scala | 94 -- .../ConsumerBasedWalsSimilarityEngine.scala | 246 ----- ...erEmbeddingBasedTripSimilarityEngine.scala | 118 --- ...rEmbeddingBasedTwHINSimilarityEngine.scala | 18 - ...beddingBasedTwoTowerSimilarityEngine.scala | 18 - ...mersBasedUserAdGraphSimilarityEngine.scala | 90 -- ...sBasedUserVideoGraphSimilarityEngine.scala | 91 -- .../DiffusionBasedSimilarityEngine.scala | 73 -- .../EarlybirdModelBasedSimilarityEngine.scala | 92 -- ...arlybirdRecencyBasedSimilarityEngine.scala | 86 -- .../EarlybirdSimilarityEngine.scala | 32 - .../EarlybirdSimilarityEngineBase.scala | 56 - .../EarlybirdSimilarityEngineRouter.scala | 136 --- ...ybirdTensorflowBasedSimilarityEngine.scala | 138 --- .../similarity_engine/FilterUtil.scala | 42 - .../HnswANNSimilarityEngine.scala | 187 ---- .../LookupSimilarityEngine.scala | 78 -- .../ModelBasedANNStore.scala | 136 --- ...ProducerBasedUnifiedSimilarityEngine.scala | 641 ------------ ...ucerBasedUserAdGraphSimilarityEngine.scala | 96 -- ...rBasedUserTweetGraphSimilarityEngine.scala | 96 -- .../SimClustersANNSimilarityEngine.scala | 113 -- .../similarity_engine/SimilarityEngine.scala | 169 --- .../SimilaritySourceOrderingUtil.scala | 32 - ...hPrecisionTopicTweetSimilarityEngine.scala | 123 --- .../SkitTopicTweetSimilarityEngine.scala | 143 --- .../StandardSimilarityEngine.scala | 65 -- .../TweetBasedQigSimilarityEngine.scala | 114 --- .../TweetBasedUnifiedSimilarityEngine.scala | 962 ------------------ ...weetBasedUserAdGraphSimilarityEngine.scala | 129 --- ...tBasedUserTweetGraphSimilarityEngine.scala | 184 ---- ...tBasedUserVideoGraphSimilarityEngine.scala | 184 ---- .../TwhinCollabFilterSimilarityEngine.scala | 72 -- ...UserTweetEntityGraphSimilarityEngine.scala | 110 -- .../com/twitter/cr_mixer/source_signal/BUILD | 32 - .../source_signal/FrsSourceGraphFetcher.scala | 54 - .../FrsSourceSignalFetcher.scala | 65 -- .../cr_mixer/source_signal/FrsStore.scala | 81 -- .../RealGraphInSourceGraphFetcher.scala | 55 - .../RealGraphOonSourceGraphFetcher.scala | 55 - .../source_signal/SourceFetcher.scala | 101 -- .../source_signal/SourceGraphFetcher.scala | 70 -- .../source_signal/SourceInfoRouter.scala | 68 -- .../source_signal/SourceSignalFetcher.scala | 45 - .../UssSourceSignalFetcher.scala | 160 --- .../cr_mixer/source_signal/UssStore.scala | 209 ---- .../scala/com/twitter/cr_mixer/util/BUILD | 29 - .../util/CandidateGenerationKeyUtil.scala | 39 - .../util/CountWeightedInterleaveUtil.scala | 180 ---- .../cr_mixer/util/EarlybirdSearchUtil.scala | 130 --- .../cr_mixer/util/InterleaveUtil.scala | 160 --- .../twitter/cr_mixer/util/MetricTagUtil.scala | 135 --- .../util/SignalTimestampStatsUtil.scala | 66 -- cr-mixer/thrift/src/main/thrift/BUILD | 48 - cr-mixer/thrift/src/main/thrift/ads.thrift | 33 - .../thrift/candidate_generation_key.thrift | 21 - .../thrift/src/main/thrift/cr_mixer.thrift | 104 -- .../src/main/thrift/frs_based_tweet.thrift | 35 - .../thrift/src/main/thrift/metric_tags.thrift | 44 - .../thrift/src/main/thrift/product.thrift | 19 - .../src/main/thrift/product_context.thrift | 21 - .../src/main/thrift/related_tweet.thrift | 24 - .../main/thrift/related_video_tweet.thrift | 23 - cr-mixer/thrift/src/main/thrift/scribe.thrift | 168 --- .../thrift/src/main/thrift/source_type.thrift | 123 --- .../thrift/src/main/thrift/topic_tweet.thrift | 28 - cr-mixer/thrift/src/main/thrift/uteg.thrift | 31 - .../thrift/src/main/thrift/validation.thrift | 19 - 285 files changed, 25871 deletions(-) delete mode 100644 cr-mixer/BUILD.bazel delete mode 100644 cr-mixer/README.md delete mode 100644 cr-mixer/server/src/main/resources/BUILD.bazel delete mode 100644 cr-mixer/server/src/main/resources/config/decider.yml delete mode 100644 cr-mixer/server/src/main/resources/logback.xml delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/BUILD.bazel delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerHttpServerWarmupHandler.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerServer.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerThriftServerWarmupHandler.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/InterleaveBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/BUILD.bazel delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/InvalidSANNConfigException.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/CrMixerLoggingABDecider.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/ParamsBuilder.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/SetImpressedBucketsLocalContextFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/FilterBase.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ImpressedTweetlistFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/InNetworkFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PostRankFilterRunner.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PreRankFilterRunner.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ReplyFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/RetweetFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetAgeFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetInfoHealthFilterBase.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegFilterRunner.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegHealthFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/VideoTweetFilter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/AdsRecommendationsScribeLogger.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/CrMixerScribeLogger.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/RelatedTweetScribeLogger.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeLoggerUtils.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeMetadata.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/TopLevelDdgMetricsMetadata.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/UtegTweetScribeLogger.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/Candidate.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGeneratorQuery.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/EarlybirdSimilarityEngineType.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/HealthThreshold.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModuleNames.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TopicTweetWithScore.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithAuthor.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScore.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScoreAndSocialProof.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ActivePromotedTweetStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BUILD.bazel delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BlueVerifiedAnnotationStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CertoStratoStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserAdGraphStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserTweetGraphStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserVideoGraphStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CrMixerParamConfigModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/DiffusionStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EarlybirdRecencyBasedCandidateStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EmbeddingStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/FrsStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/MHMtlsParamsModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/OfflineCandidateStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphOonStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphStoreMhModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationManagerModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationScorerModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SampleSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SimClustersANNServiceNameToClientMapper.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SkitStratoStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/StrongTiePredictionStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TripCandidateStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetInfoStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecentEngagedUserStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecommendationResultsStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwhinCollabFilterStratoStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwiceClustersMembersStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UnifiedCacheClient.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceColumnModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserStateStoreModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/ABDeciderModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerFlagModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerLoggingABDeciderModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureContextBuilderModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureSwitchesModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/KafkaProducerModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/LoggerFactoryModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/MemoizingStatsReceiverModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/TimeoutConfigModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/grpc_client/NaviGRPCClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/CertoTopicTweetSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerBasedWalsSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/DiffusionBasedSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/EarlybirdSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUnifiedSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserAdGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SimClustersANNSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SkitTopicTweetSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedQigSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedTwHINSimlarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUnifiedSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserAdGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserTweetGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserVideoGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TwhinCollabFilterLookupSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/UserTweetEntityGraphSimilarityEngineModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/AnnQueryServiceClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/EarlybirdSearchClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/FrsClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraPartitionClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraRootClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/QigServiceClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/SimClustersAnnServiceClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/TweetyPieClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserAdGraphClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetEntityGraphClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphPlusClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserVideoGraphClientModule.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/AdsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BlenderParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BypassInterleaveAndRankParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerBasedWalsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedCandidateGenerationParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTripParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwHINParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwoTowerParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserAdGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserTweetGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserVideoGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CrMixerParamConfig.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedCandidateGenerationParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedFTROfflineInterestedInParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedOfflineInterestedInParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedTwhinParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/EarlybirdFrsBasedCandidateGenerationParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/FrsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GlobalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodProfileClickParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodTweetClickParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/InterestedInParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedCandidateGenerationParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserAdGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserTweetGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RankerParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphInParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphOonParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentFollowsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNegativeSignalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNotificationsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentOriginalTweetsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentReplyTweetsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentRetweetsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentTweetFavoritesParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetGlobalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetProducerBasedParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetTweetBasedParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetGlobalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetTweetBasedParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RepeatedProfileVisitsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/SimClustersANNParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TopicTweetParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedCandidateGenerationParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedTwHINParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserAdGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserTweetGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserVideoGraphParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetSharesParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedSETweetCombinationMethod.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedUSSSignalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UtegTweetGlobalParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoTweetFilterParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoViewTweetsParams.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/CrMixerDecider.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/EndpointLoadShedder.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/DefaultRanker.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/SwitchRanker.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/ScribeCategory.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/BUILD.bazel delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/CrMixerAlertNotificationConfig.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/CertoTopicTweetSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/DiffusionBasedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdModelBasedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdRecencyBasedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineBase.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineRouter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/FilterUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/HnswANNSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/LookupSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ModelBasedANNStore.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUnifiedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserAdGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimClustersANNSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilaritySourceOrderingUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitHighPrecisionTopicTweetSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitTopicTweetSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/StandardSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedQigSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserAdGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserTweetGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserVideoGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TwhinCollabFilterSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/UserTweetEntityGraphSimilarityEngine.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceGraphFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceSignalFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsStore.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphInSourceGraphFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphOonSourceGraphFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceGraphFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceInfoRouter.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceSignalFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssSourceSignalFetcher.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssStore.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/BUILD delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CandidateGenerationKeyUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CountWeightedInterleaveUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/EarlybirdSearchUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/InterleaveUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/MetricTagUtil.scala delete mode 100644 cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/SignalTimestampStatsUtil.scala delete mode 100644 cr-mixer/thrift/src/main/thrift/BUILD delete mode 100644 cr-mixer/thrift/src/main/thrift/ads.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/candidate_generation_key.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/cr_mixer.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/frs_based_tweet.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/metric_tags.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/product.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/product_context.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/related_tweet.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/related_video_tweet.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/scribe.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/source_type.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/topic_tweet.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/uteg.thrift delete mode 100644 cr-mixer/thrift/src/main/thrift/validation.thrift diff --git a/cr-mixer/BUILD.bazel b/cr-mixer/BUILD.bazel deleted file mode 100644 index 75890d133..000000000 --- a/cr-mixer/BUILD.bazel +++ /dev/null @@ -1,24 +0,0 @@ -jvm_binary( - name = "bin", - basename = "cr-mixer", - main = "com.twitter.cr_mixer.CrMixerServerMain", - runtime_platform = "java11", - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/ch/qos/logback:logback-classic", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer", - "finagle/finagle-zipkin-scribe/src/main/scala", - "finatra/inject/inject-logback/src/main/scala", - "loglens/loglens-logback/src/main/scala/com/twitter/loglens/logback", - "twitter-server-internal/src/main/scala", - "twitter-server/logback-classic/src/main/scala", - ], -) - -# Aurora Workflows build phase convention requires a jvm_app named with ${project-name}-app -jvm_app( - name = "cr-mixer-app", - archive = "zip", - binary = ":bin", - tags = ["bazel-compatible"], -) diff --git a/cr-mixer/README.md b/cr-mixer/README.md deleted file mode 100644 index 0037f7e69..000000000 --- a/cr-mixer/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# CR-Mixer - -CR-Mixer is a candidate generation service proposed as part of the Personalization Strategy vision for Twitter. Its aim is to speed up the iteration and development of candidate generation and light ranking. The service acts as a lightweight coordinating layer that delegates candidate generation tasks to underlying compute services. It focuses on Twitter's candidate generation use cases and offers a centralized platform for fetching, mixing, and managing candidate sources and light rankers. The overarching goal is to increase the speed and ease of testing and developing candidate generation pipelines, ultimately delivering more value to Twitter users. - -CR-Mixer acts as a configurator and delegator, providing abstractions for the challenging parts of candidate generation and handling performance issues. It will offer a 1-stop-shop for fetching and mixing candidate sources, a managed and shared performant platform, a light ranking layer, a common filtering layer, a version control system, a co-owned feature switch set, and peripheral tooling. - -CR-Mixer's pipeline consists of 4 steps: source signal extraction, candidate generation, filtering, and ranking. It also provides peripheral tooling like scribing, debugging, and monitoring. The service fetches source signals externally from stores like UserProfileService and RealGraph, calls external candidate generation services, and caches results. Filters are applied for deduping and pre-ranking, and a light ranking step follows. diff --git a/cr-mixer/server/src/main/resources/BUILD.bazel b/cr-mixer/server/src/main/resources/BUILD.bazel deleted file mode 100644 index 8f96f402c..000000000 --- a/cr-mixer/server/src/main/resources/BUILD.bazel +++ /dev/null @@ -1,8 +0,0 @@ -resources( - sources = [ - "*.xml", - "*.yml", - "config/*.yml", - ], - tags = ["bazel-compatible"], -) diff --git a/cr-mixer/server/src/main/resources/config/decider.yml b/cr-mixer/server/src/main/resources/config/decider.yml deleted file mode 100644 index a0d55b9b4..000000000 --- a/cr-mixer/server/src/main/resources/config/decider.yml +++ /dev/null @@ -1,146 +0,0 @@ -# The keys in this file correspond to the DeciderValues defined in -# https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala - -dark_traffic_filter: - comment: Proportion of the requests that are forwarded as dark traffic to the proxy - default_availability: 0 - -enable_tweet_recommendations_home_product: - comment: Proportion of requests where we return an actual response for TweetRecommendations Home product - default_availability: 10000 - -enable_tweet_health_score: - comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute TweetHealthModelScore" - default_availability: 10000 - -enable_user_agatha_score: - comment: "Enable the calculation for health scores in tweetInfo. By enabling this decider, we will compute UserHealthModelScore" - default_availability: 10000 - -enable_user_tweet_entity_graph_traffic: - comment: "Enable the traffic to user entity tweet graph to fetch liked-by tweets candidates" - default_availability: 10000 - -enable_user_tweet_graph_traffic: - comment: "Enable the traffic to user tweet graph to fetch similar tweets candidates" - default_availability: 10000 - -enable_user_video_graph_traffic: - comment: "Enable the traffic to user video graph to fetch similar tweets candidates" - default_availability: 10000 - -enable_user_ad_graph_traffic: - comment: "Enable the traffic to user ad graph to fetch similar tweets candidates" - default_availability: 10000 - -enable_qig_similar_tweets_traffic: - comment: "Enable the traffic to QIG to fetch similar tweet candidates" - default_availability: 0 - -enable_frs_traffic: - comment: "Enable the traffic to FRS to fetch user follow recommendations" - default_availability: 0 - -enable_hydra_dark_traffic: - comment: "Enable dark traffic to hydra" - default_availability: 0 - -enable_real_graph_mh_store: - comment: "Enable traffic for the real graph manhattan based store" - default_availability: 0 - -enable_simclusters_ann_experimental_dark_traffic: - comment: "Enable dark traffic to simclusters-ann-experimental" - default_availability: 0 - -enable_simclusters_ann_2_dark_traffic: - comment: "Enable dark traffic to prod SimClustersANN2" - default_availability: 0 - -enable_user_state_store: - comment: "Enable traffic user state store to hydrate user state" - default_availability: 0 - -upper_funnel_per_step_scribe_rate: - comment: "Enable Upper Funnel Event Scribe Sampling (fetch, pre-rank, interleave etc.) for getTweetsRecommendations() endpoint" - default_availability: 0 - -kafka_message_scribe_sample_rate: - comment: "Gates the production of forked scribe messages to kafka for the async feature hydrator" - default_availability: 0 - -top_level_api_ddg_metrics_scribe_rate: - comment: "Enable Top Level API DDG Metrics Scribe Sampling for getTweetsRecommendations() endpoint" - default_availability: 0 - -ads_recommendations_per_experiment_scribe_rate: - comment: "Percentage of DDG traffic to Scribe for getAdsRecommendations() endpoint" - default_availability: 0 - -enable_loadshedding_getTweetRecommendations: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getTweetRecommendations_Home: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getTweetRecommendations_Notifications: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getTweetRecommendations_Email: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getRelatedTweetsForQueryTweet: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getRelatedTweetsForQueryTweet_Home: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getRelatedTweetsForQueryTweet_MoreTweetsModule: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getRelatedTweetsForQueryAuthor: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getRelatedTweetsForQueryAuthor_MoreTweetsModule: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getFrsBasedTweetRecommendations_Home: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_loadshedding_getFrsBasedTweetRecommendations_Notifications: - comment: "Enable loadshedding (from 0% to 100%). Requests that have been shed will return an empty response" - default_availability: 0 - -enable_user_media_representation_store: - comment: "Enable fetching user nudity rate signal from Media Understanding" - default_availability: 0 - -enable_magic_recs_real_time_aggregates_store: - comment: "Enable fetching real time aggregates features from Magic Recs memcache" - default_availability: 0 - -enable_utg_realtime_tweet_engagement_score: - comment: "Enable fetching real time tweet engagement score from utg-plus" - default_availability: 0 - -get_tweet_recommendations_cache_rate: - comment: "Proportion of users where getTweetRecommendations() request and responses will be cached" - default_availability: 1000 - -enable_earlybird_traffic: - comment: "Enable fetching tweet candidates from Earlybird" - default_availability: 0 - -enable_scribe_for_blue_verified_tweet_candidates: - comment: "Enable scribing for tweet candidates from Blue Verified users" - default_availability: 0 diff --git a/cr-mixer/server/src/main/resources/logback.xml b/cr-mixer/server/src/main/resources/logback.xml deleted file mode 100644 index 24e7fd57e..000000000 --- a/cr-mixer/server/src/main/resources/logback.xml +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - - - - - - - - - - - - true - - - - - - - - - - - ${log.service.output} - - - ${log.service.output}.%d.gz - - 21 - true - - - %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n - - - - - - ${log.access.output} - - - ${log.access.output}.%d.gz - - 21 - true - - - ${DEFAULT_ACCESS_PATTERN}%n - - - - - - true - ${log.lens.category} - ${log.lens.index} - ${log.lens.tag}/service - - %msg - - - - - - true - ${log.lens.category} - ${log.lens.index} - ${log.lens.tag}/access - - %msg - - - - - - allow_listed_pipeline_executions.log - - - allow_listed_pipeline_executions.log.%d.gz - - 7 - true - - - %date %.-3level ${DEFAULT_SERVICE_PATTERN}%n - - - - - - - - - - - - ${async_queue_size} - ${async_max_flush_time} - - - - - ${async_queue_size} - ${async_max_flush_time} - - - - - ${async_queue_size} - ${async_max_flush_time} - - - - - ${async_queue_size} - ${async_max_flush_time} - - - - - ${async_queue_size} - ${async_max_flush_time} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/BUILD.bazel b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/BUILD.bazel deleted file mode 100644 index 533a86c1f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/BUILD.bazel +++ /dev/null @@ -1,48 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/google/inject:guice", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/jvm/net/codingwell:scala-guice", - "3rdparty/jvm/org/slf4j:slf4j-api", - "cr-mixer/server/src/main/resources", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "finagle/finagle-core/src/main", - "finagle/finagle-http/src/main/scala", - "finagle/finagle-thriftmux/src/main/scala", - "finatra-internal/mtls-http/src/main/scala", - "finatra-internal/mtls-thriftmux/src/main/scala", - "finatra/http-core/src/main/java/com/twitter/finatra/http", - "finatra/inject/inject-app/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "finatra/inject/inject-server/src/main/scala", - "finatra/inject/inject-utils/src/main/scala", - "finatra/utils/src/main/java/com/twitter/finatra/annotations", - "hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/controllers", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/module", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/filters", - "src/thrift/com/twitter/timelines/render:thrift-scala", - "thrift-web-forms/src/main/scala/com/twitter/thriftwebforms", - "thrift-web-forms/src/main/scala/com/twitter/thriftwebforms/view", - "timelines/src/main/scala/com/twitter/timelines/features/app", - "twitter-server-internal", - "twitter-server/server/src/main/scala", - "util/util-app/src/main/scala", - "util/util-core:scala", - "util/util-slf4j-api/src/main/scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerHttpServerWarmupHandler.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerHttpServerWarmupHandler.scala deleted file mode 100644 index 85e302d2a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerHttpServerWarmupHandler.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.cr_mixer - -import com.twitter.finatra.http.routing.HttpWarmup -import com.twitter.finatra.httpclient.RequestBuilder._ -import com.twitter.inject.Logging -import com.twitter.inject.utils.Handler -import com.twitter.util.Try -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class CrMixerHttpServerWarmupHandler @Inject() (warmup: HttpWarmup) extends Handler with Logging { - - override def handle(): Unit = { - Try(warmup.send(get("/admin/cr-mixer/product-pipelines"), admin = true)()) - .onFailure(e => error(e.getMessage, e)) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerServer.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerServer.scala deleted file mode 100644 index 887aab83f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerServer.scala +++ /dev/null @@ -1,229 +0,0 @@ -package com.twitter.cr_mixer - -import com.google.inject.Module -import com.twitter.cr_mixer.controller.CrMixerThriftController -import com.twitter.cr_mixer.featureswitch.SetImpressedBucketsLocalContextFilter -import com.twitter.cr_mixer.module.ActivePromotedTweetStoreModule -import com.twitter.cr_mixer.module.CertoStratoStoreModule -import com.twitter.cr_mixer.module.CrMixerParamConfigModule -import com.twitter.cr_mixer.module.EmbeddingStoreModule -import com.twitter.cr_mixer.module.FrsStoreModule -import com.twitter.cr_mixer.module.MHMtlsParamsModule -import com.twitter.cr_mixer.module.OfflineCandidateStoreModule -import com.twitter.cr_mixer.module.RealGraphStoreMhModule -import com.twitter.cr_mixer.module.RealGraphOonStoreModule -import com.twitter.cr_mixer.module.RepresentationManagerModule -import com.twitter.cr_mixer.module.RepresentationScorerModule -import com.twitter.cr_mixer.module.TweetInfoStoreModule -import com.twitter.cr_mixer.module.TweetRecentEngagedUserStoreModule -import com.twitter.cr_mixer.module.TweetRecommendationResultsStoreModule -import com.twitter.cr_mixer.module.TripCandidateStoreModule -import com.twitter.cr_mixer.module.TwhinCollabFilterStratoStoreModule -import com.twitter.cr_mixer.module.UserSignalServiceColumnModule -import com.twitter.cr_mixer.module.UserSignalServiceStoreModule -import com.twitter.cr_mixer.module.UserStateStoreModule -import com.twitter.cr_mixer.module.core.ABDeciderModule -import com.twitter.cr_mixer.module.core.CrMixerFlagModule -import com.twitter.cr_mixer.module.core.CrMixerLoggingABDeciderModule -import com.twitter.cr_mixer.module.core.FeatureContextBuilderModule -import com.twitter.cr_mixer.module.core.FeatureSwitchesModule -import com.twitter.cr_mixer.module.core.KafkaProducerModule -import com.twitter.cr_mixer.module.core.LoggerFactoryModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserAdGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ProducerBasedUnifiedSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.SimClustersANNSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUnifiedSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedQigSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedTwHINSimlarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserAdGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserTweetGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TweetBasedUserVideoGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.TwhinCollabFilterLookupSimilarityEngineModule -import com.twitter.cr_mixer.module.ConsumersBasedUserAdGraphStoreModule -import com.twitter.cr_mixer.module.ConsumersBasedUserTweetGraphStoreModule -import com.twitter.cr_mixer.module.ConsumersBasedUserVideoGraphStoreModule -import com.twitter.cr_mixer.module.DiffusionStoreModule -import com.twitter.cr_mixer.module.EarlybirdRecencyBasedCandidateStoreModule -import com.twitter.cr_mixer.module.TwiceClustersMembersStoreModule -import com.twitter.cr_mixer.module.StrongTiePredictionStoreModule -import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule -import com.twitter.cr_mixer.module.thrift_client.EarlybirdSearchClientModule -import com.twitter.cr_mixer.module.thrift_client.FrsClientModule -import com.twitter.cr_mixer.module.thrift_client.QigServiceClientModule -import com.twitter.cr_mixer.module.thrift_client.SimClustersAnnServiceClientModule -import com.twitter.cr_mixer.module.thrift_client.TweetyPieClientModule -import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphClientModule -import com.twitter.cr_mixer.module.thrift_client.UserTweetGraphPlusClientModule -import com.twitter.cr_mixer.module.thrift_client.UserVideoGraphClientModule -import com.twitter.cr_mixer.{thriftscala => st} -import com.twitter.finagle.Filter -import com.twitter.finatra.annotations.DarkTrafficFilterType -import com.twitter.finatra.decider.modules.DeciderModule -import com.twitter.finatra.http.HttpServer -import com.twitter.finatra.http.routing.HttpRouter -import com.twitter.finatra.jackson.modules.ScalaObjectMapperModule -import com.twitter.finatra.mtls.http.{Mtls => HttpMtls} -import com.twitter.finatra.mtls.thriftmux.Mtls -import com.twitter.finatra.mtls.thriftmux.modules.MtlsThriftWebFormsModule -import com.twitter.finatra.thrift.ThriftServer -import com.twitter.finatra.thrift.filters._ -import com.twitter.finatra.thrift.routing.ThriftRouter -import com.twitter.hydra.common.model_config.{ConfigModule => HydraConfigModule} -import com.twitter.inject.thrift.modules.ThriftClientIdModule -import com.twitter.product_mixer.core.module.LoggingThrowableExceptionMapper -import com.twitter.product_mixer.core.module.StratoClientModule -import com.twitter.product_mixer.core.module.product_mixer_flags.ProductMixerFlagModule -import com.twitter.relevance_platform.common.filters.ClientStatsFilter -import com.twitter.relevance_platform.common.filters.DarkTrafficFilterModule -import com.twitter.cr_mixer.module.SimClustersANNServiceNameToClientMapper -import com.twitter.cr_mixer.module.SkitStratoStoreModule -import com.twitter.cr_mixer.module.BlueVerifiedAnnotationStoreModule -import com.twitter.cr_mixer.module.core.TimeoutConfigModule -import com.twitter.cr_mixer.module.grpc_client.NaviGRPCClientModule -import com.twitter.cr_mixer.module.similarity_engine.CertoTopicTweetSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.ConsumerBasedWalsSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.DiffusionBasedSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.EarlybirdSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.SkitTopicTweetSimilarityEngineModule -import com.twitter.cr_mixer.module.similarity_engine.UserTweetEntityGraphSimilarityEngineModule -import com.twitter.cr_mixer.module.thrift_client.HydraPartitionClientModule -import com.twitter.cr_mixer.module.thrift_client.HydraRootClientModule -import com.twitter.cr_mixer.module.thrift_client.UserAdGraphClientModule -import com.twitter.cr_mixer.module.thrift_client.UserTweetEntityGraphClientModule -import com.twitter.thriftwebforms.MethodOptions - -object CrMixerServerMain extends CrMixerServer - -class CrMixerServer extends ThriftServer with Mtls with HttpServer with HttpMtls { - override val name = "cr-mixer-server" - - private val coreModules = Seq( - ABDeciderModule, - CrMixerFlagModule, - CrMixerLoggingABDeciderModule, - CrMixerParamConfigModule, - new DarkTrafficFilterModule[st.CrMixer.ReqRepServicePerEndpoint](), - DeciderModule, - FeatureContextBuilderModule, - FeatureSwitchesModule, - KafkaProducerModule, - LoggerFactoryModule, - MHMtlsParamsModule, - ProductMixerFlagModule, - ScalaObjectMapperModule, - ThriftClientIdModule - ) - - private val thriftClientModules = Seq( - AnnQueryServiceClientModule, - EarlybirdSearchClientModule, - FrsClientModule, - HydraPartitionClientModule, - HydraRootClientModule, - QigServiceClientModule, - SimClustersAnnServiceClientModule, - TweetyPieClientModule, - UserAdGraphClientModule, - UserTweetEntityGraphClientModule, - UserTweetGraphClientModule, - UserTweetGraphPlusClientModule, - UserVideoGraphClientModule, - ) - - private val grpcClientModules = Seq( - NaviGRPCClientModule - ) - - // Modules sorted alphabetically, please keep the order when adding a new module - override val modules: Seq[Module] = - coreModules ++ thriftClientModules ++ grpcClientModules ++ - Seq( - ActivePromotedTweetStoreModule, - CertoStratoStoreModule, - CertoTopicTweetSimilarityEngineModule, - ConsumersBasedUserAdGraphSimilarityEngineModule, - ConsumersBasedUserTweetGraphStoreModule, - ConsumersBasedUserVideoGraphSimilarityEngineModule, - ConsumersBasedUserVideoGraphStoreModule, - ConsumerEmbeddingBasedTripSimilarityEngineModule, - ConsumerEmbeddingBasedTwHINSimilarityEngineModule, - ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule, - ConsumersBasedUserAdGraphStoreModule, - ConsumerBasedWalsSimilarityEngineModule, - DiffusionStoreModule, - EmbeddingStoreModule, - EarlybirdSimilarityEngineModule, - EarlybirdRecencyBasedCandidateStoreModule, - FrsStoreModule, - HydraConfigModule, - OfflineCandidateStoreModule, - ProducerBasedUnifiedSimilarityEngineModule, - ProducerBasedUserAdGraphSimilarityEngineModule, - ProducerBasedUserTweetGraphSimilarityEngineModule, - RealGraphOonStoreModule, - RealGraphStoreMhModule, - RepresentationManagerModule, - RepresentationScorerModule, - SimClustersANNServiceNameToClientMapper, - SimClustersANNSimilarityEngineModule, - SkitStratoStoreModule, - SkitTopicTweetSimilarityEngineModule, - StratoClientModule, - StrongTiePredictionStoreModule, - TimeoutConfigModule, - TripCandidateStoreModule, - TwiceClustersMembersStoreModule, - TweetBasedQigSimilarityEngineModule, - TweetBasedTwHINSimlarityEngineModule, - TweetBasedUnifiedSimilarityEngineModule, - TweetBasedUserAdGraphSimilarityEngineModule, - TweetBasedUserTweetGraphSimilarityEngineModule, - TweetBasedUserVideoGraphSimilarityEngineModule, - TweetInfoStoreModule, - TweetRecentEngagedUserStoreModule, - TweetRecommendationResultsStoreModule, - TwhinCollabFilterStratoStoreModule, - TwhinCollabFilterLookupSimilarityEngineModule, - UserSignalServiceColumnModule, - UserSignalServiceStoreModule, - UserStateStoreModule, - UserTweetEntityGraphSimilarityEngineModule, - DiffusionBasedSimilarityEngineModule, - BlueVerifiedAnnotationStoreModule, - new MtlsThriftWebFormsModule[st.CrMixer.MethodPerEndpoint](this) { - override protected def defaultMethodAccess: MethodOptions.Access = { - MethodOptions.Access.ByLdapGroup( - Seq( - "cr-mixer-admins", - "recosplat-sensitive-data-medium", - "recos-platform-admins", - )) - } - } - ) - - def configureThrift(router: ThriftRouter): Unit = { - router - .filter[LoggingMDCFilter] - .filter[TraceIdMDCFilter] - .filter[ThriftMDCFilter] - .filter[ClientStatsFilter] - .filter[AccessLoggingFilter] - .filter[SetImpressedBucketsLocalContextFilter] - .filter[ExceptionMappingFilter] - .filter[Filter.TypeAgnostic, DarkTrafficFilterType] - .exceptionMapper[LoggingThrowableExceptionMapper] - .add[CrMixerThriftController] - } - - override protected def warmup(): Unit = { - handle[CrMixerThriftServerWarmupHandler]() - handle[CrMixerHttpServerWarmupHandler]() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerThriftServerWarmupHandler.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerThriftServerWarmupHandler.scala deleted file mode 100644 index 46c46c92b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/CrMixerThriftServerWarmupHandler.scala +++ /dev/null @@ -1,75 +0,0 @@ -package com.twitter.cr_mixer - -import com.twitter.finagle.thrift.ClientId -import com.twitter.finatra.thrift.routing.ThriftWarmup -import com.twitter.inject.Logging -import com.twitter.inject.utils.Handler -import com.twitter.product_mixer.core.{thriftscala => pt} -import com.twitter.cr_mixer.{thriftscala => st} -import com.twitter.scrooge.Request -import com.twitter.scrooge.Response -import com.twitter.util.Return -import com.twitter.util.Throw -import com.twitter.util.Try -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class CrMixerThriftServerWarmupHandler @Inject() (warmup: ThriftWarmup) - extends Handler - with Logging { - - private val clientId = ClientId("thrift-warmup-client") - - def handle(): Unit = { - val testIds = Seq(1, 2, 3) - try { - clientId.asCurrent { - testIds.foreach { id => - val warmupReq = warmupQuery(id) - info(s"Sending warm-up request to service with query: $warmupReq") - warmup.sendRequest( - method = st.CrMixer.GetTweetRecommendations, - req = Request(st.CrMixer.GetTweetRecommendations.Args(warmupReq)))(assertWarmupResponse) - } - } - } catch { - case e: Throwable => - // we don't want a warmup failure to prevent start-up - error(e.getMessage, e) - } - info("Warm-up done.") - } - - private def warmupQuery(userId: Long): st.CrMixerTweetRequest = { - val clientContext = pt.ClientContext( - userId = Some(userId), - guestId = None, - appId = Some(258901L), - ipAddress = Some("0.0.0.0"), - userAgent = Some("FAKE_USER_AGENT_FOR_WARMUPS"), - countryCode = Some("US"), - languageCode = Some("en"), - isTwoffice = None, - userRoles = None, - deviceId = Some("FAKE_DEVICE_ID_FOR_WARMUPS") - ) - st.CrMixerTweetRequest( - clientContext = clientContext, - product = st.Product.Home, - productContext = Some(st.ProductContext.HomeContext(st.HomeContext())), - ) - } - - private def assertWarmupResponse( - result: Try[Response[st.CrMixer.GetTweetRecommendations.SuccessType]] - ): Unit = { - // we collect and log any exceptions from the result. - result match { - case Return(_) => // ok - case Throw(exception) => - warn("Error performing warm-up request.") - error(exception.getMessage, exception) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala deleted file mode 100644 index 4e8f0a41d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala +++ /dev/null @@ -1,77 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.model.BlendedAdsCandidate -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.InitialAdsCandidate -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton -import scala.collection.mutable - -@Singleton -case class AdsBlender @Inject() (globalStats: StatsReceiver) { - - private val name: String = this.getClass.getCanonicalName - private val stats: StatsReceiver = globalStats.scope(name) - - /** - * Interleaves candidates by iteratively choosing InterestedIn candidates and TWISTLY candidates - * in turn. InterestedIn candidates have no source signal, whereas TWISTLY candidates do. TWISTLY - * candidates themselves are interleaved by source before equal blending with InterestedIn - * candidates. - */ - def blend( - inputCandidates: Seq[Seq[InitialAdsCandidate]], - ): Future[Seq[BlendedAdsCandidate]] = { - - // Filter out empty candidate sequence - val candidates = inputCandidates.filter(_.nonEmpty) - val (interestedInCandidates, twistlyCandidates) = - candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty) - // First interleave twistly candidates - val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates) - - val twistlyAndInterestedInCandidates = - Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates) - - // then interleave twistly candidates with interested in to make them even - val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates) - - stats.stat("candidates").add(interleavedCandidates.size) - - val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates) - Future.value(blendedCandidates) - } - private def buildBlendedAdsCandidate( - inputCandidates: Seq[Seq[InitialAdsCandidate]], - interleavedCandidates: Seq[InitialAdsCandidate] - ): Seq[BlendedAdsCandidate] = { - val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates) - interleavedCandidates.map { interleavedCandidate => - interleavedCandidate.toBlendedAdsCandidate(cgInfoLookupMap(interleavedCandidate.tweetId)) - } - } - - private def buildCandidateToCGInfosMap( - candidateSeq: Seq[Seq[InitialAdsCandidate]], - ): Map[TweetId, Seq[CandidateGenerationInfo]] = { - val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]() - - candidateSeq.foreach { candidates => - candidates.foreach { candidate => - val candidateGenerationInfoSeq = { - tweetIdMap.getOrElse(candidate.tweetId, Seq.empty) - } - val candidateGenerationInfo = candidate.candidateGenerationInfo - tweetIdMap.put( - candidate.tweetId, - candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo)) - } - } - tweetIdMap.toMap - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BUILD deleted file mode 100644 index 604e35f99..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BUILD +++ /dev/null @@ -1,20 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala deleted file mode 100644 index 1a864a6c2..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala +++ /dev/null @@ -1,48 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.simclusters_v2.common.TweetId -import scala.collection.mutable - -object BlendedCandidatesBuilder { - - /** - * @param inputCandidates input candidate prior to interleaving - * @param interleavedCandidates after interleaving. These tweets are de-duplicated. - */ - def build( - inputCandidates: Seq[Seq[InitialCandidate]], - interleavedCandidates: Seq[InitialCandidate] - ): Seq[BlendedCandidate] = { - val cgInfoLookupMap = buildCandidateToCGInfosMap(inputCandidates) - interleavedCandidates.map { interleavedCandidate => - interleavedCandidate.toBlendedCandidate(cgInfoLookupMap(interleavedCandidate.tweetId)) - } - } - - /** - * The same tweet can be generated by different sources. - * This function tells you which CandidateGenerationInfo generated a given tweet - */ - private def buildCandidateToCGInfosMap( - candidateSeq: Seq[Seq[InitialCandidate]], - ): Map[TweetId, Seq[CandidateGenerationInfo]] = { - val tweetIdMap = mutable.HashMap[TweetId, Seq[CandidateGenerationInfo]]() - - candidateSeq.foreach { candidates => - candidates.foreach { candidate => - val candidateGenerationInfoSeq = { - tweetIdMap.getOrElse(candidate.tweetId, Seq.empty) - } - val candidateGenerationInfo = candidate.candidateGenerationInfo - tweetIdMap.put( - candidate.tweetId, - candidateGenerationInfoSeq ++ Seq(candidateGenerationInfo)) - } - } - tweetIdMap.toMap - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala deleted file mode 100644 index 9ef81009b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala +++ /dev/null @@ -1,121 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.BlenderParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject - -case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) { - - private val name: String = this.getClass.getCanonicalName - private val stats: StatsReceiver = globalStats.scope(name) - - /** - * Exposes multiple types of sorting relying only on Content Based signals - * Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores - * that come from the active SimilarityEngine and then sort on the standardized scores. - */ - def blend( - params: Params, - inputCandidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[BlendedCandidate]] = { - // Filter out empty candidate sequence - val candidates = inputCandidates.filter(_.nonEmpty) - val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match { - case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency => - candidates.flatten.sortBy(c => getSnowflakeTimeStamp(c.tweetId)).reverse - case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => - candidates.flatten.sortBy(_ => scala.util.Random.nextDouble()) - case BlenderParams.ContentBasedSortingAlgorithmEnum.FavoriteCount => - candidates.flatten.sortBy(-_.tweetInfo.favCount) - case BlenderParams.ContentBasedSortingAlgorithmEnum.SimilarityToSignalSorting => - standardizeAndSortByScore(flattenAndGroupByEngineTypeOrFirstContribEngine(candidates)) - case _ => - candidates.flatten.sortBy(-_.tweetInfo.favCount) - } - - stats.stat("candidates").add(sortedCandidates.size) - - val blendedCandidates = - BlendedCandidatesBuilder.build(inputCandidates, removeDuplicates(sortedCandidates)) - Future.value(blendedCandidates) - } - - private def removeDuplicates(candidates: Seq[InitialCandidate]): Seq[InitialCandidate] = { - val seen = collection.mutable.Set.empty[Long] - candidates.filter { c => - if (seen.contains(c.tweetId)) { - false - } else { - seen += c.tweetId - true - } - } - } - - private def groupByEngineTypeOrFirstContribEngine( - candidates: Seq[InitialCandidate] - ): Map[SimilarityEngineType, Seq[InitialCandidate]] = { - val grouped = candidates.groupBy { candidate => - val contrib = candidate.candidateGenerationInfo.contributingSimilarityEngines - if (contrib.nonEmpty) { - contrib.head.similarityEngineType - } else { - candidate.candidateGenerationInfo.similarityEngineInfo.similarityEngineType - } - } - grouped - } - - private def flattenAndGroupByEngineTypeOrFirstContribEngine( - candidates: Seq[Seq[InitialCandidate]] - ): Seq[Seq[InitialCandidate]] = { - val flat = candidates.flatten - val grouped = groupByEngineTypeOrFirstContribEngine(flat) - grouped.values.toSeq - } - - private def standardizeAndSortByScore( - candidates: Seq[Seq[InitialCandidate]] - ): Seq[InitialCandidate] = { - candidates - .map { innerSeq => - val meanScore = innerSeq - .map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)) - .sum / innerSeq.length - val stdDev = scala.math - .sqrt( - innerSeq - .map(c => c.candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0)) - .map(a => a - meanScore) - .map(a => a * a) - .sum / innerSeq.length) - innerSeq - .map(c => - ( - c, - c.candidateGenerationInfo.similarityEngineInfo.score - .map { score => - if (stdDev != 0) (score - meanScore) / stdDev - else 0.0 - } - .getOrElse(0.0))) - }.flatten.sortBy { case (_, standardizedScore) => -standardizedScore } - .map { case (candidate, _) => candidate } - } - - private def getSnowflakeTimeStamp(tweetId: Long): Time = { - val isSnowflake = SnowflakeId.isSnowflakeId(tweetId) - if (isSnowflake) { - SnowflakeId(tweetId).time - } else { - Time.fromMilliseconds(0L) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala deleted file mode 100644 index 4c5dd07c3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.BlenderParams -import com.twitter.cr_mixer.util.CountWeightedInterleaveUtil -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton - -/** - * A weighted round robin interleaving algorithm. - * The weight of each blending group based on the count of candidates in each blending group. - * The more candidates under a blending group, the more candidates are selected from it during round - * robin, which in effect prioritizes this group. - * - * Weights sum up to 1. For example: - * total candidates = 8 - * Group Weight - * [A1, A2, A3, A4] 4/8 = 0.5 // select 50% of results from group A - * [B1, B2] 2/8 = 0.25 // 25% from group B - * [C1, C2] 2/8 = 0.25 // 25% from group C - * - * Blended results = [A1, A2, B1, C1, A3, A4, B2, C2] - * See @linht's go/weighted-interleave - */ -@Singleton -case class CountWeightedInterleaveBlender @Inject() (globalStats: StatsReceiver) { - import CountWeightedInterleaveBlender._ - - private val name: String = this.getClass.getCanonicalName - private val stats: StatsReceiver = globalStats.scope(name) - - def blend( - query: CrCandidateGeneratorQuery, - inputCandidates: Seq[Seq[InitialCandidate]] - ): Future[Seq[BlendedCandidate]] = { - val weightedBlenderQuery = CountWeightedInterleaveBlender.paramToQuery(query.params) - countWeightedInterleave(weightedBlenderQuery, inputCandidates) - } - - private[blender] def countWeightedInterleave( - query: WeightedBlenderQuery, - inputCandidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[BlendedCandidate]] = { - - val candidatesAndWeightKeyByIndexId: Seq[(Seq[InitialCandidate], Double)] = { - CountWeightedInterleaveUtil.buildInitialCandidatesWithWeightKeyByFeature( - inputCandidates, - query.rankerWeightShrinkage) - } - - val interleavedCandidates = - InterleaveUtil.weightedInterleave(candidatesAndWeightKeyByIndexId, query.maxWeightAdjustments) - - stats.stat("candidates").add(interleavedCandidates.size) - - val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates) - Future.value(blendedCandidates) - } -} - -object CountWeightedInterleaveBlender { - - /** - * We pass two parameters to the weighted interleaver: - * @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we - * stay to uniform sampling. The bigger the shrinkage the - * closer we are to uniform round robin - * @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to - * uniform. Set so that we avoid infinite loops (e.g. if weights are - * 0) - */ - case class WeightedBlenderQuery( - rankerWeightShrinkage: Double, - maxWeightAdjustments: Int) - - def paramToQuery(params: Params): WeightedBlenderQuery = { - val rankerWeightShrinkage: Double = - params(BlenderParams.RankingInterleaveWeightShrinkageParam) - val maxWeightAdjustments: Int = - params(BlenderParams.RankingInterleaveMaxWeightAdjustments) - - WeightedBlenderQuery(rankerWeightShrinkage, maxWeightAdjustments) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/InterleaveBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/InterleaveBlender.scala deleted file mode 100644 index 92cdfe092..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/InterleaveBlender.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class InterleaveBlender @Inject() (globalStats: StatsReceiver) { - - private val name: String = this.getClass.getCanonicalName - private val stats: StatsReceiver = globalStats.scope(name) - - /** - * Interleaves candidates, by taking 1 candidate from each Seq[Seq[InitialCandidate]] in sequence, - * until we run out of candidates. - */ - def blend( - inputCandidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[BlendedCandidate]] = { - - val interleavedCandidates = InterleaveUtil.interleave(inputCandidates) - - stats.stat("candidates").add(interleavedCandidates.size) - - val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates) - Future.value(blendedCandidates) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala deleted file mode 100644 index 14e93d53d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypes -import com.twitter.cr_mixer.blender.ImplicitSignalBackFillBlender.BackFillSourceTypesWithVideo -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.BlenderParams -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future -import javax.inject.Inject - -case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) { - - private val name: String = this.getClass.getCanonicalName - private val stats: StatsReceiver = globalStats.scope(name) - - /** - * Partition the candidates based on source type - * Interleave the two partitions of candidates separately - * Then append the back fill candidates to the end - */ - def blend( - params: Params, - inputCandidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[BlendedCandidate]] = { - - // Filter out empty candidate sequence - val candidates = inputCandidates.filter(_.nonEmpty) - - val backFillSourceTypes = - if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo - else BackFillSourceTypes - // partition candidates based on their source types - val (backFillCandidates, regularCandidates) = - candidates.partition( - _.head.candidateGenerationInfo.sourceInfoOpt - .exists(sourceInfo => backFillSourceTypes.contains(sourceInfo.sourceType))) - - val interleavedRegularCandidates = InterleaveUtil.interleave(regularCandidates) - val interleavedBackFillCandidates = - InterleaveUtil.interleave(backFillCandidates) - stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size) - // Append interleaved backfill candidates to the end - val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates - - stats.stat("candidates").add(interleavedCandidates.size) - - val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates) - Future.value(blendedCandidates) - } - -} - -object ImplicitSignalBackFillBlender { - final val BackFillSourceTypesWithVideo: Set[SourceType] = Set( - SourceType.UserRepeatedProfileVisit, - SourceType.VideoTweetPlayback50, - SourceType.VideoTweetQualityView) - - final val BackFillSourceTypes: Set[SourceType] = Set(SourceType.UserRepeatedProfileVisit) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala deleted file mode 100644 index 7052a71a5..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.cr_mixer.blender - -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.BlenderParams -import com.twitter.cr_mixer.param.BlenderParams.BlendingAlgorithmEnum -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class SwitchBlender @Inject() ( - defaultBlender: InterleaveBlender, - sourceTypeBackFillBlender: SourceTypeBackFillBlender, - adsBlender: AdsBlender, - contentSignalBlender: ContentSignalBlender, - globalStats: StatsReceiver) { - - private val stats = globalStats.scope(this.getClass.getCanonicalName) - - def blend( - params: Params, - userState: UserState, - inputCandidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[BlendedCandidate]] = { - // Take out empty seq - val nonEmptyCandidates = inputCandidates.collect { - case candidates if candidates.nonEmpty => - candidates - } - stats.stat("num_of_sequences").add(inputCandidates.size) - - // Sort the seqs in an order - val innerSignalSorting = params(BlenderParams.SignalTypeSortingAlgorithmParam) match { - case BlenderParams.ContentBasedSortingAlgorithmEnum.SourceSignalRecency => - SwitchBlender.TimestampOrder - case BlenderParams.ContentBasedSortingAlgorithmEnum.RandomSorting => SwitchBlender.RandomOrder - case _ => SwitchBlender.TimestampOrder - } - - val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting) - // Blend based on specified blender rules - params(BlenderParams.BlendingAlgorithmParam) match { - case BlendingAlgorithmEnum.RoundRobin => - defaultBlender.blend(candidatesToBlend) - case BlendingAlgorithmEnum.SourceTypeBackFill => - sourceTypeBackFillBlender.blend(params, candidatesToBlend) - case BlendingAlgorithmEnum.SourceSignalSorting => - contentSignalBlender.blend(params, candidatesToBlend) - case _ => defaultBlender.blend(candidatesToBlend) - } - } -} - -object SwitchBlender { - - /** - * Prefers candidates generated from sources with the latest timestamps. - * The newer the source signal, the higher a candidate ranks. - * This ordering biases against consumer-based candidates because their timestamp defaults to 0 - * - * Within a Seq[Seq[Candidate]], all candidates within a inner Seq - * are guaranteed to have the same sourceInfo because they are grouped by (sourceInfo, SE model). - * Hence, we can pick .headOption to represent the whole list when filtering by the internalId of the sourceInfoOpt. - * But of course the similarityEngine score in a CGInfo could be different. - */ - val TimestampOrder: Ordering[InitialCandidate] = - math.Ordering - .by[InitialCandidate, Time]( - _.candidateGenerationInfo.sourceInfoOpt - .flatMap(_.sourceEventTime) - .getOrElse(Time.fromMilliseconds(0L))) - .reverse - - private val RandomOrder: Ordering[InitialCandidate] = - Ordering.by[InitialCandidate, Double](_ => scala.util.Random.nextDouble()) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala deleted file mode 100644 index e240ebf2d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala +++ /dev/null @@ -1,140 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.cr_mixer.blender.AdsBlender -import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger -import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery -import com.twitter.cr_mixer.model.BlendedAdsCandidate -import com.twitter.cr_mixer.model.InitialAdsCandidate -import com.twitter.cr_mixer.model.RankedAdsCandidate -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.param.AdsParams -import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams -import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.source_signal.UssSourceSignalFetcher -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class AdsCandidateGenerator @Inject() ( - ussSourceSignalFetcher: UssSourceSignalFetcher, - realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher, - adsCandidateSourceRouter: AdsCandidateSourcesRouter, - adsBlender: AdsBlender, - scribeLogger: AdsRecommendationsScribeLogger, - globalStats: StatsReceiver) { - - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchSourcesStats = stats.scope("fetchSources") - private val fetchRealGraphSeedsStats = stats.scope("fetchRealGraphSeeds") - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val interleaveStats = stats.scope("interleave") - private val rankStats = stats.scope("rank") - - def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = { - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", query.product.toString) - - StatsUtil.trackItemsStats(allStats) { - StatsUtil.trackItemsStats(perProductStats) { - for { - // fetch source signals - sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) { - fetchSources(query) - } - realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) { - fetchSeeds(query) - } - // get initial candidates from similarity engines - // hydrate lineItemInfo and filter out non active ads - initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) { - fetchCandidates(query, sourceSignals, realGraphSeeds) - } - - // blend candidates - blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) { - interleave(initialCandidates) - } - - rankedCandidates <- StatsUtil.trackItemsStats(rankStats) { - rank( - blendedCandidates, - query.params(AdsParams.EnableScoreBoost), - query.params(AdsParams.AdsCandidateGenerationScoreBoostFactor), - rankStats) - } - } yield { - rankedCandidates.take(query.maxNumResults) - } - } - } - - } - - def fetchSources( - query: AdsCandidateGeneratorQuery - ): Future[Set[SourceInfo]] = { - val fetcherQuery = - FetcherQuery(query.userId, query.product, query.userState, query.params) - ussSourceSignalFetcher.get(fetcherQuery).map(_.getOrElse(Seq.empty).toSet) - } - - private def fetchCandidates( - query: AdsCandidateGeneratorQuery, - sourceSignals: Set[SourceInfo], - realGraphSeeds: Map[UserId, Double] - ): Future[Seq[Seq[InitialAdsCandidate]]] = { - scribeLogger.scribeInitialAdsCandidates( - query, - adsCandidateSourceRouter - .fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params), - query.params(AdsParams.EnableScribe) - ) - - } - - private def fetchSeeds( - query: AdsCandidateGeneratorQuery - ): Future[Map[UserId, Double]] = { - if (query.params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) { - realGraphInSourceGraphFetcher - .get(FetcherQuery(query.userId, query.product, query.userState, query.params)) - .map(_.map(_.seedWithScores).getOrElse(Map.empty)) - } else Future.value(Map.empty[UserId, Double]) - } - - private def interleave( - candidates: Seq[Seq[InitialAdsCandidate]] - ): Future[Seq[BlendedAdsCandidate]] = { - adsBlender - .blend(candidates) - } - - private def rank( - candidates: Seq[BlendedAdsCandidate], - enableScoreBoost: Boolean, - scoreBoostFactor: Double, - statsReceiver: StatsReceiver, - ): Future[Seq[RankedAdsCandidate]] = { - - val candidateSize = candidates.size - val rankedCandidates = candidates.zipWithIndex.map { - case (candidate, index) => - val score = 0.5 + 0.5 * ((candidateSize - index).toDouble / candidateSize) - val boostedScore = if (enableScoreBoost) { - statsReceiver.stat("boostedScore").add((100.0 * score * scoreBoostFactor).toFloat) - score * scoreBoostFactor - } else { - statsReceiver.stat("score").add((100.0 * score).toFloat) - score - } - candidate.toRankedAdsCandidate(boostedScore) - } - Future.value(rankedCandidates) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala deleted file mode 100644 index 69ef31b74..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala +++ /dev/null @@ -1,516 +0,0 @@ - package com.twitter.cr_mixer.candidate_generation - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.InitialAdsCandidate -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams -import com.twitter.cr_mixer.param.ConsumerBasedWalsParams -import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.InterestedInParams -import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.SimClustersANNParams -import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.FilterUtil -import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.LineItemInfo -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.ModelVersions -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -case class AdsCandidateSourcesRouter @Inject() ( - activePromotedTweetStore: ReadableStore[TweetId, Seq[LineItemInfo]], - decider: CrMixerDecider, - @Named(ModuleNames.SimClustersANNSimilarityEngine) simClustersANNSimilarityEngine: StandardSimilarityEngine[ - Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine) - tweetBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine) - consumersBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[ - ConsumersBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine) - producerBasedUserAdGraphSimilarityEngine: StandardSimilarityEngine[ - ProducerBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine) - tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine, - @Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) consumerTwHINANNSimilarityEngine: HnswANNSimilarityEngine, - @Named(ModuleNames.ConsumerBasedWalsSimilarityEngine) - consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[ - ConsumerBasedWalsSimilarityEngine.Query, - TweetWithScore - ], - globalStats: StatsReceiver, -) { - - import AdsCandidateSourcesRouter._ - - val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName) - - def fetchCandidates( - requestUserId: UserId, - sourceSignals: Set[SourceInfo], - realGraphSeeds: Map[UserId, Double], - params: configapi.Params - ): Future[Seq[Seq[InitialAdsCandidate]]] = { - - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - - val tweetBasedSANNMinScore = params( - TweetBasedCandidateGenerationParams.SimClustersMinScoreParam) - val tweetBasedSANN1Candidates = - if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param)) { - Future.collect( - CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getSimClustersANNCandidates( - requestUserId, - Some(sourceInfo), - params, - simClustersANN1ConfigId, - tweetBasedSANNMinScore) - }) - } else Future.value(Seq.empty) - - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - val tweetBasedSANN2Candidates = - if (params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param)) { - Future.collect( - CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getSimClustersANNCandidates( - requestUserId, - Some(sourceInfo), - params, - simClustersANN2ConfigId, - tweetBasedSANNMinScore) - }) - } else Future.value(Seq.empty) - - val tweetBasedUagCandidates = - if (params(TweetBasedCandidateGenerationParams.EnableUAGParam)) { - Future.collect( - CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getTweetBasedUserAdGraphCandidates(Some(sourceInfo), params) - }) - } else Future.value(Seq.empty) - - val realGraphInNetworkBasedUagCandidates = - if (params(ConsumersBasedUserAdGraphParams.EnableSourceParam)) { - getRealGraphConsumersBasedUserAdGraphCandidates(realGraphSeeds, params).map(Seq(_)) - } else Future.value(Seq.empty) - - val producerBasedUagCandidates = - if (params(ProducerBasedCandidateGenerationParams.EnableUAGParam)) { - Future.collect( - CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getProducerBasedUserAdGraphCandidates(Some(sourceInfo), params) - }) - } else Future.value(Seq.empty) - - val tweetBasedTwhinAdsCandidates = - if (params(TweetBasedCandidateGenerationParams.EnableTwHINParam)) { - Future.collect( - CandidateSourcesRouter.getTweetBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getTwHINAdsCandidates( - tweetBasedTwHINANNSimilarityEngine, - SimilarityEngineType.TweetBasedTwHINANN, - requestUserId, - Some(sourceInfo), - ModelConfig.DebuggerDemo) - }) - } else Future.value(Seq.empty) - - val producerBasedSANNMinScore = params( - ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam) - val producerBasedSANN1Candidates = - if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param)) { - Future.collect( - CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getSimClustersANNCandidates( - requestUserId, - Some(sourceInfo), - params, - simClustersANN1ConfigId, - producerBasedSANNMinScore) - }) - } else Future.value(Seq.empty) - val producerBasedSANN2Candidates = - if (params(ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param)) { - Future.collect( - CandidateSourcesRouter.getProducerBasedSourceInfo(sourceSignals).toSeq.map { sourceInfo => - getSimClustersANNCandidates( - requestUserId, - Some(sourceInfo), - params, - simClustersANN2ConfigId, - producerBasedSANNMinScore) - }) - } else Future.value(Seq.empty) - - val interestedInMinScore = params(InterestedInParams.MinScoreParam) - val interestedInSANN1Candidates = if (params(InterestedInParams.EnableSimClustersANN1Param)) { - getSimClustersANNCandidates( - requestUserId, - None, - params, - simClustersANN1ConfigId, - interestedInMinScore).map(Seq(_)) - } else Future.value(Seq.empty) - - val interestedInSANN2Candidates = if (params(InterestedInParams.EnableSimClustersANN2Param)) { - getSimClustersANNCandidates( - requestUserId, - None, - params, - simClustersANN2ConfigId, - interestedInMinScore).map(Seq(_)) - } else Future.value(Seq.empty) - - val consumerTwHINAdsCandidates = - if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) { - getTwHINAdsCandidates( - consumerTwHINANNSimilarityEngine, - SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN, - requestUserId, - None, - ModelConfig.DebuggerDemo).map(Seq(_)) - } else Future.value(Seq.empty) - - val consumerBasedWalsCandidates = - if (params( - ConsumerBasedWalsParams.EnableSourceParam - )) { - getConsumerBasedWalsCandidates(sourceSignals, params) - }.map { - Seq(_) - } - else Future.value(Seq.empty) - - Future - .collect(Seq( - tweetBasedSANN1Candidates, - tweetBasedSANN2Candidates, - tweetBasedUagCandidates, - tweetBasedTwhinAdsCandidates, - producerBasedUagCandidates, - producerBasedSANN1Candidates, - producerBasedSANN2Candidates, - realGraphInNetworkBasedUagCandidates, - interestedInSANN1Candidates, - interestedInSANN2Candidates, - consumerTwHINAdsCandidates, - consumerBasedWalsCandidates, - )).map(_.flatten).map { tweetsWithCGInfoSeq => - Future.collect( - tweetsWithCGInfoSeq.map(candidates => convertToInitialCandidates(candidates, stats))) - }.flatten.map { candidatesLists => - val result = candidatesLists.filter(_.nonEmpty) - stats.stat("numOfSequences").add(result.size) - stats.stat("flattenCandidatesWithDup").add(result.flatten.size) - result - } - } - - private[candidate_generation] def convertToInitialCandidates( - candidates: Seq[TweetWithCandidateGenerationInfo], - stats: StatsReceiver - ): Future[Seq[InitialAdsCandidate]] = { - val tweetIds = candidates.map(_.tweetId).toSet - stats.stat("initialCandidateSizeBeforeLineItemFilter").add(tweetIds.size) - Future.collect(activePromotedTweetStore.multiGet(tweetIds)).map { lineItemInfos => - /** * - * If lineItemInfo does not exist, we will filter out the promoted tweet as it cannot be targeted and ranked in admixer - */ - val filteredCandidates = candidates.collect { - case candidate if lineItemInfos.getOrElse(candidate.tweetId, None).isDefined => - val lineItemInfo = lineItemInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialAdsCandidate( - tweetId = candidate.tweetId, - lineItemInfo = lineItemInfo, - candidate.candidateGenerationInfo - ) - } - stats.stat("initialCandidateSizeAfterLineItemFilter").add(filteredCandidates.size) - filteredCandidates - } - } - - private[candidate_generation] def getSimClustersANNCandidates( - requestUserId: UserId, - sourceInfo: Option[SourceInfo], - params: configapi.Params, - configId: String, - minScore: Double - ) = { - - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - - val embeddingType = - if (sourceInfo.isEmpty) { - params(InterestedInParams.InterestedInEmbeddingIdParam).embeddingType - } else getSimClustersANNEmbeddingType(sourceInfo.get) - val query = SimClustersANNSimilarityEngine.fromParams( - if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId, - embeddingType, - simClustersModelVersion, - configId, - params - ) - - // dark traffic to simclusters-ann-2 - if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) { - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - val sann2Query = SimClustersANNSimilarityEngine.fromParams( - if (sourceInfo.isEmpty) InternalId.UserId(requestUserId) else sourceInfo.get.internalId, - embeddingType, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - simClustersANNSimilarityEngine - .getCandidates(sann2Query) - } - - simClustersANNSimilarityEngine - .getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.filter(_.score > minScore).map { - tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - sourceInfo, - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - }) - } - - private[candidate_generation] def getProducerBasedUserAdGraphCandidates( - sourceInfo: Option[SourceInfo], - params: configapi.Params - ) = { - - val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams( - sourceInfo.get.internalId, - params - ) - producerBasedUserAdGraphSimilarityEngine - .getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore => - val similarityEngineInfo = ProducerBasedUserAdGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - sourceInfo, - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - }) - } - - private[candidate_generation] def getTweetBasedUserAdGraphCandidates( - sourceInfo: Option[SourceInfo], - params: configapi.Params - ) = { - - val query = TweetBasedUserAdGraphSimilarityEngine.fromParams( - sourceInfo.get.internalId, - params - ) - tweetBasedUserAdGraphSimilarityEngine - .getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore => - val similarityEngineInfo = TweetBasedUserAdGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - sourceInfo, - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - }) - } - - private[candidate_generation] def getRealGraphConsumersBasedUserAdGraphCandidates( - realGraphSeeds: Map[UserId, Double], - params: configapi.Params - ) = { - - val query = ConsumersBasedUserAdGraphSimilarityEngine - .fromParams(realGraphSeeds, params) - - // The internalId is a placeholder value. We do not plan to store the full seedUserId set. - val sourceInfo = SourceInfo( - sourceType = SourceType.RealGraphIn, - internalId = InternalId.UserId(0L), - sourceEventTime = None - ) - consumersBasedUserAdGraphSimilarityEngine - .getCandidates(query).map(_.getOrElse(Seq.empty)).map(_.map { tweetWithScore => - val similarityEngineInfo = ConsumersBasedUserAdGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(sourceInfo), - similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs - ) - ) - }) - } - - private[candidate_generation] def getTwHINAdsCandidates( - similarityEngine: HnswANNSimilarityEngine, - similarityEngineType: SimilarityEngineType, - requestUserId: UserId, - sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine - model: String - ): Future[Seq[TweetWithCandidateGenerationInfo]] = { - val internalId = - if (sourceInfo.nonEmpty) sourceInfo.get.internalId else InternalId.UserId(requestUserId) - similarityEngine - .getCandidates(buildHnswANNQuery(internalId, model)).map(_.getOrElse(Seq.empty)).map(_.map { - tweetWithScore => - val similarityEngineInfo = SimilarityEngineInfo( - similarityEngineType = similarityEngineType, - modelId = Some(model), - score = Some(tweetWithScore.score)) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - }) - } - - private[candidate_generation] def getConsumerBasedWalsCandidates( - sourceSignals: Set[SourceInfo], - params: configapi.Params - ): Future[Seq[TweetWithCandidateGenerationInfo]] = { - // Fetch source signals and filter them based on age. - val signals = FilterUtil.tweetSourceAgeFilter( - getConsumerBasedWalsSourceInfo(sourceSignals).toSeq, - params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam)) - - val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates( - ConsumerBasedWalsSimilarityEngine.fromParams(signals, params) - ) - val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map { - _.map { tweetsWithScores => - val sortedCandidates = tweetsWithScores.sortBy(-_.score) - val filteredCandidates = - FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam)) - consumerBasedWalsSimilarityEngine.getScopedStats - .stat("filteredCandidates_size").add(filteredCandidates.size) - - val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore => - { - val similarityEngineInfo = - ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs - ) - ) - } - } - val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - tweetsWithCandidateGenerationInfo.take(maxCandidateNum) - } - } - for { - tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut - } yield tweetsWithCandidateGenerationInfoOpt.toSeq.flatten - } -} - -object AdsCandidateSourcesRouter { - def getSimClustersANNEmbeddingType( - sourceInfo: SourceInfo - ): EmbeddingType = { - sourceInfo.sourceType match { - case SourceType.TweetFavorite | SourceType.Retweet | SourceType.OriginalTweet | - SourceType.Reply | SourceType.TweetShare | SourceType.NotificationClick | - SourceType.GoodTweetClick | SourceType.VideoTweetQualityView | - SourceType.VideoTweetPlayback50 => - EmbeddingType.LogFavLongestL2EmbeddingTweet - case SourceType.UserFollow | SourceType.UserRepeatedProfileVisit | SourceType.RealGraphOon | - SourceType.FollowRecommendation | SourceType.UserTrafficAttributionProfileVisit | - SourceType.GoodProfileClick | SourceType.TwiceUserId => - EmbeddingType.FavBasedProducer - case _ => throw new IllegalArgumentException("sourceInfo.sourceType not supported") - } - } - - def buildHnswANNQuery(internalId: InternalId, modelId: String): HnswANNEngineQuery = { - HnswANNEngineQuery( - sourceId = internalId, - modelId = modelId, - params = Params.Empty - ) - } - - def getConsumerBasedWalsSourceInfo( - sourceSignals: Set[SourceInfo] - ): Set[SourceInfo] = { - val AllowedSourceTypesForConsumerBasedWalsSE = Set( - SourceType.TweetFavorite.value, - SourceType.Retweet.value, - SourceType.TweetDontLike.value, //currently no-op - SourceType.TweetReport.value, //currently no-op - SourceType.AccountMute.value, //currently no-op - SourceType.AccountBlock.value //currently no-op - ) - sourceSignals.collect { - case sourceInfo - if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) => - sourceInfo - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/BUILD deleted file mode 100644 index f1b6e6980..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/BUILD +++ /dev/null @@ -1,51 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "ann/src/main/scala/com/twitter/ann/hnsw", - "ann/src/main/thrift/com/twitter/ann/common:ann-common-scala", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "cuad/projects/hashspace/thrift:thrift-scala", - "decider/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", - "frigate/frigate-common:base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util", - "hermit/hermit-core/src/main/scala/com/twitter/hermit/constants", - "hermit/hermit-core/src/main/scala/com/twitter/hermit/model", - "simclusters-ann/thrift/src/main/thrift:thrift-scala", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/scala/com/twitter/cortex/ml/embeddings/common:Helpers", - "src/scala/com/twitter/ml/featurestore/lib", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala", - "src/thrift/com/twitter/ml/api:embedding-scala", - "src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - "strato/config/columns/cuad/hashspace:hashspace-strato-client", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala deleted file mode 100644 index 49cc37bde..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala +++ /dev/null @@ -1,536 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TripTweetWithScore -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.param.ConsumerBasedWalsParams -import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwHINSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTwoTowerSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.EngineQuery -import com.twitter.cr_mixer.similarity_engine.FilterUtil -import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TripEngineQuery -import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/** - * Route the SourceInfo to the associated Candidate Engines. - */ -@Singleton -case class CandidateSourcesRouter @Inject() ( - customizedRetrievalCandidateGeneration: CustomizedRetrievalCandidateGeneration, - simClustersInterestedInCandidateGeneration: SimClustersInterestedInCandidateGeneration, - @Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) - tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ], - @Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) - producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[ - ProducerBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ], - @Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine) - consumerEmbeddingBasedTripSimilarityEngine: StandardSimilarityEngine[ - TripEngineQuery, - TripTweetWithScore - ], - @Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) - consumerBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine, - @Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine) - consumerBasedTwoTowerSimilarityEngine: HnswANNSimilarityEngine, - @Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine) - consumersBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[ - ConsumersBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[ - UserTweetEntityGraphSimilarityEngine.Query, - TweetWithScoreAndSocialProof - ], - @Named(ModuleNames.ConsumerBasedWalsSimilarityEngine) - consumerBasedWalsSimilarityEngine: StandardSimilarityEngine[ - ConsumerBasedWalsSimilarityEngine.Query, - TweetWithScore - ], - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - globalStats: StatsReceiver, -) { - - import CandidateSourcesRouter._ - val stats: StatsReceiver = globalStats.scope(this.getClass.getSimpleName) - - def fetchCandidates( - requestUserId: UserId, - sourceSignals: Set[SourceInfo], - sourceGraphs: Map[String, Option[GraphSourceInfo]], - params: configapi.Params, - ): Future[Seq[Seq[InitialCandidate]]] = { - - val tweetBasedCandidatesFuture = getCandidates( - getTweetBasedSourceInfo(sourceSignals), - params, - TweetBasedUnifiedSimilarityEngine.fromParams, - tweetBasedUnifiedSimilarityEngine.getCandidates) - - val producerBasedCandidatesFuture = - getCandidates( - getProducerBasedSourceInfo(sourceSignals), - params, - ProducerBasedUnifiedSimilarityEngine.fromParams(_, _), - producerBasedUnifiedSimilarityEngine.getCandidates - ) - - val simClustersInterestedInBasedCandidatesFuture = - getCandidatesPerSimilarityEngineModel( - requestUserId, - params, - SimClustersInterestedInCandidateGeneration.fromParams, - simClustersInterestedInCandidateGeneration.get) - - val consumerEmbeddingBasedLogFavBasedTripCandidatesFuture = - if (params( - ConsumerEmbeddingBasedCandidateGenerationParams.EnableLogFavBasedSimClustersTripParam)) { - getSimClustersTripCandidates( - params, - ConsumerEmbeddingBasedTripSimilarityEngine.fromParams( - ModelConfig.ConsumerLogFavBasedInterestedInEmbedding, - InternalId.UserId(requestUserId), - params - ), - consumerEmbeddingBasedTripSimilarityEngine - ).map { - Seq(_) - } - } else - Future.Nil - - val consumersBasedUvgRealGraphInCandidatesFuture = - if (params(ConsumersBasedUserVideoGraphParams.EnableSourceParam)) { - val realGraphInGraphSourceInfoOpt = - getGraphSourceInfoBySourceType(SourceType.RealGraphIn.name, sourceGraphs) - - getGraphBasedCandidates( - params, - ConsumersBasedUserVideoGraphSimilarityEngine - .fromParamsForRealGraphIn( - realGraphInGraphSourceInfoOpt - .map { graphSourceInfo => graphSourceInfo.seedWithScores }.getOrElse(Map.empty), - params), - consumersBasedUserVideoGraphSimilarityEngine, - ConsumersBasedUserVideoGraphSimilarityEngine.toSimilarityEngineInfo, - realGraphInGraphSourceInfoOpt - ).map { - Seq(_) - } - } else Future.Nil - - val consumerEmbeddingBasedFollowBasedTripCandidatesFuture = - if (params( - ConsumerEmbeddingBasedCandidateGenerationParams.EnableFollowBasedSimClustersTripParam)) { - getSimClustersTripCandidates( - params, - ConsumerEmbeddingBasedTripSimilarityEngine.fromParams( - ModelConfig.ConsumerFollowBasedInterestedInEmbedding, - InternalId.UserId(requestUserId), - params - ), - consumerEmbeddingBasedTripSimilarityEngine - ).map { - Seq(_) - } - } else - Future.Nil - - val consumerBasedWalsCandidatesFuture = - if (params( - ConsumerBasedWalsParams.EnableSourceParam - )) { - getConsumerBasedWalsCandidates(sourceSignals, params) - }.map { Seq(_) } - else Future.Nil - - val consumerEmbeddingBasedTwHINCandidatesFuture = - if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwHINParam)) { - getHnswCandidates( - params, - ConsumerEmbeddingBasedTwHINSimilarityEngine.fromParams( - InternalId.UserId(requestUserId), - params), - consumerBasedTwHINANNSimilarityEngine - ).map { Seq(_) } - } else Future.Nil - - val consumerEmbeddingBasedTwoTowerCandidatesFuture = - if (params(ConsumerEmbeddingBasedCandidateGenerationParams.EnableTwoTowerParam)) { - getHnswCandidates( - params, - ConsumerEmbeddingBasedTwoTowerSimilarityEngine.fromParams( - InternalId.UserId(requestUserId), - params), - consumerBasedTwoTowerSimilarityEngine - ).map { - Seq(_) - } - } else Future.Nil - - val customizedRetrievalBasedCandidatesFuture = - getCandidatesPerSimilarityEngineModel( - requestUserId, - params, - CustomizedRetrievalCandidateGeneration.fromParams, - customizedRetrievalCandidateGeneration.get) - - Future - .collect( - Seq( - tweetBasedCandidatesFuture, - producerBasedCandidatesFuture, - simClustersInterestedInBasedCandidatesFuture, - consumerBasedWalsCandidatesFuture, - consumerEmbeddingBasedLogFavBasedTripCandidatesFuture, - consumerEmbeddingBasedFollowBasedTripCandidatesFuture, - consumerEmbeddingBasedTwHINCandidatesFuture, - consumerEmbeddingBasedTwoTowerCandidatesFuture, - consumersBasedUvgRealGraphInCandidatesFuture, - customizedRetrievalBasedCandidatesFuture - )).map { candidatesList => - // remove empty innerSeq - val result = candidatesList.flatten.filter(_.nonEmpty) - stats.stat("numOfSequences").add(result.size) - stats.stat("flattenCandidatesWithDup").add(result.flatten.size) - - result - } - } - - private def getGraphBasedCandidates[QueryType]( - params: configapi.Params, - query: EngineQuery[QueryType], - engine: StandardSimilarityEngine[QueryType, TweetWithScore], - toSimilarityEngineInfo: Double => SimilarityEngineInfo, - graphSourceInfoOpt: Option[GraphSourceInfo] = None - ): Future[Seq[InitialCandidate]] = { - val candidatesOptFut = engine.getCandidates(query) - val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map { - _.map { tweetsWithScores => - val sortedCandidates = tweetsWithScores.sortBy(-_.score) - engine.getScopedStats.stat("sortedCandidates_size").add(sortedCandidates.size) - val tweetsWithCandidateGenerationInfo = sortedCandidates.map { tweetWithScore => - { - val similarityEngineInfo = toSimilarityEngineInfo(tweetWithScore.score) - val sourceInfo = graphSourceInfoOpt.map { graphSourceInfo => - // The internalId is a placeholder value. We do not plan to store the full seedUserId set. - SourceInfo( - sourceType = graphSourceInfo.sourceType, - internalId = InternalId.UserId(0L), - sourceEventTime = None - ) - } - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - sourceInfo, - similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs - ) - ) - } - } - val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - tweetsWithCandidateGenerationInfo.take(maxCandidateNum) - } - } - for { - tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut - initialCandidates <- convertToInitialCandidates( - tweetsWithCandidateGenerationInfoOpt.toSeq.flatten) - } yield initialCandidates - } - - private def getCandidates[QueryType]( - sourceSignals: Set[SourceInfo], - params: configapi.Params, - fromParams: (SourceInfo, configapi.Params) => QueryType, - getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - val queries = sourceSignals.map { sourceInfo => - fromParams(sourceInfo, params) - }.toSeq - - Future - .collect { - queries.map { query => - for { - candidates <- getFunc(query) - prefilterCandidates <- convertToInitialCandidates(candidates.toSeq.flatten) - } yield { - prefilterCandidates - } - } - } - } - - private def getConsumerBasedWalsCandidates( - sourceSignals: Set[SourceInfo], - params: configapi.Params - ): Future[Seq[InitialCandidate]] = { - // Fetch source signals and filter them based on age. - val signals = FilterUtil.tweetSourceAgeFilter( - getConsumerBasedWalsSourceInfo(sourceSignals).toSeq, - params(ConsumerBasedWalsParams.MaxTweetSignalAgeHoursParam)) - - val candidatesOptFut = consumerBasedWalsSimilarityEngine.getCandidates( - ConsumerBasedWalsSimilarityEngine.fromParams(signals, params) - ) - val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map { - _.map { tweetsWithScores => - val sortedCandidates = tweetsWithScores.sortBy(-_.score) - val filteredCandidates = - FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam)) - consumerBasedWalsSimilarityEngine.getScopedStats - .stat("filteredCandidates_size").add(filteredCandidates.size) - - val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore => - { - val similarityEngineInfo = - ConsumerBasedWalsSimilarityEngine.toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs - ) - ) - } - } - val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - tweetsWithCandidateGenerationInfo.take(maxCandidateNum) - } - } - for { - tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut - initialCandidates <- convertToInitialCandidates( - tweetsWithCandidateGenerationInfoOpt.toSeq.flatten) - } yield initialCandidates - } - - private def getSimClustersTripCandidates( - params: configapi.Params, - query: TripEngineQuery, - engine: StandardSimilarityEngine[ - TripEngineQuery, - TripTweetWithScore - ], - ): Future[Seq[InitialCandidate]] = { - val tweetsWithCandidatesGenerationInfoOptFut = - engine.getCandidates(EngineQuery(query, params)).map { - _.map { - _.map { tweetWithScore => - // define filters - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - SimilarityEngineInfo( - SimilarityEngineType.ExploreTripOfflineSimClustersTweets, - None, - Some(tweetWithScore.score)), - Seq.empty - ) - ) - } - } - } - for { - tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidatesGenerationInfoOptFut - initialCandidates <- convertToInitialCandidates( - tweetsWithCandidateGenerationInfoOpt.toSeq.flatten) - } yield initialCandidates - } - - private def getHnswCandidates( - params: configapi.Params, - query: HnswANNEngineQuery, - engine: HnswANNSimilarityEngine, - ): Future[Seq[InitialCandidate]] = { - val candidatesOptFut = engine.getCandidates(query) - val tweetsWithCandidateGenerationInfoOptFut = candidatesOptFut.map { - _.map { tweetsWithScores => - val sortedCandidates = tweetsWithScores.sortBy(-_.score) - val filteredCandidates = - FilterUtil.tweetAgeFilter(sortedCandidates, params(GlobalParams.MaxTweetAgeHoursParam)) - engine.getScopedStats.stat("filteredCandidates_size").add(filteredCandidates.size) - val tweetsWithCandidateGenerationInfo = filteredCandidates.map { tweetWithScore => - { - val similarityEngineInfo = - engine.toSimilarityEngineInfo(query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs - ) - ) - } - } - val maxCandidateNum = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - tweetsWithCandidateGenerationInfo.take(maxCandidateNum) - } - } - for { - tweetsWithCandidateGenerationInfoOpt <- tweetsWithCandidateGenerationInfoOptFut - initialCandidates <- convertToInitialCandidates( - tweetsWithCandidateGenerationInfoOpt.toSeq.flatten) - } yield initialCandidates - } - - /** - * Returns candidates from each similarity engine separately. - * For 1 requestUserId, it will fetch results from each similarity engine e_i, - * and returns Seq[Seq[TweetCandidate]]. - */ - private def getCandidatesPerSimilarityEngineModel[QueryType]( - requestUserId: UserId, - params: configapi.Params, - fromParams: (InternalId, configapi.Params) => QueryType, - getFunc: QueryType => Future[ - Option[Seq[Seq[TweetWithCandidateGenerationInfo]]] - ] - ): Future[Seq[Seq[InitialCandidate]]] = { - val query = fromParams(InternalId.UserId(requestUserId), params) - getFunc(query).flatMap { candidatesPerSimilarityEngineModelOpt => - val candidatesPerSimilarityEngineModel = candidatesPerSimilarityEngineModelOpt.toSeq.flatten - Future.collect { - candidatesPerSimilarityEngineModel.map(convertToInitialCandidates) - } - } - } - - private[candidate_generation] def convertToInitialCandidates( - candidates: Seq[TweetWithCandidateGenerationInfo], - ): Future[Seq[InitialCandidate]] = { - val tweetIds = candidates.map(_.tweetId).toSet - Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /*** - * If tweetInfo does not exist, we will filter out this tweet candidate. - */ - candidates.collect { - case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => - val tweetInfo = tweetInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialCandidate( - tweetId = candidate.tweetId, - tweetInfo = tweetInfo, - candidate.candidateGenerationInfo - ) - } - } - } -} - -object CandidateSourcesRouter { - def getGraphSourceInfoBySourceType( - sourceTypeStr: String, - sourceGraphs: Map[String, Option[GraphSourceInfo]] - ): Option[GraphSourceInfo] = { - sourceGraphs.getOrElse(sourceTypeStr, None) - } - - def getTweetBasedSourceInfo( - sourceSignals: Set[SourceInfo] - ): Set[SourceInfo] = { - sourceSignals.collect { - case sourceInfo - if AllowedSourceTypesForTweetBasedUnifiedSE.contains(sourceInfo.sourceType.value) => - sourceInfo - } - } - - def getProducerBasedSourceInfo( - sourceSignals: Set[SourceInfo] - ): Set[SourceInfo] = { - sourceSignals.collect { - case sourceInfo - if AllowedSourceTypesForProducerBasedUnifiedSE.contains(sourceInfo.sourceType.value) => - sourceInfo - } - } - - def getConsumerBasedWalsSourceInfo( - sourceSignals: Set[SourceInfo] - ): Set[SourceInfo] = { - sourceSignals.collect { - case sourceInfo - if AllowedSourceTypesForConsumerBasedWalsSE.contains(sourceInfo.sourceType.value) => - sourceInfo - } - } - - /*** - * Signal funneling should not exist in CG or even in any SimilarityEngine. - * They will be in Router, or eventually, in CrCandidateGenerator. - */ - val AllowedSourceTypesForConsumerBasedWalsSE = Set( - SourceType.TweetFavorite.value, - SourceType.Retweet.value, - SourceType.TweetDontLike.value, //currently no-op - SourceType.TweetReport.value, //currently no-op - SourceType.AccountMute.value, //currently no-op - SourceType.AccountBlock.value //currently no-op - ) - val AllowedSourceTypesForTweetBasedUnifiedSE = Set( - SourceType.TweetFavorite.value, - SourceType.Retweet.value, - SourceType.OriginalTweet.value, - SourceType.Reply.value, - SourceType.TweetShare.value, - SourceType.NotificationClick.value, - SourceType.GoodTweetClick.value, - SourceType.VideoTweetQualityView.value, - SourceType.VideoTweetPlayback50.value, - SourceType.TweetAggregation.value, - ) - val AllowedSourceTypesForProducerBasedUnifiedSE = Set( - SourceType.UserFollow.value, - SourceType.UserRepeatedProfileVisit.value, - SourceType.RealGraphOon.value, - SourceType.FollowRecommendation.value, - SourceType.UserTrafficAttributionProfileVisit.value, - SourceType.GoodProfileClick.value, - SourceType.ProducerAggregation.value, - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala deleted file mode 100644 index c69d0c4f2..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala +++ /dev/null @@ -1,350 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.cr_mixer.blender.SwitchBlender -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.filter.PostRankFilterRunner -import com.twitter.cr_mixer.filter.PreRankFilterRunner -import com.twitter.cr_mixer.logging.CrMixerScribeLogger -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.param.RankerParams -import com.twitter.cr_mixer.param.RecentNegativeSignalParams -import com.twitter.cr_mixer.ranker.SwitchRanker -import com.twitter.cr_mixer.source_signal.SourceInfoRouter -import com.twitter.cr_mixer.source_signal.UssStore.EnabledNegativeSourceTypes -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.util.Future -import com.twitter.util.JavaTimer -import com.twitter.util.Timer - -import javax.inject.Inject -import javax.inject.Singleton - -/** - * For now it performs the main steps as follows: - * 1. Source signal (via USS, FRS) fetch - * 2. Candidate generation - * 3. Filtering - * 4. Interleave blender - * 5. Ranker - * 6. Post-ranker filter - * 7. Truncation - */ -@Singleton -class CrCandidateGenerator @Inject() ( - sourceInfoRouter: SourceInfoRouter, - candidateSourceRouter: CandidateSourcesRouter, - switchBlender: SwitchBlender, - preRankFilterRunner: PreRankFilterRunner, - postRankFilterRunner: PostRankFilterRunner, - switchRanker: SwitchRanker, - crMixerScribeLogger: CrMixerScribeLogger, - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) { - private val timer: Timer = new JavaTimer(true) - - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - - private val fetchSourcesStats = stats.scope("fetchSources") - private val fetchPositiveSourcesStats = stats.scope("fetchPositiveSources") - private val fetchNegativeSourcesStats = stats.scope("fetchNegativeSources") - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val fetchCandidatesAfterFilterStats = stats.scope("fetchCandidatesAfterFilter") - private val preRankFilterStats = stats.scope("preRankFilter") - private val interleaveStats = stats.scope("interleave") - private val rankStats = stats.scope("rank") - private val postRankFilterStats = stats.scope("postRankFilter") - private val blueVerifiedTweetStats = stats.scope("blueVerifiedTweetStats") - private val blueVerifiedTweetStatsPerSimilarityEngine = - stats.scope("blueVerifiedTweetStatsPerSimilarityEngine") - - def get(query: CrCandidateGeneratorQuery): Future[Seq[RankedCandidate]] = { - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", query.product.toString) - val perProductBlueVerifiedStats = - blueVerifiedTweetStats.scope("perProduct", query.product.toString) - - StatsUtil.trackItemsStats(allStats) { - trackResultStats(perProductStats) { - StatsUtil.trackItemsStats(perProductStats) { - val result = for { - (sourceSignals, sourceGraphsMap) <- StatsUtil.trackBlockStats(fetchSourcesStats) { - fetchSources(query) - } - initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) { - // find the positive and negative signals - val (positiveSignals, negativeSignals) = sourceSignals.partition { signal => - !EnabledNegativeSourceTypes.contains(signal.sourceType) - } - fetchPositiveSourcesStats.stat("size").add(positiveSignals.size) - fetchNegativeSourcesStats.stat("size").add(negativeSignals.size) - - // find the positive signals to keep, removing block and muted users - val filteredSourceInfo = - if (negativeSignals.nonEmpty && query.params( - RecentNegativeSignalParams.EnableSourceParam)) { - filterSourceInfo(positiveSignals, negativeSignals) - } else { - positiveSignals - } - - // fetch candidates from the positive signals - StatsUtil.trackBlockStats(fetchCandidatesStats) { - fetchCandidates(query, filteredSourceInfo, sourceGraphsMap) - } - } - filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) { - preRankFilter(query, initialCandidates) - } - interleavedCandidates <- StatsUtil.trackItemsStats(interleaveStats) { - interleave(query, filteredCandidates) - } - rankedCandidates <- StatsUtil.trackItemsStats(rankStats) { - val candidatesToRank = - interleavedCandidates.take(query.params(RankerParams.MaxCandidatesToRank)) - rank(query, candidatesToRank) - } - postRankFilterCandidates <- StatsUtil.trackItemsStats(postRankFilterStats) { - postRankFilter(query, rankedCandidates) - } - } yield { - trackTopKStats( - 800, - postRankFilterCandidates, - isQueryK = false, - perProductBlueVerifiedStats) - trackTopKStats( - 400, - postRankFilterCandidates, - isQueryK = false, - perProductBlueVerifiedStats) - trackTopKStats( - query.maxNumResults, - postRankFilterCandidates, - isQueryK = true, - perProductBlueVerifiedStats) - - val (blueVerifiedTweets, remainingTweets) = - postRankFilterCandidates.partition( - _.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) - val topKBlueVerified = blueVerifiedTweets.take(query.maxNumResults) - val topKRemaining = remainingTweets.take(query.maxNumResults - topKBlueVerified.size) - - trackBlueVerifiedTweetStats(topKBlueVerified, perProductBlueVerifiedStats) - - if (topKBlueVerified.nonEmpty && query.params(RankerParams.EnableBlueVerifiedTopK)) { - topKBlueVerified ++ topKRemaining - } else { - postRankFilterCandidates - } - } - result.raiseWithin(timeoutConfig.serviceTimeout)(timer) - } - } - } - } - - private def fetchSources( - query: CrCandidateGeneratorQuery - ): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = { - crMixerScribeLogger.scribeSignalSources( - query, - sourceInfoRouter - .get(query.userId, query.product, query.userState, query.params)) - } - - private def filterSourceInfo( - positiveSignals: Set[SourceInfo], - negativeSignals: Set[SourceInfo] - ): Set[SourceInfo] = { - val filterUsers: Set[Long] = negativeSignals.flatMap { - case SourceInfo(_, InternalId.UserId(userId), _) => Some(userId) - case _ => None - } - - positiveSignals.filter { - case SourceInfo(_, InternalId.UserId(userId), _) => !filterUsers.contains(userId) - case _ => true - } - } - - def fetchCandidates( - query: CrCandidateGeneratorQuery, - sourceSignals: Set[SourceInfo], - sourceGraphs: Map[String, Option[GraphSourceInfo]] - ): Future[Seq[Seq[InitialCandidate]]] = { - val initialCandidates = candidateSourceRouter - .fetchCandidates( - query.userId, - sourceSignals, - sourceGraphs, - query.params - ) - - initialCandidates.map(_.flatten.map { candidate => - if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) { - blueVerifiedTweetStatsPerSimilarityEngine - .scope(query.product.toString).scope( - candidate.candidateGenerationInfo.contributingSimilarityEngines.head.similarityEngineType.toString).counter( - candidate.tweetInfo.authorId.toString).incr() - } - }) - - crMixerScribeLogger.scribeInitialCandidates( - query, - initialCandidates - ) - } - - private def preRankFilter( - query: CrCandidateGeneratorQuery, - candidates: Seq[Seq[InitialCandidate]] - ): Future[Seq[Seq[InitialCandidate]]] = { - crMixerScribeLogger.scribePreRankFilterCandidates( - query, - preRankFilterRunner - .runSequentialFilters(query, candidates)) - } - - private def postRankFilter( - query: CrCandidateGeneratorQuery, - candidates: Seq[RankedCandidate] - ): Future[Seq[RankedCandidate]] = { - postRankFilterRunner.run(query, candidates) - } - - private def interleave( - query: CrCandidateGeneratorQuery, - candidates: Seq[Seq[InitialCandidate]] - ): Future[Seq[BlendedCandidate]] = { - crMixerScribeLogger.scribeInterleaveCandidates( - query, - switchBlender - .blend(query.params, query.userState, candidates)) - } - - private def rank( - query: CrCandidateGeneratorQuery, - candidates: Seq[BlendedCandidate], - ): Future[Seq[RankedCandidate]] = { - crMixerScribeLogger.scribeRankedCandidates( - query, - switchRanker.rank(query, candidates) - ) - } - - private def trackResultStats( - stats: StatsReceiver - )( - fn: => Future[Seq[RankedCandidate]] - ): Future[Seq[RankedCandidate]] = { - fn.onSuccess { candidates => - trackReasonChosenSourceTypeStats(candidates, stats) - trackReasonChosenSimilarityEngineStats(candidates, stats) - trackPotentialReasonsSourceTypeStats(candidates, stats) - trackPotentialReasonsSimilarityEngineStats(candidates, stats) - } - } - - private def trackReasonChosenSourceTypeStats( - candidates: Seq[RankedCandidate], - stats: StatsReceiver - ): Unit = { - candidates - .groupBy(_.reasonChosen.sourceInfoOpt.map(_.sourceType)) - .foreach { - case (sourceTypeOpt, rankedCands) => - val sourceType = sourceTypeOpt.map(_.toString).getOrElse("RequesterId") // default - stats.stat("reasonChosen", "sourceType", sourceType, "size").add(rankedCands.size) - } - } - - private def trackReasonChosenSimilarityEngineStats( - candidates: Seq[RankedCandidate], - stats: StatsReceiver - ): Unit = { - candidates - .groupBy(_.reasonChosen.similarityEngineInfo.similarityEngineType) - .foreach { - case (seInfoType, rankedCands) => - stats - .stat("reasonChosen", "similarityEngine", seInfoType.toString, "size").add( - rankedCands.size) - } - } - - private def trackPotentialReasonsSourceTypeStats( - candidates: Seq[RankedCandidate], - stats: StatsReceiver - ): Unit = { - candidates - .flatMap(_.potentialReasons.map(_.sourceInfoOpt.map(_.sourceType))) - .groupBy(source => source) - .foreach { - case (sourceInfoOpt, seq) => - val sourceType = sourceInfoOpt.map(_.toString).getOrElse("RequesterId") // default - stats.stat("potentialReasons", "sourceType", sourceType, "size").add(seq.size) - } - } - - private def trackPotentialReasonsSimilarityEngineStats( - candidates: Seq[RankedCandidate], - stats: StatsReceiver - ): Unit = { - candidates - .flatMap(_.potentialReasons.map(_.similarityEngineInfo.similarityEngineType)) - .groupBy(se => se) - .foreach { - case (seType, seq) => - stats.stat("potentialReasons", "similarityEngine", seType.toString, "size").add(seq.size) - } - } - - private def trackBlueVerifiedTweetStats( - candidates: Seq[RankedCandidate], - statsReceiver: StatsReceiver - ): Unit = { - candidates.foreach { candidate => - if (candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true)) { - statsReceiver.counter(candidate.tweetInfo.authorId.toString).incr() - statsReceiver - .scope(candidate.tweetInfo.authorId.toString).counter(candidate.tweetId.toString).incr() - } - } - } - - private def trackTopKStats( - k: Int, - tweetCandidates: Seq[RankedCandidate], - isQueryK: Boolean, - statsReceiver: StatsReceiver - ): Unit = { - val (topK, beyondK) = tweetCandidates.splitAt(k) - - val blueVerifiedIds = tweetCandidates.collect { - case candidate if candidate.tweetInfo.hasBlueVerifiedAnnotation.contains(true) => - candidate.tweetInfo.authorId - }.toSet - - blueVerifiedIds.foreach { blueVerifiedId => - val numTweetsTopK = topK.count(_.tweetInfo.authorId == blueVerifiedId) - val numTweetsBeyondK = beyondK.count(_.tweetInfo.authorId == blueVerifiedId) - - if (isQueryK) { - statsReceiver.scope(blueVerifiedId.toString).stat(s"topK").add(numTweetsTopK) - statsReceiver - .scope(blueVerifiedId.toString).stat(s"beyondK").add(numTweetsBeyondK) - } else { - statsReceiver.scope(blueVerifiedId.toString).stat(s"top$k").add(numTweetsTopK) - statsReceiver - .scope(blueVerifiedId.toString).stat(s"beyond$k").add(numTweetsBeyondK) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala deleted file mode 100644 index 427dd9b74..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala +++ /dev/null @@ -1,345 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.cr_mixer.candidate_generation.CustomizedRetrievalCandidateGeneration.Query -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.CustomizedRetrievalBasedCandidateGenerationParams._ -import com.twitter.cr_mixer.param.CustomizedRetrievalBasedTwhinParams._ -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.LookupEngineQuery -import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.base.CandidateSource -import com.twitter.frigate.common.base.Stats -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton -import scala.collection.mutable.ArrayBuffer - -/** - * A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources - * - * Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different - * similarity engines without blending. In other words, this class shall not be thought of as a - * Unified Similarity Engine. It is a CG that calls multiple singular Similarity Engines. - */ -@Singleton -case class CustomizedRetrievalCandidateGeneration @Inject() ( - @Named(ModuleNames.TwhinCollabFilterSimilarityEngine) - twhinCollabFilterSimilarityEngine: LookupSimilarityEngine[ - TwhinCollabFilterSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.DiffusionBasedSimilarityEngine) - diffusionBasedSimilarityEngine: LookupSimilarityEngine[ - DiffusionBasedSimilarityEngine.Query, - TweetWithScore - ], - statsReceiver: StatsReceiver) - extends CandidateSource[ - Query, - Seq[TweetWithCandidateGenerationInfo] - ] { - - override def name: String = this.getClass.getSimpleName - - private val stats = statsReceiver.scope(name) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - /** - * For each Similarity Engine Model, return a list of tweet candidates - */ - override def get( - query: Query - ): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = { - query.internalId match { - case InternalId.UserId(_) => - Stats.trackOption(fetchCandidatesStat) { - val twhinCollabFilterForFollowCandidatesFut = if (query.enableTwhinCollabFilter) { - twhinCollabFilterSimilarityEngine.getCandidates(query.twhinCollabFilterFollowQuery) - } else Future.None - - val twhinCollabFilterForEngagementCandidatesFut = - if (query.enableTwhinCollabFilter) { - twhinCollabFilterSimilarityEngine.getCandidates( - query.twhinCollabFilterEngagementQuery) - } else Future.None - - val twhinMultiClusterForFollowCandidatesFut = if (query.enableTwhinMultiCluster) { - twhinCollabFilterSimilarityEngine.getCandidates(query.twhinMultiClusterFollowQuery) - } else Future.None - - val twhinMultiClusterForEngagementCandidatesFut = - if (query.enableTwhinMultiCluster) { - twhinCollabFilterSimilarityEngine.getCandidates( - query.twhinMultiClusterEngagementQuery) - } else Future.None - - val diffusionBasedSimilarityEngineCandidatesFut = if (query.enableRetweetBasedDiffusion) { - diffusionBasedSimilarityEngine.getCandidates(query.diffusionBasedSimilarityEngineQuery) - } else Future.None - - Future - .join( - twhinCollabFilterForFollowCandidatesFut, - twhinCollabFilterForEngagementCandidatesFut, - twhinMultiClusterForFollowCandidatesFut, - twhinMultiClusterForEngagementCandidatesFut, - diffusionBasedSimilarityEngineCandidatesFut - ).map { - case ( - twhinCollabFilterForFollowCandidates, - twhinCollabFilterForEngagementCandidates, - twhinMultiClusterForFollowCandidates, - twhinMultiClusterForEngagementCandidates, - diffusionBasedSimilarityEngineCandidates) => - val maxCandidateNumPerSourceKey = 200 - val twhinCollabFilterForFollowWithCGInfo = - getTwhinCollabCandidatesWithCGInfo( - twhinCollabFilterForFollowCandidates, - maxCandidateNumPerSourceKey, - query.twhinCollabFilterFollowQuery, - ) - val twhinCollabFilterForEngagementWithCGInfo = - getTwhinCollabCandidatesWithCGInfo( - twhinCollabFilterForEngagementCandidates, - maxCandidateNumPerSourceKey, - query.twhinCollabFilterEngagementQuery, - ) - val twhinMultiClusterForFollowWithCGInfo = - getTwhinCollabCandidatesWithCGInfo( - twhinMultiClusterForFollowCandidates, - maxCandidateNumPerSourceKey, - query.twhinMultiClusterFollowQuery, - ) - val twhinMultiClusterForEngagementWithCGInfo = - getTwhinCollabCandidatesWithCGInfo( - twhinMultiClusterForEngagementCandidates, - maxCandidateNumPerSourceKey, - query.twhinMultiClusterEngagementQuery, - ) - val retweetBasedDiffusionWithCGInfo = - getDiffusionBasedCandidatesWithCGInfo( - diffusionBasedSimilarityEngineCandidates, - maxCandidateNumPerSourceKey, - query.diffusionBasedSimilarityEngineQuery, - ) - - val twhinCollabCandidateSourcesToBeInterleaved = - ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]]( - twhinCollabFilterForFollowWithCGInfo, - twhinCollabFilterForEngagementWithCGInfo, - ) - - val twhinMultiClusterCandidateSourcesToBeInterleaved = - ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]]( - twhinMultiClusterForFollowWithCGInfo, - twhinMultiClusterForEngagementWithCGInfo, - ) - - val interleavedTwhinCollabCandidates = - InterleaveUtil.interleave(twhinCollabCandidateSourcesToBeInterleaved) - - val interleavedTwhinMultiClusterCandidates = - InterleaveUtil.interleave(twhinMultiClusterCandidateSourcesToBeInterleaved) - - val twhinCollabFilterResults = - if (interleavedTwhinCollabCandidates.nonEmpty) { - Some(interleavedTwhinCollabCandidates.take(maxCandidateNumPerSourceKey)) - } else None - - val twhinMultiClusterResults = - if (interleavedTwhinMultiClusterCandidates.nonEmpty) { - Some(interleavedTwhinMultiClusterCandidates.take(maxCandidateNumPerSourceKey)) - } else None - - val diffusionResults = - if (retweetBasedDiffusionWithCGInfo.nonEmpty) { - Some(retweetBasedDiffusionWithCGInfo.take(maxCandidateNumPerSourceKey)) - } else None - - Some( - Seq( - twhinCollabFilterResults, - twhinMultiClusterResults, - diffusionResults - ).flatten) - } - } - case _ => - throw new IllegalArgumentException("sourceId_is_not_userId_cnt") - } - } - - /** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */ - private def tweetAgeFilter( - candidates: Seq[TweetWithScore], - maxTweetAgeHours: Duration - ): Seq[TweetWithScore] = { - // Tweet IDs are approximately chronological (see http://go/snowflake), - // so we are building the earliest tweet id once - // The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper. - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours) - candidates.filter { candidate => candidate.tweetId >= earliestTweetId } - } - - /** - * AgeFilters tweetCandidates with stats - * Only age filter logic is effective here (through tweetAgeFilter). This function acts mostly for metric logging. - */ - private def ageFilterWithStats( - offlineInterestedInCandidates: Seq[TweetWithScore], - maxTweetAgeHours: Duration, - scopedStatsReceiver: StatsReceiver - ): Seq[TweetWithScore] = { - scopedStatsReceiver.stat("size").add(offlineInterestedInCandidates.size) - val candidates = offlineInterestedInCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - val filteredCandidates = tweetAgeFilter(candidates, maxTweetAgeHours) - scopedStatsReceiver.stat(f"filtered_size").add(filteredCandidates.size) - if (filteredCandidates.isEmpty) scopedStatsReceiver.counter(f"empty").incr() - - filteredCandidates - } - - private def getTwhinCollabCandidatesWithCGInfo( - tweetCandidates: Option[Seq[TweetWithScore]], - maxCandidateNumPerSourceKey: Int, - twhinCollabFilterQuery: LookupEngineQuery[ - TwhinCollabFilterSimilarityEngine.Query - ], - ): Seq[TweetWithCandidateGenerationInfo] = { - val twhinTweets = tweetCandidates match { - case Some(tweetsWithScores) => - tweetsWithScores.map { tweetWithScore => - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - TwhinCollabFilterSimilarityEngine - .toSimilarityEngineInfo(twhinCollabFilterQuery, tweetWithScore.score), - Seq.empty - ) - ) - } - case _ => Seq.empty - } - twhinTweets.take(maxCandidateNumPerSourceKey) - } - - private def getDiffusionBasedCandidatesWithCGInfo( - tweetCandidates: Option[Seq[TweetWithScore]], - maxCandidateNumPerSourceKey: Int, - diffusionBasedSimilarityEngineQuery: LookupEngineQuery[ - DiffusionBasedSimilarityEngine.Query - ], - ): Seq[TweetWithCandidateGenerationInfo] = { - val diffusionTweets = tweetCandidates match { - case Some(tweetsWithScores) => - tweetsWithScores.map { tweetWithScore => - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - DiffusionBasedSimilarityEngine - .toSimilarityEngineInfo(diffusionBasedSimilarityEngineQuery, tweetWithScore.score), - Seq.empty - ) - ) - } - case _ => Seq.empty - } - diffusionTweets.take(maxCandidateNumPerSourceKey) - } -} - -object CustomizedRetrievalCandidateGeneration { - - case class Query( - internalId: InternalId, - maxCandidateNumPerSourceKey: Int, - maxTweetAgeHours: Duration, - // twhinCollabFilter - enableTwhinCollabFilter: Boolean, - twhinCollabFilterFollowQuery: LookupEngineQuery[ - TwhinCollabFilterSimilarityEngine.Query - ], - twhinCollabFilterEngagementQuery: LookupEngineQuery[ - TwhinCollabFilterSimilarityEngine.Query - ], - // twhinMultiCluster - enableTwhinMultiCluster: Boolean, - twhinMultiClusterFollowQuery: LookupEngineQuery[ - TwhinCollabFilterSimilarityEngine.Query - ], - twhinMultiClusterEngagementQuery: LookupEngineQuery[ - TwhinCollabFilterSimilarityEngine.Query - ], - enableRetweetBasedDiffusion: Boolean, - diffusionBasedSimilarityEngineQuery: LookupEngineQuery[ - DiffusionBasedSimilarityEngine.Query - ], - ) - - def fromParams( - internalId: InternalId, - params: configapi.Params - ): Query = { - val twhinCollabFilterFollowQuery = - TwhinCollabFilterSimilarityEngine.fromParams( - internalId, - params(CustomizedRetrievalBasedTwhinCollabFilterFollowSource), - params) - - val twhinCollabFilterEngagementQuery = - TwhinCollabFilterSimilarityEngine.fromParams( - internalId, - params(CustomizedRetrievalBasedTwhinCollabFilterEngagementSource), - params) - - val twhinMultiClusterFollowQuery = - TwhinCollabFilterSimilarityEngine.fromParams( - internalId, - params(CustomizedRetrievalBasedTwhinMultiClusterFollowSource), - params) - - val twhinMultiClusterEngagementQuery = - TwhinCollabFilterSimilarityEngine.fromParams( - internalId, - params(CustomizedRetrievalBasedTwhinMultiClusterEngagementSource), - params) - - val diffusionBasedSimilarityEngineQuery = - DiffusionBasedSimilarityEngine.fromParams( - internalId, - params(CustomizedRetrievalBasedRetweetDiffusionSource), - params) - - Query( - internalId = internalId, - maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam), - // twhinCollabFilter - enableTwhinCollabFilter = params(EnableTwhinCollabFilterClusterParam), - twhinCollabFilterFollowQuery = twhinCollabFilterFollowQuery, - twhinCollabFilterEngagementQuery = twhinCollabFilterEngagementQuery, - enableTwhinMultiCluster = params(EnableTwhinMultiClusterParam), - twhinMultiClusterFollowQuery = twhinMultiClusterFollowQuery, - twhinMultiClusterEngagementQuery = twhinMultiClusterEngagementQuery, - enableRetweetBasedDiffusion = params(EnableRetweetBasedDiffusionParam), - diffusionBasedSimilarityEngineQuery = diffusionBasedSimilarityEngineQuery - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala deleted file mode 100644 index 0c5334c28..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala +++ /dev/null @@ -1,220 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithAuthor -import com.twitter.cr_mixer.param.FrsParams -import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineRouter -import com.twitter.cr_mixer.source_signal.FrsStore -import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult -import com.twitter.cr_mixer.thriftscala.FrsTweet -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.util.DefaultTimer -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.hermit.constants.AlgorithmFeedbackTokens -import com.twitter.hermit.constants.AlgorithmFeedbackTokens.AlgorithmToFeedbackTokenMap -import com.twitter.hermit.model.Algorithm -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/** - * TweetCandidateGenerator based on FRS seed users. For now this candidate generator fetches seed - * users from FRS, and retrieves the seed users' past tweets from Earlybird with Earlybird light - * ranking models. - */ -@Singleton -class FrsTweetCandidateGenerator @Inject() ( - @Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]], - frsBasedSimilarityEngine: EarlybirdSimilarityEngineRouter, - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) { - import FrsTweetCandidateGenerator._ - - private val timer = DefaultTimer - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchSeedsStats = stats.scope("fetchSeeds") - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val filterCandidatesStats = stats.scope("filterCandidates") - private val hydrateCandidatesStats = stats.scope("hydrateCandidates") - private val getCandidatesStats = stats.scope("getCandidates") - - /** - * The function retrieves the candidate for the given user as follows: - * 1. Seed user fetch from FRS. - * 2. Candidate fetch from Earlybird. - * 3. Filtering. - * 4. Candidate hydration. - * 5. Truncation. - */ - def get( - frsTweetCandidateGeneratorQuery: FrsTweetCandidateGeneratorQuery - ): Future[Seq[FrsTweet]] = { - val userId = frsTweetCandidateGeneratorQuery.userId - val product = frsTweetCandidateGeneratorQuery.product - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", product.name) - StatsUtil.trackItemsStats(allStats) { - StatsUtil.trackItemsStats(perProductStats) { - val result = for { - seedAuthorWithScores <- StatsUtil.trackOptionItemMapStats(fetchSeedsStats) { - fetchSeeds( - userId, - frsTweetCandidateGeneratorQuery.impressedUserList, - frsTweetCandidateGeneratorQuery.languageCodeOpt, - frsTweetCandidateGeneratorQuery.countryCodeOpt, - frsTweetCandidateGeneratorQuery.params, - ) - } - tweetCandidates <- StatsUtil.trackOptionItemsStats(fetchCandidatesStats) { - fetchCandidates( - userId, - seedAuthorWithScores.map(_.keys.toSeq).getOrElse(Seq.empty), - frsTweetCandidateGeneratorQuery.impressedTweetList, - seedAuthorWithScores.map(_.mapValues(_.score)).getOrElse(Map.empty), - frsTweetCandidateGeneratorQuery.params - ) - } - filteredTweetCandidates <- StatsUtil.trackOptionItemsStats(filterCandidatesStats) { - filterCandidates( - tweetCandidates, - frsTweetCandidateGeneratorQuery.params - ) - } - hydratedTweetCandidates <- StatsUtil.trackOptionItemsStats(hydrateCandidatesStats) { - hydrateCandidates( - seedAuthorWithScores, - filteredTweetCandidates - ) - } - } yield { - hydratedTweetCandidates - .map(_.take(frsTweetCandidateGeneratorQuery.maxNumResults)).getOrElse(Seq.empty) - } - result.raiseWithin(timeoutConfig.frsBasedTweetEndpointTimeout)(timer) - } - } - } - - /** - * Fetch recommended seed users from FRS - */ - private def fetchSeeds( - userId: UserId, - userDenyList: Set[UserId], - languageCodeOpt: Option[String], - countryCodeOpt: Option[String], - params: Params - ): Future[Option[Map[UserId, FrsQueryResult]]] = { - frsStore - .get( - FrsStore.Query( - userId, - params(FrsParams.FrsBasedCandidateGenerationMaxSeedsNumParam), - params(FrsParams.FrsBasedCandidateGenerationDisplayLocationParam).displayLocation, - userDenyList.toSeq, - languageCodeOpt, - countryCodeOpt - )).map { - _.map { seedAuthors => - seedAuthors.map(user => user.userId -> user).toMap - } - } - } - - /** - * Fetch tweet candidates from Earlybird - */ - private def fetchCandidates( - searcherUserId: UserId, - seedAuthors: Seq[UserId], - impressedTweetList: Set[TweetId], - frsUserToScores: Map[UserId, Double], - params: Params - ): Future[Option[Seq[TweetWithAuthor]]] = { - if (seedAuthors.nonEmpty) { - // call earlybird - val query = EarlybirdSimilarityEngineRouter.queryFromParams( - Some(searcherUserId), - seedAuthors, - impressedTweetList, - frsUserToScoresForScoreAdjustment = Some(frsUserToScores), - params - ) - frsBasedSimilarityEngine.get(query) - } else Future.None - } - - /** - * Filter candidates that do not pass visibility filter policy - */ - private def filterCandidates( - candidates: Option[Seq[TweetWithAuthor]], - params: Params - ): Future[Option[Seq[TweetWithAuthor]]] = { - val tweetIds = candidates.map(_.map(_.tweetId).toSet).getOrElse(Set.empty) - if (params(FrsParams.FrsBasedCandidateGenerationEnableVisibilityFilteringParam)) - Future - .collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - candidates.map { - // If tweetInfo does not exist, we will filter out this tweet candidate. - _.filter(candidate => tweetInfos.getOrElse(candidate.tweetId, None).isDefined) - } - } - else { - Future.value(candidates) - } - } - - /** - * Hydrate the candidates with the FRS candidate sources and scores - */ - private def hydrateCandidates( - frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]], - candidates: Option[Seq[TweetWithAuthor]] - ): Future[Option[Seq[FrsTweet]]] = { - Future.value { - candidates.map { - _.map { tweetWithAuthor => - val frsQueryResult = frsAuthorWithScores.flatMap(_.get(tweetWithAuthor.authorId)) - FrsTweet( - tweetId = tweetWithAuthor.tweetId, - authorId = tweetWithAuthor.authorId, - frsPrimarySource = frsQueryResult.flatMap(_.primarySource), - frsAuthorScore = frsQueryResult.map(_.score), - frsCandidateSourceScores = frsQueryResult.flatMap { result => - result.sourceWithScores.map { - _.collect { - // see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala - // see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala - case (candidateSourceAlgoStr, score) - if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains( - candidateSourceAlgoStr) => - AlgorithmToFeedbackTokenMap.getOrElse( - AlgorithmFeedbackTokens.TokenStrToAlgorithmMap - .getOrElse(candidateSourceAlgoStr, DefaultAlgo), - DefaultAlgoToken) -> score - } - } - } - ) - } - } - } - } - -} - -object FrsTweetCandidateGenerator { - val DefaultAlgo: Algorithm.Value = Algorithm.Other - // 9999 is the token for Algorithm.Other - val DefaultAlgoToken: Int = AlgorithmToFeedbackTokenMap.getOrElse(DefaultAlgo, 9999) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala deleted file mode 100644 index 45a919a57..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala +++ /dev/null @@ -1,156 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.filter.PreRankFilterRunner -import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -class RelatedTweetCandidateGenerator @Inject() ( - @Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ], - @Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) producerBasedUnifiedSimilarityEngine: StandardSimilarityEngine[ - ProducerBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ], - preRankFilterRunner: PreRankFilterRunner, - relatedTweetScribeLogger: RelatedTweetScribeLogger, - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - globalStats: StatsReceiver) { - - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val preRankFilterStats = stats.scope("preRankFilter") - - def get( - query: RelatedTweetCandidateGeneratorQuery - ): Future[Seq[InitialCandidate]] = { - - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", query.product.toString) - StatsUtil.trackItemsStats(allStats) { - StatsUtil.trackItemsStats(perProductStats) { - for { - initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) { - fetchCandidates(query) - } - filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) { - preRankFilter(query, initialCandidates) - } - } yield { - filteredCandidates.headOption - .getOrElse( - throw new UnsupportedOperationException( - "RelatedTweetCandidateGenerator results invalid") - ).take(query.maxNumResults) - } - } - } - } - - def fetchCandidates( - query: RelatedTweetCandidateGeneratorQuery - ): Future[Seq[Seq[InitialCandidate]]] = { - relatedTweetScribeLogger.scribeInitialCandidates( - query, - query.internalId match { - case InternalId.TweetId(_) => - getCandidatesFromSimilarityEngine( - query, - TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet, - tweetBasedUnifiedSimilarityEngine.getCandidates) - case InternalId.UserId(_) => - getCandidatesFromSimilarityEngine( - query, - ProducerBasedUnifiedSimilarityEngine.fromParamsForRelatedTweet, - producerBasedUnifiedSimilarityEngine.getCandidates) - case _ => - throw new UnsupportedOperationException( - "RelatedTweetCandidateGenerator gets invalid InternalId") - } - ) - } - - /*** - * fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, - * and apply VF filter based on TweetInfoStore - * To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]] - * instead of a Seq[Candidate] even though we only have a Seq in it. - */ - private def getCandidatesFromSimilarityEngine[QueryType]( - query: RelatedTweetCandidateGeneratorQuery, - fromParamsForRelatedTweet: (InternalId, configapi.Params) => QueryType, - getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - - /*** - * We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation - * and as a result, it will return Seq[Seq[InitialCandidate]] - */ - val engineQueries = - Seq(fromParamsForRelatedTweet(query.internalId, query.params)) - - Future - .collect { - engineQueries.map { query => - for { - candidates <- getFunc(query) - prefilterCandidates <- convertToInitialCandidates( - candidates.toSeq.flatten - ) - } yield prefilterCandidates - } - } - } - - private def preRankFilter( - query: RelatedTweetCandidateGeneratorQuery, - candidates: Seq[Seq[InitialCandidate]] - ): Future[Seq[Seq[InitialCandidate]]] = { - relatedTweetScribeLogger.scribePreRankFilterCandidates( - query, - preRankFilterRunner - .runSequentialFilters(query, candidates)) - } - - private[candidate_generation] def convertToInitialCandidates( - candidates: Seq[TweetWithCandidateGenerationInfo], - ): Future[Seq[InitialCandidate]] = { - val tweetIds = candidates.map(_.tweetId).toSet - Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /*** - * If tweetInfo does not exist, we will filter out this tweet candidate. - * This tweetInfo filter also acts as the VF filter - */ - candidates.collect { - case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => - val tweetInfo = tweetInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialCandidate( - tweetId = candidate.tweetId, - tweetInfo = tweetInfo, - candidate.candidateGenerationInfo - ) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala deleted file mode 100644 index cc7f55859..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala +++ /dev/null @@ -1,139 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.filter.PreRankFilterRunner -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -class RelatedVideoTweetCandidateGenerator @Inject() ( - @Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) tweetBasedUnifiedSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ], - preRankFilterRunner: PreRankFilterRunner, - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - globalStats: StatsReceiver) { - - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val preRankFilterStats = stats.scope("preRankFilter") - - def get( - query: RelatedVideoTweetCandidateGeneratorQuery - ): Future[Seq[InitialCandidate]] = { - - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", query.product.toString) - StatsUtil.trackItemsStats(allStats) { - StatsUtil.trackItemsStats(perProductStats) { - for { - initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) { - fetchCandidates(query) - } - filteredCandidates <- StatsUtil.trackBlockStats(preRankFilterStats) { - preRankFilter(query, initialCandidates) - } - } yield { - filteredCandidates.headOption - .getOrElse( - throw new UnsupportedOperationException( - "RelatedVideoTweetCandidateGenerator results invalid") - ).take(query.maxNumResults) - } - } - } - } - - def fetchCandidates( - query: RelatedVideoTweetCandidateGeneratorQuery - ): Future[Seq[Seq[InitialCandidate]]] = { - query.internalId match { - case InternalId.TweetId(_) => - getCandidatesFromSimilarityEngine( - query, - TweetBasedUnifiedSimilarityEngine.fromParamsForRelatedVideoTweet, - tweetBasedUnifiedSimilarityEngine.getCandidates) - case _ => - throw new UnsupportedOperationException( - "RelatedVideoTweetCandidateGenerator gets invalid InternalId") - } - } - - /*** - * fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, - * and apply VF filter based on TweetInfoStore - * To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]] - * instead of a Seq[Candidate] even though we only have a Seq in it. - */ - private def getCandidatesFromSimilarityEngine[QueryType]( - query: RelatedVideoTweetCandidateGeneratorQuery, - fromParamsForRelatedVideoTweet: (InternalId, configapi.Params) => QueryType, - getFunc: QueryType => Future[Option[Seq[TweetWithCandidateGenerationInfo]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - - /*** - * We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation - * and as a result, it will return Seq[Seq[InitialCandidate]] - */ - val engineQueries = - Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params)) - - Future - .collect { - engineQueries.map { query => - for { - candidates <- getFunc(query) - prefilterCandidates <- convertToInitialCandidates( - candidates.toSeq.flatten - ) - } yield prefilterCandidates - } - } - } - - private def preRankFilter( - query: RelatedVideoTweetCandidateGeneratorQuery, - candidates: Seq[Seq[InitialCandidate]] - ): Future[Seq[Seq[InitialCandidate]]] = { - preRankFilterRunner - .runSequentialFilters(query, candidates) - } - - private[candidate_generation] def convertToInitialCandidates( - candidates: Seq[TweetWithCandidateGenerationInfo], - ): Future[Seq[InitialCandidate]] = { - val tweetIds = candidates.map(_.tweetId).toSet - Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /*** - * If tweetInfo does not exist, we will filter out this tweet candidate. - * This tweetInfo filter also acts as the VF filter - */ - candidates.collect { - case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => - val tweetInfo = tweetInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialCandidate( - tweetId = candidate.tweetId, - tweetInfo = tweetInfo, - candidate.candidateGenerationInfo - ) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala deleted file mode 100644 index a40901a58..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala +++ /dev/null @@ -1,640 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.InterestedInParams -import com.twitter.cr_mixer.param.SimClustersANNParams -import com.twitter.cr_mixer.similarity_engine.EngineQuery -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.base.CandidateSource -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.ModelVersions -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton -import javax.inject.Named -import com.twitter.cr_mixer.model.ModuleNames - -/** - * This store looks for similar tweets for a given UserId that generates UserInterestedIn - * from SimClustersANN. It will be a standalone CandidateGeneration class moving forward. - * - * After the abstraction improvement (apply SimilarityEngine trait) - * these CG will be subjected to change. - */ -@Singleton -case class SimClustersInterestedInCandidateGeneration @Inject() ( - @Named(ModuleNames.SimClustersANNSimilarityEngine) - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - statsReceiver: StatsReceiver) - extends CandidateSource[ - SimClustersInterestedInCandidateGeneration.Query, - Seq[TweetWithCandidateGenerationInfo] - ] { - - override def name: String = this.getClass.getSimpleName - private val stats = statsReceiver.scope(name) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: SimClustersInterestedInCandidateGeneration.Query - ): Future[Option[Seq[Seq[TweetWithCandidateGenerationInfo]]]] = { - - query.internalId match { - case _: InternalId.UserId => - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - // UserInterestedIn Queries - val userInterestedInCandidateResultFut = - if (query.enableUserInterestedIn && query.enableProdSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInExperimentalSANNCandidateResultFut = - if (query.enableUserInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInExperimentalSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInSANN1CandidateResultFut = - if (query.enableUserInterestedIn && query.enableSimClustersANN1SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANN1Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInSANN2CandidateResultFut = - if (query.enableUserInterestedIn && query.enableSimClustersANN2SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANN2Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInSANN3CandidateResultFut = - if (query.enableUserInterestedIn && query.enableSimClustersANN3SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANN3Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInSANN5CandidateResultFut = - if (query.enableUserInterestedIn && query.enableSimClustersANN5SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANN5Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userInterestedInSANN4CandidateResultFut = - if (query.enableUserInterestedIn && query.enableSimClustersANN4SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.interestedInSimClustersANN4Query, - query.simClustersInterestedInMinScore) - else - Future.None - // UserNextInterestedIn Queries - val userNextInterestedInCandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInExperimentalSANNCandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInExperimentalSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInSANN1CandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableSimClustersANN1SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANN1Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInSANN2CandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableSimClustersANN2SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANN2Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInSANN3CandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableSimClustersANN3SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANN3Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInSANN5CandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableSimClustersANN5SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANN5Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userNextInterestedInSANN4CandidateResultFut = - if (query.enableUserNextInterestedIn && query.enableSimClustersANN4SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.nextInterestedInSimClustersANN4Query, - query.simClustersInterestedInMinScore) - else - Future.None - - // AddressBookInterestedIn Queries - val userAddressBookInterestedInCandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookExperimentalSANNCandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableExperimentalSimClustersANNSimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInExperimentalSimClustersANNQuery, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookSANN1CandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN1SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANN1Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookSANN2CandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN2SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANN2Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookSANN3CandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN3SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANN3Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookSANN5CandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN5SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANN5Query, - query.simClustersInterestedInMinScore) - else - Future.None - - val userAddressBookSANN4CandidateResultFut = - if (query.enableAddressBookNextInterestedIn && query.enableSimClustersANN4SimilarityEngine) - getInterestedInCandidateResult( - simClustersANNSimilarityEngine, - query.addressbookInterestedInSimClustersANN4Query, - query.simClustersInterestedInMinScore) - else - Future.None - - Future - .collect( - Seq( - userInterestedInCandidateResultFut, - userNextInterestedInCandidateResultFut, - userAddressBookInterestedInCandidateResultFut, - userInterestedInExperimentalSANNCandidateResultFut, - userNextInterestedInExperimentalSANNCandidateResultFut, - userAddressBookExperimentalSANNCandidateResultFut, - userInterestedInSANN1CandidateResultFut, - userNextInterestedInSANN1CandidateResultFut, - userAddressBookSANN1CandidateResultFut, - userInterestedInSANN2CandidateResultFut, - userNextInterestedInSANN2CandidateResultFut, - userAddressBookSANN2CandidateResultFut, - userInterestedInSANN3CandidateResultFut, - userNextInterestedInSANN3CandidateResultFut, - userAddressBookSANN3CandidateResultFut, - userInterestedInSANN5CandidateResultFut, - userNextInterestedInSANN5CandidateResultFut, - userAddressBookSANN5CandidateResultFut, - userInterestedInSANN4CandidateResultFut, - userNextInterestedInSANN4CandidateResultFut, - userAddressBookSANN4CandidateResultFut - ) - ).map { candidateResults => - Some( - candidateResults.map(candidateResult => candidateResult.getOrElse(Seq.empty)) - ) - } - } - case _ => - stats.counter("sourceId_is_not_userId_cnt").incr() - Future.None - } - } - - private def simClustersCandidateMinScoreFilter( - simClustersAnnCandidates: Seq[TweetWithScore], - simClustersInterestedInMinScore: Double, - simClustersANNConfigId: String - ): Seq[TweetWithScore] = { - val filteredCandidates = simClustersAnnCandidates - .filter { candidate => - candidate.score > simClustersInterestedInMinScore - } - - stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size) - stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr() - if (filteredCandidates.isEmpty) - stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - - private def getInterestedInCandidateResult( - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - simClustersInterestedInMinScore: Double, - ): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = { - val interestedInCandidatesFut = - simClustersANNSimilarityEngine.getCandidates(simClustersANNQuery) - - val interestedInCandidateResultFut = interestedInCandidatesFut.map { interestedInCandidates => - stats.stat("candidateSize").add(interestedInCandidates.size) - - val embeddingCandidatesStat = stats.scope( - simClustersANNQuery.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.name) - - embeddingCandidatesStat.stat("candidateSize").add(interestedInCandidates.size) - if (interestedInCandidates.isEmpty) { - embeddingCandidatesStat.counter("empty_results").incr() - } - embeddingCandidatesStat.counter("requests").incr() - - val filteredTweets = simClustersCandidateMinScoreFilter( - interestedInCandidates.toSeq.flatten, - simClustersInterestedInMinScore, - simClustersANNQuery.storeQuery.simClustersANNConfigId) - - val interestedInTweetsWithCGInfo = filteredTweets.map { tweetWithScore => - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - None, - SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(simClustersANNQuery, tweetWithScore.score), - Seq.empty // SANN is an atomic SE, and hence it has no contributing SEs - ) - ) - } - - val interestedInResults = if (interestedInTweetsWithCGInfo.nonEmpty) { - Some(interestedInTweetsWithCGInfo) - } else None - interestedInResults - } - interestedInCandidateResultFut - } -} - -object SimClustersInterestedInCandidateGeneration { - - case class Query( - internalId: InternalId, - enableUserInterestedIn: Boolean, - enableUserNextInterestedIn: Boolean, - enableAddressBookNextInterestedIn: Boolean, - enableProdSimClustersANNSimilarityEngine: Boolean, - enableExperimentalSimClustersANNSimilarityEngine: Boolean, - enableSimClustersANN1SimilarityEngine: Boolean, - enableSimClustersANN2SimilarityEngine: Boolean, - enableSimClustersANN3SimilarityEngine: Boolean, - enableSimClustersANN5SimilarityEngine: Boolean, - enableSimClustersANN4SimilarityEngine: Boolean, - simClustersInterestedInMinScore: Double, - simClustersNextInterestedInMinScore: Double, - simClustersAddressBookInterestedInMinScore: Double, - interestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - interestedInExperimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInExperimentalSimClustersANNQuery: EngineQuery[ - SimClustersANNSimilarityEngine.Query - ], - addressbookInterestedInExperimentalSimClustersANNQuery: EngineQuery[ - SimClustersANNSimilarityEngine.Query - ], - interestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - interestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - interestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - interestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - interestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - nextInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - addressbookInterestedInSimClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - ) - - def fromParams( - internalId: InternalId, - params: configapi.Params, - ): Query = { - // SimClusters common configs - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - - val simClustersInterestedInMinScore = params(InterestedInParams.MinScoreParam) - val simClustersNextInterestedInMinScore = params( - InterestedInParams.MinScoreSequentialModelParam) - val simClustersAddressBookInterestedInMinScore = params( - InterestedInParams.MinScoreAddressBookParam) - - // InterestedIn embeddings parameters - val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam) - val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam) - val addressbookInterestedInEmbedding = params( - InterestedInParams.AddressBookInterestedInEmbeddingIdParam) - - // Prod SimClustersANN Query - val interestedInSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANNConfigId, - params) - - val nextInterestedInSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANNConfigId, - params) - - val addressbookInterestedInSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANNConfigId, - params) - - // Experimental SANN cluster Query - val interestedInExperimentalSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params) - - val nextInterestedInExperimentalSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params) - - val addressbookInterestedInExperimentalSimClustersANNQuery = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params) - - // SimClusters ANN cluster 1 Query - val interestedInSimClustersANN1Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN1ConfigId, - params) - - val nextInterestedInSimClustersANN1Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN1ConfigId, - params) - - val addressbookInterestedInSimClustersANN1Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN1ConfigId, - params) - - // SimClusters ANN cluster 2 Query - val interestedInSimClustersANN2Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN2ConfigId, - params) - - val nextInterestedInSimClustersANN2Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN2ConfigId, - params) - - val addressbookInterestedInSimClustersANN2Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN2ConfigId, - params) - - // SimClusters ANN cluster 3 Query - val interestedInSimClustersANN3Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN3ConfigId, - params) - - val nextInterestedInSimClustersANN3Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN3ConfigId, - params) - - val addressbookInterestedInSimClustersANN3Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN3ConfigId, - params) - - // SimClusters ANN cluster 5 Query - val interestedInSimClustersANN5Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN5ConfigId, - params) - // SimClusters ANN cluster 4 Query - val interestedInSimClustersANN4Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - interestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN4ConfigId, - params) - - val nextInterestedInSimClustersANN5Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN5ConfigId, - params) - - val nextInterestedInSimClustersANN4Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - nextInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN4ConfigId, - params) - - val addressbookInterestedInSimClustersANN5Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN5ConfigId, - params) - - val addressbookInterestedInSimClustersANN4Query = - SimClustersANNSimilarityEngine.fromParams( - internalId, - addressbookInterestedInEmbedding.embeddingType, - simClustersModelVersion, - simClustersANN4ConfigId, - params) - - Query( - internalId = internalId, - enableUserInterestedIn = params(InterestedInParams.EnableSourceParam), - enableUserNextInterestedIn = params(InterestedInParams.EnableSourceSequentialModelParam), - enableAddressBookNextInterestedIn = params(InterestedInParams.EnableSourceAddressBookParam), - enableProdSimClustersANNSimilarityEngine = - params(InterestedInParams.EnableProdSimClustersANNParam), - enableExperimentalSimClustersANNSimilarityEngine = - params(InterestedInParams.EnableExperimentalSimClustersANNParam), - enableSimClustersANN1SimilarityEngine = params(InterestedInParams.EnableSimClustersANN1Param), - enableSimClustersANN2SimilarityEngine = params(InterestedInParams.EnableSimClustersANN2Param), - enableSimClustersANN3SimilarityEngine = params(InterestedInParams.EnableSimClustersANN3Param), - enableSimClustersANN5SimilarityEngine = params(InterestedInParams.EnableSimClustersANN5Param), - enableSimClustersANN4SimilarityEngine = params(InterestedInParams.EnableSimClustersANN4Param), - simClustersInterestedInMinScore = simClustersInterestedInMinScore, - simClustersNextInterestedInMinScore = simClustersNextInterestedInMinScore, - simClustersAddressBookInterestedInMinScore = simClustersAddressBookInterestedInMinScore, - interestedInSimClustersANNQuery = interestedInSimClustersANNQuery, - nextInterestedInSimClustersANNQuery = nextInterestedInSimClustersANNQuery, - addressbookInterestedInSimClustersANNQuery = addressbookInterestedInSimClustersANNQuery, - interestedInExperimentalSimClustersANNQuery = interestedInExperimentalSimClustersANNQuery, - nextInterestedInExperimentalSimClustersANNQuery = - nextInterestedInExperimentalSimClustersANNQuery, - addressbookInterestedInExperimentalSimClustersANNQuery = - addressbookInterestedInExperimentalSimClustersANNQuery, - interestedInSimClustersANN1Query = interestedInSimClustersANN1Query, - nextInterestedInSimClustersANN1Query = nextInterestedInSimClustersANN1Query, - addressbookInterestedInSimClustersANN1Query = addressbookInterestedInSimClustersANN1Query, - interestedInSimClustersANN2Query = interestedInSimClustersANN2Query, - nextInterestedInSimClustersANN2Query = nextInterestedInSimClustersANN2Query, - addressbookInterestedInSimClustersANN2Query = addressbookInterestedInSimClustersANN2Query, - interestedInSimClustersANN3Query = interestedInSimClustersANN3Query, - nextInterestedInSimClustersANN3Query = nextInterestedInSimClustersANN3Query, - addressbookInterestedInSimClustersANN3Query = addressbookInterestedInSimClustersANN3Query, - interestedInSimClustersANN5Query = interestedInSimClustersANN5Query, - nextInterestedInSimClustersANN5Query = nextInterestedInSimClustersANN5Query, - addressbookInterestedInSimClustersANN5Query = addressbookInterestedInSimClustersANN5Query, - interestedInSimClustersANN4Query = interestedInSimClustersANN4Query, - nextInterestedInSimClustersANN4Query = nextInterestedInSimClustersANN4Query, - addressbookInterestedInSimClustersANN4Query = addressbookInterestedInSimClustersANN4Query, - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala deleted file mode 100644 index 690fda482..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala +++ /dev/null @@ -1,232 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.TopicTweetParams -import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.TopicTweet -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.util.DefaultTimer -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.servo.util.MemoizingStatsReceiver -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject -import javax.inject.Singleton - -/** - * Formerly CrTopic in legacy Content Recommender. This generator finds top Tweets per Topic. - */ -@Singleton -class TopicTweetCandidateGenerator @Inject() ( - certoTopicTweetSimilarityEngine: CertoTopicTweetSimilarityEngine, - skitTopicTweetSimilarityEngine: SkitTopicTweetSimilarityEngine, - skitHighPrecisionTopicTweetSimilarityEngine: SkitHighPrecisionTopicTweetSimilarityEngine, - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) { - private val timer = DefaultTimer - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val filterCandidatesStats = stats.scope("filterCandidates") - private val tweetyPieFilteredStats = filterCandidatesStats.stat("tweetypie_filtered") - private val memoizedStatsReceiver = new MemoizingStatsReceiver(stats) - - def get( - query: TopicTweetCandidateGeneratorQuery - ): Future[Map[Long, Seq[TopicTweet]]] = { - val maxTweetAge = query.params(TopicTweetParams.MaxTweetAge) - val product = query.product - val allStats = memoizedStatsReceiver.scope("all") - val perProductStats = memoizedStatsReceiver.scope("perProduct", product.name) - StatsUtil.trackMapValueStats(allStats) { - StatsUtil.trackMapValueStats(perProductStats) { - val result = for { - retrievedTweets <- fetchCandidates(query) - initialTweetCandidates <- convertToInitialCandidates(retrievedTweets) - filteredTweetCandidates <- filterCandidates( - initialTweetCandidates, - maxTweetAge, - query.isVideoOnly, - query.impressedTweetList) - rankedTweetCandidates = rankCandidates(filteredTweetCandidates) - hydratedTweetCandidates = hydrateCandidates(rankedTweetCandidates) - } yield { - hydratedTweetCandidates.map { - case (topicId, topicTweets) => - val topKTweets = topicTweets.take(query.maxNumResults) - topicId -> topKTweets - } - } - result.raiseWithin(timeoutConfig.topicTweetEndpointTimeout)(timer) - } - } - } - - private def fetchCandidates( - query: TopicTweetCandidateGeneratorQuery - ): Future[Map[TopicId, Option[Seq[TopicTweetWithScore]]]] = { - Future.collect { - query.topicIds.map { topicId => - topicId -> StatsUtil.trackOptionStats(fetchCandidatesStats) { - Future - .join( - certoTopicTweetSimilarityEngine.get(CertoTopicTweetSimilarityEngine - .fromParams(topicId, query.isVideoOnly, query.params)), - skitTopicTweetSimilarityEngine - .get(SkitTopicTweetSimilarityEngine - .fromParams(topicId, query.isVideoOnly, query.params)), - skitHighPrecisionTopicTweetSimilarityEngine - .get(SkitHighPrecisionTopicTweetSimilarityEngine - .fromParams(topicId, query.isVideoOnly, query.params)) - ).map { - case (certoTopicTweets, skitTfgTopicTweets, skitHighPrecisionTopicTweets) => - val uniqueCandidates = (certoTopicTweets.getOrElse(Nil) ++ - skitTfgTopicTweets.getOrElse(Nil) ++ - skitHighPrecisionTopicTweets.getOrElse(Nil)) - .groupBy(_.tweetId).map { - case (_, dupCandidates) => dupCandidates.head - }.toSeq - Some(uniqueCandidates) - } - } - }.toMap - } - } - - private def convertToInitialCandidates( - candidatesMap: Map[TopicId, Option[Seq[TopicTweetWithScore]]] - ): Future[Map[TopicId, Seq[InitialCandidate]]] = { - val initialCandidates = candidatesMap.map { - case (topicId, candidatesOpt) => - val candidates = candidatesOpt.getOrElse(Nil) - val tweetIds = candidates.map(_.tweetId).toSet - val numTweetsPreFilter = tweetIds.size - Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /** * - * If tweetInfo does not exist, we will filter out this tweet candidate. - */ - val tweetyPieFilteredInitialCandidates = candidates.collect { - case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => - val tweetInfo = tweetInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialCandidate( - tweetId = candidate.tweetId, - tweetInfo = tweetInfo, - CandidateGenerationInfo( - None, - SimilarityEngineInfo( - similarityEngineType = candidate.similarityEngineType, - modelId = None, - score = Some(candidate.score)), - Seq.empty - ) - ) - } - val numTweetsPostFilter = tweetyPieFilteredInitialCandidates.size - tweetyPieFilteredStats.add(numTweetsPreFilter - numTweetsPostFilter) - topicId -> tweetyPieFilteredInitialCandidates - } - } - - Future.collect(initialCandidates.toSeq).map(_.toMap) - } - - private def filterCandidates( - topicTweetMap: Map[TopicId, Seq[InitialCandidate]], - maxTweetAge: Duration, - isVideoOnly: Boolean, - excludeTweetIds: Set[TweetId] - ): Future[Map[TopicId, Seq[InitialCandidate]]] = { - - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge) - - val filteredResults = topicTweetMap.map { - case (topicId, tweetsWithScore) => - topicId -> StatsUtil.trackItemsStats(filterCandidatesStats) { - - val timeFilteredTweets = - tweetsWithScore.filter { tweetWithScore => - tweetWithScore.tweetId >= earliestTweetId && !excludeTweetIds.contains( - tweetWithScore.tweetId) - } - - filterCandidatesStats - .stat("exclude_and_time_filtered").add(tweetsWithScore.size - timeFilteredTweets.size) - - val tweetNudityFilteredTweets = - timeFilteredTweets.collect { - case tweet if tweet.tweetInfo.isPassTweetMediaNudityTag.contains(true) => tweet - } - - filterCandidatesStats - .stat("tweet_nudity_filtered").add( - timeFilteredTweets.size - tweetNudityFilteredTweets.size) - - val userNudityFilteredTweets = - tweetNudityFilteredTweets.collect { - case tweet if tweet.tweetInfo.isPassUserNudityRateStrict.contains(true) => tweet - } - - filterCandidatesStats - .stat("user_nudity_filtered").add( - tweetNudityFilteredTweets.size - userNudityFilteredTweets.size) - - val videoFilteredTweets = { - if (isVideoOnly) { - userNudityFilteredTweets.collect { - case tweet if tweet.tweetInfo.hasVideo.contains(true) => tweet - } - } else { - userNudityFilteredTweets - } - } - - Future.value(videoFilteredTweets) - } - } - Future.collect(filteredResults) - } - - private def rankCandidates( - tweetCandidatesMap: Map[TopicId, Seq[InitialCandidate]] - ): Map[TopicId, Seq[InitialCandidate]] = { - tweetCandidatesMap.mapValues { tweetCandidates => - tweetCandidates.sortBy { candidate => - -candidate.tweetInfo.favCount - } - } - } - - private def hydrateCandidates( - topicCandidatesMap: Map[TopicId, Seq[InitialCandidate]] - ): Map[Long, Seq[TopicTweet]] = { - topicCandidatesMap.map { - case (topicId, tweetsWithScore) => - topicId.entityId -> - tweetsWithScore.map { tweetWithScore => - val similarityEngineType: SimilarityEngineType = - tweetWithScore.candidateGenerationInfo.similarityEngineInfo.similarityEngineType - TopicTweet( - tweetId = tweetWithScore.tweetId, - score = tweetWithScore.getSimilarityScore, - similarityEngineType = similarityEngineType - ) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala deleted file mode 100644 index ecf0bb98e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala +++ /dev/null @@ -1,179 +0,0 @@ -package com.twitter.cr_mixer.candidate_generation - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.logging.UtegTweetScribeLogger -import com.twitter.cr_mixer.filter.UtegFilterRunner -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.source_signal.RealGraphInSourceGraphFetcher -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -class UtegTweetCandidateGenerator @Inject() ( - @Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) userTweetEntityGraphSimilarityEngine: StandardSimilarityEngine[ - UserTweetEntityGraphSimilarityEngine.Query, - TweetWithScoreAndSocialProof - ], - utegTweetScribeLogger: UtegTweetScribeLogger, - tweetInfoStore: ReadableStore[TweetId, TweetInfo], - realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher, - utegFilterRunner: UtegFilterRunner, - globalStats: StatsReceiver) { - - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val fetchSeedsStats = stats.scope("fetchSeeds") - private val fetchCandidatesStats = stats.scope("fetchCandidates") - private val utegFilterStats = stats.scope("utegFilter") - private val rankStats = stats.scope("rank") - - def get( - query: UtegTweetCandidateGeneratorQuery - ): Future[Seq[TweetWithScoreAndSocialProof]] = { - - val allStats = stats.scope("all") - val perProductStats = stats.scope("perProduct", query.product.toString) - StatsUtil.trackItemsStats(allStats) { - StatsUtil.trackItemsStats(perProductStats) { - - /** - * The candidate we return in the end needs a social proof field, which isn't - * supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof - * instead. - * - * However, filters and light ranker expect Candidate-typed param to work. In order to minimise the - * changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate - * in this method. - */ - for { - realGraphSeeds <- StatsUtil.trackItemMapStats(fetchSeedsStats) { - fetchSeeds(query) - } - initialTweets <- StatsUtil.trackItemsStats(fetchCandidatesStats) { - fetchCandidates(query, realGraphSeeds) - } - initialCandidates <- convertToInitialCandidates(initialTweets) - filteredCandidates <- StatsUtil.trackItemsStats(utegFilterStats) { - utegFilter(query, initialCandidates) - } - rankedCandidates <- StatsUtil.trackItemsStats(rankStats) { - rankCandidates(query, filteredCandidates) - } - } yield { - val topTweets = rankedCandidates.take(query.maxNumResults) - convertToTweets(topTweets, initialTweets.map(tweet => tweet.tweetId -> tweet).toMap) - } - } - } - } - - private def utegFilter( - query: UtegTweetCandidateGeneratorQuery, - candidates: Seq[InitialCandidate] - ): Future[Seq[InitialCandidate]] = { - utegFilterRunner.runSequentialFilters(query, Seq(candidates)).map(_.flatten) - } - - private def fetchSeeds( - query: UtegTweetCandidateGeneratorQuery - ): Future[Map[UserId, Double]] = { - realGraphInSourceGraphFetcher - .get(FetcherQuery(query.userId, query.product, query.userState, query.params)) - .map(_.map(_.seedWithScores).getOrElse(Map.empty)) - } - - private[candidate_generation] def rankCandidates( - query: UtegTweetCandidateGeneratorQuery, - filteredCandidates: Seq[InitialCandidate], - ): Future[Seq[RankedCandidate]] = { - val blendedCandidates = filteredCandidates.map(candidate => - candidate.toBlendedCandidate(Seq(candidate.candidateGenerationInfo))) - - Future( - blendedCandidates.map { candidate => - val score = candidate.getSimilarityScore - candidate.toRankedCandidate(score) - } - ) - - } - - def fetchCandidates( - query: UtegTweetCandidateGeneratorQuery, - realGraphSeeds: Map[UserId, Double], - ): Future[Seq[TweetWithScoreAndSocialProof]] = { - val engineQuery = UserTweetEntityGraphSimilarityEngine.fromParams( - query.userId, - realGraphSeeds, - Some(query.impressedTweetList.toSeq), - query.params - ) - - utegTweetScribeLogger.scribeInitialCandidates( - query, - userTweetEntityGraphSimilarityEngine.getCandidates(engineQuery).map(_.toSeq.flatten) - ) - } - - private[candidate_generation] def convertToInitialCandidates( - candidates: Seq[TweetWithScoreAndSocialProof], - ): Future[Seq[InitialCandidate]] = { - val tweetIds = candidates.map(_.tweetId).toSet - Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /** * - * If tweetInfo does not exist, we will filter out this tweet candidate. - */ - candidates.collect { - case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => - val tweetInfo = tweetInfos(candidate.tweetId) - .getOrElse(throw new IllegalStateException("Check previous line's condition")) - - InitialCandidate( - tweetId = candidate.tweetId, - tweetInfo = tweetInfo, - CandidateGenerationInfo( - None, - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.Uteg, - modelId = None, - score = Some(candidate.score)), - Seq.empty - ) - ) - } - } - } - - private[candidate_generation] def convertToTweets( - candidates: Seq[RankedCandidate], - tweetMap: Map[TweetId, TweetWithScoreAndSocialProof] - ): Seq[TweetWithScoreAndSocialProof] = { - candidates.map { candidate => - tweetMap - .get(candidate.tweetId).map { tweet => - TweetWithScoreAndSocialProof( - tweet.tweetId, - candidate.predictionScore, - tweet.socialProofByType - ) - // The exception should never be thrown - }.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets")) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/BUILD deleted file mode 100644 index 11b558321..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/BUILD +++ /dev/null @@ -1,13 +0,0 @@ -scala_library( - sources = ["*.scala"], - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "finatra/inject/inject-core/src/main/scala", - "simclusters-ann/thrift/src/main/thrift:thrift-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala deleted file mode 100644 index dbf3ad6fd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala +++ /dev/null @@ -1,473 +0,0 @@ -package com.twitter.cr_mixer.config - -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.exception.InvalidSANNConfigException -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclustersann.thriftscala.ScoringAlgorithm -import com.twitter.simclustersann.thriftscala.{SimClustersANNConfig => ThriftSimClustersANNConfig} -import com.twitter.util.Duration - -case class SimClustersANNConfig( - maxNumResults: Int, - minScore: Double, - candidateEmbeddingType: EmbeddingType, - maxTopTweetsPerCluster: Int, - maxScanClusters: Int, - maxTweetCandidateAge: Duration, - minTweetCandidateAge: Duration, - annAlgorithm: ScoringAlgorithm) { - val toSANNConfigThrift: ThriftSimClustersANNConfig = ThriftSimClustersANNConfig( - maxNumResults = maxNumResults, - minScore = minScore, - candidateEmbeddingType = candidateEmbeddingType, - maxTopTweetsPerCluster = maxTopTweetsPerCluster, - maxScanClusters = maxScanClusters, - maxTweetCandidateAgeHours = maxTweetCandidateAge.inHours, - minTweetCandidateAgeHours = minTweetCandidateAge.inHours, - annAlgorithm = annAlgorithm, - ) -} - -object SimClustersANNConfig { - - final val DefaultConfig = SimClustersANNConfig( - maxNumResults = 200, - minScore = 0.0, - candidateEmbeddingType = EmbeddingType.LogFavBasedTweet, - maxTopTweetsPerCluster = 800, - maxScanClusters = 50, - maxTweetCandidateAge = 24.hours, - minTweetCandidateAge = 0.hours, - annAlgorithm = ScoringAlgorithm.CosineSimilarity, - ) - - /* - SimClustersANNConfigId: String - Format: Prod - “EmbeddingType_ModelVersion_Default” - Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD - */ - - private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val FavBasedProducer_Model20m145k2020_20220617_06 = - FavBasedProducer_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val FavBasedProducer_Model20m145k2020_20220801 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val FavBasedProducer_Model20m145k2020_20220810 = - FavBasedProducer_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val FavBasedProducer_Model20m145k2020_20220818 = - FavBasedProducer_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val FavBasedProducer_Model20m145k2020_20220819 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val FavBasedProducer_Model20m145k2020_20221221 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val FavBasedProducer_Model20m145k2020_20221220 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default = DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - // SANN-4 config - private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 = - LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val UnfilteredUserInterestedIn_Model20m145k2020_20220617_06 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val UnfilteredUserInterestedIn_Model20m145k2020_20220801 = - UnfilteredUserInterestedIn_Model20m145k2020_20220617_06.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val UnfilteredUserInterestedIn_Model20m145k2020_20220810 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val UnfilteredUserInterestedIn_Model20m145k2020_20220818 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val UnfilteredUserInterestedIn_Model20m145k2020_20220819 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val UnfilteredUserInterestedIn_Model20m145k2020_20221221 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val UnfilteredUserInterestedIn_Model20m145k2020_20221220 = - UnfilteredUserInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220 = - LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default = - DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220 = - LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy() - - // Chunnan's exp on maxTweetCandidateAgeDays 2 - private val UserNextInterestedIn_Model20m145k2020_20220617_06 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - maxTweetCandidateAge = 48.hours, - ) - - // Experimental SANN config - private val UserNextInterestedIn_Model20m145k2020_20220801 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val UserNextInterestedIn_Model20m145k2020_20220810 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val UserNextInterestedIn_Model20m145k2020_20220818 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val UserNextInterestedIn_Model20m145k2020_20220819 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val UserNextInterestedIn_Model20m145k2020_20221221 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val UserNextInterestedIn_Model20m145k2020_20221220 = - UserNextInterestedIn_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - // Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow - private val FollowBasedProducer_Model20m145k2020_Default = - FavBasedProducer_Model20m145k2020_Default.copy() - - // Experimental SANN config - private val FollowBasedProducer_Model20m145k2020_20220801 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.VideoPlayBack50LogFavBasedTweet, - ) - - // SANN-1 config - private val FollowBasedProducer_Model20m145k2020_20220810 = - FavBasedProducer_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-2 config - private val FollowBasedProducer_Model20m145k2020_20220818 = - FavBasedProducer_Model20m145k2020_Default.copy( - maxNumResults = 100, - candidateEmbeddingType = EmbeddingType.LogFavClickBasedAdsTweet, - maxTweetCandidateAge = 175200.hours, - maxTopTweetsPerCluster = 1600 - ) - - // SANN-3 config - private val FollowBasedProducer_Model20m145k2020_20220819 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.PushOpenLogFavBasedTweet, - ) - - // SANN-5 config - private val FollowBasedProducer_Model20m145k2020_20221221 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, - maxTweetCandidateAge = 1.hours - ) - - // SANN-4 config - private val FollowBasedProducer_Model20m145k2020_20221220 = - FavBasedProducer_Model20m145k2020_Default.copy( - candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, - maxTweetCandidateAge = 48.hours - ) - val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map( - "FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default, - "FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06, - "FavBasedProducer_Model20m145k2020_20220801" -> FavBasedProducer_Model20m145k2020_20220801, - "FavBasedProducer_Model20m145k2020_20220810" -> FavBasedProducer_Model20m145k2020_20220810, - "FavBasedProducer_Model20m145k2020_20220818" -> FavBasedProducer_Model20m145k2020_20220818, - "FavBasedProducer_Model20m145k2020_20220819" -> FavBasedProducer_Model20m145k2020_20220819, - "FavBasedProducer_Model20m145k2020_20221221" -> FavBasedProducer_Model20m145k2020_20221221, - "FavBasedProducer_Model20m145k2020_20221220" -> FavBasedProducer_Model20m145k2020_20221220, - "FollowBasedProducer_Model20m145k2020_Default" -> FollowBasedProducer_Model20m145k2020_Default, - "FollowBasedProducer_Model20m145k2020_20220801" -> FollowBasedProducer_Model20m145k2020_20220801, - "FollowBasedProducer_Model20m145k2020_20220810" -> FollowBasedProducer_Model20m145k2020_20220810, - "FollowBasedProducer_Model20m145k2020_20220818" -> FollowBasedProducer_Model20m145k2020_20220818, - "FollowBasedProducer_Model20m145k2020_20220819" -> FollowBasedProducer_Model20m145k2020_20220819, - "FollowBasedProducer_Model20m145k2020_20221221" -> FollowBasedProducer_Model20m145k2020_20221221, - "FollowBasedProducer_Model20m145k2020_20221220" -> FollowBasedProducer_Model20m145k2020_20221220, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220617_06, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220801, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220810, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220818, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20220819, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221221, - "LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220" -> LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220, - "UnfilteredUserInterestedIn_Model20m145k2020_Default" -> UnfilteredUserInterestedIn_Model20m145k2020_Default, - "UnfilteredUserInterestedIn_Model20m145k2020_20220617_06" -> UnfilteredUserInterestedIn_Model20m145k2020_20220617_06, - "UnfilteredUserInterestedIn_Model20m145k2020_20220801" -> UnfilteredUserInterestedIn_Model20m145k2020_20220801, - "UnfilteredUserInterestedIn_Model20m145k2020_20220810" -> UnfilteredUserInterestedIn_Model20m145k2020_20220810, - "UnfilteredUserInterestedIn_Model20m145k2020_20220818" -> UnfilteredUserInterestedIn_Model20m145k2020_20220818, - "UnfilteredUserInterestedIn_Model20m145k2020_20220819" -> UnfilteredUserInterestedIn_Model20m145k2020_20220819, - "UnfilteredUserInterestedIn_Model20m145k2020_20221221" -> UnfilteredUserInterestedIn_Model20m145k2020_20221221, - "UnfilteredUserInterestedIn_Model20m145k2020_20221220" -> UnfilteredUserInterestedIn_Model20m145k2020_20221220, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220617_06, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220801, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220810, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220818, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20220819, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221221, - "LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedInFromAPE_Model20m145k2020_20221220, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_Default, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220617_06, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220801, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220810, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220818, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20220819, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221221, - "LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220" -> LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE_Model20m145k2020_20221220, - "UserNextInterestedIn_Model20m145k2020_Default" -> UserNextInterestedIn_Model20m145k2020_Default, - "UserNextInterestedIn_Model20m145k2020_20220617_06" -> UserNextInterestedIn_Model20m145k2020_20220617_06, - "UserNextInterestedIn_Model20m145k2020_20220801" -> UserNextInterestedIn_Model20m145k2020_20220801, - "UserNextInterestedIn_Model20m145k2020_20220810" -> UserNextInterestedIn_Model20m145k2020_20220810, - "UserNextInterestedIn_Model20m145k2020_20220818" -> UserNextInterestedIn_Model20m145k2020_20220818, - "UserNextInterestedIn_Model20m145k2020_20220819" -> UserNextInterestedIn_Model20m145k2020_20220819, - "UserNextInterestedIn_Model20m145k2020_20221221" -> UserNextInterestedIn_Model20m145k2020_20221221, - "UserNextInterestedIn_Model20m145k2020_20221220" -> UserNextInterestedIn_Model20m145k2020_20221220, - ) - - def getConfig( - embeddingType: String, - modelVersion: String, - id: String - ): SimClustersANNConfig = { - val configName = embeddingType + "_" + modelVersion + "_" + id - DefaultConfigMappings.get(configName) match { - case Some(config) => config - case None => - throw InvalidSANNConfigException(s"Incorrect config id passed in for SANN $configName") - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala deleted file mode 100644 index 46e32990b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala +++ /dev/null @@ -1,24 +0,0 @@ -package com.twitter.cr_mixer.config - -import com.twitter.util.Duration - -case class TimeoutConfig( - /* Default timeouts for candidate generator */ - serviceTimeout: Duration, - signalFetchTimeout: Duration, - similarityEngineTimeout: Duration, - annServiceClientTimeout: Duration, - /* For Uteg Candidate Generator */ - utegSimilarityEngineTimeout: Duration, - /* For User State Store */ - userStateUnderlyingStoreTimeout: Duration, - userStateStoreTimeout: Duration, - /* For FRS based tweets */ - // Timeout passed to EarlyBird server - earlybirdServerTimeout: Duration, - // Timeout set on CrMixer side - earlybirdSimilarityEngineTimeout: Duration, - frsBasedTweetEndpointTimeout: Duration, - topicTweetEndpointTimeout: Duration, - // Timeout Settings for Navi gRPC Client - naviRequestTimeout: Duration) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/BUILD.bazel b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/BUILD.bazel deleted file mode 100644 index b2f7d2f7d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/BUILD.bazel +++ /dev/null @@ -1,48 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "content-recommender/thrift/src/main/thrift:content-recommender-common-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/debug", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "finagle/finagle-base-http/src/main", - "finagle/finagle-core/src/main", - "finagle/finagle-http/src/main/scala", - "finatra/http-server/src/main/scala/com/twitter/finatra/http:controller", - "finatra/thrift/src/main/scala/com/twitter/finatra/thrift:controller", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/functional_component/configapi", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "simclusters-ann/thrift/src/main/thrift:thrift-scala", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/ads/schema:common-scala", - "src/thrift/com/twitter/context:twitter-context-scala", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala", - "src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala", - "src/thrift/com/twitter/recos:recos-common-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/timelines/render:thrift-scala", - "src/thrift/com/twitter/timelines/timeline_logging:thrift-scala", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - "stringcenter/client", - "timelines/src/main/scala/com/twitter/timelines/tracing/lensview", - "timelines/src/main/scala/com/twitter/timelines/tracing/lensview/funnelseries", - "twitter-context/src/main/scala", - "user-signal-service/thrift/src/main/thrift:thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala deleted file mode 100644 index c16d76de8..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala +++ /dev/null @@ -1,757 +0,0 @@ -package com.twitter.cr_mixer.controller - -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.cr_mixer.candidate_generation.AdsCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.CrCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.FrsTweetCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.RelatedTweetCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.RelatedVideoTweetCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.TopicTweetCandidateGenerator -import com.twitter.cr_mixer.candidate_generation.UtegTweetCandidateGenerator -import com.twitter.cr_mixer.featureswitch.ParamsBuilder -import com.twitter.cr_mixer.logging.CrMixerScribeLogger -import com.twitter.cr_mixer.logging.RelatedTweetScribeLogger -import com.twitter.cr_mixer.logging.AdsRecommendationsScribeLogger -import com.twitter.cr_mixer.logging.RelatedTweetScribeMetadata -import com.twitter.cr_mixer.logging.ScribeMetadata -import com.twitter.cr_mixer.logging.UtegTweetScribeLogger -import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.FrsTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RankedAdsCandidate -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.TopicTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.param.AdsParams -import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.RelatedTweetGlobalParams -import com.twitter.cr_mixer.param.RelatedVideoTweetGlobalParams -import com.twitter.cr_mixer.param.TopicTweetParams -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.param.decider.EndpointLoadShedder -import com.twitter.cr_mixer.thriftscala.AdTweetRecommendation -import com.twitter.cr_mixer.thriftscala.AdsRequest -import com.twitter.cr_mixer.thriftscala.AdsResponse -import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest -import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse -import com.twitter.cr_mixer.thriftscala.FrsTweetRequest -import com.twitter.cr_mixer.thriftscala.FrsTweetResponse -import com.twitter.cr_mixer.thriftscala.RelatedTweet -import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest -import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse -import com.twitter.cr_mixer.thriftscala.RelatedVideoTweet -import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetRequest -import com.twitter.cr_mixer.thriftscala.RelatedVideoTweetResponse -import com.twitter.cr_mixer.thriftscala.TopicTweet -import com.twitter.cr_mixer.thriftscala.TopicTweetRequest -import com.twitter.cr_mixer.thriftscala.TopicTweetResponse -import com.twitter.cr_mixer.thriftscala.TweetRecommendation -import com.twitter.cr_mixer.thriftscala.UtegTweet -import com.twitter.cr_mixer.thriftscala.UtegTweetRequest -import com.twitter.cr_mixer.thriftscala.UtegTweetResponse -import com.twitter.cr_mixer.util.MetricTagUtil -import com.twitter.cr_mixer.util.SignalTimestampStatsUtil -import com.twitter.cr_mixer.{thriftscala => t} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.thrift.Controller -import com.twitter.hermit.store.common.ReadableWritableStore -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.timeline_logging.{thriftscala => thriftlog} -import com.twitter.timelines.tracing.lensview.funnelseries.TweetScoreFunnelSeries -import com.twitter.util.Future -import com.twitter.util.Time -import java.util.UUID -import javax.inject.Inject -import org.apache.commons.lang.exception.ExceptionUtils - -class CrMixerThriftController @Inject() ( - crCandidateGenerator: CrCandidateGenerator, - relatedTweetCandidateGenerator: RelatedTweetCandidateGenerator, - relatedVideoTweetCandidateGenerator: RelatedVideoTweetCandidateGenerator, - utegTweetCandidateGenerator: UtegTweetCandidateGenerator, - frsTweetCandidateGenerator: FrsTweetCandidateGenerator, - topicTweetCandidateGenerator: TopicTweetCandidateGenerator, - crMixerScribeLogger: CrMixerScribeLogger, - relatedTweetScribeLogger: RelatedTweetScribeLogger, - utegTweetScribeLogger: UtegTweetScribeLogger, - adsRecommendationsScribeLogger: AdsRecommendationsScribeLogger, - adsCandidateGenerator: AdsCandidateGenerator, - decider: CrMixerDecider, - paramsBuilder: ParamsBuilder, - endpointLoadShedder: EndpointLoadShedder, - signalTimestampStatsUtil: SignalTimestampStatsUtil, - tweetRecommendationResultsStore: ReadableWritableStore[UserId, CrMixerTweetResponse], - userStateStore: ReadableStore[UserId, UserState], - statsReceiver: StatsReceiver) - extends Controller(t.CrMixer) { - - lazy private val tweetScoreFunnelSeries = new TweetScoreFunnelSeries(statsReceiver) - - private def logErrMessage(endpoint: String, e: Throwable): Unit = { - val msg = Seq( - s"Failed endpoint $endpoint: ${e.getLocalizedMessage}", - ExceptionUtils.getStackTrace(e) - ).mkString("\n") - - /** * - * We chose logger.info() here to print message instead of logger.error since that - * logger.error sometimes suppresses detailed stacktrace. - */ - logger.info(msg) - } - - private def generateRequestUUID(): Long = { - - /** * - * We generate unique UUID via bitwise operations. See the below link for more: - * https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid - */ - UUID.randomUUID().getMostSignificantBits & Long.MaxValue - } - - handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args => - val endpointName = "getTweetRecommendations" - - val requestUUID = generateRequestUUID() - val startTime = Time.now.inMilliseconds - val userId = args.request.clientContext.userId.getOrElse( - throw new IllegalArgumentException("userId must be present in the Thrift clientContext") - ) - val queryFut = buildCrCandidateGeneratorQuery(args.request, requestUUID, userId) - queryFut.flatMap { query => - val scribeMetadata = ScribeMetadata.from(query) - endpointLoadShedder(endpointName, query.product.originalName) { - - val response = crCandidateGenerator.get(query) - - val blueVerifiedScribedResponse = response.flatMap { rankedCandidates => - val hasBlueVerifiedCandidate = rankedCandidates.exists { tweet => - tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true) - } - - if (hasBlueVerifiedCandidate) { - crMixerScribeLogger.scribeGetTweetRecommendationsForBlueVerified( - scribeMetadata, - response) - } else { - response - } - } - - val thriftResponse = blueVerifiedScribedResponse.map { candidates => - if (query.product == t.Product.Home) { - scribeTweetScoreFunnelSeries(candidates) - } - buildThriftResponse(candidates) - } - - cacheTweetRecommendationResults(args.request, thriftResponse) - - crMixerScribeLogger.scribeGetTweetRecommendations( - args.request, - startTime, - scribeMetadata, - thriftResponse) - }.rescue { - case EndpointLoadShedder.LoadSheddingException => - Future(CrMixerTweetResponse(Seq.empty)) - case e => - logErrMessage(endpointName, e) - Future(CrMixerTweetResponse(Seq.empty)) - } - } - - } - - /** * - * GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially - * doing very similar things, except that one passes in TweetId which calls TweetBased engine, - * and the other passes in AuthorId which calls ProducerBased engine. - */ - handle(t.CrMixer.GetRelatedTweetsForQueryTweet) { - args: t.CrMixer.GetRelatedTweetsForQueryTweet.Args => - val endpointName = "getRelatedTweetsForQueryTweet" - getRelatedTweets(endpointName, args.request) - } - - handle(t.CrMixer.GetRelatedVideoTweetsForQueryTweet) { - args: t.CrMixer.GetRelatedVideoTweetsForQueryTweet.Args => - val endpointName = "getRelatedVideoTweetsForQueryVideoTweet" - getRelatedVideoTweets(endpointName, args.request) - - } - - handle(t.CrMixer.GetRelatedTweetsForQueryAuthor) { - args: t.CrMixer.GetRelatedTweetsForQueryAuthor.Args => - val endpointName = "getRelatedTweetsForQueryAuthor" - getRelatedTweets(endpointName, args.request) - } - - private def getRelatedTweets( - endpointName: String, - request: RelatedTweetRequest - ): Future[RelatedTweetResponse] = { - val requestUUID = generateRequestUUID() - val startTime = Time.now.inMilliseconds - val queryFut = buildRelatedTweetQuery(request, requestUUID) - - queryFut.flatMap { query => - val relatedTweetScribeMetadata = RelatedTweetScribeMetadata.from(query) - endpointLoadShedder(endpointName, query.product.originalName) { - relatedTweetScribeLogger.scribeGetRelatedTweets( - request, - startTime, - relatedTweetScribeMetadata, - relatedTweetCandidateGenerator - .get(query) - .map(buildRelatedTweetResponse)) - }.rescue { - case EndpointLoadShedder.LoadSheddingException => - Future(RelatedTweetResponse(Seq.empty)) - case e => - logErrMessage(endpointName, e) - Future(RelatedTweetResponse(Seq.empty)) - } - } - - } - - private def getRelatedVideoTweets( - endpointName: String, - request: RelatedVideoTweetRequest - ): Future[RelatedVideoTweetResponse] = { - val requestUUID = generateRequestUUID() - val queryFut = buildRelatedVideoTweetQuery(request, requestUUID) - - queryFut.flatMap { query => - endpointLoadShedder(endpointName, query.product.originalName) { - relatedVideoTweetCandidateGenerator.get(query).map { initialCandidateSeq => - buildRelatedVideoTweetResponse(initialCandidateSeq) - } - }.rescue { - case EndpointLoadShedder.LoadSheddingException => - Future(RelatedVideoTweetResponse(Seq.empty)) - case e => - logErrMessage(endpointName, e) - Future(RelatedVideoTweetResponse(Seq.empty)) - } - } - } - - handle(t.CrMixer.GetFrsBasedTweetRecommendations) { - args: t.CrMixer.GetFrsBasedTweetRecommendations.Args => - val endpointName = "getFrsBasedTweetRecommendations" - - val requestUUID = generateRequestUUID() - val queryFut = buildFrsBasedTweetQuery(args.request, requestUUID) - queryFut.flatMap { query => - endpointLoadShedder(endpointName, query.product.originalName) { - frsTweetCandidateGenerator.get(query).map(FrsTweetResponse(_)) - }.rescue { - case e => - logErrMessage(endpointName, e) - Future(FrsTweetResponse(Seq.empty)) - } - } - } - - handle(t.CrMixer.GetTopicTweetRecommendations) { - args: t.CrMixer.GetTopicTweetRecommendations.Args => - val endpointName = "getTopicTweetRecommendations" - - val requestUUID = generateRequestUUID() - val query = buildTopicTweetQuery(args.request, requestUUID) - - endpointLoadShedder(endpointName, query.product.originalName) { - topicTweetCandidateGenerator.get(query).map(TopicTweetResponse(_)) - }.rescue { - case e => - logErrMessage(endpointName, e) - Future(TopicTweetResponse(Map.empty[Long, Seq[TopicTweet]])) - } - } - - handle(t.CrMixer.GetUtegTweetRecommendations) { - args: t.CrMixer.GetUtegTweetRecommendations.Args => - val endpointName = "getUtegTweetRecommendations" - - val requestUUID = generateRequestUUID() - val startTime = Time.now.inMilliseconds - val queryFut = buildUtegTweetQuery(args.request, requestUUID) - queryFut - .flatMap { query => - val scribeMetadata = ScribeMetadata.from(query) - endpointLoadShedder(endpointName, query.product.originalName) { - utegTweetScribeLogger.scribeGetUtegTweetRecommendations( - args.request, - startTime, - scribeMetadata, - utegTweetCandidateGenerator - .get(query) - .map(buildUtegTweetResponse) - ) - }.rescue { - case e => - logErrMessage(endpointName, e) - Future(UtegTweetResponse(Seq.empty)) - } - } - } - - handle(t.CrMixer.GetAdsRecommendations) { args: t.CrMixer.GetAdsRecommendations.Args => - val endpointName = "getAdsRecommendations" - val queryFut = buildAdsCandidateGeneratorQuery(args.request) - val startTime = Time.now.inMilliseconds - queryFut.flatMap { query => - { - val scribeMetadata = ScribeMetadata.from(query) - val response = adsCandidateGenerator - .get(query).map { candidates => - buildAdsResponse(candidates) - } - adsRecommendationsScribeLogger.scribeGetAdsRecommendations( - args.request, - startTime, - scribeMetadata, - response, - query.params(AdsParams.EnableScribe) - ) - }.rescue { - case e => - logErrMessage(endpointName, e) - Future(AdsResponse(Seq.empty)) - } - } - - } - - private def buildCrCandidateGeneratorQuery( - thriftRequest: CrMixerTweetRequest, - requestUUID: Long, - userId: Long - ): Future[CrCandidateGeneratorQuery] = { - - val product = thriftRequest.product - val productContext = thriftRequest.productContext - val scopedStats = statsReceiver - .scope(product.toString).scope("CrMixerTweetRequest") - - userStateStore - .get(userId).map { userStateOpt => - val userState = userStateOpt - .getOrElse(UserState.EnumUnknownUserState(100)) - scopedStats.scope("UserState").counter(userState.toString).incr() - - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState - ) - - // Specify product-specific behavior mapping here - val maxNumResults = (product, productContext) match { - case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) => - homeContext.maxResults.getOrElse(9999) - case (t.Product.Notifications, Some(t.ProductContext.NotificationsContext(cxt))) => - params(GlobalParams.MaxCandidatesPerRequestParam) - case (t.Product.Email, None) => - params(GlobalParams.MaxCandidatesPerRequestParam) - case (t.Product.ImmersiveMediaViewer, None) => - params(GlobalParams.MaxCandidatesPerRequestParam) - case (t.Product.VideoCarousel, None) => - params(GlobalParams.MaxCandidatesPerRequestParam) - case _ => - throw new IllegalArgumentException( - s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer" - ) - } - - CrCandidateGeneratorQuery( - userId = userId, - product = product, - userState = userState, - maxNumResults = maxNumResults, - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - params = params, - requestUUID = requestUUID, - languageCode = thriftRequest.clientContext.languageCode - ) - } - } - - private def buildRelatedTweetQuery( - thriftRequest: RelatedTweetRequest, - requestUUID: Long - ): Future[RelatedTweetCandidateGeneratorQuery] = { - - val product = thriftRequest.product - val scopedStats = statsReceiver - .scope(product.toString).scope("RelatedTweetRequest") - val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match { - case Some(userId) => userStateStore.get(userId) - case None => Future.value(Some(UserState.EnumUnknownUserState(100))) - }).map(_.getOrElse(UserState.EnumUnknownUserState(100))) - - userStateFut.map { userState => - scopedStats.scope("UserState").counter(userState.toString).incr() - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState) - - // Specify product-specific behavior mapping here - // Currently, Home takes 10, and RUX takes 100 - val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam) - - RelatedTweetCandidateGeneratorQuery( - internalId = thriftRequest.internalId, - clientContext = thriftRequest.clientContext, - product = product, - maxNumResults = maxNumResults, - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - params = params, - requestUUID = requestUUID - ) - } - } - - private def buildAdsCandidateGeneratorQuery( - thriftRequest: AdsRequest - ): Future[AdsCandidateGeneratorQuery] = { - val userId = thriftRequest.clientContext.userId.getOrElse( - throw new IllegalArgumentException("userId must be present in the Thrift clientContext") - ) - val product = thriftRequest.product - val requestUUID = generateRequestUUID() - userStateStore - .get(userId).map { userStateOpt => - val userState = userStateOpt - .getOrElse(UserState.EnumUnknownUserState(100)) - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState) - val maxNumResults = params(AdsParams.AdsCandidateGenerationMaxCandidatesNumParam) - AdsCandidateGeneratorQuery( - userId = userId, - product = product, - userState = userState, - params = params, - maxNumResults = maxNumResults, - requestUUID = requestUUID - ) - } - } - - private def buildRelatedVideoTweetQuery( - thriftRequest: RelatedVideoTweetRequest, - requestUUID: Long - ): Future[RelatedVideoTweetCandidateGeneratorQuery] = { - - val product = thriftRequest.product - val scopedStats = statsReceiver - .scope(product.toString).scope("RelatedVideoTweetRequest") - val userStateFut: Future[UserState] = (thriftRequest.clientContext.userId match { - case Some(userId) => userStateStore.get(userId) - case None => Future.value(Some(UserState.EnumUnknownUserState(100))) - }).map(_.getOrElse(UserState.EnumUnknownUserState(100))) - - userStateFut.map { userState => - scopedStats.scope("UserState").counter(userState.toString).incr() - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState) - - val maxNumResults = params(RelatedVideoTweetGlobalParams.MaxCandidatesPerRequestParam) - - RelatedVideoTweetCandidateGeneratorQuery( - internalId = thriftRequest.internalId, - clientContext = thriftRequest.clientContext, - product = product, - maxNumResults = maxNumResults, - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - params = params, - requestUUID = requestUUID - ) - } - - } - - private def buildUtegTweetQuery( - thriftRequest: UtegTweetRequest, - requestUUID: Long - ): Future[UtegTweetCandidateGeneratorQuery] = { - - val userId = thriftRequest.clientContext.userId.getOrElse( - throw new IllegalArgumentException("userId must be present in the Thrift clientContext") - ) - val product = thriftRequest.product - val productContext = thriftRequest.productContext - val scopedStats = statsReceiver - .scope(product.toString).scope("UtegTweetRequest") - - userStateStore - .get(userId).map { userStateOpt => - val userState = userStateOpt - .getOrElse(UserState.EnumUnknownUserState(100)) - scopedStats.scope("UserState").counter(userState.toString).incr() - - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState - ) - - // Specify product-specific behavior mapping here - val maxNumResults = (product, productContext) match { - case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) => - homeContext.maxResults.getOrElse(9999) - case _ => - throw new IllegalArgumentException( - s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer" - ) - } - - UtegTweetCandidateGeneratorQuery( - userId = userId, - product = product, - userState = userState, - maxNumResults = maxNumResults, - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - params = params, - requestUUID = requestUUID - ) - } - - } - - private def buildTopicTweetQuery( - thriftRequest: TopicTweetRequest, - requestUUID: Long - ): TopicTweetCandidateGeneratorQuery = { - val userId = thriftRequest.clientContext.userId.getOrElse( - throw new IllegalArgumentException( - "userId must be present in the TopicTweetRequest clientContext") - ) - val product = thriftRequest.product - val productContext = thriftRequest.productContext - - // Specify product-specific behavior mapping here - val isVideoOnly = (product, productContext) match { - case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) => - context.isVideoOnly - case (t.Product.TopicLandingPage, None) => - false - case (t.Product.HomeTopicsBackfill, None) => - false - case (t.Product.TopicTweetsStrato, None) => - false - case _ => - throw new IllegalArgumentException( - s"Product ${product} and ProductContext ${productContext} are not allowed in CrMixer" - ) - } - - statsReceiver.scope(product.toString).counter(TopicTweetRequest.toString).incr() - - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - product, - UserState.EnumUnknownUserState(100) - ) - - val topicIds = thriftRequest.topicIds.map { topicId => - TopicId( - entityId = topicId, - language = thriftRequest.clientContext.languageCode, - country = None - ) - }.toSet - - TopicTweetCandidateGeneratorQuery( - userId = userId, - topicIds = topicIds, - product = product, - maxNumResults = params(TopicTweetParams.MaxTopicTweetCandidatesParam), - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - params = params, - requestUUID = requestUUID, - isVideoOnly = isVideoOnly - ) - } - - private def buildFrsBasedTweetQuery( - thriftRequest: FrsTweetRequest, - requestUUID: Long - ): Future[FrsTweetCandidateGeneratorQuery] = { - val userId = thriftRequest.clientContext.userId.getOrElse( - throw new IllegalArgumentException( - "userId must be present in the FrsTweetRequest clientContext") - ) - val product = thriftRequest.product - val productContext = thriftRequest.productContext - - val scopedStats = statsReceiver - .scope(product.toString).scope("FrsTweetRequest") - - userStateStore - .get(userId).map { userStateOpt => - val userState = userStateOpt - .getOrElse(UserState.EnumUnknownUserState(100)) - scopedStats.scope("UserState").counter(userState.toString).incr() - - val params = - paramsBuilder.buildFromClientContext( - thriftRequest.clientContext, - thriftRequest.product, - userState - ) - val maxNumResults = (product, productContext) match { - case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) => - homeContext.maxResults.getOrElse( - params(FrsBasedCandidateGenerationMaxCandidatesNumParam)) - case _ => - params(FrsBasedCandidateGenerationMaxCandidatesNumParam) - } - - FrsTweetCandidateGeneratorQuery( - userId = userId, - product = product, - maxNumResults = maxNumResults, - impressedTweetList = thriftRequest.excludedTweetIds.getOrElse(Nil).toSet, - impressedUserList = thriftRequest.excludedUserIds.getOrElse(Nil).toSet, - params = params, - languageCodeOpt = thriftRequest.clientContext.languageCode, - countryCodeOpt = thriftRequest.clientContext.countryCode, - requestUUID = requestUUID - ) - } - } - - private def buildThriftResponse( - candidates: Seq[RankedCandidate] - ): CrMixerTweetResponse = { - - val tweets = candidates.map { candidate => - TweetRecommendation( - tweetId = candidate.tweetId, - score = candidate.predictionScore, - metricTags = Some(MetricTagUtil.buildMetricTags(candidate)), - latestSourceSignalTimestampInMillis = - SignalTimestampStatsUtil.buildLatestSourceSignalTimestamp(candidate) - ) - } - signalTimestampStatsUtil.statsSignalTimestamp(tweets) - CrMixerTweetResponse(tweets) - } - - private def scribeTweetScoreFunnelSeries( - candidates: Seq[RankedCandidate] - ): Seq[RankedCandidate] = { - // 202210210901 is a random number for code search of Lensview - tweetScoreFunnelSeries.startNewSpan( - name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType", - codePtr = 202210210901L) { - ( - candidates, - candidates.map { candidate => - thriftlog.TweetDimensionMeasure( - dimension = Some( - thriftlog - .RequestTweetDimension( - candidate.tweetId, - candidate.reasonChosen.similarityEngineInfo.similarityEngineType.value)), - measure = Some(thriftlog.RequestTweetMeasure(candidate.predictionScore)) - ) - } - ) - } - } - - private def buildRelatedTweetResponse(candidates: Seq[InitialCandidate]): RelatedTweetResponse = { - val tweets = candidates.map { candidate => - RelatedTweet( - tweetId = candidate.tweetId, - score = Some(candidate.getSimilarityScore), - authorId = Some(candidate.tweetInfo.authorId) - ) - } - RelatedTweetResponse(tweets) - } - - private def buildRelatedVideoTweetResponse( - candidates: Seq[InitialCandidate] - ): RelatedVideoTweetResponse = { - val tweets = candidates.map { candidate => - RelatedVideoTweet( - tweetId = candidate.tweetId, - score = Some(candidate.getSimilarityScore) - ) - } - RelatedVideoTweetResponse(tweets) - } - - private def buildUtegTweetResponse( - candidates: Seq[TweetWithScoreAndSocialProof] - ): UtegTweetResponse = { - val tweets = candidates.map { candidate => - UtegTweet( - tweetId = candidate.tweetId, - score = candidate.score, - socialProofByType = candidate.socialProofByType - ) - } - UtegTweetResponse(tweets) - } - - private def buildAdsResponse( - candidates: Seq[RankedAdsCandidate] - ): AdsResponse = { - AdsResponse(ads = candidates.map { candidate => - AdTweetRecommendation( - tweetId = candidate.tweetId, - score = candidate.predictionScore, - lineItems = Some(candidate.lineItemInfo)) - }) - } - - private def cacheTweetRecommendationResults( - request: CrMixerTweetRequest, - response: Future[CrMixerTweetResponse] - ): Unit = { - - val userId = request.clientContext.userId.getOrElse( - throw new IllegalArgumentException( - "userId must be present in getTweetRecommendations() Thrift clientContext")) - - if (decider.isAvailableForId(userId, DeciderConstants.getTweetRecommendationsCacheRate)) { - response.map { crMixerTweetResponse => - { - ( - request.product, - request.clientContext.userId, - crMixerTweetResponse.tweets.nonEmpty) match { - case (t.Product.Home, Some(userId), true) => - tweetRecommendationResultsStore.put((userId, crMixerTweetResponse)) - case _ => Future.value(Unit) - } - } - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/BUILD deleted file mode 100644 index 60521ad51..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/BUILD +++ /dev/null @@ -1,7 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/InvalidSANNConfigException.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/InvalidSANNConfigException.scala deleted file mode 100644 index a8ada7abf..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception/InvalidSANNConfigException.scala +++ /dev/null @@ -1,4 +0,0 @@ -package com.twitter.cr_mixer -package exception - -case class InvalidSANNConfigException(msg: String) extends Exception(msg) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/BUILD deleted file mode 100644 index d728980f6..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/BUILD +++ /dev/null @@ -1,35 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "abdecider/src/main/scala", - "configapi/configapi-abdecider", - "configapi/configapi-core", - "configapi/configapi-featureswitches:v2", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "discovery-common/src/main/scala/com/twitter/discovery/common/configapi", - "featureswitches/featureswitches-core", - "featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "frigate/frigate-common:util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/frigate:frigate-common-thrift-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/CrMixerLoggingABDecider.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/CrMixerLoggingABDecider.scala deleted file mode 100644 index 20195921e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/CrMixerLoggingABDecider.scala +++ /dev/null @@ -1,79 +0,0 @@ -package com.twitter.cr_mixer -package featureswitch - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.abdecider.LoggingABDecider -import com.twitter.abdecider.Recipient -import com.twitter.abdecider.Bucket -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.util.Local -import scala.collection.concurrent.{Map => ConcurrentMap} - -/** - * Wraps a LoggingABDecider, so all impressed buckets are recorded to a 'LocalContext' on a given request. - * - * Contexts (https://twitter.github.io/finagle/guide/Contexts.html) are Finagle's mechanism for - * storing state/variables without having to pass these variables all around the request. - * - * In order for this class to be used the [[SetImpressedBucketsLocalContextFilter]] must be applied - * at the beginning of the request, to initialize a concurrent map used to store impressed buckets. - * - * Whenever we get an a/b impression, the bucket information is logged to the concurrent hashmap. - */ -case class CrMixerLoggingABDecider( - loggingAbDecider: LoggingABDecider, - statsReceiver: StatsReceiver) - extends LoggingABDecider { - - private val scopedStatsReceiver = statsReceiver.scope("cr_logging_ab_decider") - - override def impression( - experimentName: String, - recipient: Recipient - ): Option[Bucket] = { - - StatsUtil.trackNonFutureBlockStats(scopedStatsReceiver.scope("log_impression")) { - val maybeBuckets = loggingAbDecider.impression(experimentName, recipient) - maybeBuckets.foreach { b => - scopedStatsReceiver.counter("impressions").incr() - CrMixerImpressedBuckets.recordImpressedBucket(b) - } - maybeBuckets - } - } - - override def track( - experimentName: String, - eventName: String, - recipient: Recipient - ): Unit = { - loggingAbDecider.track(experimentName, eventName, recipient) - } - - override def bucket( - experimentName: String, - recipient: Recipient - ): Option[Bucket] = { - loggingAbDecider.bucket(experimentName, recipient) - } - - override def experiments: Seq[String] = loggingAbDecider.experiments - - override def experiment(experimentName: String) = - loggingAbDecider.experiment(experimentName) -} - -object CrMixerImpressedBuckets { - private[featureswitch] val localImpressedBucketsMap = new Local[ConcurrentMap[Bucket, Boolean]] - - /** - * Gets all impressed buckets for this request. - **/ - def getAllImpressedBuckets: Option[List[Bucket]] = { - localImpressedBucketsMap.apply().map(_.map { case (k, _) => k }.toList) - } - - private[featureswitch] def recordImpressedBucket(bucket: Bucket) = { - localImpressedBucketsMap().foreach { m => m += bucket -> true } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/ParamsBuilder.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/ParamsBuilder.scala deleted file mode 100644 index c322c456e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/ParamsBuilder.scala +++ /dev/null @@ -1,151 +0,0 @@ -package com.twitter.cr_mixer.featureswitch - -import com.twitter.abdecider.LoggingABDecider -import com.twitter.abdecider.UserRecipient -import com.twitter.cr_mixer.{thriftscala => t} -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.discovery.common.configapi.FeatureContextBuilder -import com.twitter.featureswitches.FSRecipient -import com.twitter.featureswitches.UserAgent -import com.twitter.featureswitches.{Recipient => FeatureSwitchRecipient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.product_mixer.core.thriftscala.ClientContext -import com.twitter.timelines.configapi.Config -import com.twitter.timelines.configapi.FeatureValue -import com.twitter.timelines.configapi.ForcedFeatureContext -import com.twitter.timelines.configapi.OrElseFeatureContext -import com.twitter.timelines.configapi.Params -import com.twitter.timelines.configapi.RequestContext -import com.twitter.timelines.configapi.abdecider.LoggingABDeciderExperimentContext -import javax.inject.Inject -import javax.inject.Singleton - -/** Singleton object for building [[Params]] to override */ -@Singleton -class ParamsBuilder @Inject() ( - globalStats: StatsReceiver, - abDecider: LoggingABDecider, - featureContextBuilder: FeatureContextBuilder, - config: Config) { - - private val stats = globalStats.scope("params") - - def buildFromClientContext( - clientContext: ClientContext, - product: t.Product, - userState: UserState, - userRoleOverride: Option[Set[String]] = None, - featureOverrides: Map[String, FeatureValue] = Map.empty, - ): Params = { - clientContext.userId match { - case Some(userId) => - val userRecipient = buildFeatureSwitchRecipient( - userId, - userRoleOverride, - clientContext, - product, - userState - ) - - val featureContext = OrElseFeatureContext( - ForcedFeatureContext(featureOverrides), - featureContextBuilder( - Some(userId), - Some(userRecipient) - )) - - config( - requestContext = RequestContext( - userId = Some(userId), - experimentContext = LoggingABDeciderExperimentContext( - abDecider, - Some(UserRecipient(userId, Some(userId)))), - featureContext = featureContext - ), - stats - ) - case None => - val guestRecipient = - buildFeatureSwitchRecipientWithGuestId(clientContext: ClientContext, product, userState) - - val featureContext = OrElseFeatureContext( - ForcedFeatureContext(featureOverrides), - featureContextBuilder( - clientContext.userId, - Some(guestRecipient) - ) - ) //ExperimentContext with GuestRecipient is not supported as there is no active use-cases yet in CrMixer - - config( - requestContext = RequestContext( - userId = clientContext.userId, - featureContext = featureContext - ), - stats - ) - } - } - - private def buildFeatureSwitchRecipientWithGuestId( - clientContext: ClientContext, - product: t.Product, - userState: UserState - ): FeatureSwitchRecipient = { - - val recipient = FSRecipient( - userId = None, - userRoles = None, - deviceId = clientContext.deviceId, - guestId = clientContext.guestId, - languageCode = clientContext.languageCode, - countryCode = clientContext.countryCode, - userAgent = clientContext.userAgent.flatMap(UserAgent(_)), - isVerified = None, - isTwoffice = None, - tooClient = None, - highWaterMark = None - ) - - recipient.withCustomFields( - (ParamsBuilder.ProductCustomField, product.toString), - (ParamsBuilder.UserStateCustomField, userState.toString) - ) - } - - private def buildFeatureSwitchRecipient( - userId: Long, - userRolesOverride: Option[Set[String]], - clientContext: ClientContext, - product: t.Product, - userState: UserState - ): FeatureSwitchRecipient = { - val userRoles = userRolesOverride match { - case Some(overrides) => Some(overrides) - case _ => clientContext.userRoles.map(_.toSet) - } - - val recipient = FSRecipient( - userId = Some(userId), - userRoles = userRoles, - deviceId = clientContext.deviceId, - guestId = clientContext.guestId, - languageCode = clientContext.languageCode, - countryCode = clientContext.countryCode, - userAgent = clientContext.userAgent.flatMap(UserAgent(_)), - isVerified = None, - isTwoffice = None, - tooClient = None, - highWaterMark = None - ) - - recipient.withCustomFields( - (ParamsBuilder.ProductCustomField, product.toString), - (ParamsBuilder.UserStateCustomField, userState.toString) - ) - } -} - -object ParamsBuilder { - private val ProductCustomField = "product_id" - private val UserStateCustomField = "user_state" -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/SetImpressedBucketsLocalContextFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/SetImpressedBucketsLocalContextFilter.scala deleted file mode 100644 index 905c99bea..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch/SetImpressedBucketsLocalContextFilter.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.cr_mixer.featureswitch - -import com.twitter.finagle.Filter -import javax.inject.Inject -import javax.inject.Singleton -import scala.collection.concurrent.TrieMap -import com.twitter.abdecider.Bucket -import com.twitter.finagle.Service - -@Singleton -class SetImpressedBucketsLocalContextFilter @Inject() () extends Filter.TypeAgnostic { - override def toFilter[Req, Rep]: Filter[Req, Rep, Req, Rep] = - (request: Req, service: Service[Req, Rep]) => { - - val concurrentTrieMap = TrieMap - .empty[Bucket, Boolean] // Trie map has no locks and O(1) inserts - CrMixerImpressedBuckets.localImpressedBucketsMap.let(concurrentTrieMap) { - service(request) - } - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/BUILD deleted file mode 100644 index e9db59798..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "finagle/finagle-core/src/main", - "frigate/frigate-common:util", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/FilterBase.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/FilterBase.scala deleted file mode 100644 index 1be4ebbaa..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/FilterBase.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.util.Future - -trait FilterBase { - def name: String - - type ConfigType - - def filter( - candidates: Seq[Seq[InitialCandidate]], - config: ConfigType - ): Future[Seq[Seq[InitialCandidate]]] - - /** - * Build the config params here. passing in param() into the filter is strongly discouraged - * because param() can be slow when called many times - */ - def requestToConfig[CGQueryType <: CandidateGeneratorQuery](request: CGQueryType): ConfigType -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ImpressedTweetlistFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ImpressedTweetlistFilter.scala deleted file mode 100644 index 41c9b7742..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ImpressedTweetlistFilter.scala +++ /dev/null @@ -1,63 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -case class ImpressedTweetlistFilter() extends FilterBase { - import ImpressedTweetlistFilter._ - - override val name: String = this.getClass.getCanonicalName - - override type ConfigType = FilterConfig - - /* - Filtering removes some candidates based on configurable criteria. - */ - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: FilterConfig - ): Future[Seq[Seq[InitialCandidate]]] = { - // Remove candidates which match a source tweet, or which are passed in impressedTweetList - val sourceTweetsMatch = candidates - .flatMap { - - /*** - * Within a Seq[Seq[InitialCandidate]], all candidates within a inner Seq - * are guaranteed to have the same sourceInfo. Hence, we can pick .headOption - * to represent the whole list when filtering by the internalId of the sourceInfoOpt. - * But of course the similarityEngineInfo could be different. - */ - _.headOption.flatMap { candidate => - candidate.candidateGenerationInfo.sourceInfoOpt.map(_.internalId) - } - }.collect { - case InternalId.TweetId(id) => id - } - - val impressedTweetList: Set[TweetId] = - config.impressedTweetList ++ sourceTweetsMatch - - val filteredCandidateMap: Seq[Seq[InitialCandidate]] = - candidates.map { - _.filterNot { candidate => - impressedTweetList.contains(candidate.tweetId) - } - } - Future.value(filteredCandidateMap) - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType - ): FilterConfig = { - FilterConfig(request.impressedTweetList) - } -} - -object ImpressedTweetlistFilter { - case class FilterConfig(impressedTweetList: Set[TweetId]) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/InNetworkFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/InNetworkFilter.scala deleted file mode 100644 index 62f4ddba5..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/InNetworkFilter.scala +++ /dev/null @@ -1,80 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.param.UtegTweetGlobalParams -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import com.twitter.wtf.candidate.thriftscala.CandidateSeq - -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/*** - * Filters in-network tweets - */ -@Singleton -case class InNetworkFilter @Inject() ( - @Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq], - globalStats: StatsReceiver) - extends FilterBase { - override val name: String = this.getClass.getCanonicalName - import InNetworkFilter._ - - override type ConfigType = FilterConfig - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - private val filterCandidatesStats = stats.scope("filter_candidates") - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - filterConfig: FilterConfig, - ): Future[Seq[Seq[InitialCandidate]]] = { - StatsUtil.trackItemsStats(filterCandidatesStats) { - filterCandidates(candidates, filterConfig) - } - } - - private def filterCandidates( - candidates: Seq[Seq[InitialCandidate]], - filterConfig: FilterConfig, - ): Future[Seq[Seq[InitialCandidate]]] = { - - if (!filterConfig.enableInNetworkFilter) { - Future.value(candidates) - } else { - filterConfig.userIdOpt match { - case Some(userId) => - realGraphStoreMh - .get(userId).map(_.map(_.candidates.map(_.userId)).getOrElse(Seq.empty).toSet).map { - realGraphInNetworkAuthorsSet => - candidates.map(_.filterNot { candidate => - realGraphInNetworkAuthorsSet.contains(candidate.tweetInfo.authorId) - }) - } - case None => Future.value(candidates) - } - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType - ): FilterConfig = { - request match { - case UtegTweetCandidateGeneratorQuery(userId, _, _, _, _, params, _) => - FilterConfig(Some(userId), params(UtegTweetGlobalParams.EnableInNetworkFilterParam)) - case _ => FilterConfig(None, false) - } - } -} - -object InNetworkFilter { - case class FilterConfig( - userIdOpt: Option[UserId], - enableInNetworkFilter: Boolean) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PostRankFilterRunner.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PostRankFilterRunner.scala deleted file mode 100644 index 483f3d956..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PostRankFilterRunner.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.cr_mixer.filter -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class PostRankFilterRunner @Inject() ( - globalStats: StatsReceiver) { - - private val scopedStats = globalStats.scope(this.getClass.getCanonicalName) - - private val beforeCount = scopedStats.stat("candidate_count", "before") - private val afterCount = scopedStats.stat("candidate_count", "after") - - def run( - query: CrCandidateGeneratorQuery, - candidates: Seq[RankedCandidate] - ): Future[Seq[RankedCandidate]] = { - - beforeCount.add(candidates.size) - - Future( - removeBadRecentNotificationCandidates(candidates) - ).map { results => - afterCount.add(results.size) - results - } - } - - /** - * Remove "bad" quality candidates generated by recent notifications - * A candidate is bad when it is generated by a single RecentNotification - * SourceKey. - * e.x: - * tweetA {recent notification1} -> bad - * tweetB {recent notification1 recent notification2} -> good - *tweetC {recent notification1 recent follow1} -> bad - * SD-19397 - */ - private[filter] def removeBadRecentNotificationCandidates( - candidates: Seq[RankedCandidate] - ): Seq[RankedCandidate] = { - candidates.filterNot { - isBadQualityRecentNotificationCandidate - } - } - - private def isBadQualityRecentNotificationCandidate(candidate: RankedCandidate): Boolean = { - candidate.potentialReasons.size == 1 && - candidate.potentialReasons.head.sourceInfoOpt.nonEmpty && - candidate.potentialReasons.head.sourceInfoOpt.get.sourceType == SourceType.NotificationClick - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PreRankFilterRunner.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PreRankFilterRunner.scala deleted file mode 100644 index 7626acc7c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/PreRankFilterRunner.scala +++ /dev/null @@ -1,99 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -class PreRankFilterRunner @Inject() ( - impressedTweetListFilter: ImpressedTweetlistFilter, - tweetAgeFilter: TweetAgeFilter, - videoTweetFilter: VideoTweetFilter, - tweetReplyFilter: ReplyFilter, - globalStats: StatsReceiver) { - - private val scopedStats = globalStats.scope(this.getClass.getCanonicalName) - - /*** - * The order of the filters does not matter as long as we do not apply .take(N) truncation - * across all filters. In other words, it is fine that we first do tweetAgeFilter, and then - * we do impressedTweetListFilter, or the other way around. - * Same idea applies to the signal based filter - it is ok that we apply signal based filters - * before impressedTweetListFilter. - * - * We move all signal based filters before tweetAgeFilter and impressedTweetListFilter - * as a set of early filters. - */ - val orderedFilters = Seq( - tweetAgeFilter, - impressedTweetListFilter, - videoTweetFilter, - tweetReplyFilter - ) - - def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType, - candidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[Seq[InitialCandidate]]] = { - PreRankFilterRunner.runSequentialFilters( - request, - candidates, - orderedFilters, - scopedStats - ) - } - -} - -object PreRankFilterRunner { - private def recordCandidateStatsBeforeFilter( - candidates: Seq[Seq[InitialCandidate]], - statsReceiver: StatsReceiver - ): Unit = { - statsReceiver - .counter("empty_sources", "before").incr( - candidates.count { _.isEmpty } - ) - candidates.foreach { candidate => - statsReceiver.counter("candidates", "before").incr(candidate.size) - } - } - - private def recordCandidateStatsAfterFilter( - candidates: Seq[Seq[InitialCandidate]], - statsReceiver: StatsReceiver - ): Unit = { - statsReceiver - .counter("empty_sources", "after").incr( - candidates.count { _.isEmpty } - ) - candidates.foreach { candidate => - statsReceiver.counter("candidates", "after").incr(candidate.size) - } - } - - /* - Helper function for running some candidates through a sequence of filters - */ - private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType, - candidates: Seq[Seq[InitialCandidate]], - filters: Seq[FilterBase], - statsReceiver: StatsReceiver - ): Future[Seq[Seq[InitialCandidate]]] = - filters.foldLeft(Future.value(candidates)) { - case (candsFut, filter) => - candsFut.flatMap { cands => - recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name)) - filter - .filter(cands, filter.requestToConfig(request)) - .map { filteredCands => - recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name)) - filteredCands - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ReplyFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ReplyFilter.scala deleted file mode 100644 index d4d37a7da..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/ReplyFilter.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Singleton - -/*** - * Filters candidates that are replies - */ -@Singleton -case class ReplyFilter @Inject() () extends FilterBase { - override def name: String = this.getClass.getCanonicalName - override type ConfigType = Boolean - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: ConfigType - ): Future[Seq[Seq[InitialCandidate]]] = { - if (config) { - Future.value( - candidates.map { candidateSeq => - candidateSeq.filterNot { candidate => - candidate.tweetInfo.isReply.getOrElse(false) - } - } - ) - } else { - Future.value(candidates) - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): ConfigType = { - true - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/RetweetFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/RetweetFilter.scala deleted file mode 100644 index 38eefadd9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/RetweetFilter.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.UtegTweetGlobalParams -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Singleton - -/*** - * Filters candidates that are retweets - */ -@Singleton -case class RetweetFilter @Inject() () extends FilterBase { - override def name: String = this.getClass.getCanonicalName - override type ConfigType = Boolean - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: ConfigType - ): Future[Seq[Seq[InitialCandidate]]] = { - if (config) { - Future.value( - candidates.map { candidateSeq => - candidateSeq.filterNot { candidate => - candidate.tweetInfo.isRetweet.getOrElse(false) - } - } - ) - } else { - Future.value(candidates) - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): ConfigType = { - query.params(UtegTweetGlobalParams.EnableRetweetFilterParam) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetAgeFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetAgeFilter.scala deleted file mode 100644 index d7c8889e1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetAgeFilter.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Singleton -import com.twitter.conversions.DurationOps._ - -@Singleton -case class TweetAgeFilter() extends FilterBase { - override val name: String = this.getClass.getCanonicalName - - override type ConfigType = Duration - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - maxTweetAge: Duration - ): Future[Seq[Seq[InitialCandidate]]] = { - if (maxTweetAge >= 720.hours) { - Future.value(candidates) - } else { - // Tweet IDs are approximately chronological (see http://go/snowflake), - // so we are building the earliest tweet id once, - // and pass that as the value to filter candidates for each CandidateGenerationModel. - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge) - Future.value(candidates.map(_.filter(_.tweetId >= earliestTweetId))) - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): Duration = { - query.params(GlobalParams.MaxTweetAgeHoursParam) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetInfoHealthFilterBase.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetInfoHealthFilterBase.scala deleted file mode 100644 index 5ea248424..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/TweetInfoHealthFilterBase.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.HealthThreshold -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -trait TweetInfoHealthFilterBase extends FilterBase { - override def name: String = this.getClass.getCanonicalName - override type ConfigType = HealthThreshold.Enum.Value - def thresholdToPropertyMap: Map[HealthThreshold.Enum.Value, TweetInfo => Option[Boolean]] - def getFilterParamFn: CandidateGeneratorQuery => HealthThreshold.Enum.Value - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: HealthThreshold.Enum.Value - ): Future[Seq[Seq[InitialCandidate]]] = { - Future.value(candidates.map { seq => - seq.filter(p => thresholdToPropertyMap(config)(p.tweetInfo).getOrElse(true)) - }) - } - - /** - * Build the config params here. passing in param() into the filter is strongly discouraged - * because param() can be slow when called many times - */ - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): HealthThreshold.Enum.Value = { - query match { - case q: CrCandidateGeneratorQuery => getFilterParamFn(q) - case _ => HealthThreshold.Enum.Off - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegFilterRunner.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegFilterRunner.scala deleted file mode 100644 index 463e026b9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegFilterRunner.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Singleton - -/*** - * - * Run filters sequentially for UTEG candidate generator. The structure is copied from PreRankFilterRunner. - */ -@Singleton -class UtegFilterRunner @Inject() ( - inNetworkFilter: InNetworkFilter, - utegHealthFilter: UtegHealthFilter, - retweetFilter: RetweetFilter, - globalStats: StatsReceiver) { - - private val scopedStats = globalStats.scope(this.getClass.getCanonicalName) - - val orderedFilters: Seq[FilterBase] = Seq( - inNetworkFilter, - utegHealthFilter, - retweetFilter - ) - - def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType, - candidates: Seq[Seq[InitialCandidate]], - ): Future[Seq[Seq[InitialCandidate]]] = { - UtegFilterRunner.runSequentialFilters( - request, - candidates, - orderedFilters, - scopedStats - ) - } - -} - -object UtegFilterRunner { - private def recordCandidateStatsBeforeFilter( - candidates: Seq[Seq[InitialCandidate]], - statsReceiver: StatsReceiver - ): Unit = { - statsReceiver - .counter("empty_sources", "before").incr( - candidates.count { - _.isEmpty - } - ) - candidates.foreach { candidate => - statsReceiver.counter("candidates", "before").incr(candidate.size) - } - } - - private def recordCandidateStatsAfterFilter( - candidates: Seq[Seq[InitialCandidate]], - statsReceiver: StatsReceiver - ): Unit = { - statsReceiver - .counter("empty_sources", "after").incr( - candidates.count { - _.isEmpty - } - ) - candidates.foreach { candidate => - statsReceiver.counter("candidates", "after").incr(candidate.size) - } - } - - /* - Helper function for running some candidates through a sequence of filters - */ - private[filter] def runSequentialFilters[CGQueryType <: CandidateGeneratorQuery]( - request: CGQueryType, - candidates: Seq[Seq[InitialCandidate]], - filters: Seq[FilterBase], - statsReceiver: StatsReceiver - ): Future[Seq[Seq[InitialCandidate]]] = - filters.foldLeft(Future.value(candidates)) { - case (candsFut, filter) => - candsFut.flatMap { cands => - recordCandidateStatsBeforeFilter(cands, statsReceiver.scope(filter.name)) - filter - .filter(cands, filter.requestToConfig(request)) - .map { filteredCands => - recordCandidateStatsAfterFilter(filteredCands, statsReceiver.scope(filter.name)) - filteredCands - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegHealthFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegHealthFilter.scala deleted file mode 100644 index 4a327b161..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/UtegHealthFilter.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.param.UtegTweetGlobalParams -import com.twitter.util.Future - -import javax.inject.Inject -import javax.inject.Singleton - -/** - * Remove unhealthy candidates - * Currently Timeline Ranker applies a check on the following three scores: - * - toxicityScore - * - pBlockScore - * - pReportedTweetScore - * - * Where isPassTweetHealthFilterStrict checks two additions scores with the same threshold: - * - pSpammyTweetScore - * - spammyTweetContentScore - * - * We've verified that both filters behave very similarly. - */ -@Singleton -case class UtegHealthFilter @Inject() () extends FilterBase { - override def name: String = this.getClass.getCanonicalName - override type ConfigType = Boolean - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: ConfigType - ): Future[Seq[Seq[InitialCandidate]]] = { - if (config) { - Future.value( - candidates.map { candidateSeq => - candidateSeq.filter { candidate => - candidate.tweetInfo.isPassTweetHealthFilterStrict.getOrElse(false) - } - } - ) - } else { - Future.value(candidates) - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): ConfigType = { - query.params(UtegTweetGlobalParams.EnableTLRHealthFilterParam) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/VideoTweetFilter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/VideoTweetFilter.scala deleted file mode 100644 index 755ba8ac7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/filter/VideoTweetFilter.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.cr_mixer.filter - -import com.twitter.cr_mixer.filter.VideoTweetFilter.FilterConfig -import com.twitter.cr_mixer.model.CandidateGeneratorQuery -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.RelatedVideoTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.param.VideoTweetFilterParams -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -case class VideoTweetFilter() extends FilterBase { - override val name: String = this.getClass.getCanonicalName - - override type ConfigType = FilterConfig - - override def filter( - candidates: Seq[Seq[InitialCandidate]], - config: ConfigType - ): Future[Seq[Seq[InitialCandidate]]] = { - Future.value(candidates.map { - _.flatMap { - candidate => - if (!config.enableVideoTweetFilter) { - Some(candidate) - } else { - // if hasVideo is true, hasImage, hasGif should be false - val hasVideo = checkTweetInfoAttribute(candidate.tweetInfo.hasVideo) - val isHighMediaResolution = - checkTweetInfoAttribute(candidate.tweetInfo.isHighMediaResolution) - val isQuoteTweet = checkTweetInfoAttribute(candidate.tweetInfo.isQuoteTweet) - val isReply = checkTweetInfoAttribute(candidate.tweetInfo.isReply) - val hasMultipleMedia = checkTweetInfoAttribute(candidate.tweetInfo.hasMultipleMedia) - val hasUrl = checkTweetInfoAttribute(candidate.tweetInfo.hasUrl) - - if (hasVideo && isHighMediaResolution && !isQuoteTweet && - !isReply && !hasMultipleMedia && !hasUrl) { - Some(candidate) - } else { - None - } - } - } - }) - } - - def checkTweetInfoAttribute(attributeOpt: => Option[Boolean]): Boolean = { - if (attributeOpt.isDefined) - attributeOpt.get - else { - // takes Quoted Tweet (TweetInfo.isQuoteTweet) as an example, - // if the attributeOpt is None, we by default say it is not a quoted tweet - // similarly, if TweetInfo.hasVideo is a None, - // we say it does not have video. - false - } - } - - override def requestToConfig[CGQueryType <: CandidateGeneratorQuery]( - query: CGQueryType - ): FilterConfig = { - val enableVideoTweetFilter = query match { - case _: CrCandidateGeneratorQuery | _: RelatedTweetCandidateGeneratorQuery | - _: RelatedVideoTweetCandidateGeneratorQuery => - query.params(VideoTweetFilterParams.EnableVideoTweetFilterParam) - case _ => false // e.g., GetRelatedTweets() - } - FilterConfig( - enableVideoTweetFilter = enableVideoTweetFilter - ) - } -} - -object VideoTweetFilter { - // extend the filterConfig to add more flags if needed. - // now they are hardcoded according to the prod setting - case class FilterConfig( - enableVideoTweetFilter: Boolean) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/AdsRecommendationsScribeLogger.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/AdsRecommendationsScribeLogger.scala deleted file mode 100644 index f786bd586..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/AdsRecommendationsScribeLogger.scala +++ /dev/null @@ -1,139 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialAdsCandidate -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.logging.ScribeLoggerUtils._ -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.thriftscala.AdsRecommendationTopLevelApiResult -import com.twitter.cr_mixer.thriftscala.AdsRecommendationsResult -import com.twitter.cr_mixer.thriftscala.AdsRequest -import com.twitter.cr_mixer.thriftscala.AdsResponse -import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult -import com.twitter.cr_mixer.thriftscala.GetAdsRecommendationsScribe -import com.twitter.cr_mixer.thriftscala.PerformanceMetrics -import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata -import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future -import com.twitter.util.Stopwatch - -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -case class AdsRecommendationsScribeLogger @Inject() ( - @Named(ModuleNames.AdsRecommendationsLogger) adsRecommendationsScribeLogger: Logger, - decider: CrMixerDecider, - statsReceiver: StatsReceiver) { - - private val scopedStats = statsReceiver.scope(this.getClass.getCanonicalName) - private val upperFunnelsStats = scopedStats.scope("UpperFunnels") - private val topLevelApiStats = scopedStats.scope("TopLevelApi") - - /* - * Scribe first step results after fetching initial ads candidate - * */ - def scribeInitialAdsCandidates( - query: AdsCandidateGeneratorQuery, - getResultFn: => Future[Seq[Seq[InitialAdsCandidate]]], - enableScribe: Boolean // controlled by feature switch so that we can scribe for certain DDG - ): Future[Seq[Seq[InitialAdsCandidate]]] = { - val scribeMetadata = ScribeMetadata.from(query) - val timer = Stopwatch.start() - getResultFn.onSuccess { input => - val latencyMs = timer().inMilliseconds - val result = convertFetchCandidatesResult(input, scribeMetadata.userId) - val traceId = Trace.id.traceId.toLong - val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - - if (enableScribe && decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.adsRecommendationsPerExperimentScribeRate)) { - upperFunnelsStats.counter(scribeMetadata.product.originalName).incr() - scribeResult(scribeMsg) - } - } - } - - /* - * Scribe top level API results - * */ - def scribeGetAdsRecommendations( - request: AdsRequest, - startTime: Long, - scribeMetadata: ScribeMetadata, - getResultFn: => Future[AdsResponse], - enableScribe: Boolean - ): Future[AdsResponse] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { response => - val latencyMs = timer().inMilliseconds - val result = AdsRecommendationsResult.AdsRecommendationTopLevelApiResult( - AdsRecommendationTopLevelApiResult( - timestamp = startTime, - request = request, - response = response - )) - val traceId = Trace.id.traceId.toLong - val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - - if (enableScribe && decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.adsRecommendationsPerExperimentScribeRate)) { - topLevelApiStats.counter(scribeMetadata.product.originalName).incr() - scribeResult(scribeMsg) - } - } - } - - private def convertFetchCandidatesResult( - candidatesSeq: Seq[Seq[InitialAdsCandidate]], - requestUserId: UserId - ): AdsRecommendationsResult = { - val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates => - candidates.map { candidate => - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = Some( - CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)), - score = Some(candidate.getSimilarityScore), - numCandidateGenerationKeys = None // not populated yet - ) - } - } - AdsRecommendationsResult.FetchCandidatesResult( - FetchCandidatesResult(Some(tweetCandidatesWithMetadata))) - } - - private def buildScribeMessage( - result: AdsRecommendationsResult, - scribeMetadata: ScribeMetadata, - latencyMs: Long, - traceId: Long - ): GetAdsRecommendationsScribe = { - GetAdsRecommendationsScribe( - uuid = scribeMetadata.requestUUID, - userId = scribeMetadata.userId, - result = result, - traceId = Some(traceId), - performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))), - impressedBuckets = getImpressedBuckets(scopedStats) - ) - } - - private def scribeResult( - scribeMsg: GetAdsRecommendationsScribe - ): Unit = { - publish( - logger = adsRecommendationsScribeLogger, - codec = GetAdsRecommendationsScribe, - message = scribeMsg) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/BUILD deleted file mode 100644 index edf0b77f0..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/BUILD +++ /dev/null @@ -1,34 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "abdecider/src/main/scala", - "content-recommender/thrift/src/main/thrift:content-recommender-common-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "featureswitches/featureswitches-core/src/main/scala:experimentation-settings", - "finagle/finagle-core/src/main", - "frigate/frigate-common:base", - "frigate/frigate-common:util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "kafka/finagle-kafka/finatra-kafka/src/main/scala", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers", - "scribelib/validators/src/main/scala/com/twitter/scribelib/validators", - "scrooge/scrooge-serializer/src/main/scala", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/ml/api:data-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "timelines/src/main/scala/com/twitter/timelines/clientevent", - "util-internal/scribe/src/main/scala/com/twitter/logging", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/CrMixerScribeLogger.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/CrMixerScribeLogger.scala deleted file mode 100644 index 024dcf55b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/CrMixerScribeLogger.scala +++ /dev/null @@ -1,489 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.google.common.base.CaseFormat -import com.twitter.abdecider.ScribingABDeciderUtil -import com.twitter.scribelib.marshallers.ClientDataProvider -import com.twitter.scribelib.marshallers.ScribeSerialization -import com.twitter.timelines.clientevent.MinimalClientDataProvider -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.logging.ScribeLoggerUtils._ -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.scribe.ScribeCategories -import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest -import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse -import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult -import com.twitter.cr_mixer.thriftscala.FetchSignalSourcesResult -import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe -import com.twitter.cr_mixer.thriftscala.InterleaveResult -import com.twitter.cr_mixer.thriftscala.PerformanceMetrics -import com.twitter.cr_mixer.thriftscala.PreRankFilterResult -import com.twitter.cr_mixer.thriftscala.Product -import com.twitter.cr_mixer.thriftscala.RankResult -import com.twitter.cr_mixer.thriftscala.Result -import com.twitter.cr_mixer.thriftscala.SourceSignal -import com.twitter.cr_mixer.thriftscala.TopLevelApiResult -import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata -import com.twitter.cr_mixer.thriftscala.VITTweetCandidateScribe -import com.twitter.cr_mixer.thriftscala.VITTweetCandidatesScribe -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil -import com.twitter.cr_mixer.util.MetricTagUtil -import com.twitter.decider.SimpleRecipient -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.finatra.kafka.producers.KafkaProducerBase -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.util.Future -import com.twitter.util.Stopwatch -import com.twitter.util.Time - -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton -import scala.util.Random - -@Singleton -case class CrMixerScribeLogger @Inject() ( - decider: CrMixerDecider, - statsReceiver: StatsReceiver, - @Named(ModuleNames.TweetRecsLogger) tweetRecsScribeLogger: Logger, - @Named(ModuleNames.BlueVerifiedTweetRecsLogger) blueVerifiedTweetRecsScribeLogger: Logger, - @Named(ModuleNames.TopLevelApiDdgMetricsLogger) ddgMetricsLogger: Logger, - kafkaProducer: KafkaProducerBase[String, GetTweetsRecommendationsScribe]) { - - import CrMixerScribeLogger._ - - private val scopedStats = statsReceiver.scope("CrMixerScribeLogger") - private val topLevelApiStats = scopedStats.scope("TopLevelApi") - private val upperFunnelsStats = scopedStats.scope("UpperFunnels") - private val kafkaMessagesStats = scopedStats.scope("KafkaMessages") - private val topLevelApiDdgMetricsStats = scopedStats.scope("TopLevelApiDdgMetrics") - private val blueVerifiedTweetCandidatesStats = scopedStats.scope("BlueVerifiedTweetCandidates") - - private val serialization = new ScribeSerialization {} - - def scribeSignalSources( - query: CrCandidateGeneratorQuery, - getResultFn: => Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] - ): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertFetchSignalSourcesResult - ) - } - - def scribeInitialCandidates( - query: CrCandidateGeneratorQuery, - getResultFn: => Future[Seq[Seq[InitialCandidate]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertFetchCandidatesResult - ) - } - - def scribePreRankFilterCandidates( - query: CrCandidateGeneratorQuery, - getResultFn: => Future[Seq[Seq[InitialCandidate]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertPreRankFilterResult - ) - } - - def scribeInterleaveCandidates( - query: CrCandidateGeneratorQuery, - getResultFn: => Future[Seq[BlendedCandidate]] - ): Future[Seq[BlendedCandidate]] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertInterleaveResult, - enableKafkaScribe = true - ) - } - - def scribeRankedCandidates( - query: CrCandidateGeneratorQuery, - getResultFn: => Future[Seq[RankedCandidate]] - ): Future[Seq[RankedCandidate]] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertRankResult - ) - } - - /** - * Scribe Top Level API Request / Response and performance metrics - * for the getTweetRecommendations() endpoint. - */ - def scribeGetTweetRecommendations( - request: CrMixerTweetRequest, - startTime: Long, - scribeMetadata: ScribeMetadata, - getResultFn: => Future[CrMixerTweetResponse] - ): Future[CrMixerTweetResponse] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { response => - val latencyMs = timer().inMilliseconds - val result = convertTopLevelAPIResult(request, response, startTime) - val traceId = Trace.id.traceId.toLong - val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - - // We use upperFunnelPerStepScribeRate to cover TopLevelApi scribe logs - if (decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.upperFunnelPerStepScribeRate)) { - topLevelApiStats.counter(scribeMetadata.product.originalName).incr() - scribeResult(scribeMsg) - } - if (decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.topLevelApiDdgMetricsScribeRate)) { - topLevelApiDdgMetricsStats.counter(scribeMetadata.product.originalName).incr() - val topLevelDdgMetricsMetadata = TopLevelDdgMetricsMetadata.from(request) - publishTopLevelDdgMetrics( - logger = ddgMetricsLogger, - topLevelDdgMetricsMetadata = topLevelDdgMetricsMetadata, - latencyMs = latencyMs, - candidateSize = response.tweets.length) - } - } - } - - /** - * Scribe all of the Blue Verified tweets that are candidates from cr-mixer - * from the getTweetRecommendations() endpoint for stats tracking/debugging purposes. - */ - def scribeGetTweetRecommendationsForBlueVerified( - scribeMetadata: ScribeMetadata, - getResultFn: => Future[Seq[RankedCandidate]] - ): Future[Seq[RankedCandidate]] = { - getResultFn.onSuccess { rankedCandidates => - if (decider.isAvailable(DeciderConstants.enableScribeForBlueVerifiedTweetCandidates)) { - blueVerifiedTweetCandidatesStats.counter("process_request").incr() - - val blueVerifiedTweetCandidates = rankedCandidates.filter { tweet => - tweet.tweetInfo.hasBlueVerifiedAnnotation.contains(true) - } - - val impressedBuckets = getImpressedBuckets(blueVerifiedTweetCandidatesStats).getOrElse(Nil) - - val blueVerifiedCandidateScribes = blueVerifiedTweetCandidates.map { candidate => - blueVerifiedTweetCandidatesStats - .scope(scribeMetadata.product.name).counter( - candidate.tweetInfo.authorId.toString).incr() - VITTweetCandidateScribe( - tweetId = candidate.tweetId, - authorId = candidate.tweetInfo.authorId, - score = candidate.predictionScore, - metricTags = MetricTagUtil.buildMetricTags(candidate) - ) - } - - val blueVerifiedScribe = - VITTweetCandidatesScribe( - uuid = scribeMetadata.requestUUID, - userId = scribeMetadata.userId, - candidates = blueVerifiedCandidateScribes, - product = scribeMetadata.product, - impressedBuckets = impressedBuckets - ) - - publish( - logger = blueVerifiedTweetRecsScribeLogger, - codec = VITTweetCandidatesScribe, - message = blueVerifiedScribe) - } - } - } - - /** - * Scribe Per-step intermediate results and performance metrics - * for each step: fetch signals, fetch candidates, filters, ranker, etc - */ - private[logging] def scribeResultsAndPerformanceMetrics[T]( - scribeMetadata: ScribeMetadata, - getResultFn: => Future[T], - convertToResultFn: (T, UserId) => Result, - enableKafkaScribe: Boolean = false - ): Future[T] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { input => - val latencyMs = timer().inMilliseconds - val result = convertToResultFn(input, scribeMetadata.userId) - val traceId = Trace.id.traceId.toLong - val scribeMsg = buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - - if (decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.upperFunnelPerStepScribeRate)) { - upperFunnelsStats.counter(scribeMetadata.product.originalName).incr() - scribeResult(scribeMsg) - } - - // forks the scribe as a Kafka message for async feature hydration - if (enableKafkaScribe && shouldScribeKafkaMessage( - scribeMetadata.userId, - scribeMetadata.product)) { - kafkaMessagesStats.counter(scribeMetadata.product.originalName).incr() - - val batchedKafkaMessages = downsampleKafkaMessage(scribeMsg) - batchedKafkaMessages.foreach { kafkaMessage => - kafkaProducer.send( - topic = ScribeCategories.TweetsRecs.scribeCategory, - key = traceId.toString, - value = kafkaMessage, - timestamp = Time.now.inMilliseconds - ) - } - } - } - } - - private def convertTopLevelAPIResult( - request: CrMixerTweetRequest, - response: CrMixerTweetResponse, - startTime: Long - ): Result = { - Result.TopLevelApiResult( - TopLevelApiResult( - timestamp = startTime, - request = request, - response = response - )) - } - - private def convertFetchSignalSourcesResult( - sourceInfoSetTuple: (Set[SourceInfo], Map[String, Option[GraphSourceInfo]]), - requestUserId: UserId - ): Result = { - val sourceSignals = sourceInfoSetTuple._1.map { sourceInfo => - SourceSignal(id = Some(sourceInfo.internalId)) - } - // For source graphs, we pass in requestUserId as a placeholder - val sourceGraphs = sourceInfoSetTuple._2.map { - case (_, _) => - SourceSignal(id = Some(InternalId.UserId(requestUserId))) - } - Result.FetchSignalSourcesResult( - FetchSignalSourcesResult( - signals = Some(sourceSignals ++ sourceGraphs) - )) - } - - private def convertFetchCandidatesResult( - candidatesSeq: Seq[Seq[InitialCandidate]], - requestUserId: UserId - ): Result = { - val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates => - candidates.map { candidate => - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = Some( - CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)), - score = Some(candidate.getSimilarityScore), - numCandidateGenerationKeys = None // not populated yet - ) - } - } - Result.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata))) - } - - private def convertPreRankFilterResult( - candidatesSeq: Seq[Seq[InitialCandidate]], - requestUserId: UserId - ): Result = { - val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates => - candidates.map { candidate => - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = Some( - CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId)), - score = Some(candidate.getSimilarityScore), - numCandidateGenerationKeys = None // not populated yet - ) - } - } - Result.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata))) - } - - // We take InterleaveResult for Unconstrained dataset ML ranker training - private def convertInterleaveResult( - blendedCandidates: Seq[BlendedCandidate], - requestUserId: UserId - ): Result = { - val tweetCandidatesWithMetadata = blendedCandidates.map { blendedCandidate => - val candidateGenerationKey = - CandidateGenerationKeyUtil.toThrift(blendedCandidate.reasonChosen, requestUserId) - TweetCandidateWithMetadata( - tweetId = blendedCandidate.tweetId, - candidateGenerationKey = Some(candidateGenerationKey), - authorId = Some(blendedCandidate.tweetInfo.authorId), // for ML pipeline training - score = Some(blendedCandidate.getSimilarityScore), - numCandidateGenerationKeys = Some(blendedCandidate.potentialReasons.size) - ) // hydrate fields for light ranking training data - } - Result.InterleaveResult(InterleaveResult(Some(tweetCandidatesWithMetadata))) - } - - private def convertRankResult( - rankedCandidates: Seq[RankedCandidate], - requestUserId: UserId - ): Result = { - val tweetCandidatesWithMetadata = rankedCandidates.map { rankedCandidate => - val candidateGenerationKey = - CandidateGenerationKeyUtil.toThrift(rankedCandidate.reasonChosen, requestUserId) - TweetCandidateWithMetadata( - tweetId = rankedCandidate.tweetId, - candidateGenerationKey = Some(candidateGenerationKey), - score = Some(rankedCandidate.getSimilarityScore), - numCandidateGenerationKeys = Some(rankedCandidate.potentialReasons.size) - ) - } - Result.RankResult(RankResult(Some(tweetCandidatesWithMetadata))) - } - - private def buildScribeMessage( - result: Result, - scribeMetadata: ScribeMetadata, - latencyMs: Long, - traceId: Long - ): GetTweetsRecommendationsScribe = { - GetTweetsRecommendationsScribe( - uuid = scribeMetadata.requestUUID, - userId = scribeMetadata.userId, - result = result, - traceId = Some(traceId), - performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))), - impressedBuckets = getImpressedBuckets(scopedStats) - ) - } - - private def scribeResult( - scribeMsg: GetTweetsRecommendationsScribe - ): Unit = { - publish( - logger = tweetRecsScribeLogger, - codec = GetTweetsRecommendationsScribe, - message = scribeMsg) - } - - /** - * Gate for producing messages to Kafka for async feature hydration - */ - private def shouldScribeKafkaMessage( - userId: UserId, - product: Product - ): Boolean = { - val isEligibleUser = decider.isAvailable( - DeciderConstants.kafkaMessageScribeSampleRate, - Some(SimpleRecipient(userId))) - val isHomeProduct = (product == Product.Home) - isEligibleUser && isHomeProduct - } - - /** - * Due to size limits of Strato (see SD-19028), each Kafka message must be downsampled - */ - private[logging] def downsampleKafkaMessage( - scribeMsg: GetTweetsRecommendationsScribe - ): Seq[GetTweetsRecommendationsScribe] = { - val sampledResultSeq: Seq[Result] = scribeMsg.result match { - case Result.InterleaveResult(interleaveResult) => - val sampledTweetsSeq = interleaveResult.tweets - .map { tweets => - Random - .shuffle(tweets).take(KafkaMaxTweetsPerMessage) - .grouped(BatchSize).toSeq - }.getOrElse(Seq.empty) - - sampledTweetsSeq.map { sampledTweets => - Result.InterleaveResult(InterleaveResult(Some(sampledTweets))) - } - - // if it's an unrecognized type, err on the side of sending no candidates - case _ => - kafkaMessagesStats.counter("InvalidKafkaMessageResultType").incr() - Seq(Result.InterleaveResult(InterleaveResult(None))) - } - - sampledResultSeq.map { sampledResult => - GetTweetsRecommendationsScribe( - uuid = scribeMsg.uuid, - userId = scribeMsg.userId, - result = sampledResult, - traceId = scribeMsg.traceId, - performanceMetrics = None, - impressedBuckets = None - ) - } - } - - /** - * Handles client_event serialization to log data into DDG metrics - */ - private[logging] def publishTopLevelDdgMetrics( - logger: Logger, - topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata, - candidateSize: Long, - latencyMs: Long, - ): Unit = { - val data = Map[Any, Any]( - "latency_ms" -> latencyMs, - "event_value" -> candidateSize - ) - val label: (String, String) = ("tweetrec", "") - val namespace = getNamespace(topLevelDdgMetricsMetadata, label) + ("action" -> "candidates") - val message = - serialization - .serializeClientEvent(namespace, getClientData(topLevelDdgMetricsMetadata), data) - logger.info(message) - } - - private def getClientData( - topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata - ): ClientDataProvider = - MinimalClientDataProvider( - userId = topLevelDdgMetricsMetadata.userId, - guestId = None, - clientApplicationId = topLevelDdgMetricsMetadata.clientApplicationId, - countryCode = topLevelDdgMetricsMetadata.countryCode - ) - - private def getNamespace( - topLevelDdgMetricsMetadata: TopLevelDdgMetricsMetadata, - label: (String, String) - ): Map[String, String] = { - val productName = - CaseFormat.UPPER_CAMEL - .to(CaseFormat.LOWER_UNDERSCORE, topLevelDdgMetricsMetadata.product.originalName) - - Map( - "client" -> ScribingABDeciderUtil.clientForAppId( - topLevelDdgMetricsMetadata.clientApplicationId), - "page" -> "cr-mixer", - "section" -> productName, - "component" -> label._1, - "element" -> label._2 - ) - } -} - -object CrMixerScribeLogger { - val KafkaMaxTweetsPerMessage: Int = 200 - val BatchSize: Int = 20 -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/RelatedTweetScribeLogger.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/RelatedTweetScribeLogger.scala deleted file mode 100644 index b2b36f43c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/RelatedTweetScribeLogger.scala +++ /dev/null @@ -1,193 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.logging.ScribeLoggerUtils._ -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult -import com.twitter.cr_mixer.thriftscala.GetRelatedTweetsScribe -import com.twitter.cr_mixer.thriftscala.PerformanceMetrics -import com.twitter.cr_mixer.thriftscala.PreRankFilterResult -import com.twitter.cr_mixer.thriftscala.RelatedTweetRequest -import com.twitter.cr_mixer.thriftscala.RelatedTweetResponse -import com.twitter.cr_mixer.thriftscala.RelatedTweetResult -import com.twitter.cr_mixer.thriftscala.RelatedTweetTopLevelApiResult -import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata -import com.twitter.cr_mixer.util.CandidateGenerationKeyUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future -import com.twitter.util.Stopwatch -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -case class RelatedTweetScribeLogger @Inject() ( - decider: CrMixerDecider, - statsReceiver: StatsReceiver, - @Named(ModuleNames.RelatedTweetsLogger) relatedTweetsScribeLogger: Logger) { - - private val scopedStats = statsReceiver.scope("RelatedTweetsScribeLogger") - private val topLevelApiStats = scopedStats.scope("TopLevelApi") - private val topLevelApiNoUserIdStats = scopedStats.scope("TopLevelApiNoUserId") - private val upperFunnelsStats = scopedStats.scope("UpperFunnels") - private val upperFunnelsNoUserIdStats = scopedStats.scope("UpperFunnelsNoUserId") - - def scribeInitialCandidates( - query: RelatedTweetCandidateGeneratorQuery, - getResultFn: => Future[Seq[Seq[InitialCandidate]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - scribeResultsAndPerformanceMetrics( - RelatedTweetScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertFetchCandidatesResult - ) - } - - def scribePreRankFilterCandidates( - query: RelatedTweetCandidateGeneratorQuery, - getResultFn: => Future[Seq[Seq[InitialCandidate]]] - ): Future[Seq[Seq[InitialCandidate]]] = { - scribeResultsAndPerformanceMetrics( - RelatedTweetScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertPreRankFilterResult - ) - } - - /** - * Scribe Top Level API Request / Response and performance metrics - * for the getRelatedTweets endpoint. - */ - def scribeGetRelatedTweets( - request: RelatedTweetRequest, - startTime: Long, - relatedTweetScribeMetadata: RelatedTweetScribeMetadata, - getResultFn: => Future[RelatedTweetResponse] - ): Future[RelatedTweetResponse] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { response => - relatedTweetScribeMetadata.clientContext.userId match { - case Some(userId) => - if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) { - topLevelApiStats.counter(relatedTweetScribeMetadata.product.originalName).incr() - val latencyMs = timer().inMilliseconds - val result = convertTopLevelAPIResult(request, response, startTime) - val traceId = Trace.id.traceId.toLong - val scribeMsg = - buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId) - - scribeResult(scribeMsg) - } - case _ => - topLevelApiNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr() - } - } - } - - /** - * Scribe Per-step intermediate results and performance metrics - * for each step: fetch candidates, filters. - */ - private def scribeResultsAndPerformanceMetrics[T]( - relatedTweetScribeMetadata: RelatedTweetScribeMetadata, - getResultFn: => Future[T], - convertToResultFn: (T, UserId) => RelatedTweetResult - ): Future[T] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { input => - relatedTweetScribeMetadata.clientContext.userId match { - case Some(userId) => - if (decider.isAvailableForId(userId, DeciderConstants.upperFunnelPerStepScribeRate)) { - upperFunnelsStats.counter(relatedTweetScribeMetadata.product.originalName).incr() - val latencyMs = timer().inMilliseconds - val result = convertToResultFn(input, userId) - val traceId = Trace.id.traceId.toLong - val scribeMsg = - buildScribeMessage(result, relatedTweetScribeMetadata, latencyMs, traceId) - scribeResult(scribeMsg) - } - case _ => - upperFunnelsNoUserIdStats.counter(relatedTweetScribeMetadata.product.originalName).incr() - } - } - } - - private def convertTopLevelAPIResult( - request: RelatedTweetRequest, - response: RelatedTweetResponse, - startTime: Long - ): RelatedTweetResult = { - RelatedTweetResult.RelatedTweetTopLevelApiResult( - RelatedTweetTopLevelApiResult( - timestamp = startTime, - request = request, - response = response - )) - } - - private def convertFetchCandidatesResult( - candidatesSeq: Seq[Seq[InitialCandidate]], - requestUserId: UserId - ): RelatedTweetResult = { - val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates => - candidates.map { candidate => - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = None - ) // do not hydrate candidateGenerationKey to save cost - } - } - RelatedTweetResult.FetchCandidatesResult( - FetchCandidatesResult(Some(tweetCandidatesWithMetadata))) - } - - private def convertPreRankFilterResult( - candidatesSeq: Seq[Seq[InitialCandidate]], - requestUserId: UserId - ): RelatedTweetResult = { - val tweetCandidatesWithMetadata = candidatesSeq.flatMap { candidates => - candidates.map { candidate => - val candidateGenerationKey = - CandidateGenerationKeyUtil.toThrift(candidate.candidateGenerationInfo, requestUserId) - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = Some(candidateGenerationKey), - authorId = Some(candidate.tweetInfo.authorId), - score = Some(candidate.getSimilarityScore), - numCandidateGenerationKeys = None - ) - } - } - RelatedTweetResult.PreRankFilterResult(PreRankFilterResult(Some(tweetCandidatesWithMetadata))) - } - - private def buildScribeMessage( - relatedTweetResult: RelatedTweetResult, - relatedTweetScribeMetadata: RelatedTweetScribeMetadata, - latencyMs: Long, - traceId: Long - ): GetRelatedTweetsScribe = { - GetRelatedTweetsScribe( - uuid = relatedTweetScribeMetadata.requestUUID, - internalId = relatedTweetScribeMetadata.internalId, - relatedTweetResult = relatedTweetResult, - requesterId = relatedTweetScribeMetadata.clientContext.userId, - guestId = relatedTweetScribeMetadata.clientContext.guestId, - traceId = Some(traceId), - performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))), - impressedBuckets = getImpressedBuckets(scopedStats) - ) - } - - private def scribeResult( - scribeMsg: GetRelatedTweetsScribe - ): Unit = { - publish(logger = relatedTweetsScribeLogger, codec = GetRelatedTweetsScribe, message = scribeMsg) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeLoggerUtils.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeLoggerUtils.scala deleted file mode 100644 index 3b30c3f10..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeLoggerUtils.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.twitter.cr_mixer.featureswitch.CrMixerImpressedBuckets -import com.twitter.cr_mixer.thriftscala.ImpressesedBucketInfo -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.logging.Logger -import com.twitter.scrooge.BinaryThriftStructSerializer -import com.twitter.scrooge.ThriftStruct -import com.twitter.scrooge.ThriftStructCodec - -object ScribeLoggerUtils { - - /** - * Handles base64-encoding, serialization, and publish. - */ - private[logging] def publish[T <: ThriftStruct]( - logger: Logger, - codec: ThriftStructCodec[T], - message: T - ): Unit = { - logger.info(BinaryThriftStructSerializer(codec).toString(message)) - } - - private[logging] def getImpressedBuckets( - scopedStats: StatsReceiver - ): Option[List[ImpressesedBucketInfo]] = { - StatsUtil.trackNonFutureBlockStats(scopedStats.scope("getImpressedBuckets")) { - CrMixerImpressedBuckets.getAllImpressedBuckets.map { listBuckets => - val listBucketsSet = listBuckets.toSet - scopedStats.stat("impressed_buckets").add(listBucketsSet.size) - listBucketsSet.map { bucket => - ImpressesedBucketInfo( - experimentId = bucket.experiment.settings.experimentId.getOrElse(-1L), - bucketName = bucket.name, - version = bucket.experiment.settings.version, - ) - }.toList - } - } - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeMetadata.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeMetadata.scala deleted file mode 100644 index 8c0444e38..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/ScribeMetadata.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.twitter.cr_mixer.model.AdsCandidateGeneratorQuery -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.RelatedTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.thriftscala.Product -import com.twitter.product_mixer.core.thriftscala.ClientContext -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId - -case class ScribeMetadata( - requestUUID: Long, - userId: UserId, - product: Product) - -object ScribeMetadata { - def from(query: CrCandidateGeneratorQuery): ScribeMetadata = { - ScribeMetadata(query.requestUUID, query.userId, query.product) - } - - def from(query: UtegTweetCandidateGeneratorQuery): ScribeMetadata = { - ScribeMetadata(query.requestUUID, query.userId, query.product) - } - - def from(query: AdsCandidateGeneratorQuery): ScribeMetadata = { - ScribeMetadata(query.requestUUID, query.userId, query.product) - } -} - -case class RelatedTweetScribeMetadata( - requestUUID: Long, - internalId: InternalId, - clientContext: ClientContext, - product: Product) - -object RelatedTweetScribeMetadata { - def from(query: RelatedTweetCandidateGeneratorQuery): RelatedTweetScribeMetadata = { - RelatedTweetScribeMetadata( - query.requestUUID, - query.internalId, - query.clientContext, - query.product) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/TopLevelDdgMetricsMetadata.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/TopLevelDdgMetricsMetadata.scala deleted file mode 100644 index 3dd07e58e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/TopLevelDdgMetricsMetadata.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.twitter.cr_mixer -package logging - -import com.twitter.cr_mixer.thriftscala.CrMixerTweetRequest -import com.twitter.cr_mixer.thriftscala.Product - -case class TopLevelDdgMetricsMetadata( - userId: Option[Long], - product: Product, - clientApplicationId: Option[Long], - countryCode: Option[String]) - -object TopLevelDdgMetricsMetadata { - def from(request: CrMixerTweetRequest): TopLevelDdgMetricsMetadata = { - TopLevelDdgMetricsMetadata( - userId = request.clientContext.userId, - product = request.product, - clientApplicationId = request.clientContext.appId, - countryCode = request.clientContext.countryCode - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/UtegTweetScribeLogger.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/UtegTweetScribeLogger.scala deleted file mode 100644 index fb01a419b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/logging/UtegTweetScribeLogger.scala +++ /dev/null @@ -1,147 +0,0 @@ -package com.twitter.cr_mixer.logging - -import com.twitter.cr_mixer.logging.ScribeLoggerUtils._ -import com.twitter.cr_mixer.model.UtegTweetCandidateGeneratorQuery -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.thriftscala.UtegTweetRequest -import com.twitter.cr_mixer.thriftscala.UtegTweetResponse -import com.twitter.cr_mixer.thriftscala.FetchCandidatesResult -import com.twitter.cr_mixer.thriftscala.GetUtegTweetsScribe -import com.twitter.cr_mixer.thriftscala.PerformanceMetrics -import com.twitter.cr_mixer.thriftscala.UtegTweetResult -import com.twitter.cr_mixer.thriftscala.UtegTweetTopLevelApiResult -import com.twitter.cr_mixer.thriftscala.TweetCandidateWithMetadata -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future -import com.twitter.util.Stopwatch -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -case class UtegTweetScribeLogger @Inject() ( - decider: CrMixerDecider, - statsReceiver: StatsReceiver, - @Named(ModuleNames.UtegTweetsLogger) utegTweetScribeLogger: Logger) { - - private val scopedStats = statsReceiver.scope("UtegTweetScribeLogger") - private val topLevelApiStats = scopedStats.scope("TopLevelApi") - private val upperFunnelsStats = scopedStats.scope("UpperFunnels") - - def scribeInitialCandidates( - query: UtegTweetCandidateGeneratorQuery, - getResultFn: => Future[Seq[TweetWithScoreAndSocialProof]] - ): Future[Seq[TweetWithScoreAndSocialProof]] = { - scribeResultsAndPerformanceMetrics( - ScribeMetadata.from(query), - getResultFn, - convertToResultFn = convertFetchCandidatesResult - ) - } - - /** - * Scribe Top Level API Request / Response and performance metrics - * for the GetUtegTweetRecommendations() endpoint. - */ - def scribeGetUtegTweetRecommendations( - request: UtegTweetRequest, - startTime: Long, - scribeMetadata: ScribeMetadata, - getResultFn: => Future[UtegTweetResponse] - ): Future[UtegTweetResponse] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { response => - if (decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.upperFunnelPerStepScribeRate)) { - topLevelApiStats.counter(scribeMetadata.product.originalName).incr() - val latencyMs = timer().inMilliseconds - val result = convertTopLevelAPIResult(request, response, startTime) - val traceId = Trace.id.traceId.toLong - val scribeMsg = - buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - - scribeResult(scribeMsg) - } - } - } - - private def convertTopLevelAPIResult( - request: UtegTweetRequest, - response: UtegTweetResponse, - startTime: Long - ): UtegTweetResult = { - UtegTweetResult.UtegTweetTopLevelApiResult( - UtegTweetTopLevelApiResult( - timestamp = startTime, - request = request, - response = response - )) - } - - private def buildScribeMessage( - utegTweetResult: UtegTweetResult, - scribeMetadata: ScribeMetadata, - latencyMs: Long, - traceId: Long - ): GetUtegTweetsScribe = { - GetUtegTweetsScribe( - uuid = scribeMetadata.requestUUID, - userId = scribeMetadata.userId, - utegTweetResult = utegTweetResult, - traceId = Some(traceId), - performanceMetrics = Some(PerformanceMetrics(Some(latencyMs))), - impressedBuckets = getImpressedBuckets(scopedStats) - ) - } - - private def scribeResult( - scribeMsg: GetUtegTweetsScribe - ): Unit = { - publish(logger = utegTweetScribeLogger, codec = GetUtegTweetsScribe, message = scribeMsg) - } - - private def convertFetchCandidatesResult( - candidates: Seq[TweetWithScoreAndSocialProof], - requestUserId: UserId - ): UtegTweetResult = { - val tweetCandidatesWithMetadata = candidates.map { candidate => - TweetCandidateWithMetadata( - tweetId = candidate.tweetId, - candidateGenerationKey = None - ) // do not hydrate candidateGenerationKey to save cost - } - UtegTweetResult.FetchCandidatesResult(FetchCandidatesResult(Some(tweetCandidatesWithMetadata))) - } - - /** - * Scribe Per-step intermediate results and performance metrics - * for each step: fetch candidates, filters. - */ - private def scribeResultsAndPerformanceMetrics[T]( - scribeMetadata: ScribeMetadata, - getResultFn: => Future[T], - convertToResultFn: (T, UserId) => UtegTweetResult - ): Future[T] = { - val timer = Stopwatch.start() - getResultFn.onSuccess { input => - if (decider.isAvailableForId( - scribeMetadata.userId, - DeciderConstants.upperFunnelPerStepScribeRate)) { - upperFunnelsStats.counter(scribeMetadata.product.originalName).incr() - val latencyMs = timer().inMilliseconds - val result = convertToResultFn(input, scribeMetadata.userId) - val traceId = Trace.id.traceId.toLong - val scribeMsg = - buildScribeMessage(result, scribeMetadata, latencyMs, traceId) - scribeResult(scribeMsg) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/BUILD deleted file mode 100644 index 87c714254..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/recos:recos-common-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/Candidate.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/Candidate.scala deleted file mode 100644 index c357c9472..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/Candidate.scala +++ /dev/null @@ -1,200 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.cr_mixer.thriftscala.LineItemInfo -import com.twitter.simclusters_v2.common.TweetId - -sealed trait Candidate { - val tweetId: TweetId - - override def hashCode: Int = tweetId.toInt -} - -case class TweetWithCandidateGenerationInfo( - tweetId: TweetId, - candidateGenerationInfo: CandidateGenerationInfo) - extends Candidate { - - def getSimilarityScore: Double = - candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0) -} - -case class InitialCandidate( - tweetId: TweetId, - tweetInfo: TweetInfo, - candidateGenerationInfo: CandidateGenerationInfo) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0) - - /** - * The same candidate can be generated by multiple algorithms. - * During blending, candidate deduping happens. In order to retain the candidateGenerationInfo - * from different algorithms, we attach them to a list of potentialReasons. - */ - def toBlendedCandidate( - potentialReasons: Seq[CandidateGenerationInfo], - ): BlendedCandidate = { - BlendedCandidate( - tweetId, - tweetInfo, - candidateGenerationInfo, - potentialReasons, - ) - } - - // for experimental purposes only when bypassing interleave / ranking - def toRankedCandidate(): RankedCandidate = { - RankedCandidate( - tweetId, - tweetInfo, - 0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op - candidateGenerationInfo, - Seq(candidateGenerationInfo) - ) - } -} - -case class InitialAdsCandidate( - tweetId: TweetId, - lineItemInfo: Seq[LineItemInfo], - candidateGenerationInfo: CandidateGenerationInfo) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - candidateGenerationInfo.similarityEngineInfo.score.getOrElse(0.0) - - /** - * The same candidate can be generated by multiple algorithms. - * During blending, candidate deduping happens. In order to retain the candidateGenerationInfo - * from different algorithms, we attach them to a list of potentialReasons. - */ - def toBlendedAdsCandidate( - potentialReasons: Seq[CandidateGenerationInfo], - ): BlendedAdsCandidate = { - BlendedAdsCandidate( - tweetId, - lineItemInfo, - candidateGenerationInfo, - potentialReasons, - ) - } - - // for experimental purposes only when bypassing interleave / ranking - def toRankedAdsCandidate(): RankedAdsCandidate = { - RankedAdsCandidate( - tweetId, - lineItemInfo, - 0.0, // prediction score is default to 0.0 to help differentiate that it is a no-op - candidateGenerationInfo, - Seq(candidateGenerationInfo) - ) - } -} - -case class BlendedCandidate( - tweetId: TweetId, - tweetInfo: TweetInfo, - reasonChosen: CandidateGenerationInfo, - potentialReasons: Seq[CandidateGenerationInfo]) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - reasonChosen.similarityEngineInfo.score.getOrElse(0.0) - - assert(potentialReasons.contains(reasonChosen)) - - def toRankedCandidate(predictionScore: Double): RankedCandidate = { - RankedCandidate( - tweetId, - tweetInfo, - predictionScore, - reasonChosen, - potentialReasons - ) - } -} - -case class BlendedAdsCandidate( - tweetId: TweetId, - lineItemInfo: Seq[LineItemInfo], - reasonChosen: CandidateGenerationInfo, - potentialReasons: Seq[CandidateGenerationInfo]) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - reasonChosen.similarityEngineInfo.score.getOrElse(0.0) - - assert(potentialReasons.contains(reasonChosen)) - - def toRankedAdsCandidate(predictionScore: Double): RankedAdsCandidate = { - RankedAdsCandidate( - tweetId, - lineItemInfo, - predictionScore, - reasonChosen, - potentialReasons - ) - } -} - -case class RankedCandidate( - tweetId: TweetId, - tweetInfo: TweetInfo, - predictionScore: Double, - reasonChosen: CandidateGenerationInfo, - potentialReasons: Seq[CandidateGenerationInfo]) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - reasonChosen.similarityEngineInfo.score.getOrElse(0.0) - - assert(potentialReasons.contains(reasonChosen)) -} - -case class RankedAdsCandidate( - tweetId: TweetId, - lineItemInfo: Seq[LineItemInfo], - predictionScore: Double, - reasonChosen: CandidateGenerationInfo, - potentialReasons: Seq[CandidateGenerationInfo]) - extends Candidate { - - /** * - * Get the Similarity Score of a Tweet from its CG Info. For instance, - * If it is from a UnifiedTweetBasedSimilarityEngine, the score will be the weighted combined score - * And if it is from a SimClustersANNSimilarityEngine, the score will be the SANN score - */ - def getSimilarityScore: Double = - reasonChosen.similarityEngineInfo.score.getOrElse(0.0) - - assert(potentialReasons.contains(reasonChosen)) -} - -case class TripTweetWithScore(tweetId: TweetId, score: Double) extends Candidate diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala deleted file mode 100644 index 879c96b66..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGenerationInfo.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.util.Time - -/*** - * Tweet-level attributes. Represents the source used in candidate generation - * Due to legacy reason, SourceType used to represent both SourceType and SimilarityEngineType - * Moving forward, SourceType will be used for SourceType ONLY. eg., TweetFavorite, UserFollow, TwiceUserId - * At the same time, We create a new SimilarityEngineType to separate them. eg., SimClustersANN - * - * Currently, one special case is that we have TwiceUserId as a source, which is not necessarily a "signal" - * @param sourceType, e.g., SourceType.TweetFavorite, SourceType.UserFollow, SourceType.TwiceUserId - * @param internalId, e.g., UserId(0L), TweetId(0L) - */ -case class SourceInfo( - sourceType: SourceType, - internalId: InternalId, - sourceEventTime: Option[Time]) - -/*** - * Tweet-level attributes. Represents the source User Graph used in candidate generation - * It is an intermediate product, and will not be stored, unlike SourceInfo. - * Essentially, CrMixer queries a graph, and the graph returns a list of users to be used as sources. - * For instance, RealGraph, EarlyBird, FRS, Stp, etc. The underlying similarity engines such as - * UTG or UTEG will leverage these sources to build candidates. - * - * We extended the definition of SourceType to cover both "Source Signal" and "Source Graph" - * See [CrMixer] Graph Based Source Fetcher Abstraction Proposal: - * - * consider making both SourceInfo and GraphSourceInfo extends the same trait to - * have a unified interface. - */ -case class GraphSourceInfo( - sourceType: SourceType, - seedWithScores: Map[UserId, Double]) - -/*** - * Tweet-level attributes. Represents the similarity engine (the algorithm) used for - * candidate generation along with their metadata. - * @param similarityEngineType, e.g., SimClustersANN, UserTweetGraph - * @param modelId. e.g., UserTweetGraphConsumerEmbedding_ALL_20210708 - * @param score - a score generated by this sim engine - */ -case class SimilarityEngineInfo( - similarityEngineType: SimilarityEngineType, - modelId: Option[String], // ModelId can be a None. e.g., UTEG, UnifiedTweetBasedSE. etc - score: Option[Double]) - -/**** - * Tweet-level attributes. A combination for both SourceInfo and SimilarityEngineInfo - * SimilarityEngine is a composition, and it can be composed by many leaf Similarity Engines. - * For instance, the TweetBasedUnified SE could be a composition of both UserTweetGraph SE, SimClustersANN SE. - * Note that a SimilarityEngine (Composite) may call other SimilarityEngines (Atomic, Contributing) - * to contribute to its final candidate list. We track these Contributing SEs in the contributingSimilarityEngines list - * - * @param sourceInfoOpt - this is optional as many consumerBased CG does not have a source - * @param similarityEngineInfo - the similarity engine used in Candidate Generation (eg., TweetBasedUnifiedSE). It can be an atomic SE or an composite SE - * @param contributingSimilarityEngines - only composite SE will have it (e.g., SANNN, UTG). Otherwise it is an empty Seq. All contributing SEs mst be atomic - */ -case class CandidateGenerationInfo( - sourceInfoOpt: Option[SourceInfo], - similarityEngineInfo: SimilarityEngineInfo, - contributingSimilarityEngines: Seq[SimilarityEngineInfo]) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGeneratorQuery.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGeneratorQuery.scala deleted file mode 100644 index 084cbb042..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/CandidateGeneratorQuery.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.cr_mixer.thriftscala.Product -import com.twitter.product_mixer.core.thriftscala.ClientContext -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.timelines.configapi.Params - -sealed trait CandidateGeneratorQuery { - val product: Product - val maxNumResults: Int - val impressedTweetList: Set[TweetId] - val params: Params - val requestUUID: Long -} - -sealed trait HasUserId { - val userId: UserId -} - -case class CrCandidateGeneratorQuery( - userId: UserId, - product: Product, - userState: UserState, - maxNumResults: Int, - impressedTweetList: Set[TweetId], - params: Params, - requestUUID: Long, - languageCode: Option[String] = None) - extends CandidateGeneratorQuery - with HasUserId - -case class UtegTweetCandidateGeneratorQuery( - userId: UserId, - product: Product, - userState: UserState, - maxNumResults: Int, - impressedTweetList: Set[TweetId], - params: Params, - requestUUID: Long) - extends CandidateGeneratorQuery - with HasUserId - -case class RelatedTweetCandidateGeneratorQuery( - internalId: InternalId, - clientContext: ClientContext, // To scribe LogIn/LogOut requests - product: Product, - maxNumResults: Int, - impressedTweetList: Set[TweetId], - params: Params, - requestUUID: Long) - extends CandidateGeneratorQuery - -case class RelatedVideoTweetCandidateGeneratorQuery( - internalId: InternalId, - clientContext: ClientContext, // To scribe LogIn/LogOut requests - product: Product, - maxNumResults: Int, - impressedTweetList: Set[TweetId], - params: Params, - requestUUID: Long) - extends CandidateGeneratorQuery - -case class FrsTweetCandidateGeneratorQuery( - userId: UserId, - product: Product, - maxNumResults: Int, - impressedUserList: Set[UserId], - impressedTweetList: Set[TweetId], - params: Params, - languageCodeOpt: Option[String] = None, - countryCodeOpt: Option[String] = None, - requestUUID: Long) - extends CandidateGeneratorQuery - -case class AdsCandidateGeneratorQuery( - userId: UserId, - product: Product, - userState: UserState, - maxNumResults: Int, - params: Params, - requestUUID: Long) - -case class TopicTweetCandidateGeneratorQuery( - userId: UserId, - topicIds: Set[TopicId], - product: Product, - maxNumResults: Int, - impressedTweetList: Set[TweetId], - params: Params, - requestUUID: Long, - isVideoOnly: Boolean) - extends CandidateGeneratorQuery diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/EarlybirdSimilarityEngineType.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/EarlybirdSimilarityEngineType.scala deleted file mode 100644 index aa3040373..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/EarlybirdSimilarityEngineType.scala +++ /dev/null @@ -1,6 +0,0 @@ -package com.twitter.cr_mixer.model - -sealed trait EarlybirdSimilarityEngineType -object EarlybirdSimilarityEngineType_RecencyBased extends EarlybirdSimilarityEngineType -object EarlybirdSimilarityEngineType_ModelBased extends EarlybirdSimilarityEngineType -object EarlybirdSimilarityEngineType_TensorflowBased extends EarlybirdSimilarityEngineType diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/HealthThreshold.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/HealthThreshold.scala deleted file mode 100644 index 0249798bd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/HealthThreshold.scala +++ /dev/null @@ -1,11 +0,0 @@ -package com.twitter.cr_mixer.model - -object HealthThreshold { - object Enum extends Enumeration { - val Off: Value = Value(1) - val Moderate: Value = Value(2) - val Strict: Value = Value(3) - val Stricter: Value = Value(4) - val StricterPlus: Value = Value(5) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala deleted file mode 100644 index 26db7898b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModelConfig.scala +++ /dev/null @@ -1,77 +0,0 @@ -package com.twitter.cr_mixer.model - -/** - * A Configuration class for all Model Based Candidate Sources. - * - * The Model Name Guideline. Please your modelId as "Algorithm_Product_Date" - * If your model is used for multiple product surfaces, name it as all - * Don't name your algorithm as MBCG. All the algorithms here are MBCG =.= - * - * Don't forgot to add your new models into allHnswANNSimilarityEngineModelIds list. - */ -object ModelConfig { - // Offline SimClusters CG Experiment related Model Ids - val OfflineInterestedInFromKnownFor2020: String = "OfflineIIKF_ALL_20220414" - val OfflineInterestedInFromKnownFor2020Hl0El15: String = "OfflineIIKF_ALL_20220414_Hl0_El15" - val OfflineInterestedInFromKnownFor2020Hl2El15: String = "OfflineIIKF_ALL_20220414_Hl2_El15" - val OfflineInterestedInFromKnownFor2020Hl2El50: String = "OfflineIIKF_ALL_20220414_Hl2_El50" - val OfflineInterestedInFromKnownFor2020Hl8El50: String = "OfflineIIKF_ALL_20220414_Hl8_El50" - val OfflineMTSConsumerEmbeddingsFav90P20M: String = - "OfflineMTSConsumerEmbeddingsFav90P20M_ALL_20220414" - - // Twhin Model Ids - val ConsumerBasedTwHINRegularUpdateAll20221024: String = - "ConsumerBasedTwHINRegularUpdate_All_20221024" - - // Averaged Twhin Model Ids - val TweetBasedTwHINRegularUpdateAll20221024: String = - "TweetBasedTwHINRegularUpdate_All_20221024" - - // Collaborative Filtering Twhin Model Ids - val TwhinCollabFilterForFollow: String = - "TwhinCollabFilterForFollow" - val TwhinCollabFilterForEngagement: String = - "TwhinCollabFilterForEngagement" - val TwhinMultiClusterForFollow: String = - "TwhinMultiClusterForFollow" - val TwhinMultiClusterForEngagement: String = - "TwhinMultiClusterForEngagement" - - // Two Tower model Ids - val TwoTowerFavALL20220808: String = - "TwoTowerFav_ALL_20220808" - - // Debugger Demo-Only Model Ids - val DebuggerDemo: String = "DebuggerDemo" - - // ColdStartLookalike - this is not really a model name, it is as a placeholder to - // indicate ColdStartLookalike candidate source, which is currently being pluged into - // CustomizedRetrievalCandidateGeneration temporarily. - val ColdStartLookalikeModelName: String = "ConsumersBasedUtgColdStartLookalike20220707" - - // consumersBasedUTG-RealGraphOon Model Id - val ConsumersBasedUtgRealGraphOon20220705: String = "ConsumersBasedUtgRealGraphOon_All_20220705" - // consumersBasedUAG-RealGraphOon Model Id - val ConsumersBasedUagRealGraphOon20221205: String = "ConsumersBasedUagRealGraphOon_All_20221205" - - // FTR - val OfflineFavDecayedSum: String = "OfflineFavDecayedSum" - val OfflineFtrAt5Pop1000RnkDcy11: String = "OfflineFtrAt5Pop1000RnkDcy11" - val OfflineFtrAt5Pop10000RnkDcy11: String = "OfflineFtrAt5Pop10000RnkDcy11" - - // All Model Ids of HnswANNSimilarityEngines - val allHnswANNSimilarityEngineModelIds = Seq( - ConsumerBasedTwHINRegularUpdateAll20221024, - TwoTowerFavALL20220808, - DebuggerDemo - ) - - val ConsumerLogFavBasedInterestedInEmbedding: String = - "ConsumerLogFavBasedInterestedIn_ALL_20221228" - val ConsumerFollowBasedInterestedInEmbedding: String = - "ConsumerFollowBasedInterestedIn_ALL_20221228" - - val RetweetBasedDiffusion: String = - "RetweetBasedDiffusion" - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModuleNames.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModuleNames.scala deleted file mode 100644 index 6aec7b052..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/ModuleNames.scala +++ /dev/null @@ -1,122 +0,0 @@ -package com.twitter.cr_mixer.model - -/** - * Define name annotated module names here - */ -object ModuleNames { - - final val FrsStore = "FrsStore" - final val UssStore = "UssStore" - final val UssStratoColumn = "UssStratoColumn" - final val RsxStore = "RsxStore" - final val RmsTweetLogFavLongestL2EmbeddingStore = "RmsTweetLogFavLongestL2EmbeddingStore" - final val RmsUserFavBasedProducerEmbeddingStore = "RmsUserFavBasedProducerEmbeddingStore" - final val RmsUserLogFavInterestedInEmbeddingStore = "RmsUserLogFavInterestedInEmbeddingStore" - final val RmsUserFollowInterestedInEmbeddingStore = "RmsUserFollowInterestedInEmbeddingStore" - final val StpStore = "StpStore" - final val TwiceClustersMembersStore = "TwiceClustersMembersStore" - final val TripCandidateStore = "TripCandidateStore" - - final val ConsumerEmbeddingBasedTripSimilarityEngine = - "ConsumerEmbeddingBasedTripSimilarityEngine" - final val ConsumerEmbeddingBasedTwHINANNSimilarityEngine = - "ConsumerEmbeddingBasedTwHINANNSimilarityEngine" - final val ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine = - "ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine" - final val ConsumersBasedUserAdGraphSimilarityEngine = - "ConsumersBasedUserAdGraphSimilarityEngine" - final val ConsumersBasedUserVideoGraphSimilarityEngine = - "ConsumersBasedUserVideoGraphSimilarityEngine" - - final val ConsumerBasedWalsSimilarityEngine = "ConsumerBasedWalsSimilarityEngine" - - final val TweetBasedTwHINANNSimilarityEngine = "TweetBasedTwHINANNSimilarityEngine" - - final val SimClustersANNSimilarityEngine = "SimClustersANNSimilarityEngine" - - final val ProdSimClustersANNServiceClientName = "ProdSimClustersANNServiceClient" - final val ExperimentalSimClustersANNServiceClientName = "ExperimentalSimClustersANNServiceClient" - final val SimClustersANNServiceClientName1 = "SimClustersANNServiceClient1" - final val SimClustersANNServiceClientName2 = "SimClustersANNServiceClient2" - final val SimClustersANNServiceClientName3 = "SimClustersANNServiceClient3" - final val SimClustersANNServiceClientName5 = "SimClustersANNServiceClient5" - final val SimClustersANNServiceClientName4 = "SimClustersANNServiceClient4" - final val UnifiedCache = "unifiedCache" - final val MLScoreCache = "mlScoreCache" - final val TweetRecommendationResultsCache = "tweetRecommendationResultsCache" - final val EarlybirdTweetsCache = "earlybirdTweetsCache" - final val EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache = - "earlybirdTweetsWithoutRetweetsRepliesCacheStore" - final val EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache = - "earlybirdTweetsWithRetweetsRepliesCacheStore" - - final val AbDeciderLogger = "abDeciderLogger" - final val TopLevelApiDdgMetricsLogger = "topLevelApiDdgMetricsLogger" - final val TweetRecsLogger = "tweetRecsLogger" - final val BlueVerifiedTweetRecsLogger = "blueVerifiedTweetRecsLogger" - final val RelatedTweetsLogger = "relatedTweetsLogger" - final val UtegTweetsLogger = "utegTweetsLogger" - final val AdsRecommendationsLogger = "adsRecommendationLogger" - - final val OfflineSimClustersANNInterestedInSimilarityEngine = - "OfflineSimClustersANNInterestedInSimilarityEngine" - - final val RealGraphOonStore = "RealGraphOonStore" - final val RealGraphInStore = "RealGraphInStore" - - final val OfflineTweet2020CandidateStore = "OfflineTweet2020CandidateStore" - final val OfflineTweet2020Hl0El15CandidateStore = "OfflineTweet2020Hl0El15CandidateStore" - final val OfflineTweet2020Hl2El15CandidateStore = "OfflineTweet2020Hl2El15CandidateStore" - final val OfflineTweet2020Hl2El50CandidateStore = "OfflineTweet2020Hl2El50CandidateStore" - final val OfflineTweet2020Hl8El50CandidateStore = "OfflineTweet2020Hl8El50CandidateStore" - final val OfflineTweetMTSCandidateStore = "OfflineTweetMTSCandidateStore" - - final val OfflineFavDecayedSumCandidateStore = "OfflineFavDecayedSumCandidateStore" - final val OfflineFtrAt5Pop1000RankDecay11CandidateStore = - "OfflineFtrAt5Pop1000RankDecay11CandidateStore" - final val OfflineFtrAt5Pop10000RankDecay11CandidateStore = - "OfflineFtrAt5Pop10000RankDecay11CandidateStore" - - final val TwhinCollabFilterStratoStoreForFollow = "TwhinCollabFilterStratoStoreForFollow" - final val TwhinCollabFilterStratoStoreForEngagement = "TwhinCollabFilterStratoStoreForEngagement" - final val TwhinMultiClusterStratoStoreForFollow = "TwhinMultiClusterStratoStoreForFollow" - final val TwhinMultiClusterStratoStoreForEngagement = "TwhinMultiClusterStratoStoreForEngagement" - - final val ProducerBasedUserAdGraphSimilarityEngine = - "ProducerBasedUserAdGraphSimilarityEngine" - final val ProducerBasedUserTweetGraphSimilarityEngine = - "ProducerBasedUserTweetGraphSimilarityEngine" - final val ProducerBasedUnifiedSimilarityEngine = "ProducerBasedUnifiedSimilarityEngine" - - final val TweetBasedUserAdGraphSimilarityEngine = "TweetBasedUserAdGraphSimilarityEngine" - final val TweetBasedUserTweetGraphSimilarityEngine = "TweetBasedUserTweetGraphSimilarityEngine" - final val TweetBasedUserVideoGraphSimilarityEngine = "TweetBasedUserVideoGraphSimilarityEngine" - final val TweetBasedQigSimilarityEngine = "TweetBasedQigSimilarityEngine" - final val TweetBasedUnifiedSimilarityEngine = "TweetBasedUnifiedSimilarityEngine" - - final val TwhinCollabFilterSimilarityEngine = "TwhinCollabFilterSimilarityEngine" - - final val ConsumerBasedUserTweetGraphStore = "ConsumerBasedUserTweetGraphStore" - final val ConsumerBasedUserVideoGraphStore = "ConsumerBasedUserVideoGraphStore" - final val ConsumerBasedUserAdGraphStore = "ConsumerBasedUserAdGraphStore" - - final val UserTweetEntityGraphSimilarityEngine = - "UserTweetEntityGraphSimilarityEngine" - - final val CertoTopicTweetSimilarityEngine = "CertoTopicTweetSimilarityEngine" - final val CertoStratoStoreName = "CertoStratoStore" - - final val SkitTopicTweetSimilarityEngine = "SkitTopicTweetSimilarityEngine" - final val SkitHighPrecisionTopicTweetSimilarityEngine = - "SkitHighPrecisionTopicTweetSimilarityEngine" - final val SkitStratoStoreName = "SkitStratoStore" - - final val HomeNaviGRPCClient = "HomeNaviGRPCClient" - final val AdsFavedNaviGRPCClient = "AdsFavedNaviGRPCClient" - final val AdsMonetizableNaviGRPCClient = "AdsMonetizableNaviGRPCClient" - - final val RetweetBasedDiffusionRecsMhStore = "RetweetBasedDiffusionRecsMhStore" - final val DiffusionBasedSimilarityEngine = "DiffusionBasedSimilarityEngine" - - final val BlueVerifiedAnnotationStore = "BlueVerifiedAnnotationStore" -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TopicTweetWithScore.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TopicTweetWithScore.scala deleted file mode 100644 index e9a0cf173..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TopicTweetWithScore.scala +++ /dev/null @@ -1,13 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.simclusters_v2.common.TweetId - -/*** - * Bind a tweetId with a raw score generated from one single Similarity Engine - * @param similarityEngineType, which underlying topic source the topic tweet is from - */ -case class TopicTweetWithScore( - tweetId: TweetId, - score: Double, - similarityEngineType: SimilarityEngineType) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithAuthor.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithAuthor.scala deleted file mode 100644 index 16a506a4c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithAuthor.scala +++ /dev/null @@ -1,6 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId - -case class TweetWithAuthor(tweetId: TweetId, authorId: UserId) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScore.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScore.scala deleted file mode 100644 index ad8866912..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScore.scala +++ /dev/null @@ -1,8 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.simclusters_v2.common.TweetId - -/*** - * Bind a tweetId with a raw score generated from one single Similarity Engine - */ -case class TweetWithScore(tweetId: TweetId, score: Double) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScoreAndSocialProof.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScoreAndSocialProof.scala deleted file mode 100644 index 94e430d8e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model/TweetWithScoreAndSocialProof.scala +++ /dev/null @@ -1,12 +0,0 @@ -package com.twitter.cr_mixer.model - -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.recos.recos_common.thriftscala.SocialProofType - -/*** - * Bind a tweetId with a raw score and social proofs by type - */ -case class TweetWithScoreAndSocialProof( - tweetId: TweetId, - score: Double, - socialProofByType: Map[SocialProofType, Seq[Long]]) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ActivePromotedTweetStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ActivePromotedTweetStoreModule.scala deleted file mode 100644 index d6529531a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ActivePromotedTweetStoreModule.scala +++ /dev/null @@ -1,135 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.bijection.thrift.CompactThriftCodec -import com.twitter.ads.entities.db.thriftscala.LineItemObjective -import com.twitter.bijection.Injection -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.thriftscala.LineItemInfo -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hermit.store.common.ObservedCachedReadableStore -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.ml.api.DataRecord -import com.twitter.ml.api.DataType -import com.twitter.ml.api.Feature -import com.twitter.ml.api.GeneralTensor -import com.twitter.ml.api.RichDataRecord -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.manhattan.Revenue -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import com.twitter.util.Future -import javax.inject.Named -import scala.collection.JavaConverters._ - -object ActivePromotedTweetStoreModule extends TwitterModule { - - case class ActivePromotedTweetStore( - activePromotedTweetMHStore: ReadableStore[String, DataRecord], - statsReceiver: StatsReceiver) - extends ReadableStore[TweetId, Seq[LineItemInfo]] { - override def get(tweetId: TweetId): Future[Option[Seq[LineItemInfo]]] = { - activePromotedTweetMHStore.get(tweetId.toString).map { - _.map { dataRecord => - val richDataRecord = new RichDataRecord(dataRecord) - val lineItemIdsFeature: Feature[GeneralTensor] = - new Feature.Tensor("active_promoted_tweets.line_item_ids", DataType.INT64) - - val lineItemObjectivesFeature: Feature[GeneralTensor] = - new Feature.Tensor("active_promoted_tweets.line_item_objectives", DataType.INT64) - - val lineItemIdsTensor: GeneralTensor = richDataRecord.getFeatureValue(lineItemIdsFeature) - val lineItemObjectivesTensor: GeneralTensor = - richDataRecord.getFeatureValue(lineItemObjectivesFeature) - - val lineItemIds: Seq[Long] = - if (lineItemIdsTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemIdsTensor.getInt64Tensor.isSetLongs) { - lineItemIdsTensor.getInt64Tensor.getLongs.asScala.map(_.toLong) - } else Seq.empty - - val lineItemObjectives: Seq[LineItemObjective] = - if (lineItemObjectivesTensor.getSetField == GeneralTensor._Fields.INT64_TENSOR && lineItemObjectivesTensor.getInt64Tensor.isSetLongs) { - lineItemObjectivesTensor.getInt64Tensor.getLongs.asScala.map(objective => - LineItemObjective(objective.toInt)) - } else Seq.empty - - val lineItemInfo = - if (lineItemIds.size == lineItemObjectives.size) { - lineItemIds.zipWithIndex.map { - case (lineItemId, index) => - LineItemInfo( - lineItemId = lineItemId, - lineItemObjective = lineItemObjectives(index) - ) - } - } else Seq.empty - - lineItemInfo - } - } - } - } - - @Provides - @Singleton - def providesActivePromotedTweetStore( - manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - crMixerStatsReceiver: StatsReceiver - ): ReadableStore[TweetId, Seq[LineItemInfo]] = { - - val mhConfig = new ManhattanROConfig { - val hdfsPath = HDFSPath("") - val applicationID = ApplicationID("ads_bigquery_features") - val datasetName = DatasetName("active_promoted_tweets") - val cluster = Revenue - - override def statsReceiver: StatsReceiver = - crMixerStatsReceiver.scope("active_promoted_tweets_mh") - } - val mhStore: ReadableStore[String, DataRecord] = - ManhattanRO - .getReadableStoreWithMtls[String, DataRecord]( - mhConfig, - manhattanKVClientMtlsParams - )( - implicitly[Injection[String, Array[Byte]]], - CompactThriftCodec[DataRecord] - ) - - val underlyingStore = - ActivePromotedTweetStore(mhStore, crMixerStatsReceiver.scope("ActivePromotedTweetStore")) - val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 60.minutes, - asyncUpdate = false - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[LineItemInfo]()), - statsReceiver = crMixerStatsReceiver.scope("memCachedActivePromotedTweetStore"), - keyToString = { k: TweetId => s"apt/$k" } - ) - - ObservedCachedReadableStore.from( - memcachedStore, - ttl = 30.minutes, - maxKeys = 250000, // size of promoted tweet is around 200,000 - windowSize = 10000L, - cacheName = "active_promoted_tweet_cache", - maxMultiGetSize = 20 - )(crMixerStatsReceiver.scope("inMemoryCachedActivePromotedTweetStore")) - - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BUILD.bazel b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BUILD.bazel deleted file mode 100644 index 6773b526c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BUILD.bazel +++ /dev/null @@ -1,130 +0,0 @@ -scala_library( - sources = [ - "*.scala", - "core/*.scala", - "grpc_client/*.scala", - "similarity_engine/*.scala", - "source_signal/*.scala", - "thrift_client/*.scala", - ], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/bijection:core", - "3rdparty/jvm/com/twitter/bijection:scrooge", - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/com/twitter/storehaus:memcache", - "3rdparty/jvm/io/grpc:grpc-api", - "3rdparty/jvm/io/grpc:grpc-auth", - "3rdparty/jvm/io/grpc:grpc-core", - "3rdparty/jvm/io/grpc:grpc-netty", - "3rdparty/jvm/io/grpc:grpc-protobuf", - "3rdparty/jvm/io/grpc:grpc-stub", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/jvm/org/scalanlp:breeze", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "abdecider/src/main/scala", - "ann/src/main/thrift/com/twitter/ann/common:ann-common-scala", - "configapi/configapi-abdecider", - "configapi/configapi-core", - "configapi/configapi-featureswitches:v2", - "content-recommender/server/src/main/scala/com/twitter/contentrecommender:cr-mixer-deps", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/featureswitch", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "discovery-common/src/main/scala/com/twitter/discovery/common/configapi", - "featureswitches/featureswitches-core", - "featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder", - "finagle-internal/finagle-grpc/src/main/scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finatra-internal/kafka/src/main/scala/com/twitter/finatra/kafka/consumers", - "finatra-internal/mtls-thriftmux/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "finatra/inject/inject-modules/src/main/scala", - "finatra/inject/inject-thrift-client", - "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", - "frigate/frigate-common:util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/health", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/interests", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", - "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", - "hydra/partition/thrift/src/main/thrift:thrift-scala", - "hydra/root/thrift/src/main/thrift:thrift-scala", - "mediaservices/commons/src/main/scala:futuretracker", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "qig-ranker/thrift/src/main/thrift:thrift-scala", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/health_store", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection", - "relevance-platform/thrift/src/main/thrift:thrift-scala", - "representation-manager/client/src/main/scala/com/twitter/representation_manager", - "representation-manager/client/src/main/scala/com/twitter/representation_manager/config", - "representation-manager/server/src/main/scala/com/twitter/representation_manager/migration", - "representation-manager/server/src/main/thrift:thrift-scala", - "representation-scorer/server/src/main/thrift:thrift-scala", - "servo/decider", - "servo/util/src/main/scala", - "simclusters-ann/thrift/src/main/thrift:thrift-scala", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/java/com/twitter/ml/api:api-base", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/scala/com/twitter/algebird_internal/injection", - "src/scala/com/twitter/cortex/ml/embeddings/common:Helpers", - "src/scala/com/twitter/ml/api/embedding", - "src/scala/com/twitter/ml/featurestore/lib", - "src/scala/com/twitter/scalding_internal/multiformat/format", - "src/scala/com/twitter/simclusters_v2/candidate_source", - "src/scala/com/twitter/simclusters_v2/common", - "src/scala/com/twitter/storehaus_internal/manhattan", - "src/scala/com/twitter/storehaus_internal/manhattan/config", - "src/scala/com/twitter/storehaus_internal/memcache", - "src/scala/com/twitter/storehaus_internal/memcache/config", - "src/scala/com/twitter/storehaus_internal/offline", - "src/scala/com/twitter/storehaus_internal/util", - "src/scala/com/twitter/topic_recos/stores", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/frigate:frigate-common-thrift-scala", - "src/thrift/com/twitter/frigate:frigate-thrift-scala", - "src/thrift/com/twitter/frigate/data_pipeline/scalding:blue_verified_annotations-scala", - "src/thrift/com/twitter/hermit/stp:hermit-stp-scala", - "src/thrift/com/twitter/ml/api:data-java", - "src/thrift/com/twitter/ml/api:embedding-scala", - "src/thrift/com/twitter/ml/featurestore:ml-feature-store-embedding-scala", - "src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala", - "src/thrift/com/twitter/recos:recos-common-scala", - "src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_graph_plus:user_tweet_graph_plus-scala", - "src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala", - "src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala", - "src/thrift/com/twitter/tweetypie:service-scala", - "src/thrift/com/twitter/twistly:twistly-scala", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - "stitch/stitch-storehaus", - "stitch/stitch-tweetypie/src/main/scala", - "strato/src/main/scala/com/twitter/strato/client", - "user-signal-service/thrift/src/main/thrift:thrift-scala", - "util-internal/scribe/src/main/scala/com/twitter/logging", - "util/util-hashing", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BlueVerifiedAnnotationStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BlueVerifiedAnnotationStoreModule.scala deleted file mode 100644 index 21769d3fa..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/BlueVerifiedAnnotationStoreModule.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.inject.TwitterModule -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2 -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.Athena -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.hermit.store.common.ObservedCachedReadableStore - -object BlueVerifiedAnnotationStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.BlueVerifiedAnnotationStore) - def providesBlueVerifiedAnnotationStore( - statsReceiver: StatsReceiver, - manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams, - ): ReadableStore[String, BlueVerifiedAnnotationsV2] = { - - implicit val valueCodec = new BinaryScalaCodec(BlueVerifiedAnnotationsV2) - - val underlyingStore = ManhattanRO - .getReadableStoreWithMtls[String, BlueVerifiedAnnotationsV2]( - ManhattanROConfig( - HDFSPath(""), - ApplicationID("content_recommender_athena"), - DatasetName("blue_verified_annotations"), - Athena), - manhattanKVClientMtlsParams - ) - - ObservedCachedReadableStore.from( - underlyingStore, - ttl = 24.hours, - maxKeys = 100000, - windowSize = 10000L, - cacheName = "blue_verified_annotation_cache" - )(statsReceiver.scope("inMemoryCachedBlueVerifiedAnnotationStore")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CertoStratoStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CertoStratoStoreModule.scala deleted file mode 100644 index 9908aa702..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CertoStratoStoreModule.scala +++ /dev/null @@ -1,57 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hermit.store.common.ObservedCachedReadableStore -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.Client -import com.twitter.topic_recos.stores.CertoTopicTopKTweetsStore -import com.twitter.topic_recos.thriftscala.TweetWithScores - -object CertoStratoStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.CertoStratoStoreName) - def providesCertoStratoStore( - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - stratoClient: Client, - statsReceiver: StatsReceiver - ): ReadableStore[TopicId, Seq[TweetWithScores]] = { - val certoStore = ObservedReadableStore(CertoTopicTopKTweetsStore.prodStore(stratoClient))( - statsReceiver.scope(ModuleNames.CertoStratoStoreName)).mapValues { topKTweetsWithScores => - topKTweetsWithScores.topTweetsByFollowerL2NormalizedCosineSimilarityScore - } - - val memCachedStore = ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = certoStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScores]()), - statsReceiver = statsReceiver.scope("memcached_certo_store"), - keyToString = { k => s"certo:${keyHasher.hashKey(k.toString.getBytes)}" } - ) - - ObservedCachedReadableStore.from[TopicId, Seq[TweetWithScores]]( - memCachedStore, - ttl = 5.minutes, - maxKeys = 100000, // ~150MB max - cacheName = "certo_in_memory_cache", - windowSize = 10000L - )(statsReceiver.scope("certo_in_memory_cache")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserAdGraphStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserAdGraphStoreModule.scala deleted file mode 100644 index 33a0d33fc..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserAdGraphStoreModule.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest -import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Named -import javax.inject.Singleton - -object ConsumersBasedUserAdGraphStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ConsumerBasedUserAdGraphStore) - def providesConsumerBasedUserAdGraphStore( - userAdGraphService: UserAdGraph.MethodPerEndpoint - ): ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] = { - new ReadableStore[ConsumersBasedRelatedAdRequest, RelatedAdResponse] { - override def get( - k: ConsumersBasedRelatedAdRequest - ): Future[Option[RelatedAdResponse]] = { - userAdGraphService.consumersBasedRelatedAds(k).map(Some(_)) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserTweetGraphStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserTweetGraphStoreModule.scala deleted file mode 100644 index ab027744a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserTweetGraphStoreModule.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Named -import javax.inject.Singleton - -object ConsumersBasedUserTweetGraphStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ConsumerBasedUserTweetGraphStore) - def providesConsumerBasedUserTweetGraphStore( - userTweetGraphService: UserTweetGraph.MethodPerEndpoint - ): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = { - new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] { - override def get( - k: ConsumersBasedRelatedTweetRequest - ): Future[Option[RelatedTweetResponse]] = { - userTweetGraphService.consumersBasedRelatedTweets(k).map(Some(_)) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserVideoGraphStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserVideoGraphStoreModule.scala deleted file mode 100644 index 05cf496d8..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/ConsumersBasedUserVideoGraphStoreModule.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse -import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Named -import javax.inject.Singleton - -object ConsumersBasedUserVideoGraphStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ConsumerBasedUserVideoGraphStore) - def providesConsumerBasedUserVideoGraphStore( - userVideoGraphService: UserVideoGraph.MethodPerEndpoint - ): ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] = { - new ReadableStore[ConsumersBasedRelatedTweetRequest, RelatedTweetResponse] { - override def get( - k: ConsumersBasedRelatedTweetRequest - ): Future[Option[RelatedTweetResponse]] = { - userVideoGraphService.consumersBasedRelatedTweets(k).map(Some(_)) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CrMixerParamConfigModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CrMixerParamConfigModule.scala deleted file mode 100644 index baece7947..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/CrMixerParamConfigModule.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.timelines.configapi.Config -import com.twitter.cr_mixer.param.CrMixerParamConfig -import com.twitter.inject.TwitterModule -import javax.inject.Singleton - -object CrMixerParamConfigModule extends TwitterModule { - - @Provides - @Singleton - def provideConfig(): Config = { - CrMixerParamConfig.config - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/DiffusionStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/DiffusionStoreModule.scala deleted file mode 100644 index dc95f07f5..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/DiffusionStoreModule.scala +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.bijection.Injection -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.TweetsWithScore -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.Apollo -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import javax.inject.Named -import javax.inject.Singleton - -object DiffusionStoreModule extends TwitterModule { - type UserId = Long - implicit val longCodec = implicitly[Injection[Long, Array[Byte]]] - implicit val tweetRecsInjection: Injection[TweetsWithScore, Array[Byte]] = - BinaryScalaCodec(TweetsWithScore) - - @Provides - @Singleton - @Named(ModuleNames.RetweetBasedDiffusionRecsMhStore) - def retweetBasedDiffusionRecsMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[Long, TweetsWithScore] = { - val manhattanROConfig = ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("diffusion_retweet_tweet_recs"), - Apollo - ) - - buildTweetRecsStore(serviceIdentifier, manhattanROConfig) - } - - private def buildTweetRecsStore( - serviceIdentifier: ServiceIdentifier, - manhattanROConfig: ManhattanROConfig - ): ReadableStore[Long, TweetsWithScore] = { - - ManhattanRO - .getReadableStoreWithMtls[Long, TweetsWithScore]( - manhattanROConfig, - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, tweetRecsInjection) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EarlybirdRecencyBasedCandidateStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EarlybirdRecencyBasedCandidateStoreModule.scala deleted file mode 100644 index c0fe025f0..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EarlybirdRecencyBasedCandidateStoreModule.scala +++ /dev/null @@ -1,189 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.SeqLongInjection -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams -import com.twitter.search.earlybird.thriftscala.EarlybirdRequest -import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode -import com.twitter.search.earlybird.thriftscala.EarlybirdService -import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery -import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Duration -import com.twitter.util.Future -import javax.inject.Named - -object EarlybirdRecencyBasedCandidateStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache) - def providesEarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore( - statsReceiver: StatsReceiver, - earlybirdSearchClient: EarlybirdService.MethodPerEndpoint, - @Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient, - timeoutConfig: TimeoutConfig - ): ReadableStore[UserId, Seq[TweetId]] = { - val stats = statsReceiver.scope("EarlybirdRecencyBasedWithoutRetweetsRepliesCandidateStore") - val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] { - override def get(userId: UserId): Future[Option[Seq[TweetId]]] = { - // Home based EB filters out retweets and replies - val earlybirdRequest = - buildEarlybirdRequest( - userId, - FilterOutRetweetsAndReplies, - DefaultMaxNumTweetPerUser, - timeoutConfig.earlybirdServerTimeout) - getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats) - } - } - ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = earlybirdRecencyBasedTweetsCache, - ttl = MemcacheKeyTimeToLiveDuration, - asyncUpdate = true - )( - valueInjection = SeqLongInjection, - statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_home_memcache"), - keyToString = { k => - f"uEBRBHM:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedHoMe - } - ) - } - - @Provides - @Singleton - @Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache) - def providesEarlybirdRecencyBasedWithRetweetsRepliesCandidateStore( - statsReceiver: StatsReceiver, - earlybirdSearchClient: EarlybirdService.MethodPerEndpoint, - @Named(ModuleNames.EarlybirdTweetsCache) earlybirdRecencyBasedTweetsCache: MemcachedClient, - timeoutConfig: TimeoutConfig - ): ReadableStore[UserId, Seq[TweetId]] = { - val stats = statsReceiver.scope("EarlybirdRecencyBasedWithRetweetsRepliesCandidateStore") - val underlyingStore = new ReadableStore[UserId, Seq[TweetId]] { - override def get(userId: UserId): Future[Option[Seq[TweetId]]] = { - val earlybirdRequest = buildEarlybirdRequest( - userId, - // Notifications based EB keeps retweets and replies - NotFilterOutRetweetsAndReplies, - DefaultMaxNumTweetPerUser, - processingTimeout = timeoutConfig.earlybirdServerTimeout - ) - getEarlybirdSearchResult(earlybirdSearchClient, earlybirdRequest, stats) - } - } - ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = earlybirdRecencyBasedTweetsCache, - ttl = MemcacheKeyTimeToLiveDuration, - asyncUpdate = true - )( - valueInjection = SeqLongInjection, - statsReceiver = statsReceiver.scope("earlybird_recency_based_tweets_notifications_memcache"), - keyToString = { k => - f"uEBRBN:${keyHasher.hashKey(k.toString.getBytes)}%X" // prefix = EarlyBirdRecencyBasedNotifications - } - ) - } - - private val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - /** - * Note the DefaultMaxNumTweetPerUser is used to adjust the result size per cache entry. - * If the value changes, it will increase the size of the memcache. - */ - private val DefaultMaxNumTweetPerUser: Int = 100 - private val FilterOutRetweetsAndReplies = true - private val NotFilterOutRetweetsAndReplies = false - private val MemcacheKeyTimeToLiveDuration: Duration = Duration.fromMinutes(15) - - private def buildEarlybirdRequest( - seedUserId: UserId, - filterOutRetweetsAndReplies: Boolean, - maxNumTweetsPerSeedUser: Int, - processingTimeout: Duration - ): EarlybirdRequest = - EarlybirdRequest( - searchQuery = getThriftSearchQuery( - seedUserId = seedUserId, - filterOutRetweetsAndReplies = filterOutRetweetsAndReplies, - maxNumTweetsPerSeedUser = maxNumTweetsPerSeedUser, - processingTimeout = processingTimeout - ), - clientId = Some(EarlybirdClientId), - timeoutMs = processingTimeout.inMilliseconds.intValue(), - getOlderResults = Some(false), - adjustedProtectedRequestParams = None, - adjustedFullArchiveRequestParams = None, - getProtectedTweetsOnly = Some(false), - skipVeryRecentTweets = true, - ) - - private def getThriftSearchQuery( - seedUserId: UserId, - filterOutRetweetsAndReplies: Boolean, - maxNumTweetsPerSeedUser: Int, - processingTimeout: Duration - ): ThriftSearchQuery = ThriftSearchQuery( - serializedQuery = GetEarlybirdQuery( - None, - None, - Set.empty, - filterOutRetweetsAndReplies - ).map(_.serialize), - fromUserIDFilter64 = Some(Seq(seedUserId)), - numResults = maxNumTweetsPerSeedUser, - rankingMode = ThriftSearchRankingMode.Recency, - collectorParams = Some( - CollectorParams( - // numResultsToReturn defines how many results each EB shard will return to search root - numResultsToReturn = maxNumTweetsPerSeedUser, - // terminationParams.maxHitsToProcess is used for early terminating per shard results fetching. - terminationParams = - GetCollectorTerminationParams(maxNumTweetsPerSeedUser, processingTimeout) - )), - facetFieldNames = Some(FacetsToFetch), - resultMetadataOptions = Some(MetadataOptions), - searchStatusIds = None - ) - - private def getEarlybirdSearchResult( - earlybirdSearchClient: EarlybirdService.MethodPerEndpoint, - request: EarlybirdRequest, - statsReceiver: StatsReceiver - ): Future[Option[Seq[TweetId]]] = earlybirdSearchClient - .search(request) - .map { response => - response.responseCode match { - case EarlybirdResponseCode.Success => - val earlybirdSearchResult = - response.searchResults - .map { - _.results - .map(searchResult => searchResult.id) - } - statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size) - earlybirdSearchResult - case e => - statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr() - Some(Seq.empty) - } - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EmbeddingStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EmbeddingStoreModule.scala deleted file mode 100644 index 26d9f8ad1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/EmbeddingStoreModule.scala +++ /dev/null @@ -1,195 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.bijection.Injection -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.bijection.scrooge.CompactScalaCodec -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.inject.TwitterModule -import com.twitter.ml.api.{thriftscala => api} -import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.Apollo -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import javax.inject.Named -import javax.inject.Singleton - -object EmbeddingStoreModule extends TwitterModule { - type UserId = Long - implicit val mbcgUserEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - CompactScalaCodec(api.Embedding) - implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] = - CompactScalaCodec(CandidateTweetsList) - - final val TwHINEmbeddingRegularUpdateMhStoreName = "TwHINEmbeddingRegularUpdateMhStore" - @Provides - @Singleton - @Named(TwHINEmbeddingRegularUpdateMhStoreName) - def twHINEmbeddingRegularUpdateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[InternalId, api.Embedding] = { - val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - BinaryScalaCodec(api.Embedding) - - val longCodec = implicitly[Injection[Long, Array[Byte]]] - - ManhattanRO - .getReadableStoreWithMtls[TweetId, api.Embedding]( - ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("twhin_regular_update_tweet_embedding_apollo"), - Apollo - ), - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] { - case InternalId.TweetId(tweetId) => - tweetId - case _ => - throw new UnsupportedOperationException("Invalid Internal Id") - } - } - - final val ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName = - "ConsumerBasedTwHINEmbeddingRegularUpdateMhStore" - @Provides - @Singleton - @Named(ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName) - def consumerBasedTwHINEmbeddingRegularUpdateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[InternalId, api.Embedding] = { - val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - BinaryScalaCodec(api.Embedding) - - val longCodec = implicitly[Injection[Long, Array[Byte]]] - - ManhattanRO - .getReadableStoreWithMtls[UserId, api.Embedding]( - ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("twhin_user_embedding_regular_update_apollo"), - Apollo - ), - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] { - case InternalId.UserId(userId) => - userId - case _ => - throw new UnsupportedOperationException("Invalid Internal Id") - } - } - - final val TwoTowerFavConsumerEmbeddingMhStoreName = "TwoTowerFavConsumerEmbeddingMhStore" - @Provides - @Singleton - @Named(TwoTowerFavConsumerEmbeddingMhStoreName) - def twoTowerFavConsumerEmbeddingMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[InternalId, api.Embedding] = { - val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - BinaryScalaCodec(api.Embedding) - - val longCodec = implicitly[Injection[Long, Array[Byte]]] - - ManhattanRO - .getReadableStoreWithMtls[UserId, api.Embedding]( - ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("two_tower_fav_user_embedding_apollo"), - Apollo - ), - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] { - case InternalId.UserId(userId) => - userId - case _ => - throw new UnsupportedOperationException("Invalid Internal Id") - } - } - - final val DebuggerDemoUserEmbeddingMhStoreName = "DebuggerDemoUserEmbeddingMhStoreName" - @Provides - @Singleton - @Named(DebuggerDemoUserEmbeddingMhStoreName) - def debuggerDemoUserEmbeddingStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[InternalId, api.Embedding] = { - // This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalUserEmbeddingScalaDataset.sql - // Change the above sql if you want to use a diff embedding - val manhattanROConfig = ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("experimental_user_embedding"), - Apollo - ) - buildUserEmbeddingStore(serviceIdentifier, manhattanROConfig) - } - - final val DebuggerDemoTweetEmbeddingMhStoreName = "DebuggerDemoTweetEmbeddingMhStore" - @Provides - @Singleton - @Named(DebuggerDemoTweetEmbeddingMhStoreName) - def debuggerDemoTweetEmbeddingStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[InternalId, api.Embedding] = { - // This dataset is from src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql - // Change the above sql if you want to use a diff embedding - val manhattanROConfig = ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("cr_mixer_apollo"), - DatasetName("experimental_tweet_embedding"), - Apollo - ) - buildTweetEmbeddingStore(serviceIdentifier, manhattanROConfig) - } - - private def buildUserEmbeddingStore( - serviceIdentifier: ServiceIdentifier, - manhattanROConfig: ManhattanROConfig - ): ReadableStore[InternalId, api.Embedding] = { - val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - BinaryScalaCodec(api.Embedding) - - val longCodec = implicitly[Injection[Long, Array[Byte]]] - ManhattanRO - .getReadableStoreWithMtls[UserId, api.Embedding]( - manhattanROConfig, - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] { - case InternalId.UserId(userId) => - userId - case _ => - throw new UnsupportedOperationException("Invalid Internal Id") - } - } - - private def buildTweetEmbeddingStore( - serviceIdentifier: ServiceIdentifier, - manhattanROConfig: ManhattanROConfig - ): ReadableStore[InternalId, api.Embedding] = { - val binaryEmbeddingInjection: Injection[api.Embedding, Array[Byte]] = - BinaryScalaCodec(api.Embedding) - - val longCodec = implicitly[Injection[Long, Array[Byte]]] - - ManhattanRO - .getReadableStoreWithMtls[TweetId, api.Embedding]( - manhattanROConfig, - ManhattanKVClientMtlsParams(serviceIdentifier) - )(longCodec, binaryEmbeddingInjection).composeKeyMapping[InternalId] { - case InternalId.TweetId(tweetId) => - tweetId - case _ => - throw new UnsupportedOperationException("Invalid Internal Id") - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/FrsStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/FrsStoreModule.scala deleted file mode 100644 index cfe044afd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/FrsStoreModule.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.source_signal.FrsStore -import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.storehaus.ReadableStore -import javax.inject.Named - -object FrsStoreModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.FrsStore) - def providesFrsStore( - frsClient: FollowRecommendationsThriftService.MethodPerEndpoint, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): ReadableStore[FrsStore.Query, Seq[FrsQueryResult]] = { - ObservedReadableStore(FrsStore(frsClient, statsReceiver, decider))( - statsReceiver.scope("follow_recommendations_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/MHMtlsParamsModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/MHMtlsParamsModule.scala deleted file mode 100644 index 339d0330a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/MHMtlsParamsModule.scala +++ /dev/null @@ -1,17 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.inject.TwitterModule -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import javax.inject.Singleton - -object MHMtlsParamsModule extends TwitterModule { - @Singleton - @Provides - def providesManhattanMtlsParams( - serviceIdentifier: ServiceIdentifier - ): ManhattanKVClientMtlsParams = { - ManhattanKVClientMtlsParams(serviceIdentifier) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/OfflineCandidateStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/OfflineCandidateStoreModule.scala deleted file mode 100644 index db4a3fa5e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/OfflineCandidateStoreModule.scala +++ /dev/null @@ -1,150 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.bijection.Injection -import com.twitter.bijection.scrooge.CompactScalaCodec -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.CandidateTweetsList -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.Apollo -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import javax.inject.Named -import javax.inject.Singleton - -object OfflineCandidateStoreModule extends TwitterModule { - type UserId = Long - implicit val tweetCandidatesInjection: Injection[CandidateTweetsList, Array[Byte]] = - CompactScalaCodec(CandidateTweetsList) - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweet2020CandidateStore) - def offlineTweet2020CandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_interestedin_2020" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweet2020Hl0El15CandidateStore) - def offlineTweet2020Hl0El15CandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_0_el_15" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweet2020Hl2El15CandidateStore) - def offlineTweet2020Hl2El15CandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_15" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweet2020Hl2El50CandidateStore) - def offlineTweet2020Hl2El50CandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_2_el_50" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweet2020Hl8El50CandidateStore) - def offlineTweet2020Hl8El50CandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_interestedin_2020_hl_8_el_50" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineTweetMTSCandidateStore) - def offlineTweetMTSCandidateMhStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_mts_consumer_embeddings" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineFavDecayedSumCandidateStore) - def offlineFavDecayedSumCandidateStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_decayed_sum" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineFtrAt5Pop1000RankDecay11CandidateStore) - def offlineFtrAt5Pop1000RankDecay11CandidateStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_ftrat5_pop1000_rank_decay_1_1" - ) - } - - @Provides - @Singleton - @Named(ModuleNames.OfflineFtrAt5Pop10000RankDecay11CandidateStore) - def offlineFtrAt5Pop10000RankDecay11CandidateStore( - serviceIdentifier: ServiceIdentifier - ): ReadableStore[UserId, CandidateTweetsList] = { - buildOfflineCandidateStore( - serviceIdentifier, - datasetName = "offline_tweet_recommendations_from_ftrat5_pop10000_rank_decay_1_1" - ) - } - - private def buildOfflineCandidateStore( - serviceIdentifier: ServiceIdentifier, - datasetName: String - ): ReadableStore[UserId, CandidateTweetsList] = { - ManhattanRO - .getReadableStoreWithMtls[Long, CandidateTweetsList]( - ManhattanROConfig( - HDFSPath(""), // not needed - ApplicationID("multi_type_simclusters"), - DatasetName(datasetName), - Apollo - ), - ManhattanKVClientMtlsParams(serviceIdentifier) - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphOonStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphOonStoreModule.scala deleted file mode 100644 index 3d9a71a1c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphOonStoreModule.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.app.Flag -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.wtf.candidate.thriftscala.CandidateSeq - -object RealGraphOonStoreModule extends TwitterModule { - - private val userRealGraphOonColumnPath: Flag[String] = flag[String]( - name = "crMixer.userRealGraphOonColumnPath", - default = "recommendations/twistly/userRealgraphOon", - help = "Strato column path for user real graph OON Store" - ) - - @Provides - @Singleton - @Named(ModuleNames.RealGraphOonStore) - def providesRealGraphOonStore( - stratoClient: StratoClient, - statsReceiver: StatsReceiver - ): ReadableStore[UserId, CandidateSeq] = { - val realGraphOonStratoFetchableStore = StratoFetchableStore - .withUnitView[UserId, CandidateSeq](stratoClient, userRealGraphOonColumnPath()) - - ObservedReadableStore( - realGraphOonStratoFetchableStore - )(statsReceiver.scope("user_real_graph_oon_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphStoreMhModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphStoreMhModule.scala deleted file mode 100644 index 0cd1a3ad7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RealGraphStoreMhModule.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.UserId -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.Apollo -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.storehaus_internal.util.HDFSPath -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.cr_mixer.param.decider.DeciderKey -import com.twitter.hermit.store.common.DeciderableReadableStore -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.wtf.candidate.thriftscala.CandidateSeq - -object RealGraphStoreMhModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.RealGraphInStore) - def providesRealGraphStoreMh( - decider: CrMixerDecider, - statsReceiver: StatsReceiver, - manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - ): ReadableStore[UserId, CandidateSeq] = { - - implicit val valueCodec = new BinaryScalaCodec(CandidateSeq) - val underlyingStore = ManhattanRO - .getReadableStoreWithMtls[UserId, CandidateSeq]( - ManhattanROConfig( - HDFSPath(""), - ApplicationID("cr_mixer_apollo"), - DatasetName("real_graph_scores_apollo"), - Apollo), - manhattanKVClientMtlsParams - ) - - val memCachedStore = ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 24.hours, - )( - valueInjection = valueCodec, - statsReceiver = statsReceiver.scope("memCachedUserRealGraphMh"), - keyToString = { k: UserId => s"uRGraph/$k" } - ) - - DeciderableReadableStore( - memCachedStore, - decider.deciderGateBuilder.idGate(DeciderKey.enableRealGraphMhStoreDeciderKey), - statsReceiver.scope("RealGraphMh") - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationManagerModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationManagerModule.scala deleted file mode 100644 index 227e5fff3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationManagerModule.scala +++ /dev/null @@ -1,107 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.SimClustersEmbedding -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.representation_manager.thriftscala.SimClustersEmbeddingView -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.ModelVersion -import com.google.inject.Provides -import com.google.inject.Singleton -import javax.inject.Named -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.simclusters_v2.thriftscala.{SimClustersEmbedding => ThriftSimClustersEmbedding} - -object RepresentationManagerModule extends TwitterModule { - private val ColPathPrefix = "recommendations/representation_manager/" - private val SimclustersTweetColPath = ColPathPrefix + "simClustersEmbedding.Tweet" - private val SimclustersUserColPath = ColPathPrefix + "simClustersEmbedding.User" - - @Provides - @Singleton - @Named(ModuleNames.RmsTweetLogFavLongestL2EmbeddingStore) - def providesRepresentationManagerTweetStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[TweetId, SimClustersEmbedding] = { - ObservedReadableStore( - StratoFetchableStore - .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( - stratoClient, - SimclustersTweetColPath, - SimClustersEmbeddingView( - EmbeddingType.LogFavLongestL2EmbeddingTweet, - ModelVersion.Model20m145k2020)) - .mapValues(SimClustersEmbedding(_)))( - statsReceiver.scope("rms_tweet_log_fav_longest_l2_store")) - } - - @Provides - @Singleton - @Named(ModuleNames.RmsUserFavBasedProducerEmbeddingStore) - def providesRepresentationManagerUserFavBasedProducerEmbeddingStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[UserId, SimClustersEmbedding] = { - ObservedReadableStore( - StratoFetchableStore - .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( - stratoClient, - SimclustersUserColPath, - SimClustersEmbeddingView( - EmbeddingType.FavBasedProducer, - ModelVersion.Model20m145k2020 - ) - ) - .mapValues(SimClustersEmbedding(_)))( - statsReceiver.scope("rms_user_fav_based_producer_store")) - } - - @Provides - @Singleton - @Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore) - def providesRepresentationManagerUserLogFavConsumerEmbeddingStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[UserId, SimClustersEmbedding] = { - ObservedReadableStore( - StratoFetchableStore - .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( - stratoClient, - SimclustersUserColPath, - SimClustersEmbeddingView( - EmbeddingType.LogFavBasedUserInterestedIn, - ModelVersion.Model20m145k2020 - ) - ) - .mapValues(SimClustersEmbedding(_)))( - statsReceiver.scope("rms_user_log_fav_interestedin_store")) - } - - @Provides - @Singleton - @Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore) - def providesRepresentationManagerUserFollowInterestedInEmbeddingStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[UserId, SimClustersEmbedding] = { - ObservedReadableStore( - StratoFetchableStore - .withView[Long, SimClustersEmbeddingView, ThriftSimClustersEmbedding]( - stratoClient, - SimclustersUserColPath, - SimClustersEmbeddingView( - EmbeddingType.FollowBasedUserInterestedIn, - ModelVersion.Model20m145k2020 - ) - ) - .mapValues(SimClustersEmbedding(_)))( - statsReceiver.scope("rms_user_follow_interestedin_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationScorerModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationScorerModule.scala deleted file mode 100644 index 7db6474cc..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/RepresentationScorerModule.scala +++ /dev/null @@ -1,56 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.simclusters_v2.thriftscala.ModelVersion -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.storehaus.ReadableStore -import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.hermit.store.common.ObservedReadableStore -import javax.inject.Named -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.representationscorer.thriftscala.ListScoreId - -object RepresentationScorerModule extends TwitterModule { - - private val rsxColumnPath = "recommendations/representation_scorer/listScore" - - private final val SimClusterModelVersion = ModelVersion.Model20m145k2020 - private final val TweetEmbeddingType = EmbeddingType.LogFavBasedTweet - - @Provides - @Singleton - @Named(ModuleNames.RsxStore) - def providesRepresentationScorerStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[(UserId, TweetId), Double] = { - ObservedReadableStore( - StratoFetchableStore - .withUnitView[ListScoreId, Double](stratoClient, rsxColumnPath).composeKeyMapping[( - UserId, - TweetId - )] { key => - representationScorerStoreKeyMapping(key._1, key._2) - } - )(statsReceiver.scope("rsx_store")) - } - - private def representationScorerStoreKeyMapping(t1: TweetId, t2: TweetId): ListScoreId = { - ListScoreId( - algorithm = ScoringAlgorithm.PairEmbeddingLogCosineSimilarity, - modelVersion = SimClusterModelVersion, - targetEmbeddingType = TweetEmbeddingType, - targetId = InternalId.TweetId(t1), - candidateEmbeddingType = TweetEmbeddingType, - candidateIds = Seq(InternalId.TweetId(t2)) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SampleSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SampleSimilarityEngineModule.scala deleted file mode 100644 index 98c3f4af6..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SampleSimilarityEngineModule.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import javax.inject.Singleton - -/** - * In this example we build a [[StandardSimilarityEngine]] to wrap a dummy store - */ -object SimpleSimilarityEngineModule extends TwitterModule { - @Provides - @Singleton - def providesSimpleSimilarityEngine( - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver - ): StandardSimilarityEngine[UserId, (TweetId, Double)] = { - // Inject your readableStore implementation here - val dummyStore = ReadableStore.fromMap( - Map( - 1L -> Seq((100L, 1.0), (101L, 1.0)), - 2L -> Seq((200L, 2.0), (201L, 2.0)), - 3L -> Seq((300L, 3.0), (301L, 3.0)) - )) - - new StandardSimilarityEngine[UserId, (TweetId, Double)]( - implementingStore = dummyStore, - identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9997), - globalStats = globalStats, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} - -/** - * In this example we build a [[LookupSimilarityEngine]] to wrap a dummy store with 2 versions - */ -object LookupSimilarityEngineModule extends TwitterModule { - @Provides - @Singleton - def providesLookupSimilarityEngine( - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver - ): LookupSimilarityEngine[UserId, (TweetId, Double)] = { - // Inject your readableStore implementation here - val dummyStoreV1 = ReadableStore.fromMap( - Map( - 1L -> Seq((100L, 1.0), (101L, 1.0)), - 2L -> Seq((200L, 2.0), (201L, 2.0)), - )) - - val dummyStoreV2 = ReadableStore.fromMap( - Map( - 1L -> Seq((100L, 1.0), (101L, 1.0)), - 2L -> Seq((200L, 2.0), (201L, 2.0)), - )) - - new LookupSimilarityEngine[UserId, (TweetId, Double)]( - versionedStoreMap = Map( - "V1" -> dummyStoreV1, - "V2" -> dummyStoreV2 - ), - identifier = SimilarityEngineType.EnumUnknownSimilarityEngineType(9998), - globalStats = globalStats, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SimClustersANNServiceNameToClientMapper.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SimClustersANNServiceNameToClientMapper.scala deleted file mode 100644 index 305839816..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SimClustersANNServiceNameToClientMapper.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.inject.TwitterModule -import com.twitter.simclustersann.thriftscala.SimClustersANNService -import javax.inject.Named - -object SimClustersANNServiceNameToClientMapper extends TwitterModule { - - @Provides - @Singleton - def providesSimClustersANNServiceNameToClientMapping( - @Named(ModuleNames.ProdSimClustersANNServiceClientName) simClustersANNServiceProd: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) simClustersANNServiceExperimental: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.SimClustersANNServiceClientName1) simClustersANNService1: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.SimClustersANNServiceClientName2) simClustersANNService2: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.SimClustersANNServiceClientName3) simClustersANNService3: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.SimClustersANNServiceClientName5) simClustersANNService5: SimClustersANNService.MethodPerEndpoint, - @Named(ModuleNames.SimClustersANNServiceClientName4) simClustersANNService4: SimClustersANNService.MethodPerEndpoint - ): Map[String, SimClustersANNService.MethodPerEndpoint] = { - Map[String, SimClustersANNService.MethodPerEndpoint]( - "simclusters-ann" -> simClustersANNServiceProd, - "simclusters-ann-experimental" -> simClustersANNServiceExperimental, - "simclusters-ann-1" -> simClustersANNService1, - "simclusters-ann-2" -> simClustersANNService2, - "simclusters-ann-3" -> simClustersANNService3, - "simclusters-ann-5" -> simClustersANNService5, - "simclusters-ann-4" -> simClustersANNService4 - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SkitStratoStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SkitStratoStoreModule.scala deleted file mode 100644 index 318c2ed00..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/SkitStratoStoreModule.scala +++ /dev/null @@ -1,65 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedCachedReadableStore -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.Client -import com.twitter.topic_recos.thriftscala.TopicTopTweets -import com.twitter.topic_recos.thriftscala.TopicTweet -import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey - -/** - * Strato store that wraps the topic top tweets pipeline indexed from a Summingbird job - */ -object SkitStratoStoreModule extends TwitterModule { - - val column = "recommendations/topic_recos/topicTopTweets" - - @Provides - @Singleton - @Named(ModuleNames.SkitStratoStoreName) - def providesSkitStratoStore( - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - stratoClient: Client, - statsReceiver: StatsReceiver - ): ReadableStore[TopicTweetPartitionFlatKey, Seq[TopicTweet]] = { - val skitStore = ObservedReadableStore( - StratoFetchableStore - .withUnitView[TopicTweetPartitionFlatKey, TopicTopTweets](stratoClient, column))( - statsReceiver.scope(ModuleNames.SkitStratoStoreName)).mapValues { topicTopTweets => - topicTopTweets.topTweets - } - - val memCachedStore = ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = skitStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TopicTweet]()), - statsReceiver = statsReceiver.scope("memcached_skit_store"), - keyToString = { k => s"skit:${keyHasher.hashKey(k.toString.getBytes)}" } - ) - - ObservedCachedReadableStore.from[TopicTweetPartitionFlatKey, Seq[TopicTweet]]( - memCachedStore, - ttl = 5.minutes, - maxKeys = 100000, // ~150MB max - cacheName = "skit_in_memory_cache", - windowSize = 10000L - )(statsReceiver.scope("skit_in_memory_cache")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/StrongTiePredictionStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/StrongTiePredictionStoreModule.scala deleted file mode 100644 index 51d556077..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/StrongTiePredictionStoreModule.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.app.Flag -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.UserId -import com.twitter.hermit.stp.thriftscala.STPResult -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import javax.inject.Named - -object StrongTiePredictionStoreModule extends TwitterModule { - - private val strongTiePredictionColumnPath: Flag[String] = flag[String]( - name = "crMixer.strongTiePredictionColumnPath", - default = "onboarding/userrecs/strong_tie_prediction_big", - help = "Strato column path for StrongTiePredictionStore" - ) - - @Provides - @Singleton - @Named(ModuleNames.StpStore) - def providesStrongTiePredictionStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[UserId, STPResult] = { - val strongTiePredictionStratoFetchableStore = StratoFetchableStore - .withUnitView[UserId, STPResult](stratoClient, strongTiePredictionColumnPath()) - - ObservedReadableStore( - strongTiePredictionStratoFetchableStore - )(statsReceiver.scope("strong_tie_prediction_big_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TripCandidateStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TripCandidateStoreModule.scala deleted file mode 100644 index 802d5c986..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TripCandidateStoreModule.scala +++ /dev/null @@ -1,34 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweets -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain -import javax.inject.Named - -object TripCandidateStoreModule extends TwitterModule { - private val stratoColumn = "trends/trip/tripTweetsDataflowProd" - - @Provides - @Named(ModuleNames.TripCandidateStore) - def providesSimClustersTripCandidateStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient - ): ReadableStore[TripDomain, Seq[TripTweet]] = { - val tripCandidateStratoFetchableStore = - StratoFetchableStore - .withUnitView[TripDomain, TripTweets](stratoClient, stratoColumn) - .mapValues(_.tweets) - - ObservedReadableStore( - tripCandidateStratoFetchableStore - )(statsReceiver.scope("simclusters_trip_candidate_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetInfoStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetInfoStoreModule.scala deleted file mode 100644 index a3a794e8e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetInfoStoreModule.scala +++ /dev/null @@ -1,205 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Module -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.contentrecommender.thriftscala.TweetInfo -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.frigate.common.store.health.TweetHealthModelStore -import com.twitter.frigate.common.store.health.TweetHealthModelStore.TweetHealthModelStoreConfig -import com.twitter.frigate.common.store.health.UserHealthModelStore -import com.twitter.frigate.thriftscala.TweetHealthScores -import com.twitter.frigate.thriftscala.UserAgathaScores -import com.twitter.hermit.store.common.DeciderableReadableStore -import com.twitter.hermit.store.common.ObservedCachedReadableStore -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.contentrecommender.store.TweetInfoStore -import com.twitter.contentrecommender.store.TweetyPieFieldsStore -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderKey -import com.twitter.frigate.data_pipeline.scalding.thriftscala.BlueVerifiedAnnotationsV2 -import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus -import com.twitter.recos.user_tweet_graph_plus.thriftscala.TweetEngagementScores -import com.twitter.relevance_platform.common.health_store.UserMediaRepresentationHealthStore -import com.twitter.relevance_platform.common.health_store.MagicRecsRealTimeAggregatesStore -import com.twitter.relevance_platform.thriftscala.MagicRecsRealTimeAggregatesScores -import com.twitter.relevance_platform.thriftscala.UserMediaRepresentationScores -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.tweetypie.thriftscala.TweetService -import com.twitter.util.Future -import com.twitter.util.JavaTimer -import com.twitter.util.Timer - -import javax.inject.Named - -object TweetInfoStoreModule extends TwitterModule { - implicit val timer: Timer = new JavaTimer(true) - override def modules: Seq[Module] = Seq(UnifiedCacheClient) - - @Provides - @Singleton - def providesTweetInfoStore( - statsReceiver: StatsReceiver, - serviceIdentifier: ServiceIdentifier, - stratoClient: StratoClient, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams, - tweetyPieService: TweetService.MethodPerEndpoint, - userTweetGraphPlusService: UserTweetGraphPlus.MethodPerEndpoint, - @Named(ModuleNames.BlueVerifiedAnnotationStore) blueVerifiedAnnotationStore: ReadableStore[ - String, - BlueVerifiedAnnotationsV2 - ], - decider: CrMixerDecider - ): ReadableStore[TweetId, TweetInfo] = { - - val tweetEngagementScoreStore: ReadableStore[TweetId, TweetEngagementScores] = { - val underlyingStore = - ObservedReadableStore(new ReadableStore[TweetId, TweetEngagementScores] { - override def get( - k: TweetId - ): Future[Option[TweetEngagementScores]] = { - userTweetGraphPlusService.tweetEngagementScore(k).map { - Some(_) - } - } - })(statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore")) - - DeciderableReadableStore( - underlyingStore, - decider.deciderGateBuilder.idGate( - DeciderKey.enableUtgRealTimeTweetEngagementScoreDeciderKey), - statsReceiver.scope("UserTweetGraphTweetEngagementScoreStore") - ) - - } - - val tweetHealthModelStore: ReadableStore[TweetId, TweetHealthScores] = { - val underlyingStore = TweetHealthModelStore.buildReadableStore( - stratoClient, - Some( - TweetHealthModelStoreConfig( - enablePBlock = true, - enableToxicity = true, - enablePSpammy = true, - enablePReported = true, - enableSpammyTweetContent = true, - enablePNegMultimodal = true, - )) - )(statsReceiver.scope("UnderlyingTweetHealthModelStore")) - - DeciderableReadableStore( - ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 2.hours - )( - valueInjection = BinaryScalaCodec(TweetHealthScores), - statsReceiver = statsReceiver.scope("memCachedTweetHealthModelStore"), - keyToString = { k: TweetId => s"tHMS/$k" } - ), - decider.deciderGateBuilder.idGate(DeciderKey.enableHealthSignalsScoreDeciderKey), - statsReceiver.scope("TweetHealthModelStore") - ) // use s"tHMS/$k" instead of s"tweetHealthModelStore/$k" to differentiate from CR cache - } - - val userHealthModelStore: ReadableStore[UserId, UserAgathaScores] = { - val underlyingStore = UserHealthModelStore.buildReadableStore(stratoClient)( - statsReceiver.scope("UnderlyingUserHealthModelStore")) - DeciderableReadableStore( - ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 18.hours - )( - valueInjection = BinaryScalaCodec(UserAgathaScores), - statsReceiver = statsReceiver.scope("memCachedUserHealthModelStore"), - keyToString = { k: UserId => s"uHMS/$k" } - ), - decider.deciderGateBuilder.idGate(DeciderKey.enableUserAgathaScoreDeciderKey), - statsReceiver.scope("UserHealthModelStore") - ) - } - - val userMediaRepresentationHealthStore: ReadableStore[UserId, UserMediaRepresentationScores] = { - val underlyingStore = - UserMediaRepresentationHealthStore.buildReadableStore( - manhattanKVClientMtlsParams, - statsReceiver.scope("UnderlyingUserMediaRepresentationHealthStore") - ) - DeciderableReadableStore( - ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 12.hours - )( - valueInjection = BinaryScalaCodec(UserMediaRepresentationScores), - statsReceiver = statsReceiver.scope("memCacheUserMediaRepresentationHealthStore"), - keyToString = { k: UserId => s"uMRHS/$k" } - ), - decider.deciderGateBuilder.idGate(DeciderKey.enableUserMediaRepresentationStoreDeciderKey), - statsReceiver.scope("UserMediaRepresentationHealthStore") - ) - } - - val magicRecsRealTimeAggregatesStore: ReadableStore[ - TweetId, - MagicRecsRealTimeAggregatesScores - ] = { - val underlyingStore = - MagicRecsRealTimeAggregatesStore.buildReadableStore( - serviceIdentifier, - statsReceiver.scope("UnderlyingMagicRecsRealTimeAggregatesScores") - ) - DeciderableReadableStore( - underlyingStore, - decider.deciderGateBuilder.idGate(DeciderKey.enableMagicRecsRealTimeAggregatesStore), - statsReceiver.scope("MagicRecsRealTimeAggregatesStore") - ) - } - - val tweetInfoStore: ReadableStore[TweetId, TweetInfo] = { - val underlyingStore = TweetInfoStore( - TweetyPieFieldsStore.getStoreFromTweetyPie(tweetyPieService), - userMediaRepresentationHealthStore, - magicRecsRealTimeAggregatesStore, - tweetEngagementScoreStore, - blueVerifiedAnnotationStore - )(statsReceiver.scope("tweetInfoStore")) - - val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 15.minutes, - // Hydrating tweetInfo is now a required step for all candidates, - // hence we needed to tune these thresholds. - asyncUpdate = serviceIdentifier.environment == "prod" - )( - valueInjection = BinaryScalaCodec(TweetInfo), - statsReceiver = statsReceiver.scope("memCachedTweetInfoStore"), - keyToString = { k: TweetId => s"tIS/$k" } - ) - - ObservedCachedReadableStore.from( - memcachedStore, - ttl = 15.minutes, - maxKeys = 8388607, // Check TweetInfo definition. size~92b. Around 736 MB - windowSize = 10000L, - cacheName = "tweet_info_cache", - maxMultiGetSize = 20 - )(statsReceiver.scope("inMemoryCachedTweetInfoStore")) - } - tweetInfoStore - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecentEngagedUserStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecentEngagedUserStoreModule.scala deleted file mode 100644 index 2e379e545..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecentEngagedUserStoreModule.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.app.Flag -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers - -object TweetRecentEngagedUserStoreModule extends TwitterModule { - - private val tweetRecentEngagedUsersStoreDefaultVersion = - 0 // DefaultVersion for tweetEngagedUsersStore, whose key = (tweetId, DefaultVersion) - private val tweetRecentEngagedUsersColumnPath: Flag[String] = flag[String]( - name = "crMixer.tweetRecentEngagedUsersColumnPath", - default = "recommendations/twistly/tweetRecentEngagedUsers", - help = "Strato column path for TweetRecentEngagedUsersStore" - ) - private type Version = Long - - @Provides - @Singleton - def providesTweetRecentEngagedUserStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[TweetId, TweetRecentEngagedUsers] = { - val tweetRecentEngagedUsersStratoFetchableStore = StratoFetchableStore - .withUnitView[(TweetId, Version), TweetRecentEngagedUsers]( - stratoClient, - tweetRecentEngagedUsersColumnPath()).composeKeyMapping[TweetId](tweetId => - (tweetId, tweetRecentEngagedUsersStoreDefaultVersion)) - - ObservedReadableStore( - tweetRecentEngagedUsersStratoFetchableStore - )(statsReceiver.scope("tweet_recent_engaged_users_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecommendationResultsStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecommendationResultsStoreModule.scala deleted file mode 100644 index 04c03eda6..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TweetRecommendationResultsStoreModule.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.thriftscala.CrMixerTweetResponse -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.hermit.store.common.ReadableWritableStore -import com.twitter.hermit.store.common.ObservedReadableWritableMemcacheStore -import com.twitter.simclusters_v2.common.UserId -import javax.inject.Named - -object TweetRecommendationResultsStoreModule extends TwitterModule { - @Provides - @Singleton - def providesTweetRecommendationResultsStore( - @Named(ModuleNames.TweetRecommendationResultsCache) tweetRecommendationResultsCacheClient: MemcachedClient, - statsReceiver: StatsReceiver - ): ReadableWritableStore[UserId, CrMixerTweetResponse] = { - ObservedReadableWritableMemcacheStore.fromCacheClient( - cacheClient = tweetRecommendationResultsCacheClient, - ttl = 24.hours)( - valueInjection = BinaryScalaCodec(CrMixerTweetResponse), - statsReceiver = statsReceiver.scope("TweetRecommendationResultsMemcacheStore"), - keyToString = { k: UserId => k.toString } - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwhinCollabFilterStratoStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwhinCollabFilterStratoStoreModule.scala deleted file mode 100644 index 4275ad2a8..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwhinCollabFilterStratoStoreModule.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.inject.TwitterModule -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.TwhinCollabFilterView -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import javax.inject.Named - -object TwhinCollabFilterStratoStoreModule extends TwitterModule { - - val stratoColumnPath: String = "cuad/twhin/getCollabFilterTweetCandidatesProd.User" - - @Provides - @Singleton - @Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow) - def providesTwhinCollabFilterStratoStoreForFollow( - stratoClient: StratoClient - ): ReadableStore[Long, Seq[TweetId]] = { - StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]]( - stratoClient, - column = stratoColumnPath, - view = TwhinCollabFilterView("follow_2022_03_10_c_500K") - ) - } - - @Provides - @Singleton - @Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement) - def providesTwhinCollabFilterStratoStoreForEngagement( - stratoClient: StratoClient - ): ReadableStore[Long, Seq[TweetId]] = { - StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]]( - stratoClient, - column = stratoColumnPath, - view = TwhinCollabFilterView("engagement_2022_04_10_c_500K")) - } - - @Provides - @Singleton - @Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow) - def providesTwhinMultiClusterStratoStoreForFollow( - stratoClient: StratoClient - ): ReadableStore[Long, Seq[TweetId]] = { - StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]]( - stratoClient, - column = stratoColumnPath, - view = TwhinCollabFilterView("multiclusterFollow20220921") - ) - } - - @Provides - @Singleton - @Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement) - def providesTwhinMultiClusterStratoStoreForEngagement( - stratoClient: StratoClient - ): ReadableStore[Long, Seq[TweetId]] = { - StratoFetchableStore.withView[Long, TwhinCollabFilterView, Seq[TweetId]]( - stratoClient, - column = stratoColumnPath, - view = TwhinCollabFilterView("multiclusterEng20220921")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwiceClustersMembersStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwiceClustersMembersStoreModule.scala deleted file mode 100644 index a15e2549a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/TwiceClustersMembersStoreModule.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.app.Flag -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.simclusters_v2.thriftscala.OrderedClustersAndMembers -import javax.inject.Named - -object TwiceClustersMembersStoreModule extends TwitterModule { - - private val twiceClustersMembersColumnPath: Flag[String] = flag[String]( - name = "crMixer.twiceClustersMembersColumnPath", - default = - "recommendations/simclusters_v2/embeddings/TwiceClustersMembersLargestDimApeSimilarity", - help = "Strato column path for TweetRecentEngagedUsersStore" - ) - - @Provides - @Singleton - @Named(ModuleNames.TwiceClustersMembersStore) - def providesTweetRecentEngagedUserStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[UserId, OrderedClustersAndMembers] = { - val twiceClustersMembersStratoFetchableStore = StratoFetchableStore - .withUnitView[UserId, OrderedClustersAndMembers]( - stratoClient, - twiceClustersMembersColumnPath()) - - ObservedReadableStore( - twiceClustersMembersStratoFetchableStore - )(statsReceiver.scope("twice_clusters_members_largestDimApe_similarity_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UnifiedCacheClient.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UnifiedCacheClient.scala deleted file mode 100644 index 3b48f4c02..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UnifiedCacheClient.scala +++ /dev/null @@ -1,83 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.app.Flag -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.memcached.Client -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus_internal.memcache.MemcacheStore -import com.twitter.storehaus_internal.util.ClientName -import com.twitter.storehaus_internal.util.ZkEndPoint -import javax.inject.Named - -object UnifiedCacheClient extends TwitterModule { - - private val TIME_OUT = 20.milliseconds - - val crMixerUnifiedCacheDest: Flag[String] = flag[String]( - name = "crMixer.unifiedCacheDest", - default = "/s/cache/content_recommender_unified_v2", - help = "Wily path to Content Recommender unified cache" - ) - - val tweetRecommendationResultsCacheDest: Flag[String] = flag[String]( - name = "tweetRecommendationResults.CacheDest", - default = "/s/cache/tweet_recommendation_results", - help = "Wily path to CrMixer getTweetRecommendations() results cache" - ) - - val earlybirdTweetsCacheDest: Flag[String] = flag[String]( - name = "earlybirdTweets.CacheDest", - default = "/s/cache/crmixer_earlybird_tweets", - help = "Wily path to CrMixer Earlybird Recency Based Similarity Engine result cache" - ) - - @Provides - @Singleton - @Named(ModuleNames.UnifiedCache) - def provideUnifiedCacheClient( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver, - ): Client = - MemcacheStore.memcachedClient( - name = ClientName("memcache-content-recommender-unified"), - dest = ZkEndPoint(crMixerUnifiedCacheDest()), - statsReceiver = statsReceiver.scope("cache_client"), - serviceIdentifier = serviceIdentifier, - timeout = TIME_OUT - ) - - @Provides - @Singleton - @Named(ModuleNames.TweetRecommendationResultsCache) - def providesTweetRecommendationResultsCache( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver, - ): Client = - MemcacheStore.memcachedClient( - name = ClientName("memcache-tweet-recommendation-results"), - dest = ZkEndPoint(tweetRecommendationResultsCacheDest()), - statsReceiver = statsReceiver.scope("cache_client"), - serviceIdentifier = serviceIdentifier, - timeout = TIME_OUT - ) - - @Provides - @Singleton - @Named(ModuleNames.EarlybirdTweetsCache) - def providesEarlybirdTweetsCache( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver, - ): Client = - MemcacheStore.memcachedClient( - name = ClientName("memcache-crmixer-earlybird-tweets"), - dest = ZkEndPoint(earlybirdTweetsCacheDest()), - statsReceiver = statsReceiver.scope("cache_client"), - serviceIdentifier = serviceIdentifier, - timeout = TIME_OUT - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceColumnModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceColumnModule.scala deleted file mode 100644 index b15ebe0fe..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceColumnModule.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.module -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.usersignalservice.thriftscala.BatchSignalRequest -import com.twitter.usersignalservice.thriftscala.BatchSignalResponse -import javax.inject.Named - -object UserSignalServiceColumnModule extends TwitterModule { - private val UssColumnPath = "recommendations/user-signal-service/signals" - - @Provides - @Singleton - @Named(ModuleNames.UssStratoColumn) - def providesUserSignalServiceStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[BatchSignalRequest, BatchSignalResponse] = { - ObservedReadableStore( - StratoFetchableStore - .withUnitView[BatchSignalRequest, BatchSignalResponse](stratoClient, UssColumnPath))( - statsReceiver.scope("user_signal_service_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceStoreModule.scala deleted file mode 100644 index cc55f0e9a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserSignalServiceStoreModule.scala +++ /dev/null @@ -1,37 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import com.twitter.strato.client.{Client => StratoClient} -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.source_signal.UssStore -import com.twitter.cr_mixer.source_signal.UssStore.Query -import com.twitter.frigate.common.store.strato.StratoFetchableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.usersignalservice.thriftscala.BatchSignalRequest -import com.twitter.usersignalservice.thriftscala.BatchSignalResponse -import com.twitter.usersignalservice.thriftscala.SignalType -import com.twitter.usersignalservice.thriftscala.{Signal => UssSignal} -import javax.inject.Named - -object UserSignalServiceStoreModule extends TwitterModule { - - private val UssColumnPath = "recommendations/user-signal-service/signals" - - @Provides - @Singleton - @Named(ModuleNames.UssStore) - def providesUserSignalServiceStore( - statsReceiver: StatsReceiver, - stratoClient: StratoClient, - ): ReadableStore[Query, Seq[(SignalType, Seq[UssSignal])]] = { - ObservedReadableStore( - UssStore( - StratoFetchableStore - .withUnitView[BatchSignalRequest, BatchSignalResponse](stratoClient, UssColumnPath), - statsReceiver))(statsReceiver.scope("user_signal_service_store")) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserStateStoreModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserStateStoreModule.scala deleted file mode 100644 index 6db2c38fd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/UserStateStoreModule.scala +++ /dev/null @@ -1,113 +0,0 @@ -package com.twitter.cr_mixer.module - -import com.google.inject.Provides -import com.google.inject.Singleton -import com.twitter.bijection.Bufferable -import com.twitter.bijection.Injection -import com.twitter.bijection.scrooge.BinaryScalaCodec -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.UserId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storage.client.manhattan.kv.ManhattanKVClientMtlsParams -import com.twitter.storehaus.ReadableStore -import com.twitter.storehaus_internal.manhattan.ManhattanRO -import com.twitter.storehaus_internal.manhattan.ManhattanROConfig -import com.twitter.storehaus_internal.util.HDFSPath -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.core_workflows.user_model.thriftscala.CondensedUserState -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderKey -import com.twitter.hermit.store.common.DeciderableReadableStore -import com.twitter.storehaus_internal.manhattan.Apollo -import com.twitter.storehaus_internal.util.ApplicationID -import com.twitter.storehaus_internal.util.DatasetName -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.JavaTimer -import com.twitter.util.Time -import com.twitter.util.TimeoutException -import com.twitter.util.Timer -import javax.inject.Named - -object UserStateStoreModule extends TwitterModule { - implicit val timer: Timer = new JavaTimer(true) - final val NewUserCreateDaysThreshold = 7 - final val DefaultUnknownUserStateValue = 100 - - // Convert CondensedUserState to UserState Enum - // If CondensedUserState is None, back fill by checking whether the user is new user - class UserStateStore( - userStateStore: ReadableStore[UserId, CondensedUserState], - timeout: Duration, - statsReceiver: StatsReceiver) - extends ReadableStore[UserId, UserState] { - override def get(userId: UserId): Future[Option[UserState]] = { - userStateStore - .get(userId).map(_.flatMap(_.userState)).map { - case Some(userState) => Some(userState) - case None => - val isNewUser = SnowflakeId.timeFromIdOpt(userId).exists { userCreateTime => - Time.now - userCreateTime < Duration.fromDays(NewUserCreateDaysThreshold) - } - if (isNewUser) Some(UserState.New) - else Some(UserState.EnumUnknownUserState(DefaultUnknownUserStateValue)) - - }.raiseWithin(timeout)(timer).rescue { - case _: TimeoutException => - statsReceiver.counter("TimeoutException").incr() - Future.None - } - } - } - - @Provides - @Singleton - def providesUserStateStore( - crMixerDecider: CrMixerDecider, - statsReceiver: StatsReceiver, - manhattanKVClientMtlsParams: ManhattanKVClientMtlsParams, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig - ): ReadableStore[UserId, UserState] = { - - val underlyingStore = new UserStateStore( - ManhattanRO - .getReadableStoreWithMtls[UserId, CondensedUserState]( - ManhattanROConfig( - HDFSPath(""), - ApplicationID("cr_mixer_apollo"), - DatasetName("condensed_user_state"), - Apollo), - manhattanKVClientMtlsParams - )( - implicitly[Injection[Long, Array[Byte]]], - BinaryScalaCodec(CondensedUserState) - ), - timeoutConfig.userStateStoreTimeout, - statsReceiver.scope("UserStateStore") - ).mapValues(_.value) // Read the value of Enum so that we only caches the Int - - val memCachedStore = ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 24.hours, - )( - valueInjection = Bufferable.injectionOf[Int], // Cache Value is Enum Value for UserState - statsReceiver = statsReceiver.scope("memCachedUserStateStore"), - keyToString = { k: UserId => s"uState/$k" } - ).mapValues(value => UserState.getOrUnknown(value)) - - DeciderableReadableStore( - memCachedStore, - crMixerDecider.deciderGateBuilder.idGate(DeciderKey.enableUserStateStoreDeciderKey), - statsReceiver.scope("UserStateStore") - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/ABDeciderModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/ABDeciderModule.scala deleted file mode 100644 index 9d981f4f3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/ABDeciderModule.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.google.inject.name.Named -import com.twitter.abdecider.ABDeciderFactory -import com.twitter.abdecider.LoggingABDecider -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.inject.TwitterModule -import com.twitter.inject.annotations.Flag -import com.twitter.logging.Logger -import javax.inject.Singleton - -object ABDeciderModule extends TwitterModule { - - flag( - name = "abdecider.path", - default = "/usr/local/config/abdecider/abdecider.yml", - help = "path to the abdecider Yml file location" - ) - - @Provides - @Singleton - def provideABDecider( - @Flag("abdecider.path") abDeciderYmlPath: String, - @Named(ModuleNames.AbDeciderLogger) scribeLogger: Logger - ): LoggingABDecider = { - ABDeciderFactory( - abDeciderYmlPath = abDeciderYmlPath, - scribeLogger = Some(scribeLogger), - environment = Some("production") - ).buildWithLogging() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerFlagModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerFlagModule.scala deleted file mode 100644 index 9e7b9938a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerFlagModule.scala +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.twitter.inject.TwitterModule - -object CrMixerFlagName { - val SERVICE_FLAG = "cr_mixer.flag" - val DarkTrafficFilterDeciderKey = "thrift.dark.traffic.filter.decider_key" -} - -object CrMixerFlagModule extends TwitterModule { - import CrMixerFlagName._ - - flag[Boolean](name = SERVICE_FLAG, default = false, help = "This is a CR Mixer flag") - - flag[String]( - name = DarkTrafficFilterDeciderKey, - default = "dark_traffic_filter", - help = "Dark traffic filter decider key" - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerLoggingABDeciderModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerLoggingABDeciderModule.scala deleted file mode 100644 index 6b674495f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/CrMixerLoggingABDeciderModule.scala +++ /dev/null @@ -1,20 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.twitter.abdecider.LoggingABDecider -import com.twitter.cr_mixer.featureswitch.CrMixerLoggingABDecider -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import javax.inject.Singleton - -object CrMixerLoggingABDeciderModule extends TwitterModule { - - @Provides - @Singleton - def provideABDecider( - loggingABDecider: LoggingABDecider, - statsReceiver: StatsReceiver - ): CrMixerLoggingABDecider = { - CrMixerLoggingABDecider(loggingABDecider, statsReceiver) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureContextBuilderModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureContextBuilderModule.scala deleted file mode 100644 index 18d262c54..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureContextBuilderModule.scala +++ /dev/null @@ -1,16 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.twitter.discovery.common.configapi.FeatureContextBuilder -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.inject.TwitterModule -import javax.inject.Singleton - -object FeatureContextBuilderModule extends TwitterModule { - - @Provides - @Singleton - def providesFeatureContextBuilder(featureSwitches: FeatureSwitches): FeatureContextBuilder = { - FeatureContextBuilder(featureSwitches) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureSwitchesModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureSwitchesModule.scala deleted file mode 100644 index a87d1f54b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/FeatureSwitchesModule.scala +++ /dev/null @@ -1,74 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.twitter.cr_mixer.featureswitch.CrMixerLoggingABDecider -import com.twitter.featureswitches.v2.FeatureSwitches -import com.twitter.featureswitches.v2.builder.FeatureSwitchesBuilder -import com.twitter.featureswitches.v2.experimentation.NullBucketImpressor -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.inject.annotations.Flag -import com.twitter.util.Duration -import javax.inject.Singleton - -object FeatureSwitchesModule extends TwitterModule { - - flag( - name = "featureswitches.path", - default = "/features/cr-mixer/main", - help = "path to the featureswitch configuration directory" - ) - flag( - "use_config_repo_mirror.bool", - false, - "If true, read config from a different directory, to facilitate testing.") - - val DefaultFastRefresh: Boolean = false - val AddServiceDetailsFromAurora: Boolean = true - val ImpressExperiments: Boolean = true - - @Provides - @Singleton - def providesFeatureSwitches( - @Flag("featureswitches.path") featureSwitchDirectory: String, - @Flag("use_config_repo_mirror.bool") useConfigRepoMirrorFlag: Boolean, - abDecider: CrMixerLoggingABDecider, - statsReceiver: StatsReceiver - ): FeatureSwitches = { - val configRepoAbsPath = - getConfigRepoAbsPath(useConfigRepoMirrorFlag) - val fastRefresh = - shouldFastRefresh(useConfigRepoMirrorFlag) - - val featureSwitches = FeatureSwitchesBuilder() - .abDecider(abDecider) - .statsReceiver(statsReceiver.scope("featureswitches-v2")) - .configRepoAbsPath(configRepoAbsPath) - .featuresDirectory(featureSwitchDirectory) - .limitToReferencedExperiments(shouldLimit = true) - .experimentImpressionStatsEnabled(true) - - if (!ImpressExperiments) featureSwitches.experimentBucketImpressor(NullBucketImpressor) - if (AddServiceDetailsFromAurora) featureSwitches.serviceDetailsFromAurora() - if (fastRefresh) featureSwitches.refreshPeriod(Duration.fromSeconds(10)) - - featureSwitches.build() - } - - private def getConfigRepoAbsPath( - useConfigRepoMirrorFlag: Boolean - ): String = { - if (useConfigRepoMirrorFlag) - "config_repo_mirror/" - else "/usr/local/config" - } - - private def shouldFastRefresh( - useConfigRepoMirrorFlag: Boolean - ): Boolean = { - if (useConfigRepoMirrorFlag) - true - else DefaultFastRefresh - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/KafkaProducerModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/KafkaProducerModule.scala deleted file mode 100644 index 770ad1e7e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/KafkaProducerModule.scala +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.twitter.cr_mixer.thriftscala.GetTweetsRecommendationsScribe -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finatra.kafka.producers.FinagleKafkaProducerBuilder -import com.twitter.finatra.kafka.producers.KafkaProducerBase -import com.twitter.finatra.kafka.producers.NullKafkaProducer -import com.twitter.finatra.kafka.serde.ScalaSerdes -import com.twitter.inject.TwitterModule -import javax.inject.Singleton -import org.apache.kafka.clients.CommonClientConfigs -import org.apache.kafka.common.config.SaslConfigs -import org.apache.kafka.common.config.SslConfigs -import org.apache.kafka.common.record.CompressionType -import org.apache.kafka.common.security.auth.SecurityProtocol -import org.apache.kafka.common.serialization.Serdes - -object KafkaProducerModule extends TwitterModule { - - @Provides - @Singleton - def provideTweetRecsLoggerFactory( - serviceIdentifier: ServiceIdentifier, - ): KafkaProducerBase[String, GetTweetsRecommendationsScribe] = { - KafkaProducerFactory.getKafkaProducer(serviceIdentifier.environment) - } -} - -object KafkaProducerFactory { - private val jaasConfig = - """com.sun.security.auth.module.Krb5LoginModule - |required - |principal="cr-mixer@TWITTER.BIZ" - |debug=true - |useKeyTab=true - |storeKey=true - |keyTab="/var/lib/tss/keys/fluffy/keytabs/client/cr-mixer.keytab" - |doNotPrompt=true; - """.stripMargin.replaceAll("\n", " ") - - private val trustStoreLocation = "/etc/tw_truststore/messaging/kafka/client.truststore.jks" - - def getKafkaProducer( - environment: String - ): KafkaProducerBase[String, GetTweetsRecommendationsScribe] = { - if (environment == "prod") { - FinagleKafkaProducerBuilder() - .dest("/s/kafka/recommendations:kafka-tls") - // kerberos params - .withConfig(SaslConfigs.SASL_JAAS_CONFIG, jaasConfig) - .withConfig( - CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, - SecurityProtocol.SASL_SSL.toString) - .withConfig(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, trustStoreLocation) - .withConfig(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM) - .withConfig(SaslConfigs.SASL_KERBEROS_SERVICE_NAME, "kafka") - .withConfig(SaslConfigs.SASL_KERBEROS_SERVER_NAME, "kafka") - // Kafka params - .keySerializer(Serdes.String.serializer) - .valueSerializer(ScalaSerdes.CompactThrift[GetTweetsRecommendationsScribe].serializer()) - .clientId("cr-mixer") - .enableIdempotence(true) - .compressionType(CompressionType.LZ4) - .build() - } else { - new NullKafkaProducer[String, GetTweetsRecommendationsScribe] - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/LoggerFactoryModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/LoggerFactoryModule.scala deleted file mode 100644 index 877ed4bb2..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/LoggerFactoryModule.scala +++ /dev/null @@ -1,155 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.scribe.ScribeCategories -import com.twitter.cr_mixer.scribe.ScribeCategory -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.logging.BareFormatter -import com.twitter.logging.Level -import com.twitter.logging.Logger -import com.twitter.logging.NullHandler -import com.twitter.logging.QueueingHandler -import com.twitter.logging.ScribeHandler -import com.twitter.logging.{LoggerFactory => TwitterLoggerFactory} -import javax.inject.Named -import javax.inject.Singleton - -object LoggerFactoryModule extends TwitterModule { - - private val DefaultQueueSize = 10000 - - @Provides - @Singleton - @Named(ModuleNames.AbDeciderLogger) - def provideAbDeciderLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.AbDecider, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.TopLevelApiDdgMetricsLogger) - def provideTopLevelApiDdgMetricsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.TopLevelApiDdgMetrics, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.TweetRecsLogger) - def provideTweetRecsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.TweetsRecs, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.BlueVerifiedTweetRecsLogger) - def provideVITTweetRecsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.VITTweetsRecs, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.RelatedTweetsLogger) - def provideRelatedTweetsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.RelatedTweets, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.UtegTweetsLogger) - def provideUtegTweetsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.UtegTweets, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - @Provides - @Singleton - @Named(ModuleNames.AdsRecommendationsLogger) - def provideAdsRecommendationsLogger( - serviceIdentifier: ServiceIdentifier, - statsReceiver: StatsReceiver - ): Logger = { - buildLoggerFactory( - ScribeCategories.AdsRecommendations, - serviceIdentifier.environment, - statsReceiver.scope("ScribeLogger")) - .apply() - } - - private def buildLoggerFactory( - category: ScribeCategory, - environment: String, - statsReceiver: StatsReceiver - ): TwitterLoggerFactory = { - environment match { - case "prod" => - TwitterLoggerFactory( - node = category.getProdLoggerFactoryNode, - level = Some(Level.INFO), - useParents = false, - handlers = List( - QueueingHandler( - maxQueueSize = DefaultQueueSize, - handler = ScribeHandler( - category = category.scribeCategory, - formatter = BareFormatter, - statsReceiver = statsReceiver.scope(category.getProdLoggerFactoryNode) - ) - ) - ) - ) - case _ => - TwitterLoggerFactory( - node = category.getStagingLoggerFactoryNode, - level = Some(Level.DEBUG), - useParents = false, - handlers = List( - { () => NullHandler } - ) - ) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/MemoizingStatsReceiverModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/MemoizingStatsReceiverModule.scala deleted file mode 100644 index ee94cf166..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/MemoizingStatsReceiverModule.scala +++ /dev/null @@ -1,12 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.twitter.finagle.stats.LoadedStatsReceiver -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.servo.util.MemoizingStatsReceiver - -object MemoizingStatsReceiverModule extends TwitterModule { - override def configure(): Unit = { - bind[StatsReceiver].toInstance(new MemoizingStatsReceiver(LoadedStatsReceiver)) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/TimeoutConfigModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/TimeoutConfigModule.scala deleted file mode 100644 index 1b6200812..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/core/TimeoutConfigModule.scala +++ /dev/null @@ -1,104 +0,0 @@ -package com.twitter.cr_mixer.module.core - -import com.twitter.inject.TwitterModule -import com.google.inject.Provides -import javax.inject.Singleton -import com.twitter.util.Duration -import com.twitter.app.Flag -import com.twitter.cr_mixer.config.TimeoutConfig - -/** - * All timeout settings in CrMixer. - * Timeout numbers are defined in source/cr-mixer/server/config/deploy.aurora - */ -object TimeoutConfigModule extends TwitterModule { - - /** - * Flag names for client timeout - * These are used in modules extending ThriftMethodBuilderClientModule - * which cannot accept injection of TimeoutConfig - */ - val EarlybirdClientTimeoutFlagName = "earlybird.client.timeout" - val FrsClientTimeoutFlagName = "frsSignalFetch.client.timeout" - val QigRankerClientTimeoutFlagName = "qigRanker.client.timeout" - val TweetypieClientTimeoutFlagName = "tweetypie.client.timeout" - val UserTweetGraphClientTimeoutFlagName = "userTweetGraph.client.timeout" - val UserTweetGraphPlusClientTimeoutFlagName = "userTweetGraphPlus.client.timeout" - val UserAdGraphClientTimeoutFlagName = "userAdGraph.client.timeout" - val UserVideoGraphClientTimeoutFlagName = "userVideoGraph.client.timeout" - val UtegClientTimeoutFlagName = "uteg.client.timeout" - val NaviRequestTimeoutFlagName = "navi.client.request.timeout" - - /** - * Flags for timeouts - * These are defined and initialized only in this file - */ - // timeout for the service - private val serviceTimeout: Flag[Duration] = - flag("service.timeout", "service total timeout") - - // timeout for signal fetch - private val signalFetchTimeout: Flag[Duration] = - flag[Duration]("signalFetch.timeout", "signal fetch timeout") - - // timeout for similarity engine - private val similarityEngineTimeout: Flag[Duration] = - flag[Duration]("similarityEngine.timeout", "similarity engine timeout") - private val annServiceClientTimeout: Flag[Duration] = - flag[Duration]("annService.client.timeout", "annQueryService client timeout") - - // timeout for user affinities fetcher - private val userStateUnderlyingStoreTimeout: Flag[Duration] = - flag[Duration]("userStateUnderlyingStore.timeout", "user state underlying store timeout") - - private val userStateStoreTimeout: Flag[Duration] = - flag[Duration]("userStateStore.timeout", "user state store timeout") - - private val utegSimilarityEngineTimeout: Flag[Duration] = - flag[Duration]("uteg.similarityEngine.timeout", "uteg similarity engine timeout") - - private val earlybirdServerTimeout: Flag[Duration] = - flag[Duration]("earlybird.server.timeout", "earlybird server timeout") - - private val earlybirdSimilarityEngineTimeout: Flag[Duration] = - flag[Duration]("earlybird.similarityEngine.timeout", "Earlybird similarity engine timeout") - - private val frsBasedTweetEndpointTimeout: Flag[Duration] = - flag[Duration]( - "frsBasedTweet.endpoint.timeout", - "frsBasedTweet endpoint timeout" - ) - - private val topicTweetEndpointTimeout: Flag[Duration] = - flag[Duration]( - "topicTweet.endpoint.timeout", - "topicTweet endpoint timeout" - ) - - // timeout for Navi client - private val naviRequestTimeout: Flag[Duration] = - flag[Duration]( - NaviRequestTimeoutFlagName, - Duration.fromMilliseconds(2000), - "Request timeout for a single RPC Call", - ) - - @Provides - @Singleton - def provideTimeoutBudget(): TimeoutConfig = - TimeoutConfig( - serviceTimeout = serviceTimeout(), - signalFetchTimeout = signalFetchTimeout(), - similarityEngineTimeout = similarityEngineTimeout(), - annServiceClientTimeout = annServiceClientTimeout(), - utegSimilarityEngineTimeout = utegSimilarityEngineTimeout(), - userStateUnderlyingStoreTimeout = userStateUnderlyingStoreTimeout(), - userStateStoreTimeout = userStateStoreTimeout(), - earlybirdServerTimeout = earlybirdServerTimeout(), - earlybirdSimilarityEngineTimeout = earlybirdSimilarityEngineTimeout(), - frsBasedTweetEndpointTimeout = frsBasedTweetEndpointTimeout(), - topicTweetEndpointTimeout = topicTweetEndpointTimeout(), - naviRequestTimeout = naviRequestTimeout() - ) - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/grpc_client/NaviGRPCClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/grpc_client/NaviGRPCClientModule.scala deleted file mode 100644 index 418f44747..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/grpc_client/NaviGRPCClientModule.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.cr_mixer.module.grpc_client - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.finagle.Http -import com.twitter.finagle.grpc.FinagleChannelBuilder -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsStackClientSyntax -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.util.Duration -import io.grpc.ManagedChannel -import javax.inject.Named -import javax.inject.Singleton - -object NaviGRPCClientModule extends TwitterModule { - - val maxRetryAttempts = 3 - - @Provides - @Singleton - @Named(ModuleNames.HomeNaviGRPCClient) - def providesHomeNaviGRPCClient( - serviceIdentifier: ServiceIdentifier, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): ManagedChannel = { - val label = "navi-wals-recommended-tweets-home-client" - val dest = "/s/ads-prediction/navi-wals-recommended-tweets-home" - buildClient(serviceIdentifier, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.AdsFavedNaviGRPCClient) - def providesAdsFavedNaviGRPCClient( - serviceIdentifier: ServiceIdentifier, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): ManagedChannel = { - val label = "navi-wals-ads-faved-tweets" - val dest = "/s/ads-prediction/navi-wals-ads-faved-tweets" - buildClient(serviceIdentifier, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.AdsMonetizableNaviGRPCClient) - def providesAdsMonetizableNaviGRPCClient( - serviceIdentifier: ServiceIdentifier, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): ManagedChannel = { - val label = "navi-wals-ads-monetizable-tweets" - val dest = "/s/ads-prediction/navi-wals-ads-monetizable-tweets" - buildClient(serviceIdentifier, timeoutConfig, statsReceiver, dest, label) - } - - private def buildClient( - serviceIdentifier: ServiceIdentifier, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - dest: String, - label: String - ): ManagedChannel = { - - val stats = statsReceiver.scope("clnt").scope(label) - - val client = Http.client - .withLabel(label) - .withMutualTls(serviceIdentifier) - .withRequestTimeout(timeoutConfig.naviRequestTimeout) - .withTransport.connectTimeout(Duration.fromMilliseconds(10000)) - .withSession.acquisitionTimeout(Duration.fromMilliseconds(20000)) - .withStatsReceiver(stats) - .withHttpStats - - FinagleChannelBuilder - .forTarget(dest) - .overrideAuthority("rustserving") - .maxRetryAttempts(maxRetryAttempts) - .enableRetryForStatus(io.grpc.Status.RESOURCE_EXHAUSTED) - .enableRetryForStatus(io.grpc.Status.UNKNOWN) - .enableUnsafeFullyBufferingMode() - .httpClient(client) - .build() - - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/CertoTopicTweetSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/CertoTopicTweetSimilarityEngineModule.scala deleted file mode 100644 index 6c82329b0..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/CertoTopicTweetSimilarityEngineModule.scala +++ /dev/null @@ -1,57 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.EngineQuery -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.topic_recos.thriftscala.TweetWithScores -import javax.inject.Named -import javax.inject.Singleton - -object CertoTopicTweetSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.CertoTopicTweetSimilarityEngine) - def providesCertoTopicTweetSimilarityEngine( - @Named(ModuleNames.CertoStratoStoreName) certoStratoStore: ReadableStore[ - TopicId, - Seq[TweetWithScores] - ], - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): StandardSimilarityEngine[ - EngineQuery[Query], - TopicTweetWithScore - ] = { - new StandardSimilarityEngine[EngineQuery[Query], TopicTweetWithScore]( - implementingStore = CertoTopicTweetSimilarityEngine(certoStratoStore, statsReceiver), - identifier = SimilarityEngineType.CertoTopicTweet, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.topicTweetEndpointTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableTopicTweetTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerBasedWalsSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerBasedWalsSimilarityEngineModule.scala deleted file mode 100644 index e09f8b639..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerBasedWalsSimilarityEngineModule.scala +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import io.grpc.ManagedChannel -import javax.inject.Named - -object ConsumerBasedWalsSimilarityEngineModule extends TwitterModule { - @Provides - @Named(ModuleNames.ConsumerBasedWalsSimilarityEngine) - def providesConsumerBasedWalsSimilarityEngine( - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - @Named(ModuleNames.HomeNaviGRPCClient) homeNaviGRPCClient: ManagedChannel, - @Named(ModuleNames.AdsFavedNaviGRPCClient) adsFavedNaviGRPCClient: ManagedChannel, - @Named(ModuleNames.AdsMonetizableNaviGRPCClient) adsMonetizableNaviGRPCClient: ManagedChannel, - ): StandardSimilarityEngine[ - ConsumerBasedWalsSimilarityEngine.Query, - TweetWithScore - ] = { - - val underlyingStore = new ConsumerBasedWalsSimilarityEngine( - homeNaviGRPCClient, - adsFavedNaviGRPCClient, - adsMonetizableNaviGRPCClient, - statsReceiver - ) - - new StandardSimilarityEngine[ - ConsumerBasedWalsSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = underlyingStore, - identifier = SimilarityEngineType.ConsumerBasedWalsANN, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngineModule.scala deleted file mode 100644 index 8d209798b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngineModule.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TripTweetWithScore -import com.twitter.cr_mixer.similarity_engine.ConsumerEmbeddingBasedTripSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TripEngineQuery -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.common.SimClustersEmbedding -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain -import javax.inject.Named - -object ConsumerEmbeddingBasedTripSimilarityEngineModule extends TwitterModule { - @Provides - @Named(ModuleNames.ConsumerEmbeddingBasedTripSimilarityEngine) - def providesConsumerEmbeddingBasedTripSimilarityEngineModule( - @Named(ModuleNames.RmsUserLogFavInterestedInEmbeddingStore) - userLogFavInterestedInEmbeddingStore: ReadableStore[UserId, SimClustersEmbedding], - @Named(ModuleNames.RmsUserFollowInterestedInEmbeddingStore) - userFollowInterestedInEmbeddingStore: ReadableStore[UserId, SimClustersEmbedding], - @Named(ModuleNames.TripCandidateStore) - tripCandidateStore: ReadableStore[TripDomain, Seq[TripTweet]], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): StandardSimilarityEngine[TripEngineQuery, TripTweetWithScore] = { - val underlyingStore = ObservedReadableStore( - ConsumerEmbeddingBasedTripSimilarityEngine( - embeddingStoreLookUpMap = Map( - ModelConfig.ConsumerLogFavBasedInterestedInEmbedding -> userLogFavInterestedInEmbeddingStore, - ModelConfig.ConsumerFollowBasedInterestedInEmbedding -> userFollowInterestedInEmbeddingStore, - ), - tripCandidateSource = tripCandidateStore, - statsReceiver - ))(statsReceiver.scope("TripSimilarityEngine")) - - new StandardSimilarityEngine[TripEngineQuery, TripTweetWithScore]( - implementingStore = underlyingStore, - identifier = SimilarityEngineType.ExploreTripOfflineSimClustersTweets, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngineModule.scala deleted file mode 100644 index 289d052b4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngineModule.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.module.EmbeddingStoreModule -import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import com.twitter.ml.api.{thriftscala => api} -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType - -object ConsumerEmbeddingBasedTwHINSimilarityEngineModule extends TwitterModule { - @Provides - @Named(ModuleNames.ConsumerEmbeddingBasedTwHINANNSimilarityEngine) - def providesConsumerEmbeddingBasedTwHINANNSimilarityEngine( - // MH stores - @Named(EmbeddingStoreModule.ConsumerBasedTwHINEmbeddingRegularUpdateMhStoreName) - consumerBasedTwHINEmbeddingRegularUpdateMhStore: ReadableStore[InternalId, api.Embedding], - @Named(EmbeddingStoreModule.DebuggerDemoUserEmbeddingMhStoreName) - debuggerDemoUserEmbeddingMhStore: ReadableStore[InternalId, api.Embedding], - @Named(AnnQueryServiceClientModule.TwHINRegularUpdateAnnServiceClientName) - twHINRegularUpdateAnnService: AnnQueryService.MethodPerEndpoint, - @Named(AnnQueryServiceClientModule.DebuggerDemoAnnServiceClientName) - debuggerDemoAnnService: AnnQueryService.MethodPerEndpoint, - // Other configs - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver - ): HnswANNSimilarityEngine = { - new HnswANNSimilarityEngine( - embeddingStoreLookUpMap = Map( - ModelConfig.ConsumerBasedTwHINRegularUpdateAll20221024 -> consumerBasedTwHINEmbeddingRegularUpdateMhStore, - ModelConfig.DebuggerDemo -> debuggerDemoUserEmbeddingMhStore, - ), - annServiceLookUpMap = Map( - ModelConfig.ConsumerBasedTwHINRegularUpdateAll20221024 -> twHINRegularUpdateAnnService, - ModelConfig.DebuggerDemo -> debuggerDemoAnnService, - ), - globalStats = statsReceiver, - identifier = SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule.scala deleted file mode 100644 index 704093e36..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.cr_mixer.module -package similarity_engine - -import com.google.inject.Provides -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.module.EmbeddingStoreModule -import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import com.twitter.ml.api.{thriftscala => api} -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType - -object ConsumerEmbeddingBasedTwoTowerSimilarityEngineModule extends TwitterModule { - @Provides - @Named(ModuleNames.ConsumerEmbeddingBasedTwoTowerANNSimilarityEngine) - def providesConsumerEmbeddingBasedTwoTowerANNSimilarityEngine( - @Named(EmbeddingStoreModule.TwoTowerFavConsumerEmbeddingMhStoreName) - twoTowerFavConsumerEmbeddingMhStore: ReadableStore[InternalId, api.Embedding], - @Named(AnnQueryServiceClientModule.TwoTowerFavAnnServiceClientName) - twoTowerFavAnnService: AnnQueryService.MethodPerEndpoint, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver - ): HnswANNSimilarityEngine = { - new HnswANNSimilarityEngine( - embeddingStoreLookUpMap = Map( - ModelConfig.TwoTowerFavALL20220808 -> twoTowerFavConsumerEmbeddingMhStore, - ), - annServiceLookUpMap = Map( - ModelConfig.TwoTowerFavALL20220808 -> twoTowerFavAnnService, - ), - globalStats = statsReceiver, - identifier = SimilarityEngineType.ConsumerEmbeddingBasedTwoTowerANN, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngineModule.scala deleted file mode 100644 index e66a48a87..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,61 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest -import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -object ConsumersBasedUserAdGraphSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ConsumersBasedUserAdGraphSimilarityEngine) - def providesConsumersBasedUserAdGraphSimilarityEngine( - @Named(ModuleNames.ConsumerBasedUserAdGraphStore) - consumersBasedUserAdGraphStore: ReadableStore[ - ConsumersBasedRelatedAdRequest, - RelatedAdResponse - ], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - ConsumersBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ] = { - - new StandardSimilarityEngine[ - ConsumersBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = - ConsumersBasedUserAdGraphSimilarityEngine(consumersBasedUserAdGraphStore, statsReceiver), - identifier = SimilarityEngineType.ConsumersBasedUserTweetGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserTweetGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - memCacheConfig = None - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngineModule.scala deleted file mode 100644 index 977a90f25..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,62 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.ConsumersBasedUserVideoGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -object ConsumersBasedUserVideoGraphSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ConsumersBasedUserVideoGraphSimilarityEngine) - def providesConsumersBasedUserVideoGraphSimilarityEngine( - @Named(ModuleNames.ConsumerBasedUserVideoGraphStore) - consumersBasedUserVideoGraphStore: ReadableStore[ - ConsumersBasedRelatedTweetRequest, - RelatedTweetResponse - ], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - ConsumersBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ] = { - - new StandardSimilarityEngine[ - ConsumersBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = ConsumersBasedUserVideoGraphSimilarityEngine( - consumersBasedUserVideoGraphStore, - statsReceiver), - identifier = SimilarityEngineType.ConsumersBasedUserVideoGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserVideoGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - memCacheConfig = None - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/DiffusionBasedSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/DiffusionBasedSimilarityEngineModule.scala deleted file mode 100644 index f48521085..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/DiffusionBasedSimilarityEngineModule.scala +++ /dev/null @@ -1,52 +0,0 @@ -package com.twitter.cr_mixer.module -package similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.simclusters_v2.thriftscala.TweetsWithScore -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.DiffusionBasedSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -object DiffusionBasedSimilarityEngineModule extends TwitterModule { - @Provides - @Singleton - @Named(ModuleNames.DiffusionBasedSimilarityEngine) - def providesDiffusionBasedSimilarityEngineModule( - @Named(ModuleNames.RetweetBasedDiffusionRecsMhStore) - retweetBasedDiffusionRecsMhStore: ReadableStore[Long, TweetsWithScore], - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver - ): LookupSimilarityEngine[Query, TweetWithScore] = { - - val versionedStoreMap = Map( - ModelConfig.RetweetBasedDiffusion -> DiffusionBasedSimilarityEngine( - retweetBasedDiffusionRecsMhStore, - globalStats), - ) - - new LookupSimilarityEngine[Query, TweetWithScore]( - versionedStoreMap = versionedStoreMap, - identifier = SimilarityEngineType.DiffusionBasedTweet, - globalStats = globalStats, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/EarlybirdSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/EarlybirdSimilarityEngineModule.scala deleted file mode 100644 index 6cdabfce4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/EarlybirdSimilarityEngineModule.scala +++ /dev/null @@ -1,120 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.EarlybirdModelBasedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.EarlybirdRecencyBasedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.EarlybirdTensorflowBasedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import javax.inject.Singleton - -object EarlybirdSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - def providesRecencyBasedEarlybirdSimilarityEngine( - earlybirdRecencyBasedSimilarityEngine: EarlybirdRecencyBasedSimilarityEngine, - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): EarlybirdSimilarityEngine[ - EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery, - EarlybirdRecencyBasedSimilarityEngine - ] = { - new EarlybirdSimilarityEngine[ - EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery, - EarlybirdRecencyBasedSimilarityEngine - ]( - implementingStore = earlybirdRecencyBasedSimilarityEngine, - identifier = SimilarityEngineType.EarlybirdRecencyBasedSimilarityEngine, - globalStats = - statsReceiver.scope(SimilarityEngineType.EarlybirdRecencyBasedSimilarityEngine.name), - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.earlybirdSimilarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = Some( - DeciderConfig( - decider = decider, - deciderString = DeciderConstants.enableEarlybirdTrafficDeciderKey - )), - enableFeatureSwitch = None - ) - ) - ) - } - - @Provides - @Singleton - def providesModelBasedEarlybirdSimilarityEngine( - earlybirdModelBasedSimilarityEngine: EarlybirdModelBasedSimilarityEngine, - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): EarlybirdSimilarityEngine[ - EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery, - EarlybirdModelBasedSimilarityEngine - ] = { - new EarlybirdSimilarityEngine[ - EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery, - EarlybirdModelBasedSimilarityEngine - ]( - implementingStore = earlybirdModelBasedSimilarityEngine, - identifier = SimilarityEngineType.EarlybirdModelBasedSimilarityEngine, - globalStats = - statsReceiver.scope(SimilarityEngineType.EarlybirdModelBasedSimilarityEngine.name), - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.earlybirdSimilarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = Some( - DeciderConfig( - decider = decider, - deciderString = DeciderConstants.enableEarlybirdTrafficDeciderKey - )), - enableFeatureSwitch = None - ) - ) - ) - } - - @Provides - @Singleton - def providesTensorflowBasedEarlybirdSimilarityEngine( - earlybirdTensorflowBasedSimilarityEngine: EarlybirdTensorflowBasedSimilarityEngine, - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): EarlybirdSimilarityEngine[ - EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery, - EarlybirdTensorflowBasedSimilarityEngine - ] = { - new EarlybirdSimilarityEngine[ - EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery, - EarlybirdTensorflowBasedSimilarityEngine - ]( - implementingStore = earlybirdTensorflowBasedSimilarityEngine, - identifier = SimilarityEngineType.EarlybirdTensorflowBasedSimilarityEngine, - globalStats = - statsReceiver.scope(SimilarityEngineType.EarlybirdTensorflowBasedSimilarityEngine.name), - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.earlybirdSimilarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = Some( - DeciderConfig( - decider = decider, - deciderString = DeciderConstants.enableEarlybirdTrafficDeciderKey - )), - enableFeatureSwitch = None - ) - ) - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUnifiedSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUnifiedSimilarityEngineModule.scala deleted file mode 100644 index b16d59924..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUnifiedSimilarityEngineModule.scala +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUnifiedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -object ProducerBasedUnifiedSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ProducerBasedUnifiedSimilarityEngine) - def providesProducerBasedUnifiedSimilarityEngine( - @Named(ModuleNames.ProducerBasedUserTweetGraphSimilarityEngine) - producerBasedUserTweetGraphSimilarityEngine: StandardSimilarityEngine[ - ProducerBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.SimClustersANNSimilarityEngine) - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): StandardSimilarityEngine[ - ProducerBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ] = { - - val underlyingStore: ReadableStore[ProducerBasedUnifiedSimilarityEngine.Query, Seq[ - TweetWithCandidateGenerationInfo - ]] = ProducerBasedUnifiedSimilarityEngine( - producerBasedUserTweetGraphSimilarityEngine, - simClustersANNSimilarityEngine, - statsReceiver - ) - - new StandardSimilarityEngine[ - ProducerBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ]( - implementingStore = underlyingStore, - identifier = SimilarityEngineType.ProducerBasedUnifiedSimilarityEngine, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserAdGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserAdGraphSimilarityEngineModule.scala deleted file mode 100644 index d221a58a9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserAdGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine._ -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import javax.inject.Named -import javax.inject.Singleton - -object ProducerBasedUserAdGraphSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ProducerBasedUserAdGraphSimilarityEngine) - def providesProducerBasedUserAdGraphSimilarityEngine( - userAdGraphService: UserAdGraph.MethodPerEndpoint, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - ProducerBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ] = { - new StandardSimilarityEngine[ - ProducerBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = - ProducerBasedUserAdGraphSimilarityEngine(userAdGraphService, statsReceiver), - identifier = SimilarityEngineType.ProducerBasedUserAdGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserAdGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - memCacheConfig = Some( - MemCacheConfig( - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes, - keyToString = { k => - //Example Query CRMixer:ProducerBasedUTG:1234567890ABCDEF - f"ProducerBasedUTG:${keyHasher.hashKey(k.toString.getBytes)}%X" - } - )) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngineModule.scala deleted file mode 100644 index a5821d01c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.ProducerBasedUserTweetGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine._ -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph -import javax.inject.Named -import javax.inject.Singleton - -object ProducerBasedUserTweetGraphSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ProducerBasedUserTweetGraphSimilarityEngine) - def providesProducerBasedUserTweetGraphSimilarityEngine( - userTweetGraphService: UserTweetGraph.MethodPerEndpoint, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - ProducerBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ] = { - new StandardSimilarityEngine[ - ProducerBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = - ProducerBasedUserTweetGraphSimilarityEngine(userTweetGraphService, statsReceiver), - identifier = SimilarityEngineType.ProducerBasedUserTweetGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserTweetGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - memCacheConfig = Some( - MemCacheConfig( - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes, - keyToString = { k => - //Example Query CRMixer:ProducerBasedUTG:1234567890ABCDEF - f"ProducerBasedUTG:${keyHasher.hashKey(k.toString.getBytes)}%X" - } - )) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SimClustersANNSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SimClustersANNSimilarityEngineModule.scala deleted file mode 100644 index 7af68327d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SimClustersANNSimilarityEngineModule.scala +++ /dev/null @@ -1,117 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.hermit.store.common.ObservedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.candidate_source.SimClustersANNCandidateSource.CacheableShortTTLEmbeddingTypes -import com.twitter.simclustersann.thriftscala.SimClustersANNService -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Named -import javax.inject.Singleton - -object SimClustersANNSimilarityEngineModule extends TwitterModule { - - private val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNSimilarityEngine) - def providesProdSimClustersANNSimilarityEngine( - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - simClustersANNServiceNameToClientMapper: Map[String, SimClustersANNService.MethodPerEndpoint], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver - ): StandardSimilarityEngine[Query, TweetWithScore] = { - - val underlyingStore = - SimClustersANNSimilarityEngine(simClustersANNServiceNameToClientMapper, statsReceiver) - - val observedReadableStore = - ObservedReadableStore(underlyingStore)(statsReceiver.scope("SimClustersANNServiceStore")) - - val memCachedStore: ReadableStore[Query, Seq[TweetWithScore]] = - ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = observedReadableStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScore]()), - statsReceiver = statsReceiver.scope("simclusters_ann_store_memcache"), - keyToString = { k => - //Example Query CRMixer:SCANN:1:2:1234567890ABCDEF:1234567890ABCDEF - f"CRMixer:SCANN:${k.simClustersANNQuery.sourceEmbeddingId.embeddingType.getValue()}%X" + - f":${k.simClustersANNQuery.sourceEmbeddingId.modelVersion.getValue()}%X" + - f":${keyHasher.hashKey(k.simClustersANNQuery.sourceEmbeddingId.internalId.toString.getBytes)}%X" + - f":${keyHasher.hashKey(k.simClustersANNQuery.config.toString.getBytes)}%X" - } - ) - - // Only cache the candidates if it's not Consumer-source. For example, TweetSource, - // ProducerSource, TopicSource - val wrapperStats = statsReceiver.scope("SimClustersANNWrapperStore") - - val wrapperStore: ReadableStore[Query, Seq[TweetWithScore]] = - buildWrapperStore(memCachedStore, observedReadableStore, wrapperStats) - - new StandardSimilarityEngine[ - Query, - TweetWithScore - ]( - implementingStore = wrapperStore, - identifier = SimilarityEngineType.SimClustersANN, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } - - def buildWrapperStore( - memCachedStore: ReadableStore[Query, Seq[TweetWithScore]], - underlyingStore: ReadableStore[Query, Seq[TweetWithScore]], - wrapperStats: StatsReceiver - ): ReadableStore[Query, Seq[TweetWithScore]] = { - - // Only cache the candidates if it's not Consumer-source. For example, TweetSource, - // ProducerSource, TopicSource - val wrapperStore: ReadableStore[Query, Seq[TweetWithScore]] = - new ReadableStore[Query, Seq[TweetWithScore]] { - - override def multiGet[K1 <: Query]( - queries: Set[K1] - ): Map[K1, Future[Option[Seq[TweetWithScore]]]] = { - val (cacheableQueries, nonCacheableQueries) = - queries.partition { query => - CacheableShortTTLEmbeddingTypes.contains( - query.simClustersANNQuery.sourceEmbeddingId.embeddingType) - } - memCachedStore.multiGet(cacheableQueries) ++ - underlyingStore.multiGet(nonCacheableQueries) - } - } - wrapperStore - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SkitTopicTweetSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SkitTopicTweetSimilarityEngineModule.scala deleted file mode 100644 index 4de20fcfe..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/SkitTopicTweetSimilarityEngineModule.scala +++ /dev/null @@ -1,88 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.EngineQuery -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.SkitHighPrecisionTopicTweetSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import com.twitter.topic_recos.thriftscala.TopicTweet -import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey -import javax.inject.Named -import javax.inject.Singleton - -object SkitTopicTweetSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.SkitHighPrecisionTopicTweetSimilarityEngine) - def providesSkitHighPrecisionTopicTweetSimilarityEngine( - @Named(ModuleNames.SkitStratoStoreName) skitStratoStore: ReadableStore[ - TopicTweetPartitionFlatKey, - Seq[TopicTweet] - ], - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): StandardSimilarityEngine[ - EngineQuery[Query], - TopicTweetWithScore - ] = { - new StandardSimilarityEngine[EngineQuery[Query], TopicTweetWithScore]( - implementingStore = - SkitHighPrecisionTopicTweetSimilarityEngine(skitStratoStore, statsReceiver), - identifier = SimilarityEngineType.SkitHighPrecisionTopicTweet, - globalStats = statsReceiver.scope(SimilarityEngineType.SkitHighPrecisionTopicTweet.name), - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.topicTweetEndpointTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableTopicTweetTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } - @Provides - @Singleton - @Named(ModuleNames.SkitTopicTweetSimilarityEngine) - def providesSkitTfgTopicTweetSimilarityEngine( - @Named(ModuleNames.SkitStratoStoreName) skitStratoStore: ReadableStore[ - TopicTweetPartitionFlatKey, - Seq[TopicTweet] - ], - timeoutConfig: TimeoutConfig, - decider: CrMixerDecider, - statsReceiver: StatsReceiver - ): StandardSimilarityEngine[ - EngineQuery[Query], - TopicTweetWithScore - ] = { - new StandardSimilarityEngine[EngineQuery[Query], TopicTweetWithScore]( - implementingStore = SkitTopicTweetSimilarityEngine(skitStratoStore, statsReceiver), - identifier = SimilarityEngineType.SkitTfgTopicTweet, - globalStats = statsReceiver.scope(SimilarityEngineType.SkitTfgTopicTweet.name), - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.topicTweetEndpointTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableTopicTweetTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedQigSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedQigSimilarityEngineModule.scala deleted file mode 100644 index 06d9a2186..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedQigSimilarityEngineModule.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine._ -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.keyHasher -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedQigSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.qig_ranker.thriftscala.QigRanker -import javax.inject.Named -import javax.inject.Singleton - -object TweetBasedQigSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.TweetBasedQigSimilarityEngine) - def providesTweetBasedQigSimilarTweetsCandidateSource( - qigRanker: QigRanker.MethodPerEndpoint, - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - TweetBasedQigSimilarityEngine.Query, - TweetWithScore - ] = { - new StandardSimilarityEngine[ - TweetBasedQigSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = TweetBasedQigSimilarityEngine(qigRanker, statsReceiver), - identifier = SimilarityEngineType.Qig, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableQigSimilarTweetsTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - memCacheConfig = Some( - MemCacheConfig( - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes, - keyToString = { k => - f"TweetBasedQIGRanker:${keyHasher.hashKey(k.sourceId.toString.getBytes)}%X" - } - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedTwHINSimlarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedTwHINSimlarityEngineModule.scala deleted file mode 100644 index cc9da4772..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedTwHINSimlarityEngineModule.scala +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine -import com.google.inject.Provides -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.module.EmbeddingStoreModule -import com.twitter.cr_mixer.module.thrift_client.AnnQueryServiceClientModule -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import com.twitter.ml.api.{thriftscala => api} -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.HnswANNEngineQuery -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} - -object TweetBasedTwHINSimlarityEngineModule extends TwitterModule { - @Provides - @Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine) - def providesTweetBasedTwHINANNSimilarityEngine( - // MH stores - @Named(EmbeddingStoreModule.TwHINEmbeddingRegularUpdateMhStoreName) - twHINEmbeddingRegularUpdateMhStore: ReadableStore[InternalId, api.Embedding], - @Named(EmbeddingStoreModule.DebuggerDemoTweetEmbeddingMhStoreName) - debuggerDemoTweetEmbeddingMhStore: ReadableStore[InternalId, api.Embedding], - // ANN clients - @Named(AnnQueryServiceClientModule.TwHINRegularUpdateAnnServiceClientName) - twHINRegularUpdateAnnService: AnnQueryService.MethodPerEndpoint, - @Named(AnnQueryServiceClientModule.DebuggerDemoAnnServiceClientName) - debuggerDemoAnnService: AnnQueryService.MethodPerEndpoint, - // Other configs - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver - ): HnswANNSimilarityEngine = { - new HnswANNSimilarityEngine( - embeddingStoreLookUpMap = Map( - ModelConfig.TweetBasedTwHINRegularUpdateAll20221024 -> twHINEmbeddingRegularUpdateMhStore, - ModelConfig.DebuggerDemo -> debuggerDemoTweetEmbeddingMhStore, - ), - annServiceLookUpMap = Map( - ModelConfig.TweetBasedTwHINRegularUpdateAll20221024 -> twHINRegularUpdateAnnService, - ModelConfig.DebuggerDemo -> debuggerDemoAnnService, - ), - globalStats = statsReceiver, - identifier = SimilarityEngineType.TweetBasedTwHINANN, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ), - memCacheConfigOpt = Some( - SimilarityEngine.MemCacheConfig[HnswANNEngineQuery]( - cacheClient = crMixerUnifiedCacheClient, - ttl = 30.minutes, - keyToString = (query: HnswANNEngineQuery) => - SimilarityEngine.keyHasher.hashKey(query.cacheKey.getBytes).toString - )) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUnifiedSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUnifiedSimilarityEngineModule.scala deleted file mode 100644 index aa54bf071..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUnifiedSimilarityEngineModule.scala +++ /dev/null @@ -1,83 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.HnswANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimClustersANNSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedQigSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUnifiedSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserTweetGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserVideoGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -object TweetBasedUnifiedSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.TweetBasedUnifiedSimilarityEngine) - def providesTweetBasedUnifiedSimilarityEngine( - @Named(ModuleNames.TweetBasedUserTweetGraphSimilarityEngine) tweetBasedUserTweetGraphSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedUserVideoGraphSimilarityEngine) tweetBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine) - tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine, - @Named(ModuleNames.TweetBasedQigSimilarityEngine) tweetBasedQigSimilarityEngine: StandardSimilarityEngine[ - TweetBasedQigSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.SimClustersANNSimilarityEngine) - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): StandardSimilarityEngine[ - TweetBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ] = { - - val underlyingStore: ReadableStore[TweetBasedUnifiedSimilarityEngine.Query, Seq[ - TweetWithCandidateGenerationInfo - ]] = TweetBasedUnifiedSimilarityEngine( - tweetBasedUserTweetGraphSimilarityEngine, - tweetBasedUserVideoGraphSimilarityEngine, - simClustersANNSimilarityEngine, - tweetBasedQigSimilarityEngine, - tweetBasedTwHINANNSimilarityEngine, - statsReceiver - ) - - new StandardSimilarityEngine[ - TweetBasedUnifiedSimilarityEngine.Query, - TweetWithCandidateGenerationInfo - ]( - implementingStore = underlyingStore, - identifier = SimilarityEngineType.TweetBasedUnifiedSimilarityEngine, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserAdGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserAdGraphSimilarityEngineModule.scala deleted file mode 100644 index 7288e603f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserAdGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserAdGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import javax.inject.Named -import javax.inject.Singleton - -object TweetBasedUserAdGraphSimilarityEngineModule extends TwitterModule { - - private val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - @Provides - @Singleton - @Named(ModuleNames.TweetBasedUserAdGraphSimilarityEngine) - def providesTweetBasedUserAdGraphSimilarityEngine( - userAdGraphService: UserAdGraph.MethodPerEndpoint, - tweetRecentEngagedUserStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - TweetBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ] = { - - val underlyingStore = TweetBasedUserAdGraphSimilarityEngine( - userAdGraphService, - tweetRecentEngagedUserStore, - statsReceiver) - - val memCachedStore: ReadableStore[ - TweetBasedUserAdGraphSimilarityEngine.Query, - Seq[ - TweetWithScore - ] - ] = - ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScore]()), - statsReceiver = statsReceiver.scope("tweet_based_user_ad_graph_store_memcache"), - keyToString = { k => - //Example Query CRMixer:TweetBasedUTG:1234567890ABCDEF - f"CRMixer:TweetBasedUAG:${keyHasher.hashKey(k.toString.getBytes)}%X" - } - ) - - new StandardSimilarityEngine[ - TweetBasedUserAdGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = memCachedStore, - identifier = SimilarityEngineType.TweetBasedUserAdGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserAdGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserTweetGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserTweetGraphSimilarityEngineModule.scala deleted file mode 100644 index a7a388199..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserTweetGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.cr_mixer.module -package similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserTweetGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import javax.inject.Named -import javax.inject.Singleton - -object TweetBasedUserTweetGraphSimilarityEngineModule extends TwitterModule { - - private val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - @Provides - @Singleton - @Named(ModuleNames.TweetBasedUserTweetGraphSimilarityEngine) - def providesTweetBasedUserTweetGraphSimilarityEngine( - userTweetGraphService: UserTweetGraph.MethodPerEndpoint, - tweetRecentEngagedUserStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ] = { - - val underlyingStore = TweetBasedUserTweetGraphSimilarityEngine( - userTweetGraphService, - tweetRecentEngagedUserStore, - statsReceiver) - - val memCachedStore: ReadableStore[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - Seq[ - TweetWithScore - ] - ] = - ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScore]()), - statsReceiver = statsReceiver.scope("tweet_based_user_tweet_graph_store_memcache"), - keyToString = { k => - //Example Query CRMixer:TweetBasedUTG:1234567890ABCDEF - f"CRMixer:TweetBasedUTG:${keyHasher.hashKey(k.toString.getBytes)}%X" - } - ) - - new StandardSimilarityEngine[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = memCachedStore, - identifier = SimilarityEngineType.TweetBasedUserTweetGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserTweetGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserVideoGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserVideoGraphSimilarityEngineModule.scala deleted file mode 100644 index efc354d21..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TweetBasedUserVideoGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TweetBasedUserVideoGraphSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.memcached.{Client => MemcachedClient} -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import javax.inject.Named -import javax.inject.Singleton - -object TweetBasedUserVideoGraphSimilarityEngineModule extends TwitterModule { - - private val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - @Provides - @Singleton - @Named(ModuleNames.TweetBasedUserVideoGraphSimilarityEngine) - def providesTweetBasedUserVideoGraphSimilarityEngine( - userVideoGraphService: UserVideoGraph.MethodPerEndpoint, - tweetRecentEngagedUserStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - @Named(ModuleNames.UnifiedCache) crMixerUnifiedCacheClient: MemcachedClient, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ] = { - - val underlyingStore = - TweetBasedUserVideoGraphSimilarityEngine( - userVideoGraphService, - tweetRecentEngagedUserStore, - statsReceiver) - - val memCachedStore: ReadableStore[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - Seq[ - TweetWithScore - ] - ] = - ObservedMemcachedReadableStore - .fromCacheClient( - backingStore = underlyingStore, - cacheClient = crMixerUnifiedCacheClient, - ttl = 10.minutes - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[TweetWithScore]()), - statsReceiver = statsReceiver.scope("tweet_based_user_video_graph_store_memcache"), - keyToString = { k => - //Example Query CRMixer:TweetBasedUVG:1234567890ABCDEF - f"CRMixer:TweetBasedUVG:${keyHasher.hashKey(k.toString.getBytes)}%X" - } - ) - - new StandardSimilarityEngine[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ]( - implementingStore = memCachedStore, - identifier = SimilarityEngineType.TweetBasedUserVideoGraph, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = - Some(DeciderConfig(decider, DeciderConstants.enableUserVideoGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TwhinCollabFilterLookupSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TwhinCollabFilterLookupSimilarityEngineModule.scala deleted file mode 100644 index 4f7c909e3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/TwhinCollabFilterLookupSimilarityEngineModule.scala +++ /dev/null @@ -1,71 +0,0 @@ -package com.twitter.cr_mixer.module -package similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.similarity_engine.LookupSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine.Query -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.TwhinCollabFilterSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.storehaus.ReadableStore -import javax.inject.Named -import javax.inject.Singleton - -/** - * TwhinCandidatesLookupSimilarityEngineModule routes the request to the corresponding - * twhin based candidate store which follow the same pattern as TwHIN Collaborative Filtering. - */ - -object TwhinCollabFilterLookupSimilarityEngineModule extends TwitterModule { - @Provides - @Singleton - @Named(ModuleNames.TwhinCollabFilterSimilarityEngine) - def providesTwhinCollabFilterLookupSimilarityEngineModule( - @Named(ModuleNames.TwhinCollabFilterStratoStoreForFollow) - twhinCollabFilterStratoStoreForFollow: ReadableStore[Long, Seq[TweetId]], - @Named(ModuleNames.TwhinCollabFilterStratoStoreForEngagement) - twhinCollabFilterStratoStoreForEngagement: ReadableStore[Long, Seq[TweetId]], - @Named(ModuleNames.TwhinMultiClusterStratoStoreForFollow) - twhinMultiClusterStratoStoreForFollow: ReadableStore[Long, Seq[TweetId]], - @Named(ModuleNames.TwhinMultiClusterStratoStoreForEngagement) - twhinMultiClusterStratoStoreForEngagement: ReadableStore[Long, Seq[TweetId]], - timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver - ): LookupSimilarityEngine[Query, TweetWithScore] = { - val versionedStoreMap = Map( - ModelConfig.TwhinCollabFilterForFollow -> TwhinCollabFilterSimilarityEngine( - twhinCollabFilterStratoStoreForFollow, - globalStats), - ModelConfig.TwhinCollabFilterForEngagement -> TwhinCollabFilterSimilarityEngine( - twhinCollabFilterStratoStoreForEngagement, - globalStats), - ModelConfig.TwhinMultiClusterForFollow -> TwhinCollabFilterSimilarityEngine( - twhinMultiClusterStratoStoreForFollow, - globalStats), - ModelConfig.TwhinMultiClusterForEngagement -> TwhinCollabFilterSimilarityEngine( - twhinMultiClusterStratoStoreForEngagement, - globalStats), - ) - - new LookupSimilarityEngine[Query, TweetWithScore]( - versionedStoreMap = versionedStoreMap, - identifier = SimilarityEngineType.TwhinCollabFilter, - globalStats = globalStats, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.similarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = None, - enableFeatureSwitch = None - ) - ) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/UserTweetEntityGraphSimilarityEngineModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/UserTweetEntityGraphSimilarityEngineModule.scala deleted file mode 100644 index cf2093208..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/similarity_engine/UserTweetEntityGraphSimilarityEngineModule.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.cr_mixer.module.similarity_engine - -import com.google.inject.Provides -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.similarity_engine.UserTweetEntityGraphSimilarityEngine -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.DeciderConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.GatingConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.similarity_engine.StandardSimilarityEngine -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.inject.TwitterModule -import com.twitter.recos.user_tweet_entity_graph.thriftscala.UserTweetEntityGraph -import javax.inject.Named -import javax.inject.Singleton - -object UserTweetEntityGraphSimilarityEngineModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.UserTweetEntityGraphSimilarityEngine) - def providesUserTweetEntityGraphSimilarityEngine( - userTweetEntityGraphService: UserTweetEntityGraph.MethodPerEndpoint, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - decider: CrMixerDecider - ): StandardSimilarityEngine[ - UserTweetEntityGraphSimilarityEngine.Query, - TweetWithScoreAndSocialProof - ] = { - new StandardSimilarityEngine[ - UserTweetEntityGraphSimilarityEngine.Query, - TweetWithScoreAndSocialProof - ]( - implementingStore = - UserTweetEntityGraphSimilarityEngine(userTweetEntityGraphService, statsReceiver), - identifier = SimilarityEngineType.Uteg, - globalStats = statsReceiver, - engineConfig = SimilarityEngineConfig( - timeout = timeoutConfig.utegSimilarityEngineTimeout, - gatingConfig = GatingConfig( - deciderConfig = Some( - DeciderConfig(decider, DeciderConstants.enableUserTweetEntityGraphTrafficDeciderKey)), - enableFeatureSwitch = None - ) - ), - // We cannot use the key to cache anything in UTEG because the key contains a long list of userIds - memCacheConfig = None - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/AnnQueryServiceClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/AnnQueryServiceClientModule.scala deleted file mode 100644 index 17dbfcae5..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/AnnQueryServiceClientModule.scala +++ /dev/null @@ -1,107 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.google.inject.Provides -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.conversions.DurationOps._ -import com.twitter.conversions.PercentOps._ -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.client.MtlsStackClient._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.thrift.ClientId -import com.twitter.inject.TwitterModule -import javax.inject.Named -import javax.inject.Singleton - -object AnnQueryServiceClientModule extends TwitterModule { - final val DebuggerDemoAnnServiceClientName = "DebuggerDemoAnnServiceClient" - - @Provides - @Singleton - @Named(DebuggerDemoAnnServiceClientName) - def debuggerDemoAnnServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - statsReceiver: StatsReceiver, - timeoutConfig: TimeoutConfig, - ): AnnQueryService.MethodPerEndpoint = { - // This ANN is built from the embeddings in src/scala/com/twitter/wtf/beam/bq_embedding_export/sql/MlfExperimentalTweetEmbeddingScalaDataset.sql - // Change the above sql if you want to build the index from a diff embedding - val dest = "/s/cassowary/mlf-experimental-ann-service" - val label = "experimental-ann" - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - final val TwHINUuaAnnServiceClientName = "TwHINUuaAnnServiceClient" - @Provides - @Singleton - @Named(TwHINUuaAnnServiceClientName) - def twhinUuaAnnServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - statsReceiver: StatsReceiver, - timeoutConfig: TimeoutConfig, - ): AnnQueryService.MethodPerEndpoint = { - val dest = "/s/cassowary/twhin-uua-ann-service" - val label = "twhin_uua_ann" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - final val TwHINRegularUpdateAnnServiceClientName = "TwHINRegularUpdateAnnServiceClient" - @Provides - @Singleton - @Named(TwHINRegularUpdateAnnServiceClientName) - def twHINRegularUpdateAnnServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - statsReceiver: StatsReceiver, - timeoutConfig: TimeoutConfig, - ): AnnQueryService.MethodPerEndpoint = { - val dest = "/s/cassowary/twhin-regular-update-ann-service" - val label = "twhin_regular_update" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - final val TwoTowerFavAnnServiceClientName = "TwoTowerFavAnnServiceClient" - @Provides - @Singleton - @Named(TwoTowerFavAnnServiceClientName) - def twoTowerFavAnnServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - statsReceiver: StatsReceiver, - timeoutConfig: TimeoutConfig, - ): AnnQueryService.MethodPerEndpoint = { - val dest = "/s/cassowary/tweet-rec-two-tower-fav-ann" - val label = "tweet_rec_two_tower_fav_ann" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - private def buildClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - dest: String, - label: String - ): AnnQueryService.MethodPerEndpoint = { - val thriftClient = ThriftMux.client - .withMutualTls(serviceIdentifier) - .withClientId(clientId) - .withLabel(label) - .withStatsReceiver(statsReceiver) - .withTransport.connectTimeout(500.milliseconds) - .withSession.acquisitionTimeout(500.milliseconds) - .methodBuilder(dest) - .withTimeoutPerRequest(timeoutConfig.annServiceClientTimeout) - .withRetryDisabled - .idempotent(5.percent) - .servicePerEndpoint[AnnQueryService.ServicePerEndpoint] - - ThriftMux.Client.methodPerEndpoint(thriftClient) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/EarlybirdSearchClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/EarlybirdSearchClientModule.scala deleted file mode 100644 index c399a5a37..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/EarlybirdSearchClientModule.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client -import com.twitter.app.Flag -import com.twitter.finagle.ThriftMux -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.search.earlybird.thriftscala.EarlybirdService -import com.twitter.inject.Injector -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.EarlybirdClientTimeoutFlagName -import com.twitter.finagle.service.RetryBudget -import com.twitter.util.Duration -import org.apache.thrift.protocol.TCompactProtocol - -object EarlybirdSearchClientModule - extends ThriftMethodBuilderClientModule[ - EarlybirdService.ServicePerEndpoint, - EarlybirdService.MethodPerEndpoint - ] - with MtlsClient { - - override def label: String = "earlybird" - override def dest: String = "/s/earlybird-root-superroot/root-superroot" - private val requestTimeoutFlag: Flag[Duration] = - flag[Duration](EarlybirdClientTimeoutFlagName, "Earlybird client timeout") - override protected def requestTimeout: Duration = requestTimeoutFlag() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = { - super - .configureThriftMuxClient(injector, client) - .withProtocolFactory(new TCompactProtocol.Factory()) - .withSessionQualifier - .successRateFailureAccrual(successRate = 0.9, window = 30.seconds) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/FrsClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/FrsClientModule.scala deleted file mode 100644 index 1084f2c1a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/FrsClientModule.scala +++ /dev/null @@ -1,41 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.finagle.ThriftMux -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.FrsClientTimeoutFlagName -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.util.Duration - -object FrsClientModule - extends ThriftMethodBuilderClientModule[ - FollowRecommendationsThriftService.ServicePerEndpoint, - FollowRecommendationsThriftService.MethodPerEndpoint - ] - with MtlsClient { - - override def label: String = "follow-recommendations-service" - override def dest: String = "/s/follow-recommendations/follow-recos-service" - - private val frsSignalFetchTimeout: Flag[Duration] = - flag[Duration](FrsClientTimeoutFlagName, "FRS signal fetch client timeout") - override def requestTimeout: Duration = frsSignalFetchTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = { - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withSessionQualifier - .successRateFailureAccrual(successRate = 0.9, window = 30.seconds) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraPartitionClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraPartitionClientModule.scala deleted file mode 100644 index c208e111c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraPartitionClientModule.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.thriftmux.MethodBuilder -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.hydra.partition.{thriftscala => ht} - -object HydraPartitionClientModule - extends ThriftMethodBuilderClientModule[ - ht.HydraPartition.ServicePerEndpoint, - ht.HydraPartition.MethodPerEndpoint - ] - with MtlsClient { - override def label: String = "hydra-partition" - - override def dest: String = "/s/hydra/hydra-partition" - - override protected def configureMethodBuilder( - injector: Injector, - methodBuilder: MethodBuilder - ): MethodBuilder = methodBuilder.withTimeoutTotal(500.milliseconds) - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraRootClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraRootClientModule.scala deleted file mode 100644 index 28d5b1767..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/HydraRootClientModule.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.thriftmux.MethodBuilder -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.hydra.root.{thriftscala => ht} -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule - -object HydraRootClientModule - extends ThriftMethodBuilderClientModule[ - ht.HydraRoot.ServicePerEndpoint, - ht.HydraRoot.MethodPerEndpoint - ] - with MtlsClient { - override def label: String = "hydra-root" - - override def dest: String = "/s/hydra/hydra-root" - - override protected def configureMethodBuilder( - injector: Injector, - methodBuilder: MethodBuilder - ): MethodBuilder = methodBuilder.withTimeoutTotal(500.milliseconds) - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/QigServiceClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/QigServiceClientModule.scala deleted file mode 100644 index 86675e349..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/QigServiceClientModule.scala +++ /dev/null @@ -1,40 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.QigRankerClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.qig_ranker.thriftscala.QigRanker -import com.twitter.util.Duration -import com.twitter.util.Throw - -object QigServiceClientModule - extends ThriftMethodBuilderClientModule[ - QigRanker.ServicePerEndpoint, - QigRanker.MethodPerEndpoint - ] - with MtlsClient { - override val label: String = "qig-ranker" - override val dest: String = "/s/qig-shared/qig-ranker" - private val qigRankerClientTimeout: Flag[Duration] = - flag[Duration](QigRankerClientTimeoutFlagName, "ranking timeout") - - override def requestTimeout: Duration = qigRankerClientTimeout() - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/SimClustersAnnServiceClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/SimClustersAnnServiceClientModule.scala deleted file mode 100644 index 7504ab6c3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/SimClustersAnnServiceClientModule.scala +++ /dev/null @@ -1,147 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.google.inject.Provides -import com.twitter.conversions.PercentOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.client.MtlsStackClient._ -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.thrift.ClientId -import com.twitter.inject.TwitterModule -import com.twitter.simclustersann.{thriftscala => t} -import javax.inject.Named -import javax.inject.Singleton - -object SimClustersAnnServiceClientModule extends TwitterModule { - - @Provides - @Singleton - @Named(ModuleNames.ProdSimClustersANNServiceClientName) - def providesProdSimClustersANNServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server" - val dest = "/s/simclusters-ann/simclusters-ann" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.ExperimentalSimClustersANNServiceClientName) - def providesExperimentalSimClustersANNServiceClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-experimental-server" - val dest = "/s/simclusters-ann/simclusters-ann-experimental" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNServiceClientName1) - def providesSimClustersANNServiceClient1( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server-1" - val dest = "/s/simclusters-ann/simclusters-ann-1" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNServiceClientName2) - def providesSimClustersANNServiceClient2( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server-2" - val dest = "/s/simclusters-ann/simclusters-ann-2" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNServiceClientName3) - def providesSimClustersANNServiceClient3( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server-3" - val dest = "/s/simclusters-ann/simclusters-ann-3" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNServiceClientName5) - def providesSimClustersANNServiceClient5( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server-5" - val dest = "/s/simclusters-ann/simclusters-ann-5" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - - @Provides - @Singleton - @Named(ModuleNames.SimClustersANNServiceClientName4) - def providesSimClustersANNServiceClient4( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - ): t.SimClustersANNService.MethodPerEndpoint = { - val label = "simclusters-ann-server-4" - val dest = "/s/simclusters-ann/simclusters-ann-4" - - buildClient(serviceIdentifier, clientId, timeoutConfig, statsReceiver, dest, label) - } - private def buildClient( - serviceIdentifier: ServiceIdentifier, - clientId: ClientId, - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver, - dest: String, - label: String - ): t.SimClustersANNService.MethodPerEndpoint = { - val stats = statsReceiver.scope("clnt") - - val thriftClient = ThriftMux.client - .withMutualTls(serviceIdentifier) - .withClientId(clientId) - .withLabel(label) - .withStatsReceiver(stats) - .methodBuilder(dest) - .idempotent(5.percent) - .withTimeoutPerRequest(timeoutConfig.annServiceClientTimeout) - .withRetryDisabled - .servicePerEndpoint[t.SimClustersANNService.ServicePerEndpoint] - - ThriftMux.Client.methodPerEndpoint(thriftClient) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/TweetyPieClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/TweetyPieClientModule.scala deleted file mode 100644 index 610ccc95a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/TweetyPieClientModule.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.google.inject.Provides -import com.twitter.app.Flag -import com.twitter.conversions.DurationOps.richDurationFromInt -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.TweetypieClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.stitch.tweetypie.{TweetyPie => STweetyPie} -import com.twitter.tweetypie.thriftscala.TweetService -import com.twitter.util.Duration -import com.twitter.util.Throw -import javax.inject.Singleton - -object TweetyPieClientModule - extends ThriftMethodBuilderClientModule[ - TweetService.ServicePerEndpoint, - TweetService.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "tweetypie" - override val dest = "/s/tweetypie/tweetypie" - - private val tweetypieClientTimeout: Flag[Duration] = - flag[Duration](TweetypieClientTimeoutFlagName, "tweetypie client timeout") - override def requestTimeout: Duration = tweetypieClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - // We bump the success rate from the default of 0.8 to 0.9 since we're dropping the - // consecutive failures part of the default policy. - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withSessionQualifier - .successRateFailureAccrual(successRate = 0.9, window = 30.seconds) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } - - @Provides - @Singleton - def providesTweetyPie( - tweetyPieService: TweetService.MethodPerEndpoint - ): STweetyPie = { - STweetyPie(tweetyPieService) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserAdGraphClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserAdGraphClientModule.scala deleted file mode 100644 index 4c1f337ab..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserAdGraphClientModule.scala +++ /dev/null @@ -1,47 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.UserAdGraphClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mtls.authentication.ServiceIdentifier -import com.twitter.finagle.mtls.client.MtlsStackClient.MtlsThriftMuxClientSyntax -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import com.twitter.util.Duration -import com.twitter.util.Throw - -object UserAdGraphClientModule - extends ThriftMethodBuilderClientModule[ - UserAdGraph.ServicePerEndpoint, - UserAdGraph.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "user-ad-graph" - override val dest = "/s/user-tweet-graph/user-ad-graph" - private val userAdGraphClientTimeout: Flag[Duration] = - flag[Duration](UserAdGraphClientTimeoutFlagName, "userAdGraph client timeout") - override def requestTimeout: Duration = userAdGraphClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withMutualTls(injector.instance[ServiceIdentifier]) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetEntityGraphClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetEntityGraphClientModule.scala deleted file mode 100644 index 337f943f7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetEntityGraphClientModule.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.UtegClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.recos.user_tweet_entity_graph.thriftscala.UserTweetEntityGraph -import com.twitter.util.Duration -import com.twitter.util.Throw - -object UserTweetEntityGraphClientModule - extends ThriftMethodBuilderClientModule[ - UserTweetEntityGraph.ServicePerEndpoint, - UserTweetEntityGraph.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "user-tweet-entity-graph" - override val dest = "/s/cassowary/user_tweet_entity_graph" - private val userTweetEntityGraphClientTimeout: Flag[Duration] = - flag[Duration](UtegClientTimeoutFlagName, "user tweet entity graph client timeout") - override def requestTimeout: Duration = userTweetEntityGraphClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphClientModule.scala deleted file mode 100644 index 572786fd1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphClientModule.scala +++ /dev/null @@ -1,43 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph -import com.twitter.util.Duration -import com.twitter.util.Throw -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.UserTweetGraphClientTimeoutFlagName -import com.twitter.finagle.service.RetryBudget - -object UserTweetGraphClientModule - extends ThriftMethodBuilderClientModule[ - UserTweetGraph.ServicePerEndpoint, - UserTweetGraph.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "user-tweet-graph" - override val dest = "/s/user-tweet-graph/user-tweet-graph" - private val userTweetGraphClientTimeout: Flag[Duration] = - flag[Duration](UserTweetGraphClientTimeoutFlagName, "userTweetGraph client timeout") - override def requestTimeout: Duration = userTweetGraphClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphPlusClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphPlusClientModule.scala deleted file mode 100644 index 41ae96e53..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserTweetGraphPlusClientModule.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.UserTweetGraphPlusClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.recos.user_tweet_graph_plus.thriftscala.UserTweetGraphPlus -import com.twitter.util.Duration -import com.twitter.util.Throw - -object UserTweetGraphPlusClientModule - extends ThriftMethodBuilderClientModule[ - UserTweetGraphPlus.ServicePerEndpoint, - UserTweetGraphPlus.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "user-tweet-graph-plus" - override val dest = "/s/user-tweet-graph/user-tweet-graph-plus" - private val userTweetGraphPlusClientTimeout: Flag[Duration] = - flag[Duration]( - UserTweetGraphPlusClientTimeoutFlagName, - "userTweetGraphPlus client timeout" - ) - override def requestTimeout: Duration = userTweetGraphPlusClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserVideoGraphClientModule.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserVideoGraphClientModule.scala deleted file mode 100644 index 7c311cbfa..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/module/thrift_client/UserVideoGraphClientModule.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.cr_mixer.module.thrift_client - -import com.twitter.app.Flag -import com.twitter.cr_mixer.module.core.TimeoutConfigModule.UserVideoGraphClientTimeoutFlagName -import com.twitter.finagle.ThriftMux -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.service.ReqRep -import com.twitter.finagle.service.ResponseClass -import com.twitter.finagle.service.RetryBudget -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finatra.mtls.thriftmux.modules.MtlsClient -import com.twitter.inject.Injector -import com.twitter.inject.thrift.modules.ThriftMethodBuilderClientModule -import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph -import com.twitter.util.Duration -import com.twitter.util.Throw - -object UserVideoGraphClientModule - extends ThriftMethodBuilderClientModule[ - UserVideoGraph.ServicePerEndpoint, - UserVideoGraph.MethodPerEndpoint - ] - with MtlsClient { - - override val label = "user-video-graph" - override val dest = "/s/user-tweet-graph/user-video-graph" - private val userVideoGraphClientTimeout: Flag[Duration] = - flag[Duration]( - UserVideoGraphClientTimeoutFlagName, - "userVideoGraph client timeout" - ) - override def requestTimeout: Duration = userVideoGraphClientTimeout() - - override def retryBudget: RetryBudget = RetryBudget.Empty - - override def configureThriftMuxClient( - injector: Injector, - client: ThriftMux.Client - ): ThriftMux.Client = - super - .configureThriftMuxClient(injector, client) - .withStatsReceiver(injector.instance[StatsReceiver].scope("clnt")) - .withResponseClassifier { - case ReqRep(_, Throw(_: ClientDiscardedRequestException)) => ResponseClass.Ignorable - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/AdsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/AdsParams.scala deleted file mode 100644 index 880f1b27c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/AdsParams.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object AdsParams { - object AdsCandidateGenerationMaxCandidatesNumParam - extends FSBoundedParam[Int]( - name = "ads_candidate_generation_max_candidates_num", - default = 400, - min = 0, - max = 2000 - ) - - object EnableScoreBoost - extends FSParam[Boolean]( - name = "ads_candidate_generation_enable_score_boost", - default = false - ) - - object AdsCandidateGenerationScoreBoostFactor - extends FSBoundedParam[Double]( - name = "ads_candidate_generation_score_boost_factor", - default = 10000.0, - min = 1.0, - max = 100000.0 - ) - - object EnableScribe - extends FSParam[Boolean]( - name = "ads_candidate_generation_enable_scribe", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - AdsCandidateGenerationMaxCandidatesNumParam, - EnableScoreBoost, - AdsCandidateGenerationScoreBoostFactor - ) - - lazy val config: BaseConfig = { - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - AdsCandidateGenerationMaxCandidatesNumParam) - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableScoreBoost, - EnableScribe - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(AdsCandidateGenerationScoreBoostFactor) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BUILD deleted file mode 100644 index b24a21394..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BUILD +++ /dev/null @@ -1,27 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "abdecider/src/main/scala", - "configapi/configapi-abdecider", - "configapi/configapi-core", - "configapi/configapi-featureswitches:v2", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "discovery-common/src/main/scala/com/twitter/discovery/common/configapi", - "featureswitches/featureswitches-core", - "featureswitches/featureswitches-core/src/main/scala/com/twitter/featureswitches/v2/builder", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "scribelib/marshallers/src/main/scala/com/twitter/scribelib/marshallers", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "user-signal-service/thrift/src/main/thrift:thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BlenderParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BlenderParams.scala deleted file mode 100644 index 185fc4440..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BlenderParams.scala +++ /dev/null @@ -1,152 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object BlenderParams { - object BlendingAlgorithmEnum extends Enumeration { - val RoundRobin: Value = Value - val SourceTypeBackFill: Value = Value - val SourceSignalSorting: Value = Value - } - object ContentBasedSortingAlgorithmEnum extends Enumeration { - val FavoriteCount: Value = Value - val SourceSignalRecency: Value = Value - val RandomSorting: Value = Value - val SimilarityToSignalSorting: Value = Value - val CandidateRecency: Value = Value - } - - object BlendingAlgorithmParam - extends FSEnumParam[BlendingAlgorithmEnum.type]( - name = "blending_algorithm_id", - default = BlendingAlgorithmEnum.RoundRobin, - enum = BlendingAlgorithmEnum - ) - - object RankingInterleaveWeightShrinkageParam - extends FSBoundedParam[Double]( - name = "blending_enable_ml_ranking_interleave_weights_shrinkage", - default = 1.0, - min = 0.0, - max = 1.0 - ) - - object RankingInterleaveMaxWeightAdjustments - extends FSBoundedParam[Int]( - name = "blending_interleave_max_weighted_adjustments", - default = 3000, - min = 0, - max = 9999 - ) - - object SignalTypeSortingAlgorithmParam - extends FSEnumParam[ContentBasedSortingAlgorithmEnum.type]( - name = "blending_algorithm_inner_signal_sorting_id", - default = ContentBasedSortingAlgorithmEnum.SourceSignalRecency, - enum = ContentBasedSortingAlgorithmEnum - ) - - object ContentBlenderTypeSortingAlgorithmParam - extends FSEnumParam[ContentBasedSortingAlgorithmEnum.type]( - name = "blending_algorithm_content_blender_sorting_id", - default = ContentBasedSortingAlgorithmEnum.FavoriteCount, - enum = ContentBasedSortingAlgorithmEnum - ) - - //UserAffinities Algo Param: whether to distributed the source type weights - object EnableDistributedSourceTypeWeightsParam - extends FSParam[Boolean]( - name = "blending_algorithm_enable_distributed_source_type_weights", - default = false - ) - - object BlendGroupingMethodEnum extends Enumeration { - val SourceKeyDefault: Value = Value("SourceKey") - val SourceTypeSimilarityEngine: Value = Value("SourceTypeSimilarityEngine") - val AuthorId: Value = Value("AuthorId") - } - - object BlendGroupingMethodParam - extends FSEnumParam[BlendGroupingMethodEnum.type]( - name = "blending_grouping_method_id", - default = BlendGroupingMethodEnum.SourceKeyDefault, - enum = BlendGroupingMethodEnum - ) - - object RecencyBasedRandomSamplingHalfLifeInDays - extends FSBoundedParam[Int]( - name = "blending_interleave_random_sampling_recency_based_half_life_in_days", - default = 7, - min = 1, - max = 28 - ) - - object RecencyBasedRandomSamplingDefaultWeight - extends FSBoundedParam[Double]( - name = "blending_interleave_random_sampling_recency_based_default_weight", - default = 1.0, - min = 0.1, - max = 2.0 - ) - - object SourceTypeBackFillEnableVideoBackFill - extends FSParam[Boolean]( - name = "blending_enable_video_backfill", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - BlendingAlgorithmParam, - RankingInterleaveWeightShrinkageParam, - RankingInterleaveMaxWeightAdjustments, - EnableDistributedSourceTypeWeightsParam, - BlendGroupingMethodParam, - RecencyBasedRandomSamplingHalfLifeInDays, - RecencyBasedRandomSamplingDefaultWeight, - SourceTypeBackFillEnableVideoBackFill, - SignalTypeSortingAlgorithmParam, - ContentBlenderTypeSortingAlgorithmParam, - ) - - lazy val config: BaseConfig = { - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - BlendingAlgorithmParam, - BlendGroupingMethodParam, - SignalTypeSortingAlgorithmParam, - ContentBlenderTypeSortingAlgorithmParam - ) - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableDistributedSourceTypeWeightsParam, - SourceTypeBackFillEnableVideoBackFill - ) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - RankingInterleaveMaxWeightAdjustments, - RecencyBasedRandomSamplingHalfLifeInDays - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - RankingInterleaveWeightShrinkageParam, - RecencyBasedRandomSamplingDefaultWeight - ) - - BaseConfigBuilder() - .set(enumOverrides: _*) - .set(booleanOverrides: _*) - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BypassInterleaveAndRankParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BypassInterleaveAndRankParams.scala deleted file mode 100644 index 20cbc369a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/BypassInterleaveAndRankParams.scala +++ /dev/null @@ -1,98 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object BypassInterleaveAndRankParams { - object EnableTwhinCollabFilterBypassParam - extends FSParam[Boolean]( - name = "bypass_interleave_and_rank_twhin_collab_filter", - default = false - ) - - object EnableTwoTowerBypassParam - extends FSParam[Boolean]( - name = "bypass_interleave_and_rank_two_tower", - default = false - ) - - object EnableConsumerBasedTwhinBypassParam - extends FSParam[Boolean]( - name = "bypass_interleave_and_rank_consumer_based_twhin", - default = false - ) - - object EnableConsumerBasedWalsBypassParam - extends FSParam[Boolean]( - name = "bypass_interleave_and_rank_consumer_based_wals", - default = false - ) - - object TwhinCollabFilterBypassPercentageParam - extends FSBoundedParam[Double]( - name = "bypass_interleave_and_rank_twhin_collab_filter_percentage", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - object TwoTowerBypassPercentageParam - extends FSBoundedParam[Double]( - name = "bypass_interleave_and_rank_two_tower_percentage", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - object ConsumerBasedTwhinBypassPercentageParam - extends FSBoundedParam[Double]( - name = "bypass_interleave_and_rank_consumer_based_twhin_percentage", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - object ConsumerBasedWalsBypassPercentageParam - extends FSBoundedParam[Double]( - name = "bypass_interleave_and_rank_consumer_based_wals_percentage", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableTwhinCollabFilterBypassParam, - EnableTwoTowerBypassParam, - EnableConsumerBasedTwhinBypassParam, - EnableConsumerBasedWalsBypassParam, - TwhinCollabFilterBypassPercentageParam, - TwoTowerBypassPercentageParam, - ConsumerBasedTwhinBypassPercentageParam, - ConsumerBasedWalsBypassPercentageParam, - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTwhinCollabFilterBypassParam, - EnableTwoTowerBypassParam, - EnableConsumerBasedTwhinBypassParam, - EnableConsumerBasedWalsBypassParam, - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - TwhinCollabFilterBypassPercentageParam, - TwoTowerBypassPercentageParam, - ConsumerBasedTwhinBypassPercentageParam, - ConsumerBasedWalsBypassPercentageParam, - ) - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerBasedWalsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerBasedWalsParams.scala deleted file mode 100644 index 15f4d36ab..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerBasedWalsParams.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.conversions.DurationOps.richDurationFromInt -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.Param -import com.twitter.util.Duration - -object ConsumerBasedWalsParams { - - object EnableSourceParam - extends FSParam[Boolean]( - name = "consumer_based_wals_enable_source", - default = false - ) - - object ModelNameParam - extends FSParam[String]( - name = "consumer_based_wals_model_name", - default = "model_0" - ) - - object WilyNsNameParam - extends FSParam[String]( - name = "consumer_based_wals_wily_ns_name", - default = "" - ) - - object ModelInputNameParam - extends FSParam[String]( - name = "consumer_based_wals_model_input_name", - default = "examples" - ) - - object ModelOutputNameParam - extends FSParam[String]( - name = "consumer_based_wals_model_output_name", - default = "all_tweet_ids" - ) - - object ModelSignatureNameParam - extends FSParam[String]( - name = "consumer_based_wals_model_signature_name", - default = "serving_default" - ) - - object MaxTweetSignalAgeHoursParam - extends FSBoundedParam[Duration]( - name = "consumer_based_wals_max_tweet_signal_age_hours", - default = 72.hours, - min = 1.hours, - max = 720.hours - ) - with HasDurationConversion { - - override val durationConversion: DurationConversion = DurationConversion.FromHours - } - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - ModelNameParam, - ModelInputNameParam, - ModelOutputNameParam, - ModelSignatureNameParam, - MaxTweetSignalAgeHoursParam, - WilyNsNameParam, - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - ) - val stringOverrides = FeatureSwitchOverrideUtil.getStringFSOverrides( - ModelNameParam, - ModelInputNameParam, - ModelOutputNameParam, - ModelSignatureNameParam, - WilyNsNameParam - ) - - val boundedDurationFSOverrides = - FeatureSwitchOverrideUtil.getBoundedDurationFSOverrides(MaxTweetSignalAgeHoursParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(stringOverrides: _*) - .set(boundedDurationFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedCandidateGenerationParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedCandidateGenerationParams.scala deleted file mode 100644 index bedbaf0b9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedCandidateGenerationParams.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ConsumerEmbeddingBasedCandidateGenerationParams { - - object EnableTwHINParam - extends FSParam[Boolean]( - name = "consumer_embedding_based_candidate_generation_enable_twhin", - default = false - ) - - object EnableTwoTowerParam - extends FSParam[Boolean]( - name = "consumer_embedding_based_candidate_generation_enable_two_tower", - default = false - ) - - object EnableLogFavBasedSimClustersTripParam - extends FSParam[Boolean]( - name = "consumer_embedding_based_candidate_generation_enable_logfav_based_simclusters_trip", - default = false - ) - - object EnableFollowBasedSimClustersTripParam - extends FSParam[Boolean]( - name = "consumer_embedding_based_candidate_generation_enable_follow_based_simclusters_trip", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableTwHINParam, - EnableTwoTowerParam, - EnableFollowBasedSimClustersTripParam, - EnableLogFavBasedSimClustersTripParam - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTwHINParam, - EnableTwoTowerParam, - EnableFollowBasedSimClustersTripParam, - EnableLogFavBasedSimClustersTripParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTripParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTripParams.scala deleted file mode 100644 index 4b43d42ab..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTripParams.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ConsumerEmbeddingBasedTripParams { - object SourceIdParam - extends FSParam[String]( - name = "consumer_embedding_based_trip_source_id", - default = "EXPLR_TOPK_VID_48H_V3") - - object MaxNumCandidatesParam - extends FSBoundedParam[Int]( - name = "consumer_embedding_based_trip_max_num_candidates", - default = 80, - min = 0, - max = 200 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - SourceIdParam, - MaxNumCandidatesParam - ) - - lazy val config: BaseConfig = { - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - SourceIdParam - ) - - val intFSOverrides = - FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxNumCandidatesParam - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .set(intFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwHINParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwHINParams.scala deleted file mode 100644 index bda14d5d4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwHINParams.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.Param - -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil - -object ConsumerEmbeddingBasedTwHINParams { - object ModelIdParam - extends FSParam[String]( - name = "consumer_embedding_based_twhin_model_id", - default = ModelConfig.ConsumerBasedTwHINRegularUpdateAll20221024, - ) // Note: this default value does not match with ModelIds yet. This FS is a placeholder - - val AllParams: Seq[Param[_] with FSName] = Seq( - ModelIdParam - ) - - lazy val config: BaseConfig = { - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - ModelIdParam - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwoTowerParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwoTowerParams.scala deleted file mode 100644 index 2a6474adc..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumerEmbeddingBasedTwoTowerParams.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig.TwoTowerFavALL20220808 -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ConsumerEmbeddingBasedTwoTowerParams { - object ModelIdParam - extends FSParam[String]( - name = "consumer_embedding_based_two_tower_model_id", - default = TwoTowerFavALL20220808, - ) // Note: this default value does not match with ModelIds yet. This FS is a placeholder - - val AllParams: Seq[Param[_] with FSName] = Seq( - ModelIdParam - ) - - lazy val config: BaseConfig = { - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - ModelIdParam - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserAdGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserAdGraphParams.scala deleted file mode 100644 index a730e0994..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserAdGraphParams.scala +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ConsumersBasedUserAdGraphParams { - - object EnableSourceParam - extends FSParam[Boolean]( - name = "consumers_based_user_ad_graph_enable_source", - default = false - ) - - // UTG-Lookalike - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "consumers_based_user_ad_graph_min_co_occurrence", - default = 2, - min = 0, - max = 500 - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "consumers_based_user_ad_graph_min_score", - default = 0.0, - min = 0.0, - max = 10.0 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - MinCoOccurrenceParam, - MinScoreParam - ) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides(MinCoOccurrenceParam) - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(MinScoreParam) - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides(EnableSourceParam) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserTweetGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserTweetGraphParams.scala deleted file mode 100644 index 47c67887f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserTweetGraphParams.scala +++ /dev/null @@ -1,44 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -/** - * ConsumersBasedUserTweetGraph Params, there are multiple ways (e.g. FRS, RealGraphOon) to generate consumersSeedSet for ConsumersBasedUserTweetGraph - * for now we allow flexibility in tuning UTG params for different consumersSeedSet generation algo by giving the param name {consumerSeedSetAlgo}{ParamName} - */ - -object ConsumersBasedUserTweetGraphParams { - - object EnableSourceParam - extends FSParam[Boolean]( - name = "consumers_based_user_tweet_graph_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - ) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides() - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides() - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserVideoGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserVideoGraphParams.scala deleted file mode 100644 index ab0133632..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ConsumersBasedUserVideoGraphParams.scala +++ /dev/null @@ -1,65 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -/** - * ConsumersBasedUserVideoGraph Params: there are multiple ways (e.g. FRS, RealGraphIn) to generate consumersSeedSet for ConsumersBasedUserTweetGraph - * for now we allow flexibility in tuning UVG params for different consumersSeedSet generation algo by giving the param name {consumerSeedSetAlgo}{ParamName} - */ - -object ConsumersBasedUserVideoGraphParams { - - object EnableSourceParam - extends FSParam[Boolean]( - name = "consumers_based_user_video_graph_enable_source", - default = false - ) - - // UTG-RealGraphIN - object RealGraphInMinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "consumers_based_user_video_graph_real_graph_in_min_co_occurrence", - default = 3, - min = 0, - max = 500 - ) - - object RealGraphInMinScoreParam - extends FSBoundedParam[Double]( - name = "consumers_based_user_video_graph_real_graph_in_min_score", - default = 2.0, - min = 0.0, - max = 10.0 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - RealGraphInMinCoOccurrenceParam, - RealGraphInMinScoreParam - ) - - lazy val config: BaseConfig = { - - val intOverrides = - FeatureSwitchOverrideUtil.getBoundedIntFSOverrides(RealGraphInMinCoOccurrenceParam) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(RealGraphInMinScoreParam) - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CrMixerParamConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CrMixerParamConfig.scala deleted file mode 100644 index ada50d965..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CrMixerParamConfig.scala +++ /dev/null @@ -1,122 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.CompositeConfig -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.Param - -object CrMixerParamConfig { - - lazy val config: CompositeConfig = new CompositeConfig( - configs = Seq( - AdsParams.config, - BlenderParams.config, - BypassInterleaveAndRankParams.config, - RankerParams.config, - ConsumerBasedWalsParams.config, - ConsumerEmbeddingBasedCandidateGenerationParams.config, - ConsumerEmbeddingBasedTripParams.config, - ConsumerEmbeddingBasedTwHINParams.config, - ConsumerEmbeddingBasedTwoTowerParams.config, - ConsumersBasedUserAdGraphParams.config, - ConsumersBasedUserTweetGraphParams.config, - ConsumersBasedUserVideoGraphParams.config, - CustomizedRetrievalBasedCandidateGenerationParams.config, - CustomizedRetrievalBasedOfflineInterestedInParams.config, - CustomizedRetrievalBasedFTROfflineInterestedInParams.config, - CustomizedRetrievalBasedTwhinParams.config, - EarlybirdFrsBasedCandidateGenerationParams.config, - FrsParams.config, - GlobalParams.config, - InterestedInParams.config, - ProducerBasedCandidateGenerationParams.config, - ProducerBasedUserAdGraphParams.config, - ProducerBasedUserTweetGraphParams.config, - RecentFollowsParams.config, - RecentNegativeSignalParams.config, - RecentNotificationsParams.config, - RecentOriginalTweetsParams.config, - RecentReplyTweetsParams.config, - RecentRetweetsParams.config, - RecentTweetFavoritesParams.config, - RelatedTweetGlobalParams.config, - RelatedVideoTweetGlobalParams.config, - RelatedTweetProducerBasedParams.config, - RelatedTweetTweetBasedParams.config, - RelatedVideoTweetTweetBasedParams.config, - RealGraphInParams.config, - RealGraphOonParams.config, - RepeatedProfileVisitsParams.config, - SimClustersANNParams.config, - TopicTweetParams.config, - TweetBasedCandidateGenerationParams.config, - TweetBasedUserAdGraphParams.config, - TweetBasedUserTweetGraphParams.config, - TweetBasedUserVideoGraphParams.config, - TweetSharesParams.config, - TweetBasedTwHINParams.config, - RealGraphOonParams.config, - GoodTweetClickParams.config, - GoodProfileClickParams.config, - UtegTweetGlobalParams.config, - VideoTweetFilterParams.config, - VideoViewTweetsParams.config, - UnifiedUSSSignalParams.config, - ), - simpleName = "CrMixerConfig" - ) - - val allParams: Seq[Param[_] with FSName] = { - AdsParams.AllParams ++ - BlenderParams.AllParams ++ - BypassInterleaveAndRankParams.AllParams ++ - RankerParams.AllParams ++ - ConsumerBasedWalsParams.AllParams ++ - ConsumerEmbeddingBasedCandidateGenerationParams.AllParams ++ - ConsumerEmbeddingBasedTripParams.AllParams ++ - ConsumerEmbeddingBasedTwHINParams.AllParams ++ - ConsumerEmbeddingBasedTwoTowerParams.AllParams ++ - ConsumersBasedUserAdGraphParams.AllParams ++ - ConsumersBasedUserTweetGraphParams.AllParams ++ - ConsumersBasedUserVideoGraphParams.AllParams ++ - CustomizedRetrievalBasedCandidateGenerationParams.AllParams ++ - CustomizedRetrievalBasedOfflineInterestedInParams.AllParams ++ - CustomizedRetrievalBasedFTROfflineInterestedInParams.AllParams ++ - CustomizedRetrievalBasedTwhinParams.AllParams ++ - EarlybirdFrsBasedCandidateGenerationParams.AllParams ++ - FrsParams.AllParams ++ - GlobalParams.AllParams ++ - InterestedInParams.AllParams ++ - ProducerBasedCandidateGenerationParams.AllParams ++ - ProducerBasedUserAdGraphParams.AllParams ++ - ProducerBasedUserTweetGraphParams.AllParams ++ - RecentFollowsParams.AllParams ++ - RecentNegativeSignalParams.AllParams ++ - RecentNotificationsParams.AllParams ++ - RecentOriginalTweetsParams.AllParams ++ - RecentReplyTweetsParams.AllParams ++ - RecentRetweetsParams.AllParams ++ - RecentTweetFavoritesParams.AllParams ++ - RelatedTweetGlobalParams.AllParams ++ - RelatedVideoTweetGlobalParams.AllParams ++ - RelatedTweetProducerBasedParams.AllParams ++ - RelatedTweetTweetBasedParams.AllParams ++ - RelatedVideoTweetTweetBasedParams.AllParams ++ - RepeatedProfileVisitsParams.AllParams ++ - SimClustersANNParams.AllParams ++ - TopicTweetParams.AllParams ++ - TweetBasedCandidateGenerationParams.AllParams ++ - TweetBasedUserAdGraphParams.AllParams ++ - TweetBasedUserTweetGraphParams.AllParams ++ - TweetBasedUserVideoGraphParams.AllParams ++ - TweetSharesParams.AllParams ++ - TweetBasedTwHINParams.AllParams ++ - RealGraphOonParams.AllParams ++ - RealGraphInParams.AllParams ++ - GoodTweetClickParams.AllParams ++ - GoodProfileClickParams.AllParams ++ - UtegTweetGlobalParams.AllParams ++ - VideoTweetFilterParams.AllParams ++ - VideoViewTweetsParams.AllParams ++ - UnifiedUSSSignalParams.AllParams - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedCandidateGenerationParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedCandidateGenerationParams.scala deleted file mode 100644 index 966048b0f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedCandidateGenerationParams.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object CustomizedRetrievalBasedCandidateGenerationParams { - - // Offline SimClusters InterestedIn params - object EnableOfflineInterestedInParam - extends FSParam[Boolean]( - name = "customized_retrieval_based_candidate_generation_enable_offline_interestedin", - default = false - ) - - // Offline SimClusters FTR-based InterestedIn - object EnableOfflineFTRInterestedInParam - extends FSParam[Boolean]( - name = "customized_retrieval_based_candidate_generation_enable_ftr_offline_interestedin", - default = false - ) - - // TwHin Collab Filter Cluster params - object EnableTwhinCollabFilterClusterParam - extends FSParam[Boolean]( - name = "customized_retrieval_based_candidate_generation_enable_twhin_collab_filter_cluster", - default = false - ) - - // TwHin Multi Cluster params - object EnableTwhinMultiClusterParam - extends FSParam[Boolean]( - name = "customized_retrieval_based_candidate_generation_enable_twhin_multi_cluster", - default = false - ) - - object EnableRetweetBasedDiffusionParam - extends FSParam[Boolean]( - name = "customized_retrieval_based_candidate_generation_enable_retweet_based_diffusion", - default = false - ) - object CustomizedRetrievalBasedRetweetDiffusionSource - extends FSParam[String]( - name = - "customized_retrieval_based_candidate_generation_offline_retweet_based_diffusion_model_id", - default = ModelConfig.RetweetBasedDiffusion - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableOfflineInterestedInParam, - EnableOfflineFTRInterestedInParam, - EnableTwhinCollabFilterClusterParam, - EnableTwhinMultiClusterParam, - EnableRetweetBasedDiffusionParam, - CustomizedRetrievalBasedRetweetDiffusionSource - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableOfflineInterestedInParam, - EnableOfflineFTRInterestedInParam, - EnableTwhinCollabFilterClusterParam, - EnableTwhinMultiClusterParam, - EnableRetweetBasedDiffusionParam - ) - - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - CustomizedRetrievalBasedRetweetDiffusionSource - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedFTROfflineInterestedInParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedFTROfflineInterestedInParams.scala deleted file mode 100644 index d6d1b0430..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedFTROfflineInterestedInParams.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.cr_mixer.param -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object CustomizedRetrievalBasedFTROfflineInterestedInParams { - object CustomizedRetrievalBasedFTROfflineInterestedInSource - extends FSParam[String]( - name = "customized_retrieval_based_ftr_offline_interestedin_model_id", - default = ModelConfig.OfflineFavDecayedSum - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - CustomizedRetrievalBasedFTROfflineInterestedInSource) - - lazy val config: BaseConfig = { - - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - CustomizedRetrievalBasedFTROfflineInterestedInSource - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedOfflineInterestedInParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedOfflineInterestedInParams.scala deleted file mode 100644 index d5244e135..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedOfflineInterestedInParams.scala +++ /dev/null @@ -1,33 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object CustomizedRetrievalBasedOfflineInterestedInParams { - - // Model slots available for offline InterestedIn candidate generation - object CustomizedRetrievalBasedOfflineInterestedInSource - extends FSParam[String]( - name = "customized_retrieval_based_offline_interestedin_model_id", - default = ModelConfig.OfflineInterestedInFromKnownFor2020 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(CustomizedRetrievalBasedOfflineInterestedInSource) - - lazy val config: BaseConfig = { - - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - CustomizedRetrievalBasedOfflineInterestedInSource - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedTwhinParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedTwhinParams.scala deleted file mode 100644 index 646cdb163..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/CustomizedRetrievalBasedTwhinParams.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object CustomizedRetrievalBasedTwhinParams { - - // Model slots available for TwhinCollab and MultiCluster - object CustomizedRetrievalBasedTwhinCollabFilterFollowSource - extends FSParam[String]( - name = "customized_retrieval_based_offline_twhin_collab_filter_follow_model_id", - default = ModelConfig.TwhinCollabFilterForFollow - ) - - object CustomizedRetrievalBasedTwhinCollabFilterEngagementSource - extends FSParam[String]( - name = "customized_retrieval_based_offline_twhin_collab_filter_engagement_model_id", - default = ModelConfig.TwhinCollabFilterForEngagement - ) - - object CustomizedRetrievalBasedTwhinMultiClusterFollowSource - extends FSParam[String]( - name = "customized_retrieval_based_offline_twhin_multi_cluster_follow_model_id", - default = ModelConfig.TwhinMultiClusterForFollow - ) - - object CustomizedRetrievalBasedTwhinMultiClusterEngagementSource - extends FSParam[String]( - name = "customized_retrieval_based_offline_twhin_multi_cluster_engagement_model_id", - default = ModelConfig.TwhinMultiClusterForEngagement - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq( - CustomizedRetrievalBasedTwhinCollabFilterFollowSource, - CustomizedRetrievalBasedTwhinCollabFilterEngagementSource, - CustomizedRetrievalBasedTwhinMultiClusterFollowSource, - CustomizedRetrievalBasedTwhinMultiClusterEngagementSource, - ) - - lazy val config: BaseConfig = { - - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - CustomizedRetrievalBasedTwhinCollabFilterFollowSource, - CustomizedRetrievalBasedTwhinCollabFilterEngagementSource, - CustomizedRetrievalBasedTwhinMultiClusterFollowSource, - CustomizedRetrievalBasedTwhinMultiClusterEngagementSource, - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/EarlybirdFrsBasedCandidateGenerationParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/EarlybirdFrsBasedCandidateGenerationParams.scala deleted file mode 100644 index 2a9ffb424..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/EarlybirdFrsBasedCandidateGenerationParams.scala +++ /dev/null @@ -1,117 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_ModelBased -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_RecencyBased -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_TensorflowBased -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.Param -import com.twitter.util.Duration - -object EarlybirdFrsBasedCandidateGenerationParams { - object CandidateGenerationEarlybirdSimilarityEngineType extends Enumeration { - protected case class SimilarityEngineType(rankingMode: EarlybirdSimilarityEngineType) - extends super.Val - import scala.language.implicitConversions - implicit def valueToEarlybirdRankingMode(x: Value): SimilarityEngineType = - x.asInstanceOf[SimilarityEngineType] - - val EarlybirdRankingMode_RecencyBased: SimilarityEngineType = SimilarityEngineType( - EarlybirdSimilarityEngineType_RecencyBased) - val EarlybirdRankingMode_ModelBased: SimilarityEngineType = SimilarityEngineType( - EarlybirdSimilarityEngineType_ModelBased) - val EarlybirdRankingMode_TensorflowBased: SimilarityEngineType = SimilarityEngineType( - EarlybirdSimilarityEngineType_TensorflowBased) - } - - object FrsBasedCandidateGenerationEarlybirdSimilarityEngineTypeParam - extends FSEnumParam[CandidateGenerationEarlybirdSimilarityEngineType.type]( - name = "frs_based_candidate_generation_earlybird_ranking_mode_id", - default = - CandidateGenerationEarlybirdSimilarityEngineType.EarlybirdRankingMode_RecencyBased, - enum = CandidateGenerationEarlybirdSimilarityEngineType - ) - - object FrsBasedCandidateGenerationRecencyBasedEarlybirdMaxTweetsPerUser - extends FSBoundedParam[Int]( - name = "frs_based_candidate_generation_earlybird_max_tweets_per_user", - default = 100, - min = 0, - /** - * Note max should be equal to EarlybirdRecencyBasedCandidateStoreModule.DefaultMaxNumTweetPerUser. - * Which is the size of the memcached result list. - */ - max = 100 - ) - - object FrsBasedCandidateGenerationEarlybirdMaxTweetAge - extends FSBoundedParam[Duration]( - name = "frs_based_candidate_generation_earlybird_max_tweet_age_hours", - default = 24.hours, - min = 12.hours, - /** - * Note max could be related to EarlybirdRecencyBasedCandidateStoreModule.DefaultMaxNumTweetPerUser. - * Which is the size of the memcached result list for recency based earlybird candidate source. - * E.g. if max = 720.hours, we may want to increase the DefaultMaxNumTweetPerUser. - */ - max = 96.hours - ) - with HasDurationConversion { - override val durationConversion: DurationConversion = DurationConversion.FromHours - } - - object FrsBasedCandidateGenerationEarlybirdFilterOutRetweetsAndReplies - extends FSParam[Boolean]( - name = "frs_based_candidate_generation_earlybird_filter_out_retweets_and_replies", - default = true - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - FrsBasedCandidateGenerationEarlybirdSimilarityEngineTypeParam, - FrsBasedCandidateGenerationRecencyBasedEarlybirdMaxTweetsPerUser, - FrsBasedCandidateGenerationEarlybirdMaxTweetAge, - FrsBasedCandidateGenerationEarlybirdFilterOutRetweetsAndReplies, - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - FrsBasedCandidateGenerationEarlybirdFilterOutRetweetsAndReplies, - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides() - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - FrsBasedCandidateGenerationRecencyBasedEarlybirdMaxTweetsPerUser - ) - - val durationFSOverrides = - FeatureSwitchOverrideUtil.getDurationFSOverrides( - FrsBasedCandidateGenerationEarlybirdMaxTweetAge - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - FrsBasedCandidateGenerationEarlybirdSimilarityEngineTypeParam, - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(intOverrides: _*) - .set(enumOverrides: _*) - .set(durationFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/FrsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/FrsParams.scala deleted file mode 100644 index 18bf1d474..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/FrsParams.scala +++ /dev/null @@ -1,131 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param -import com.twitter.follow_recommendations.thriftscala.DisplayLocation -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.logging.Logger -import com.twitter.finagle.stats.NullStatsReceiver - -object FrsParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "signal_frs_enable_source", - default = false - ) - - object EnableSourceGraphParam - extends FSParam[Boolean]( - name = "graph_frs_enable_source", - default = false - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "signal_frs_min_score", - default = 0.4, - min = 0.0, - max = 1.0 - ) - - object MaxConsumerSeedsNumParam - extends FSBoundedParam[Int]( - name = "graph_frs_max_user_seeds_num", - default = 200, - min = 0, - max = 1000 - ) - - /** - * These params below are only used for FrsTweetCandidateGenerator and shouldn't be used in other endpoints - * * FrsBasedCandidateGenerationMaxSeedsNumParam - * * FrsCandidateGenerationDisplayLocationParam - * * FrsCandidateGenerationDisplayLocation - * * FrsBasedCandidateGenerationMaxCandidatesNumParam - */ - object FrsBasedCandidateGenerationEnableVisibilityFilteringParam - extends FSParam[Boolean]( - name = "frs_based_candidate_generation_enable_vf", - default = true - ) - - object FrsBasedCandidateGenerationMaxSeedsNumParam - extends FSBoundedParam[Int]( - name = "frs_based_candidate_generation_max_seeds_num", - default = 100, - min = 0, - max = 800 - ) - - object FrsBasedCandidateGenerationDisplayLocation extends Enumeration { - protected case class FrsDisplayLocationValue(displayLocation: DisplayLocation) extends super.Val - import scala.language.implicitConversions - implicit def valueToDisplayLocationValue(x: Value): FrsDisplayLocationValue = - x.asInstanceOf[FrsDisplayLocationValue] - - val DisplayLocation_ContentRecommender: FrsDisplayLocationValue = FrsDisplayLocationValue( - DisplayLocation.ContentRecommender) - val DisplayLocation_Home: FrsDisplayLocationValue = FrsDisplayLocationValue( - DisplayLocation.HomeTimelineTweetRecs) - val DisplayLocation_Notifications: FrsDisplayLocationValue = FrsDisplayLocationValue( - DisplayLocation.TweetNotificationRecs) - } - - object FrsBasedCandidateGenerationDisplayLocationParam - extends FSEnumParam[FrsBasedCandidateGenerationDisplayLocation.type]( - name = "frs_based_candidate_generation_display_location_id", - default = FrsBasedCandidateGenerationDisplayLocation.DisplayLocation_Home, - enum = FrsBasedCandidateGenerationDisplayLocation - ) - - object FrsBasedCandidateGenerationMaxCandidatesNumParam - extends FSBoundedParam[Int]( - name = "frs_based_candidate_generation_max_candidates_num", - default = 100, - min = 0, - max = 2000 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - EnableSourceGraphParam, - MinScoreParam, - MaxConsumerSeedsNumParam, - FrsBasedCandidateGenerationMaxSeedsNumParam, - FrsBasedCandidateGenerationDisplayLocationParam, - FrsBasedCandidateGenerationMaxCandidatesNumParam, - FrsBasedCandidateGenerationEnableVisibilityFilteringParam - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableSourceGraphParam, - FrsBasedCandidateGenerationEnableVisibilityFilteringParam - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(MinScoreParam) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxConsumerSeedsNumParam, - FrsBasedCandidateGenerationMaxSeedsNumParam, - FrsBasedCandidateGenerationMaxCandidatesNumParam) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - FrsBasedCandidateGenerationDisplayLocationParam, - ) - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(intOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GlobalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GlobalParams.scala deleted file mode 100644 index 77def9a2a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GlobalParams.scala +++ /dev/null @@ -1,106 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.common.ModelVersions -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.Param -import com.twitter.util.Duration - -/** - * Instantiate Params that do not relate to a specific product. - * The params in this file correspond to config repo file - * [[https://sourcegraph.twitter.biz/config-git.twitter.biz/config/-/blob/features/cr-mixer/main/twistly_core.yml]] - */ -object GlobalParams { - - object MaxCandidatesPerRequestParam - extends FSBoundedParam[Int]( - name = "twistly_core_max_candidates_per_request", - default = 100, - min = 0, - max = 9000 - ) - - object ModelVersionParam - extends FSEnumParam[ModelVersions.Enum.type]( - name = "twistly_core_simclusters_model_version_id", - default = ModelVersions.Enum.Model20M145K2020, - enum = ModelVersions.Enum - ) - - object UnifiedMaxSourceKeyNum - extends FSBoundedParam[Int]( - name = "twistly_core_unified_max_sourcekey_num", - default = 15, - min = 0, - max = 100 - ) - - object MaxCandidateNumPerSourceKeyParam - extends FSBoundedParam[Int]( - name = "twistly_core_candidate_per_sourcekey_max_num", - default = 200, - min = 0, - max = 1000 - ) - - // 1 hours to 30 days - object MaxTweetAgeHoursParam - extends FSBoundedParam[Duration]( - name = "twistly_core_max_tweet_age_hours", - default = 720.hours, - min = 1.hours, - max = 720.hours - ) - with HasDurationConversion { - - override val durationConversion: DurationConversion = DurationConversion.FromHours - } - - val AllParams: Seq[Param[_] with FSName] = Seq( - MaxCandidatesPerRequestParam, - UnifiedMaxSourceKeyNum, - MaxCandidateNumPerSourceKeyParam, - ModelVersionParam, - MaxTweetAgeHoursParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides() - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxCandidatesPerRequestParam, - UnifiedMaxSourceKeyNum, - MaxCandidateNumPerSourceKeyParam - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ModelVersionParam - ) - - val boundedDurationFSOverrides = - FeatureSwitchOverrideUtil.getBoundedDurationFSOverrides(MaxTweetAgeHoursParam) - - val seqOverrides = FeatureSwitchOverrideUtil.getLongSeqFSOverrides() - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(intOverrides: _*) - .set(boundedDurationFSOverrides: _*) - .set(enumOverrides: _*) - .set(seqOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodProfileClickParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodProfileClickParams.scala deleted file mode 100644 index 175dccfac..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodProfileClickParams.scala +++ /dev/null @@ -1,60 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param -import com.twitter.usersignalservice.thriftscala.SignalType - -object GoodProfileClickParams { - - object ClickMinDwellTimeParam extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - import scala.language.implicitConversions - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val TotalDwellTime10s = SignalTypeValue(SignalType.GoodProfileClick) - val TotalDwellTime20s = SignalTypeValue(SignalType.GoodProfileClick20s) - val TotalDwellTime30s = SignalTypeValue(SignalType.GoodProfileClick30s) - - } - - object EnableSourceParam - extends FSParam[Boolean]( - name = "signal_good_profile_clicks_enable_source", - default = false - ) - - object ClickMinDwellTimeType - extends FSEnumParam[ClickMinDwellTimeParam.type]( - name = "signal_good_profile_clicks_min_dwelltime_type_id", - default = ClickMinDwellTimeParam.TotalDwellTime10s, - enum = ClickMinDwellTimeParam - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq(EnableSourceParam, ClickMinDwellTimeType) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ClickMinDwellTimeType - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodTweetClickParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodTweetClickParams.scala deleted file mode 100644 index 949048821..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/GoodTweetClickParams.scala +++ /dev/null @@ -1,75 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param -import com.twitter.usersignalservice.thriftscala.SignalType - -object GoodTweetClickParams { - - object ClickMinDwellTimeParam extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - import scala.language.implicitConversions - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val TotalDwellTime2s = SignalTypeValue(SignalType.GoodTweetClick) - val TotalDwellTime5s = SignalTypeValue(SignalType.GoodTweetClick5s) - val TotalDwellTime10s = SignalTypeValue(SignalType.GoodTweetClick10s) - val TotalDwellTime30s = SignalTypeValue(SignalType.GoodTweetClick30s) - - } - - object EnableSourceParam - extends FSParam[Boolean]( - name = "signal_good_tweet_clicks_enable_source", - default = false - ) - - object ClickMinDwellTimeType - extends FSEnumParam[ClickMinDwellTimeParam.type]( - name = "signal_good_tweet_clicks_min_dwelltime_type_id", - default = ClickMinDwellTimeParam.TotalDwellTime2s, - enum = ClickMinDwellTimeParam - ) - - object MaxSignalNumParam - extends FSBoundedParam[Int]( - name = "signal_good_tweet_clicks_max_signal_num", - default = 15, - min = 0, - max = 15 - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq(EnableSourceParam, ClickMinDwellTimeType, MaxSignalNumParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ClickMinDwellTimeType - ) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxSignalNumParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(enumOverrides: _*) - .set(intOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/InterestedInParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/InterestedInParams.scala deleted file mode 100644 index 503469ac3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/InterestedInParams.scala +++ /dev/null @@ -1,213 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.simclusters_v2.thriftscala.{EmbeddingType => SimClustersEmbeddingType} -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object InterestedInParams { - - object SourceEmbedding extends Enumeration { - protected case class EmbeddingType(embeddingType: SimClustersEmbeddingType) extends super.Val - import scala.language.implicitConversions - implicit def valueToEmbeddingtype(x: Value): EmbeddingType = x.asInstanceOf[EmbeddingType] - - val UserInterestedIn: Value = EmbeddingType(SimClustersEmbeddingType.FilteredUserInterestedIn) - val UnfilteredUserInterestedIn: Value = EmbeddingType( - SimClustersEmbeddingType.UnfilteredUserInterestedIn) - val FromProducerEmbedding: Value = EmbeddingType( - SimClustersEmbeddingType.FilteredUserInterestedInFromPE) - val LogFavBasedUserInterestedInFromAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedInFromAPE) - val FollowBasedUserInterestedInFromAPE: Value = EmbeddingType( - SimClustersEmbeddingType.FollowBasedUserInterestedInFromAPE) - val UserNextInterestedIn: Value = EmbeddingType(SimClustersEmbeddingType.UserNextInterestedIn) - // AddressBook based InterestedIn - val LogFavBasedUserInterestedAverageAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedAverageAddressBookFromIIAPE) - val LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE) - val LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE) - val LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE) - val LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE) - val LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE: Value = EmbeddingType( - SimClustersEmbeddingType.LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE) - } - - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_source", - default = true - ) - - object InterestedInEmbeddingIdParam - extends FSEnumParam[SourceEmbedding.type]( - name = "twistly_interestedin_embedding_id", - default = SourceEmbedding.UnfilteredUserInterestedIn, - enum = SourceEmbedding - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "twistly_interestedin_min_score", - default = 0.072, - min = 0.0, - max = 1.0 - ) - - object EnableSourceSequentialModelParam - extends FSParam[Boolean]( - name = "twistly_interestedin_sequential_model_enable_source", - default = false - ) - - object NextInterestedInEmbeddingIdParam - extends FSEnumParam[SourceEmbedding.type]( - name = "twistly_interestedin_sequential_model_embedding_id", - default = SourceEmbedding.UserNextInterestedIn, - enum = SourceEmbedding - ) - - object MinScoreSequentialModelParam - extends FSBoundedParam[Double]( - name = "twistly_interestedin_sequential_model_min_score", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - object EnableSourceAddressBookParam - extends FSParam[Boolean]( - name = "twistly_interestedin_addressbook_enable_source", - default = false - ) - - object AddressBookInterestedInEmbeddingIdParam - extends FSEnumParam[SourceEmbedding.type]( - name = "twistly_interestedin_addressbook_embedding_id", - default = SourceEmbedding.LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE, - enum = SourceEmbedding - ) - - object MinScoreAddressBookParam - extends FSBoundedParam[Double]( - name = "twistly_interestedin_addressbook_min_score", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - // Prod SimClusters ANN param - // This is used to enable/disable querying of production SANN service. Useful when experimenting - // with replacements to it. - object EnableProdSimClustersANNParam - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_prod_simclusters_ann", - default = true - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN 1 cluster params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN 2 cluster params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN 3 cluster params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN 5 cluster params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_simclusters_ann_5", - default = false - ) - - // SimClusters ANN 4 cluster params - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "twistly_interestedin_enable_simclusters_ann_4", - default = false - ) - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - EnableSourceSequentialModelParam, - EnableSourceAddressBookParam, - EnableProdSimClustersANNParam, - EnableExperimentalSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - MinScoreParam, - MinScoreSequentialModelParam, - MinScoreAddressBookParam, - InterestedInEmbeddingIdParam, - NextInterestedInEmbeddingIdParam, - AddressBookInterestedInEmbeddingIdParam, - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableSourceSequentialModelParam, - EnableSourceAddressBookParam, - EnableProdSimClustersANNParam, - EnableExperimentalSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - MinScoreParam, - MinScoreSequentialModelParam, - MinScoreAddressBookParam) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - InterestedInEmbeddingIdParam, - NextInterestedInEmbeddingIdParam, - AddressBookInterestedInEmbeddingIdParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedCandidateGenerationParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedCandidateGenerationParams.scala deleted file mode 100644 index e9ae7feaa..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedCandidateGenerationParams.scala +++ /dev/null @@ -1,143 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ProducerBasedCandidateGenerationParams { - // Source params. Not being used. It is always set to true in prod - object EnableSourceParam - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_source", - default = false - ) - - object UtgCombinationMethodParam - extends FSEnumParam[UnifiedSETweetCombinationMethod.type]( - name = "producer_based_candidate_generation_utg_combination_method_id", - default = UnifiedSETweetCombinationMethod.Frontload, - enum = UnifiedSETweetCombinationMethod - ) - - // UTG params - object EnableUTGParam - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_utg", - default = false - ) - - object EnableUAGParam - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_uag", - default = false - ) - - // SimClusters params - object EnableSimClustersANNParam - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters", - default = true - ) - - // Filter params - object SimClustersMinScoreParam - extends FSBoundedParam[Double]( - name = "producer_based_candidate_generation_filter_simclusters_min_score", - default = 0.7, - min = 0.0, - max = 1.0 - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN cluster 1 params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN cluster 2 params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN cluster 5 params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters_ann_5", - default = false - ) - - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "producer_based_candidate_generation_enable_simclusters_ann_4", - default = false - ) - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - EnableUAGParam, - EnableUTGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - SimClustersMinScoreParam, - UtgCombinationMethodParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableUAGParam, - EnableUTGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - UtgCombinationMethodParam, - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(SimClustersMinScoreParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserAdGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserAdGraphParams.scala deleted file mode 100644 index 197db074a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserAdGraphParams.scala +++ /dev/null @@ -1,53 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ProducerBasedUserAdGraphParams { - - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "producer_based_user_ad_graph_min_co_occurrence", - default = 2, - min = 0, - max = 500 - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "producer_based_user_ad_graph_min_score", - default = 3.0, - min = 0.0, - max = 10.0 - ) - - object MaxNumFollowersParam - extends FSBoundedParam[Int]( - name = "producer_based_user_ad_graph_max_num_followers", - default = 500, - min = 100, - max = 1000 - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq(MinCoOccurrenceParam, MaxNumFollowersParam, MinScoreParam) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MinCoOccurrenceParam, - MaxNumFollowersParam, - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(MinScoreParam) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserTweetGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserTweetGraphParams.scala deleted file mode 100644 index 0747d0afd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/ProducerBasedUserTweetGraphParams.scala +++ /dev/null @@ -1,53 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object ProducerBasedUserTweetGraphParams { - - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "producer_based_user_tweet_graph_min_co_occurrence", - default = 4, - min = 0, - max = 500 - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "producer_based_user_tweet_graph_min_score", - default = 3.0, - min = 0.0, - max = 10.0 - ) - - object MaxNumFollowersParam - extends FSBoundedParam[Int]( - name = "producer_based_user_tweet_graph_max_num_followers", - default = 500, - min = 100, - max = 1000 - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq(MinCoOccurrenceParam, MaxNumFollowersParam, MinScoreParam) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MinCoOccurrenceParam, - MaxNumFollowersParam, - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(MinScoreParam) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RankerParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RankerParams.scala deleted file mode 100644 index e7785ffb8..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RankerParams.scala +++ /dev/null @@ -1,59 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RankerParams { - - object MaxCandidatesToRank - extends FSBoundedParam[Int]( - name = "twistly_core_max_candidates_to_rank", - default = 2000, - min = 0, - max = 9999 - ) - - object EnableBlueVerifiedTopK - extends FSParam[Boolean]( - name = "twistly_core_blue_verified_top_k", - default = true - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - MaxCandidatesToRank, - EnableBlueVerifiedTopK - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides(EnableBlueVerifiedTopK) - - val boundedDurationFSOverrides = - FeatureSwitchOverrideUtil.getBoundedDurationFSOverrides() - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxCandidatesToRank - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ) - val stringFSOverrides = FeatureSwitchOverrideUtil.getStringFSOverrides() - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(boundedDurationFSOverrides: _*) - .set(intOverrides: _*) - .set(enumOverrides: _*) - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphInParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphInParams.scala deleted file mode 100644 index 7614ca0eb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphInParams.scala +++ /dev/null @@ -1,25 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi._ - -object RealGraphInParams { - object EnableSourceGraphParam - extends FSParam[Boolean]( - name = "graph_realgraphin_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceGraphParam, - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceGraphParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphOonParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphOonParams.scala deleted file mode 100644 index 8b303c55b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RealGraphOonParams.scala +++ /dev/null @@ -1,51 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RealGraphOonParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "signal_realgraphoon_enable_source", - default = false - ) - - object EnableSourceGraphParam - extends FSParam[Boolean]( - name = "graph_realgraphoon_enable_source", - default = false - ) - - object MaxConsumerSeedsNumParam - extends FSBoundedParam[Int]( - name = "graph_realgraphoon_max_user_seeds_num", - default = 200, - min = 0, - max = 1000 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - EnableSourceGraphParam, - MaxConsumerSeedsNumParam - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableSourceGraphParam - ) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides(MaxConsumerSeedsNumParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(intOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentFollowsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentFollowsParams.scala deleted file mode 100644 index ecb75c82f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentFollowsParams.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentFollowsParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentfollows_enable_source", - default = true - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNegativeSignalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNegativeSignalParams.scala deleted file mode 100644 index 429d6daba..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNegativeSignalParams.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentNegativeSignalParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentnegativesignals_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam - ) - - lazy val config: BaseConfig = { - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ) - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides() - - BaseConfigBuilder() - .set(booleanOverrides: _*).set(doubleOverrides: _*).set(enumOverrides: _*).build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNotificationsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNotificationsParams.scala deleted file mode 100644 index 641118a05..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentNotificationsParams.scala +++ /dev/null @@ -1,28 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentNotificationsParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentnotifications_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentOriginalTweetsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentOriginalTweetsParams.scala deleted file mode 100644 index 5b485e61f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentOriginalTweetsParams.scala +++ /dev/null @@ -1,28 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentOriginalTweetsParams { - - // Source params - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentoriginaltweets_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides(EnableSourceParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentReplyTweetsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentReplyTweetsParams.scala deleted file mode 100644 index 7e6617c6d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentReplyTweetsParams.scala +++ /dev/null @@ -1,27 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentReplyTweetsParams { - // Source params - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentreplytweets_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides(EnableSourceParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentRetweetsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentRetweetsParams.scala deleted file mode 100644 index 93c1fe356..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentRetweetsParams.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentRetweetsParams { - - // Source params - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recentretweets_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentTweetFavoritesParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentTweetFavoritesParams.scala deleted file mode 100644 index 22d0d6a70..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RecentTweetFavoritesParams.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RecentTweetFavoritesParams { - // Source params - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_recenttweetfavorites_enable_source", - default = true - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetGlobalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetGlobalParams.scala deleted file mode 100644 index f5e5ee21e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetGlobalParams.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RelatedTweetGlobalParams { - - object MaxCandidatesPerRequestParam - extends FSBoundedParam[Int]( - name = "related_tweet_core_max_candidates_per_request", - default = 100, - min = 0, - max = 500 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(MaxCandidatesPerRequestParam) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxCandidatesPerRequestParam - ) - - BaseConfigBuilder() - .set(intOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetProducerBasedParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetProducerBasedParams.scala deleted file mode 100644 index 3851f6144..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetProducerBasedParams.scala +++ /dev/null @@ -1,111 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RelatedTweetProducerBasedParams { - - // UTG params - object EnableUTGParam - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_utg", - default = false - ) - - // SimClusters params - object EnableSimClustersANNParam - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters", - default = true - ) - - // Filter params - object SimClustersMinScoreParam - extends FSBoundedParam[Double]( - name = "related_tweet_producer_based_filter_simclusters_min_score", - default = 0.0, - min = 0.0, - max = 1.0 - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN cluster 1 params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN cluster 2 params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters_ann_5", - default = false - ) - - // SimClusters ANN cluster 4 params - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "related_tweet_producer_based_enable_simclusters_ann_4", - default = false - ) - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableUTGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - SimClustersMinScoreParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableUTGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam - ) - - val doubleOverrides = FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - SimClustersMinScoreParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetTweetBasedParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetTweetBasedParams.scala deleted file mode 100644 index 10d01a5d1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedTweetTweetBasedParams.scala +++ /dev/null @@ -1,141 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RelatedTweetTweetBasedParams { - - // UTG params - object EnableUTGParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_utg", - default = false - ) - - // UVG params - object EnableUVGParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_uvg", - default = false - ) - - // UAG params - object EnableUAGParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_uag", - default = false - ) - - // SimClusters params - object EnableSimClustersANNParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters", - default = true - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN cluster 1 params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN cluster 2 params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN cluster 5 params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters_ann_5", - default = false - ) - - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_simclusters_ann_4", - default = false - ) - // TwHIN params - object EnableTwHINParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_twhin", - default = false - ) - - // QIG params - object EnableQigSimilarTweetsParam - extends FSParam[Boolean]( - name = "related_tweet_tweet_based_enable_qig_similar_tweets", - default = false - ) - - // Filter params - object SimClustersMinScoreParam - extends FSBoundedParam[Double]( - name = "related_tweet_tweet_based_filter_simclusters_min_score", - default = 0.3, - min = 0.0, - max = 1.0 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableSimClustersANNParam, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - SimClustersMinScoreParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableSimClustersANNParam, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(SimClustersMinScoreParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetGlobalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetGlobalParams.scala deleted file mode 100644 index eeed18e6c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetGlobalParams.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RelatedVideoTweetGlobalParams { - - object MaxCandidatesPerRequestParam - extends FSBoundedParam[Int]( - name = "related_video_tweet_core_max_candidates_per_request", - default = 100, - min = 0, - max = 500 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(MaxCandidatesPerRequestParam) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxCandidatesPerRequestParam - ) - - BaseConfigBuilder() - .set(intOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetTweetBasedParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetTweetBasedParams.scala deleted file mode 100644 index 3b40653bc..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RelatedVideoTweetTweetBasedParams.scala +++ /dev/null @@ -1,134 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RelatedVideoTweetTweetBasedParams { - - // UTG params - object EnableUTGParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_utg", - default = false - ) - - // SimClusters params - object EnableSimClustersANNParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters", - default = true - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN cluster 1 params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN cluster 2 params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN cluster 5 params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters_ann_5", - default = false - ) - - // SimClusters ANN cluster 4 params - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_simclusters_ann_4", - default = false - ) - // TwHIN params - object EnableTwHINParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_twhin", - default = false - ) - - // QIG params - object EnableQigSimilarTweetsParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_qig_similar_tweets", - default = false - ) - - // Filter params - object SimClustersMinScoreParam - extends FSBoundedParam[Double]( - name = "related_video_tweet_tweet_based_filter_simclusters_min_score", - default = 0.3, - min = 0.0, - max = 1.0 - ) - - object EnableUVGParam - extends FSParam[Boolean]( - name = "related_video_tweet_tweet_based_enable_uvg", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableSimClustersANNParam, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - SimClustersMinScoreParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableSimClustersANNParam, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(SimClustersMinScoreParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RepeatedProfileVisitsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RepeatedProfileVisitsParams.scala deleted file mode 100644 index 4cb205de9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/RepeatedProfileVisitsParams.scala +++ /dev/null @@ -1,72 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.usersignalservice.thriftscala.SignalType -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object RepeatedProfileVisitsParams { - object ProfileMinVisitParam extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - import scala.language.implicitConversions - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val TotalVisitsInPast180Days = SignalTypeValue(SignalType.RepeatedProfileVisit180dMinVisit6V1) - val TotalVisitsInPast90Days = SignalTypeValue(SignalType.RepeatedProfileVisit90dMinVisit6V1) - val TotalVisitsInPast14Days = SignalTypeValue(SignalType.RepeatedProfileVisit14dMinVisit2V1) - val TotalVisitsInPast180DaysNoNegative = SignalTypeValue( - SignalType.RepeatedProfileVisit180dMinVisit6V1NoNegative) - val TotalVisitsInPast90DaysNoNegative = SignalTypeValue( - SignalType.RepeatedProfileVisit90dMinVisit6V1NoNegative) - val TotalVisitsInPast14DaysNoNegative = SignalTypeValue( - SignalType.RepeatedProfileVisit14dMinVisit2V1NoNegative) - } - - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_repeatedprofilevisits_enable_source", - default = true - ) - - object MinScoreParam - extends FSBoundedParam[Double]( - name = "twistly_repeatedprofilevisits_min_score", - default = 0.5, - min = 0.0, - max = 1.0 - ) - - object ProfileMinVisitType - extends FSEnumParam[ProfileMinVisitParam.type]( - name = "twistly_repeatedprofilevisits_min_visit_type_id", - default = ProfileMinVisitParam.TotalVisitsInPast14Days, - enum = ProfileMinVisitParam - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam, ProfileMinVisitType) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam - ) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ProfileMinVisitType - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/SimClustersANNParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/SimClustersANNParams.scala deleted file mode 100644 index b650d5123..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/SimClustersANNParams.scala +++ /dev/null @@ -1,76 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object SimClustersANNParams { - - // Different SimClusters ANN cluster has its own config id (model slot) - object SimClustersANNConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_config_id", - default = "Default" - ) - - object SimClustersANN1ConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_1_config_id", - default = "20220810" - ) - - object SimClustersANN2ConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_2_config_id", - default = "20220818" - ) - - object SimClustersANN3ConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_3_config_id", - default = "20220819" - ) - - object SimClustersANN5ConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_5_config_id", - default = "20221221" - ) - object SimClustersANN4ConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_simclusters_ann_4_config_id", - default = "20221220" - ) - object ExperimentalSimClustersANNConfigId - extends FSParam[String]( - name = "similarity_simclusters_ann_experimental_simclusters_ann_config_id", - default = "20220801" - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - SimClustersANNConfigId, - SimClustersANN1ConfigId, - SimClustersANN2ConfigId, - SimClustersANN3ConfigId, - SimClustersANN5ConfigId, - ExperimentalSimClustersANNConfigId - ) - - lazy val config: BaseConfig = { - val stringOverrides = FeatureSwitchOverrideUtil.getStringFSOverrides( - SimClustersANNConfigId, - SimClustersANN1ConfigId, - SimClustersANN2ConfigId, - SimClustersANN3ConfigId, - SimClustersANN5ConfigId, - ExperimentalSimClustersANNConfigId - ) - - BaseConfigBuilder() - .set(stringOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TopicTweetParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TopicTweetParams.scala deleted file mode 100644 index 3ef683f52..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TopicTweetParams.scala +++ /dev/null @@ -1,115 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.conversions.DurationOps._ -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.Param -import com.twitter.util.Duration - -object TopicTweetParams { - object MaxTweetAge - extends FSBoundedParam[Duration]( - name = "topic_tweet_candidate_generation_max_tweet_age_hours", - default = 24.hours, - min = 12.hours, - max = 48.hours - ) - with HasDurationConversion { - override val durationConversion: DurationConversion = DurationConversion.FromHours - } - - object MaxTopicTweetCandidatesParam - extends FSBoundedParam[Int]( - name = "topic_tweet_max_candidates_num", - default = 200, - min = 0, - max = 1000 - ) - - object MaxSkitTfgCandidatesParam - extends FSBoundedParam[Int]( - name = "topic_tweet_skit_tfg_max_candidates_num", - default = 100, - min = 0, - max = 1000 - ) - - object MaxSkitHighPrecisionCandidatesParam - extends FSBoundedParam[Int]( - name = "topic_tweet_skit_high_precision_max_candidates_num", - default = 100, - min = 0, - max = 1000 - ) - - object MaxCertoCandidatesParam - extends FSBoundedParam[Int]( - name = "topic_tweet_certo_max_candidates_num", - default = 100, - min = 0, - max = 1000 - ) - - // The min prod score for Certo L2-normalized cosine candidates - object CertoScoreThresholdParam - extends FSBoundedParam[Double]( - name = "topic_tweet_certo_score_threshold", - default = 0.015, - min = 0, - max = 1 - ) - - object SemanticCoreVersionIdParam - extends FSParam[Long]( - name = "semantic_core_version_id", - default = 1380520918896713735L - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - CertoScoreThresholdParam, - MaxTopicTweetCandidatesParam, - MaxTweetAge, - MaxCertoCandidatesParam, - MaxSkitTfgCandidatesParam, - MaxSkitHighPrecisionCandidatesParam, - SemanticCoreVersionIdParam - ) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides() - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(CertoScoreThresholdParam) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxCertoCandidatesParam, - MaxSkitTfgCandidatesParam, - MaxSkitHighPrecisionCandidatesParam, - MaxTopicTweetCandidatesParam - ) - - val longOverrides = FeatureSwitchOverrideUtil.getLongFSOverrides(SemanticCoreVersionIdParam) - - val durationFSOverrides = FeatureSwitchOverrideUtil.getDurationFSOverrides(MaxTweetAge) - - val enumOverrides = - FeatureSwitchOverrideUtil.getEnumFSOverrides(NullStatsReceiver, Logger(getClass)) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(intOverrides: _*) - .set(longOverrides: _*) - .set(enumOverrides: _*) - .set(durationFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedCandidateGenerationParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedCandidateGenerationParams.scala deleted file mode 100644 index 7f94d2e41..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedCandidateGenerationParams.scala +++ /dev/null @@ -1,189 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetBasedCandidateGenerationParams { - - // Source params. Not being used. It is always set to true in prod - object EnableSourceParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_source", - default = false - ) - - // UTG params - object EnableUTGParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_utg", - default = true - ) - - // SimClusters params - object EnableSimClustersANNParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters", - default = true - ) - - // Experimental SimClusters ANN params - object EnableExperimentalSimClustersANNParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_experimental_simclusters_ann", - default = false - ) - - // SimClusters ANN cluster 1 params - object EnableSimClustersANN1Param - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters_ann_1", - default = false - ) - - // SimClusters ANN cluster 2 params - object EnableSimClustersANN2Param - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters_ann_2", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN3Param - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters_ann_3", - default = false - ) - - // SimClusters ANN cluster 3 params - object EnableSimClustersANN5Param - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters_ann_5", - default = false - ) - - // SimClusters ANN cluster 4 params - object EnableSimClustersANN4Param - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_simclusters_ann_4", - default = false - ) - // TwHIN params - object EnableTwHINParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_twhin", - default = false - ) - - // QIG params - object EnableQigSimilarTweetsParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_qig_similar_tweets", - default = false - ) - - object QigMaxNumSimilarTweetsParam - extends FSBoundedParam[Int]( - name = "tweet_based_candidate_generation_qig_max_num_similar_tweets", - default = 100, - min = 10, - max = 100 - ) - - // UVG params - object EnableUVGParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_uvg", - default = false - ) - - // UAG params - object EnableUAGParam - extends FSParam[Boolean]( - name = "tweet_based_candidate_generation_enable_uag", - default = false - ) - - // Filter params - object SimClustersMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_candidate_generation_filter_simclusters_min_score", - default = 0.5, - min = 0.0, - max = 1.0 - ) - - // for learning DDG that has a higher threshold for video based SANN - object SimClustersVideoBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_candidate_generation_filter_simclusters_video_based_min_score", - default = 0.5, - min = 0.0, - max = 1.0 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableSourceParam, - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableUAGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - SimClustersMinScoreParam, - SimClustersVideoBasedMinScoreParam, - QigMaxNumSimilarTweetsParam, - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableTwHINParam, - EnableQigSimilarTweetsParam, - EnableUTGParam, - EnableUVGParam, - EnableUAGParam, - EnableSimClustersANNParam, - EnableSimClustersANN1Param, - EnableSimClustersANN2Param, - EnableSimClustersANN3Param, - EnableSimClustersANN5Param, - EnableSimClustersANN4Param, - EnableExperimentalSimClustersANNParam, - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - SimClustersMinScoreParam, - SimClustersVideoBasedMinScoreParam) - - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - ) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - QigMaxNumSimilarTweetsParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(doubleOverrides: _*) - .set(enumOverrides: _*) - .set(intOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedTwHINParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedTwHINParams.scala deleted file mode 100644 index c4ecfc6fb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedTwHINParams.scala +++ /dev/null @@ -1,30 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.cr_mixer.model.ModelConfig -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetBasedTwHINParams { - object ModelIdParam - extends FSParam[String]( - name = "tweet_based_twhin_model_id", - default = ModelConfig.TweetBasedTwHINRegularUpdateAll20221024, - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(ModelIdParam) - - lazy val config: BaseConfig = { - val stringFSOverrides = - FeatureSwitchOverrideUtil.getStringFSOverrides( - ModelIdParam - ) - - BaseConfigBuilder() - .set(stringFSOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserAdGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserAdGraphParams.scala deleted file mode 100644 index 9e994b16b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserAdGraphParams.scala +++ /dev/null @@ -1,58 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetBasedUserAdGraphParams { - - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_ad_graph_min_co_occurrence", - default = 1, - min = 0, - max = 500 - ) - - object ConsumersBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_user_ad_graph_consumers_based_min_score", - default = 0.0, - min = 0.0, - max = 10.0 - ) - - object MaxConsumerSeedsNumParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_ad_graph_max_user_seeds_num", - default = 100, - min = 0, - max = 300 - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam, - ConsumersBasedMinScoreParam - ) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(ConsumersBasedMinScoreParam) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserTweetGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserTweetGraphParams.scala deleted file mode 100644 index 8cc42f81f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserTweetGraphParams.scala +++ /dev/null @@ -1,89 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetBasedUserTweetGraphParams { - - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_tweet_graph_min_co_occurrence", - default = 3, - min = 0, - max = 500 - ) - - object TweetBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_user_tweet_graph_tweet_based_min_score", - default = 0.5, - min = 0.0, - max = 10.0 - ) - - object ConsumersBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_user_tweet_graph_consumers_based_min_score", - default = 4.0, - min = 0.0, - max = 10.0 - ) - object MaxConsumerSeedsNumParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_tweet_graph_max_user_seeds_num", - default = 100, - min = 0, - max = 300 - ) - - object EnableCoverageExpansionOldTweetParam - extends FSParam[Boolean]( - name = "tweet_based_user_tweet_graph_enable_coverage_expansion_old_tweet", - default = false - ) - - object EnableCoverageExpansionAllTweetParam - extends FSParam[Boolean]( - name = "tweet_based_user_tweet_graph_enable_coverage_expansion_all_tweet", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableCoverageExpansionAllTweetParam, - EnableCoverageExpansionOldTweetParam, - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam, - TweetBasedMinScoreParam, - ConsumersBasedMinScoreParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableCoverageExpansionAllTweetParam, - EnableCoverageExpansionOldTweetParam - ) - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides( - TweetBasedMinScoreParam, - ConsumersBasedMinScoreParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserVideoGraphParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserVideoGraphParams.scala deleted file mode 100644 index 0de5d2df7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetBasedUserVideoGraphParams.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetBasedUserVideoGraphParams { - - object MinCoOccurrenceParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_video_graph_min_co_occurrence", - default = 5, - min = 0, - max = 500 - ) - - object TweetBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_user_video_graph_tweet_based_min_score", - default = 0.0, - min = 0.0, - max = 100.0 - ) - - object ConsumersBasedMinScoreParam - extends FSBoundedParam[Double]( - name = "tweet_based_user_video_graph_consumers_based_min_score", - default = 4.0, - min = 0.0, - max = 10.0 - ) - - object MaxConsumerSeedsNumParam - extends FSBoundedParam[Int]( - name = "tweet_based_user_video_graph_max_user_seeds_num", - default = 200, - min = 0, - max = 500 - ) - - object EnableCoverageExpansionOldTweetParam - extends FSParam[Boolean]( - name = "tweet_based_user_video_graph_enable_coverage_expansion_old_tweet", - default = false - ) - - object EnableCoverageExpansionAllTweetParam - extends FSParam[Boolean]( - name = "tweet_based_user_video_graph_enable_coverage_expansion_all_tweet", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam, - TweetBasedMinScoreParam, - EnableCoverageExpansionOldTweetParam, - EnableCoverageExpansionAllTweetParam - ) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MinCoOccurrenceParam, - MaxConsumerSeedsNumParam - ) - - val doubleOverrides = - FeatureSwitchOverrideUtil.getBoundedDoubleFSOverrides(TweetBasedMinScoreParam) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(doubleOverrides: _*) - .build() - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetSharesParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetSharesParams.scala deleted file mode 100644 index 1602441b0..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/TweetSharesParams.scala +++ /dev/null @@ -1,29 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param - -object TweetSharesParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "twistly_tweetshares_enable_source", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq(EnableSourceParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedSETweetCombinationMethod.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedSETweetCombinationMethod.scala deleted file mode 100644 index f5b92f138..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedSETweetCombinationMethod.scala +++ /dev/null @@ -1,15 +0,0 @@ -package com.twitter.cr_mixer.param - -import scala.language.implicitConversions - -object UnifiedSETweetCombinationMethod extends Enumeration { - - protected case class CombinationType(s: String) extends super.Val - - implicit def valueToCombinationType(x: Value): CombinationType = x.asInstanceOf[CombinationType] - - val Default: Value = CombinationType("") - val Interleave: Value = CombinationType("Interleave") - val Frontload: Value = CombinationType("Frontload") - val Backfill: Value = CombinationType("Backfill") -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedUSSSignalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedUSSSignalParams.scala deleted file mode 100644 index 071cabc0c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UnifiedUSSSignalParams.scala +++ /dev/null @@ -1,121 +0,0 @@ -package com.twitter.cr_mixer.param -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param -import com.twitter.usersignalservice.thriftscala.SignalType -import scala.language.implicitConversions - -object UnifiedUSSSignalParams { - - object TweetAggregationTypeParam extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val UniformAggregation = SignalTypeValue(SignalType.TweetBasedUnifiedUniformSignal) - val EngagementAggregation = SignalTypeValue( - SignalType.TweetBasedUnifiedEngagementWeightedSignal) - } - - object ProducerAggregationTypeParam extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - - import scala.language.implicitConversions - - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val UniformAggregation = SignalTypeValue(SignalType.ProducerBasedUnifiedUniformSignal) - val EngagementAggregation = SignalTypeValue( - SignalType.ProducerBasedUnifiedEngagementWeightedSignal) - - } - - object ReplaceIndividualUSSSourcesParam - extends FSParam[Boolean]( - name = "twistly_agg_replace_enable_source", - default = false - ) - - object EnableTweetAggSourceParam - extends FSParam[Boolean]( - name = "twistly_agg_tweet_agg_enable_source", - default = false - ) - - object TweetAggTypeParam - extends FSEnumParam[TweetAggregationTypeParam.type]( - name = "twistly_agg_tweet_agg_type_id", - default = TweetAggregationTypeParam.EngagementAggregation, - enum = TweetAggregationTypeParam - ) - - object UnifiedTweetSourceNumberParam - extends FSBoundedParam[Int]( - name = "twistly_agg_tweet_agg_source_number", - default = 0, - min = 0, - max = 100, - ) - - object EnableProducerAggSourceParam - extends FSParam[Boolean]( - name = "twistly_agg_producer_agg_enable_source", - default = false - ) - - object ProducerAggTypeParam - extends FSEnumParam[ProducerAggregationTypeParam.type]( - name = "twistly_agg_producer_agg_type_id", - default = ProducerAggregationTypeParam.EngagementAggregation, - enum = ProducerAggregationTypeParam - ) - - object UnifiedProducerSourceNumberParam - extends FSBoundedParam[Int]( - name = "twistly_agg_producer_agg_source_number", - default = 0, - min = 0, - max = 100, - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableTweetAggSourceParam, - EnableProducerAggSourceParam, - TweetAggTypeParam, - ProducerAggTypeParam, - UnifiedTweetSourceNumberParam, - UnifiedProducerSourceNumberParam, - ReplaceIndividualUSSSourcesParam - ) - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTweetAggSourceParam, - EnableProducerAggSourceParam, - ReplaceIndividualUSSSourcesParam, - ) - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - UnifiedProducerSourceNumberParam, - UnifiedTweetSourceNumberParam) - val enumOverrides = FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - TweetAggTypeParam, - ProducerAggTypeParam - ) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(intOverrides: _*) - .set(enumOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UtegTweetGlobalParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UtegTweetGlobalParams.scala deleted file mode 100644 index 29f5a7818..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/UtegTweetGlobalParams.scala +++ /dev/null @@ -1,94 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.conversions.DurationOps._ -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.DurationConversion -import com.twitter.timelines.configapi.FSBoundedParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.HasDurationConversion -import com.twitter.timelines.configapi.Param -import com.twitter.util.Duration - -object UtegTweetGlobalParams { - - object MaxUtegCandidatesToRequestParam - extends FSBoundedParam[Int]( - name = "max_uteg_candidates_to_request", - default = 800, - min = 10, - max = 200 - ) - - object CandidateRefreshSinceTimeOffsetHoursParam - extends FSBoundedParam[Duration]( - name = "candidate_refresh_since_time_offset_hours", - default = 48.hours, - min = 1.hours, - max = 96.hours - ) - with HasDurationConversion { - override val durationConversion: DurationConversion = DurationConversion.FromHours - } - - object EnableTLRHealthFilterParam - extends FSParam[Boolean]( - name = "enable_uteg_tlr_health_filter", - default = true - ) - - object EnableRepliesToNonFollowedUsersFilterParam - extends FSParam[Boolean]( - name = "enable_uteg_replies_to_non_followed_users_filter", - default = false - ) - - object EnableRetweetFilterParam - extends FSParam[Boolean]( - name = "enable_uteg_retweet_filter", - default = true - ) - - object EnableInNetworkFilterParam - extends FSParam[Boolean]( - name = "enable_uteg_in_network_filter", - default = true - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq( - MaxUtegCandidatesToRequestParam, - CandidateRefreshSinceTimeOffsetHoursParam, - EnableTLRHealthFilterParam, - EnableRepliesToNonFollowedUsersFilterParam, - EnableRetweetFilterParam, - EnableInNetworkFilterParam - ) - - lazy val config: BaseConfig = { - - val intOverrides = FeatureSwitchOverrideUtil.getBoundedIntFSOverrides( - MaxUtegCandidatesToRequestParam - ) - - val durationFSOverrides = - FeatureSwitchOverrideUtil.getDurationFSOverrides( - CandidateRefreshSinceTimeOffsetHoursParam - ) - - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableTLRHealthFilterParam, - EnableRepliesToNonFollowedUsersFilterParam, - EnableRetweetFilterParam, - EnableInNetworkFilterParam - ) - - BaseConfigBuilder() - .set(intOverrides: _*) - .set(durationFSOverrides: _*) - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoTweetFilterParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoTweetFilterParams.scala deleted file mode 100644 index 3a93d0a1a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoTweetFilterParams.scala +++ /dev/null @@ -1,31 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.Param - -object VideoTweetFilterParams { - - object EnableVideoTweetFilterParam - extends FSParam[Boolean]( - name = "video_tweet_filter_enable_filter", - default = false - ) - - val AllParams: Seq[Param[_] with FSName] = Seq( - EnableVideoTweetFilterParam - ) - - lazy val config: BaseConfig = { - - val booleanOverrides = - FeatureSwitchOverrideUtil.getBooleanFSOverrides(EnableVideoTweetFilterParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .build() - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoViewTweetsParams.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoViewTweetsParams.scala deleted file mode 100644 index 44f508d89..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/VideoViewTweetsParams.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.cr_mixer.param - -import com.twitter.finagle.stats.NullStatsReceiver -import com.twitter.logging.Logger -import com.twitter.timelines.configapi.BaseConfig -import com.twitter.timelines.configapi.BaseConfigBuilder -import com.twitter.timelines.configapi.FSEnumParam -import com.twitter.timelines.configapi.FSName -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.FeatureSwitchOverrideUtil -import com.twitter.timelines.configapi.Param -import com.twitter.usersignalservice.thriftscala.SignalType - -object VideoViewTweetsParams { - object EnableSourceParam - extends FSParam[Boolean]( - name = "signal_videoviewtweets_enable_source", - default = false - ) - - object EnableSourceImpressionParam - extends FSParam[Boolean]( - name = "signal_videoviewtweets_enableimpression_source", - default = false - ) - - object VideoViewTweetType extends Enumeration { - protected case class SignalTypeValue(signalType: SignalType) extends super.Val - import scala.language.implicitConversions - implicit def valueToSignalTypeValue(x: Value): SignalTypeValue = - x.asInstanceOf[SignalTypeValue] - - val VideoTweetQualityView: SignalTypeValue = SignalTypeValue(SignalType.VideoView90dQualityV1) - val VideoTweetPlayback50: SignalTypeValue = SignalTypeValue(SignalType.VideoView90dPlayback50V1) - } - - object VideoViewTweetTypeParam - extends FSEnumParam[VideoViewTweetType.type]( - name = "signal_videoviewtweets_videoviewtype_id", - default = VideoViewTweetType.VideoTweetQualityView, - enum = VideoViewTweetType - ) - - val AllParams: Seq[Param[_] with FSName] = - Seq(EnableSourceParam, EnableSourceImpressionParam, VideoViewTweetTypeParam) - - lazy val config: BaseConfig = { - val booleanOverrides = FeatureSwitchOverrideUtil.getBooleanFSOverrides( - EnableSourceParam, - EnableSourceImpressionParam, - ) - val enumOverrides = - FeatureSwitchOverrideUtil.getEnumFSOverrides( - NullStatsReceiver, - Logger(getClass), - VideoViewTweetTypeParam) - - BaseConfigBuilder() - .set(booleanOverrides: _*) - .set(enumOverrides: _*) - .build() - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/BUILD deleted file mode 100644 index 730986d64..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/BUILD +++ /dev/null @@ -1,16 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - platform = "java8", - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "decider/src/main/scala", - "finagle/finagle-base-http/src/main", - "finagle/finagle-core/src/main", - "finagle/finagle-http/src/main/scala", - "servo/decider", - "src/scala/com/twitter/simclusters_v2/common", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/CrMixerDecider.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/CrMixerDecider.scala deleted file mode 100644 index 8c909ca05..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/CrMixerDecider.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.param.decider - -import com.twitter.decider.Decider -import com.twitter.decider.RandomRecipient -import com.twitter.decider.Recipient -import com.twitter.decider.SimpleRecipient -import com.twitter.simclusters_v2.common.DeciderGateBuilderWithIdHashing -import javax.inject.Inject - -case class CrMixerDecider @Inject() (decider: Decider) { - - def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = { - decider.isAvailable(feature, recipient) - } - - lazy val deciderGateBuilder = new DeciderGateBuilderWithIdHashing(decider) - - /** - * When useRandomRecipient is set to false, the decider is either completely on or off. - * When useRandomRecipient is set to true, the decider is on for the specified % of traffic. - */ - def isAvailable(feature: String, useRandomRecipient: Boolean = true): Boolean = { - if (useRandomRecipient) isAvailable(feature, Some(RandomRecipient)) - else isAvailable(feature, None) - } - - /*** - * Decide whether the decider is available for a specific id using SimpleRecipient(id). - */ - def isAvailableForId( - id: Long, - deciderConstants: String - ): Boolean = { - // Note: SimpleRecipient does expose a `val isUser = true` field which is not correct if the Id is not a user Id. - // However this field does not appear to be used anywhere in source. - decider.isAvailable(deciderConstants, Some(SimpleRecipient(id))) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala deleted file mode 100644 index 518ea53db..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/DeciderKey.scala +++ /dev/null @@ -1,67 +0,0 @@ -package com.twitter.cr_mixer.param.decider - -import com.twitter.servo.decider.DeciderKeyEnum - -object DeciderConstants { - val enableHealthSignalsScoreDeciderKey = "enable_tweet_health_score" - val enableUTGRealTimeTweetEngagementScoreDeciderKey = "enable_utg_realtime_tweet_engagement_score" - val enableUserAgathaScoreDeciderKey = "enable_user_agatha_score" - val enableUserTweetEntityGraphTrafficDeciderKey = "enable_user_tweet_entity_graph_traffic" - val enableUserTweetGraphTrafficDeciderKey = "enable_user_tweet_graph_traffic" - val enableUserVideoGraphTrafficDeciderKey = "enable_user_video_graph_traffic" - val enableUserAdGraphTrafficDeciderKey = "enable_user_ad_graph_traffic" - val enableSimClustersANN2DarkTrafficDeciderKey = "enable_simclusters_ann_2_dark_traffic" - val enableQigSimilarTweetsTrafficDeciderKey = "enable_qig_similar_tweets_traffic" - val enableFRSTrafficDeciderKey = "enable_frs_traffic" - val upperFunnelPerStepScribeRate = "upper_funnel_per_step_scribe_rate" - val kafkaMessageScribeSampleRate = "kafka_message_scribe_sample_rate" - val enableRealGraphMhStoreDeciderKey = "enable_real_graph_mh_store" - val topLevelApiDdgMetricsScribeRate = "top_level_api_ddg_metrics_scribe_rate" - val adsRecommendationsPerExperimentScribeRate = "ads_recommendations_per_experiment_scribe_rate" - val enableScribeForBlueVerifiedTweetCandidates = - "enable_scribe_for_blue_verified_tweet_candidates" - - val enableUserStateStoreDeciderKey = "enable_user_state_store" - val enableUserMediaRepresentationStoreDeciderKey = - "enable_user_media_representation_store" - val enableMagicRecsRealTimeAggregatesStoreDeciderKey = - "enable_magic_recs_real_time_aggregates_store" - - val enableEarlybirdTrafficDeciderKey = "enable_earlybird_traffic" - - val enableTopicTweetTrafficDeciderKey = "enable_topic_tweet_traffic" - - val getTweetRecommendationsCacheRate = "get_tweet_recommendations_cache_rate" -} - -object DeciderKey extends DeciderKeyEnum { - - val enableHealthSignalsScoreDeciderKey: Value = Value( - DeciderConstants.enableHealthSignalsScoreDeciderKey - ) - - val enableUtgRealTimeTweetEngagementScoreDeciderKey: Value = Value( - DeciderConstants.enableUTGRealTimeTweetEngagementScoreDeciderKey - ) - val enableUserAgathaScoreDeciderKey: Value = Value( - DeciderConstants.enableUserAgathaScoreDeciderKey - ) - val enableUserMediaRepresentationStoreDeciderKey: Value = Value( - DeciderConstants.enableUserMediaRepresentationStoreDeciderKey - ) - - val enableMagicRecsRealTimeAggregatesStore: Value = Value( - DeciderConstants.enableMagicRecsRealTimeAggregatesStoreDeciderKey - ) - - val enableUserStateStoreDeciderKey: Value = Value( - DeciderConstants.enableUserStateStoreDeciderKey - ) - - val enableRealGraphMhStoreDeciderKey: Value = Value( - DeciderConstants.enableRealGraphMhStoreDeciderKey - ) - - val enableEarlybirdTrafficDeciderKey: Value = Value( - DeciderConstants.enableEarlybirdTrafficDeciderKey) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/EndpointLoadShedder.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/EndpointLoadShedder.scala deleted file mode 100644 index a53e629a9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider/EndpointLoadShedder.scala +++ /dev/null @@ -1,57 +0,0 @@ -package com.twitter.cr_mixer.param.decider - -import com.twitter.decider.Decider -import com.twitter.decider.RandomRecipient -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import javax.inject.Inject -import scala.util.control.NoStackTrace - -/* - Provides deciders-controlled load shedding for a given Product from a given endpoint. - The format of the decider keys is: - - enable_loadshedding__ - E.g.: - enable_loadshedding_getTweetRecommendations_Notifications - - Deciders are fractional, so a value of 50.00 will drop 50% of responses. If a decider key is not - defined for a particular endpoint/product combination, those requests will always be - served. - - We should therefore aim to define keys for the endpoints/product we care most about in decider.yml, - so that we can control them during incidents. - */ -case class EndpointLoadShedder @Inject() ( - decider: Decider, - statsReceiver: StatsReceiver) { - import EndpointLoadShedder._ - - // Fall back to False for any undefined key - private val deciderWithFalseFallback: Decider = decider.orElse(Decider.False) - private val keyPrefix = "enable_loadshedding" - private val scopedStats = statsReceiver.scope("EndpointLoadShedder") - - def apply[T](endpointName: String, product: String)(serve: => Future[T]): Future[T] = { - /* - Checks if either per-product or top-level load shedding is enabled - If both are enabled at different percentages, load shedding will not be perfectly calculable due - to salting of hash (i.e. 25% load shed for Product x + 25% load shed for overall does not - result in 50% load shed for x) - */ - val keyTyped = s"${keyPrefix}_${endpointName}_$product" - val keyTopLevel = s"${keyPrefix}_${endpointName}" - - if (deciderWithFalseFallback.isAvailable(keyTopLevel, recipient = Some(RandomRecipient))) { - scopedStats.counter(keyTopLevel).incr - Future.exception(LoadSheddingException) - } else if (deciderWithFalseFallback.isAvailable(keyTyped, recipient = Some(RandomRecipient))) { - scopedStats.counter(keyTyped).incr - Future.exception(LoadSheddingException) - } else serve - } -} - -object EndpointLoadShedder { - object LoadSheddingException extends Exception with NoStackTrace -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/BUILD deleted file mode 100644 index 139ecd4c7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/BUILD +++ /dev/null @@ -1,30 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:content-recommender-common-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "frigate/frigate-common:base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util", - "hydra/common/libraries/src/main/scala/com/twitter/hydra/common/model_config", - "hydra/partition/thrift/src/main/thrift:thrift-scala", - "hydra/root/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/ml/api:data-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/DefaultRanker.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/DefaultRanker.scala deleted file mode 100644 index 2ae91642b..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/DefaultRanker.scala +++ /dev/null @@ -1,23 +0,0 @@ -package com.twitter.cr_mixer.ranker - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * Keep the same order as the input. - */ -@Singleton -class DefaultRanker() { - def rank( - candidates: Seq[BlendedCandidate], - ): Future[Seq[RankedCandidate]] = { - val candidateSize = candidates.size - val rankedCandidates = candidates.zipWithIndex.map { - case (candidate, index) => - candidate.toRankedCandidate((candidateSize - index).toDouble) - } - Future.value(rankedCandidates) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/SwitchRanker.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/SwitchRanker.scala deleted file mode 100644 index da44f664e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/ranker/SwitchRanker.scala +++ /dev/null @@ -1,46 +0,0 @@ -package com.twitter.cr_mixer.ranker - -import com.twitter.cr_mixer.model.BlendedCandidate -import com.twitter.cr_mixer.model.CrCandidateGeneratorQuery -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.util.Future -import com.twitter.util.JavaTimer -import com.twitter.util.Time -import com.twitter.util.Timer -import javax.inject.Inject -import javax.inject.Singleton - -/** - * CR-Mixer internal ranker - */ -@Singleton -class SwitchRanker @Inject() ( - defaultRanker: DefaultRanker, - globalStats: StatsReceiver) { - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) - implicit val timer: Timer = new JavaTimer(true) - - def rank( - query: CrCandidateGeneratorQuery, - candidates: Seq[BlendedCandidate], - ): Future[Seq[RankedCandidate]] = { - defaultRanker.rank(candidates) - } - -} - -object SwitchRanker { - - /** Prefers candidates generated from sources with the latest timestamps. - * The newer the source signal, the higher a candidate ranks. - * This ordering biases against consumer-based candidates because their timestamp defaults to 0 - */ - val TimestampOrder: Ordering[RankedCandidate] = - math.Ordering - .by[RankedCandidate, Time]( - _.reasonChosen.sourceInfoOpt - .flatMap(_.sourceEventTime) - .getOrElse(Time.fromMilliseconds(0L))) - .reverse -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/BUILD deleted file mode 100644 index 8e6ae5049..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/BUILD +++ /dev/null @@ -1,22 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication", - "finagle/finagle-core/src/main", - "frigate/frigate-common:base", - "frigate/frigate-common:util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "scrooge/scrooge-serializer/src/main/scala", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "util-internal/scribe/src/main/scala/com/twitter/logging", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/ScribeCategory.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/ScribeCategory.scala deleted file mode 100644 index b86c9174f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/scribe/ScribeCategory.scala +++ /dev/null @@ -1,64 +0,0 @@ -package com.twitter.cr_mixer.scribe - -/** - * Categories define scribe categories used in cr-mixer service. - */ -object ScribeCategories { - lazy val AllCategories = - List(AbDecider, TopLevelApiDdgMetrics, TweetsRecs) - - /** - * AbDecider represents scribe logs for experiments - */ - lazy val AbDecider: ScribeCategory = ScribeCategory( - "abdecider_scribe", - "client_event" - ) - - /** - * Top-Level Client event scribe logs, to record changes in system metrics (e.g. latency, - * candidates returned, empty rate ) per experiment bucket, and store them in DDG metric group - */ - lazy val TopLevelApiDdgMetrics: ScribeCategory = ScribeCategory( - "top_level_api_ddg_metrics_scribe", - "client_event" - ) - - lazy val TweetsRecs: ScribeCategory = ScribeCategory( - "get_tweets_recommendations_scribe", - "cr_mixer_get_tweets_recommendations" - ) - - lazy val VITTweetsRecs: ScribeCategory = ScribeCategory( - "get_vit_tweets_recommendations_scribe", - "cr_mixer_get_vit_tweets_recommendations" - ) - - lazy val RelatedTweets: ScribeCategory = ScribeCategory( - "get_related_tweets_scribe", - "cr_mixer_get_related_tweets" - ) - - lazy val UtegTweets: ScribeCategory = ScribeCategory( - "get_uteg_tweets_scribe", - "cr_mixer_get_uteg_tweets" - ) - - lazy val AdsRecommendations: ScribeCategory = ScribeCategory( - "get_ads_recommendations_scribe", - "cr_mixer_get_ads_recommendations" - ) -} - -/** - * Category represents each scribe log data. - * - * @param loggerFactoryNode loggerFactory node name in cr-mixer associated with this scribe category - * @param scribeCategory scribe category name (globally unique at Twitter) - */ -case class ScribeCategory( - loggerFactoryNode: String, - scribeCategory: String) { - def getProdLoggerFactoryNode: String = loggerFactoryNode - def getStagingLoggerFactoryNode: String = "staging_" + loggerFactoryNode -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/BUILD.bazel b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/BUILD.bazel deleted file mode 100644 index 8fa46c772..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/BUILD.bazel +++ /dev/null @@ -1,15 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/javax/inject:javax.inject", - "configapi/configapi-core", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/pipeline/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product", - "product-mixer/core/src/main/scala/com/twitter/product_mixer/core/product/registry", - "stitch/stitch-core", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/CrMixerAlertNotificationConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/CrMixerAlertNotificationConfig.scala deleted file mode 100644 index df0572ef1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/service/CrMixerAlertNotificationConfig.scala +++ /dev/null @@ -1,26 +0,0 @@ -package com.twitter.cr_mixer.service - -import com.twitter.product_mixer.core.functional_component.common.alert.Destination -import com.twitter.product_mixer.core.functional_component.common.alert.NotificationGroup - -/** - * Notifications (email, pagerduty, etc) can be specific per-alert but it is common for multiple - * products to share notification configuration. - * - * Our configuration uses only email notifications because SampleMixer is a demonstration service - * with neither internal nor customer-facing users. You will likely want to use a PagerDuty - * destination instead. For example: - * {{{ - * critical = Destination(pagerDutyKey = Some("your-pagerduty-key")) - * }}} - * - * - * For more information about how to get a PagerDuty key, see: - * https://docbird.twitter.biz/mon/how-to-guides.html?highlight=notificationgroup#set-up-email-pagerduty-and-slack-notifications - */ -object CrMixerAlertNotificationConfig { - val DefaultNotificationGroup: NotificationGroup = NotificationGroup( - warn = Destination(emails = Seq("no-reply@twitter.com")), - critical = Destination(emails = Seq("no-reply@twitter.com")) - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/BUILD deleted file mode 100644 index c7ae7c752..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/BUILD +++ /dev/null @@ -1,74 +0,0 @@ -scala_library( - sources = [ - "*.scala", - ], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/com/twitter/storehaus:memcache", - "3rdparty/jvm/io/grpc:grpc-api", - "3rdparty/jvm/io/grpc:grpc-auth", - "3rdparty/jvm/io/grpc:grpc-core", - "3rdparty/jvm/io/grpc:grpc-netty", - "3rdparty/jvm/io/grpc:grpc-protobuf", - "3rdparty/jvm/io/grpc:grpc-stub", - "3rdparty/jvm/io/opil:tensorflow-serving-client", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "ann/src/main/scala/com/twitter/ann/hnsw", - "ann/src/main/thrift/com/twitter/ann/common:ann-common-scala", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/exception", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "finagle-internal/finagle-grpc/src/main/scala", - "finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/client", - "finatra-internal/mtls-thriftmux/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", - "frigate/frigate-common:base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store/strato", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util", - "hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common", - "mediaservices/commons/src/main/scala:futuretracker", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift-scala", - "qig-ranker/thrift/src/main/thrift:thrift-scala", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/injection", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/simclustersann/multicluster", - "simclusters-ann/thrift/src/main/thrift:thrift-scala", - "snowflake/src/main/scala/com/twitter/snowflake/id", - "src/java/com/twitter/search/common/schema/base", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/scala/com/twitter/cortex/ml/embeddings/common:Helpers", - "src/scala/com/twitter/ml/featurestore/lib", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/ml/api:embedding-scala", - "src/thrift/com/twitter/recos:recos-common-scala", - "src/thrift/com/twitter/recos/user_ad_graph:user_ad_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_entity_graph:user_tweet_entity_graph-scala", - "src/thrift/com/twitter/recos/user_tweet_graph:user_tweet_graph-scala", - "src/thrift/com/twitter/recos/user_video_graph:user_video_graph-scala", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/common:ranking-scala", - "src/thrift/com/twitter/search/query_interaction_graph/service:qig-service-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/topic_recos:topic_recos-thrift-scala", - "src/thrift/com/twitter/trends/trip_v1:trip-tweets-thrift-scala", - "src/thrift/com/twitter/twistly:twistly-scala", - "strato/src/main/scala/com/twitter/strato/client", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/CertoTopicTweetSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/CertoTopicTweetSimilarityEngine.scala deleted file mode 100644 index a57085d0f..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/CertoTopicTweetSimilarityEngine.scala +++ /dev/null @@ -1,94 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.google.inject.Inject -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.TopicTweetParams -import com.twitter.cr_mixer.similarity_engine.CertoTopicTweetSimilarityEngine._ -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.topic_recos.thriftscala._ -import com.twitter.util.Future - -@Singleton -case class CertoTopicTweetSimilarityEngine @Inject() ( - @Named(ModuleNames.CertoStratoStoreName) certoStratoStore: ReadableStore[ - TopicId, - Seq[TweetWithScores] - ], - statsReceiver: StatsReceiver) - extends ReadableStore[EngineQuery[Query], Seq[TopicTweetWithScore]] { - - private val name: String = this.getClass.getSimpleName - private val stats = statsReceiver.scope(name) - - override def get(query: EngineQuery[Query]): Future[Option[Seq[TopicTweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(stats) { - topTweetsByFollowerL2NormalizedScore.get(query).map { - _.map { topicTopTweets => - topicTopTweets.map { topicTweet => - TopicTweetWithScore( - tweetId = topicTweet.tweetId, - score = topicTweet.scores.followerL2NormalizedCosineSimilarity8HrHalfLife, - similarityEngineType = SimilarityEngineType.CertoTopicTweet - ) - } - } - } - } - } - - private val topTweetsByFollowerL2NormalizedScore: ReadableStore[EngineQuery[Query], Seq[ - TweetWithScores - ]] = { - ReadableStore.fromFnFuture { query: EngineQuery[Query] => - StatsUtil.trackOptionItemsStats(stats) { - for { - topKTweetsWithScores <- certoStratoStore.get(query.storeQuery.topicId) - } yield { - topKTweetsWithScores.map( - _.filter( - _.scores.followerL2NormalizedCosineSimilarity8HrHalfLife >= query.storeQuery.certoScoreTheshold) - .take(query.storeQuery.maxCandidates)) - } - } - } - } -} - -object CertoTopicTweetSimilarityEngine { - - // Query is used as a cache key. Do not add any user level information in this. - case class Query( - topicId: TopicId, - maxCandidates: Int, - certoScoreTheshold: Double) - - def fromParams( - topicId: TopicId, - isVideoOnly: Boolean, - params: configapi.Params, - ): EngineQuery[Query] = { - - val maxCandidates = if (isVideoOnly) { - params(TopicTweetParams.MaxCertoCandidatesParam) * 2 - } else { - params(TopicTweetParams.MaxCertoCandidatesParam) - } - - EngineQuery( - Query( - topicId = topicId, - maxCandidates = maxCandidates, - certoScoreTheshold = params(TopicTweetParams.CertoScoreThresholdParam) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala deleted file mode 100644 index 599704fa7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerBasedWalsSimilarityEngine.scala +++ /dev/null @@ -1,246 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.ConsumerBasedWalsParams -import com.twitter.cr_mixer.similarity_engine.ConsumerBasedWalsSimilarityEngine.Query -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import io.grpc.ManagedChannel -import tensorflow.serving.Predict.PredictRequest -import tensorflow.serving.Predict.PredictResponse -import tensorflow.serving.PredictionServiceGrpc -import org.tensorflow.example.Feature -import org.tensorflow.example.Int64List -import org.tensorflow.example.FloatList -import org.tensorflow.example.Features -import org.tensorflow.example.Example -import tensorflow.serving.Model -import org.tensorflow.framework.TensorProto -import org.tensorflow.framework.DataType -import org.tensorflow.framework.TensorShapeProto -import com.twitter.finagle.grpc.FutureConverters -import java.util.ArrayList -import java.lang -import com.twitter.util.Return -import com.twitter.util.Throw -import java.util.concurrent.ConcurrentHashMap -import scala.jdk.CollectionConverters._ - -// Stats object maintain a set of stats that are specific to the Wals Engine. -case class WalsStats(scope: String, scopedStats: StatsReceiver) { - - val requestStat = scopedStats.scope(scope) - val inputSignalSize = requestStat.stat("input_signal_size") - - val latency = requestStat.stat("latency_ms") - val latencyOnError = requestStat.stat("error_latency_ms") - val latencyOnSuccess = requestStat.stat("success_latency_ms") - - val requests = requestStat.counter("requests") - val success = requestStat.counter("success") - val failures = requestStat.scope("failures") - - def onFailure(t: Throwable, startTimeMs: Long) { - val duration = System.currentTimeMillis() - startTimeMs - latency.add(duration) - latencyOnError.add(duration) - failures.counter(t.getClass.getName).incr() - } - - def onSuccess(startTimeMs: Long) { - val duration = System.currentTimeMillis() - startTimeMs - latency.add(duration) - latencyOnSuccess.add(duration) - success.incr() - } -} - -// StatsMap maintains a mapping from Model's input signature to a stats receiver -// The Wals model suports multiple input signature which can run different graphs internally and -// can have a different performance profile. -// Invoking StatsReceiver.stat() on each request can create a new stat object and can be expensive -// in performance critical paths. -object WalsStatsMap { - val mapping = new ConcurrentHashMap[String, WalsStats]() - - def get(scope: String, scopedStats: StatsReceiver): WalsStats = { - mapping.computeIfAbsent(scope, (scope) => WalsStats(scope, scopedStats)) - } -} - -case class ConsumerBasedWalsSimilarityEngine( - homeNaviGRPCClient: ManagedChannel, - adsFavedNaviGRPCClient: ManagedChannel, - adsMonetizableNaviGRPCClient: ManagedChannel, - statsReceiver: StatsReceiver) - extends ReadableStore[ - Query, - Seq[TweetWithScore] - ] { - - override def get( - query: ConsumerBasedWalsSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - val startTimeMs = System.currentTimeMillis() - val stats = - WalsStatsMap.get( - query.wilyNsName + "/" + query.modelSignatureName, - statsReceiver.scope("NaviPredictionService") - ) - stats.requests.incr() - stats.inputSignalSize.add(query.sourceIds.size) - try { - // avoid inference calls is source signals are empty - if (query.sourceIds.isEmpty) { - Future.value(Some(Seq.empty)) - } else { - val grpcClient = query.wilyNsName match { - case "navi-wals-recommended-tweets-home-client" => homeNaviGRPCClient - case "navi-wals-ads-faved-tweets" => adsFavedNaviGRPCClient - case "navi-wals-ads-monetizable-tweets" => adsFavedNaviGRPCClient - // default to homeNaviGRPCClient - case _ => homeNaviGRPCClient - } - val stub = PredictionServiceGrpc.newFutureStub(grpcClient) - val inferRequest = getModelInput(query) - - FutureConverters - .RichListenableFuture(stub.predict(inferRequest)).toTwitter - .transform { - case Return(resp) => - stats.onSuccess(startTimeMs) - Future.value(Some(getModelOutput(query, resp))) - case Throw(e) => - stats.onFailure(e, startTimeMs) - Future.exception(e) - } - } - } catch { - case e: Throwable => Future.exception(e) - } - } - - def getFeaturesForRecommendations(query: ConsumerBasedWalsSimilarityEngine.Query): Example = { - val tweetIds = new ArrayList[lang.Long]() - val tweetFaveWeight = new ArrayList[lang.Float]() - - query.sourceIds.foreach { sourceInfo => - val weight = sourceInfo.sourceType match { - case SourceType.TweetFavorite | SourceType.Retweet => 1.0f - // currently no-op - as we do not get negative signals - case SourceType.TweetDontLike | SourceType.TweetReport | SourceType.AccountMute | - SourceType.AccountBlock => - 0.0f - case _ => 0.0f - } - sourceInfo.internalId match { - case InternalId.TweetId(tweetId) => - tweetIds.add(tweetId) - tweetFaveWeight.add(weight) - case _ => - throw new IllegalArgumentException( - s"Invalid InternalID - does not contain TweetId for Source Signal: ${sourceInfo}") - } - } - - val tweetIdsFeature = - Feature - .newBuilder().setInt64List( - Int64List - .newBuilder().addAllValue(tweetIds).build() - ).build() - - val tweetWeightsFeature = Feature - .newBuilder().setFloatList( - FloatList.newBuilder().addAllValue(tweetFaveWeight).build()).build() - - val features = Features - .newBuilder() - .putFeature("tweet_ids", tweetIdsFeature) - .putFeature("tweet_weights", tweetWeightsFeature) - .build() - Example.newBuilder().setFeatures(features).build() - } - - def getModelInput(query: ConsumerBasedWalsSimilarityEngine.Query): PredictRequest = { - val tfExample = getFeaturesForRecommendations(query) - - val inferenceRequest = PredictRequest - .newBuilder() - .setModelSpec( - Model.ModelSpec - .newBuilder() - .setName(query.modelName) - .setSignatureName(query.modelSignatureName)) - .putInputs( - query.modelInputName, - TensorProto - .newBuilder() - .setDtype(DataType.DT_STRING) - .setTensorShape(TensorShapeProto - .newBuilder() - .addDim(TensorShapeProto.Dim.newBuilder().setSize(1))) - .addStringVal(tfExample.toByteString) - .build() - ).build() - inferenceRequest - } - - def getModelOutput(query: Query, response: PredictResponse): Seq[TweetWithScore] = { - val outputName = query.modelOutputName - if (response.containsOutputs(outputName)) { - val tweetList = response.getOutputsMap - .get(outputName) - .getInt64ValList.asScala - tweetList.zip(tweetList.size to 1 by -1).map { (tweetWithScore) => - TweetWithScore(tweetWithScore._1, tweetWithScore._2.toLong) - } - } else { - Seq.empty - } - } -} - -object ConsumerBasedWalsSimilarityEngine { - case class Query( - sourceIds: Seq[SourceInfo], - modelName: String, - modelInputName: String, - modelOutputName: String, - modelSignatureName: String, - wilyNsName: String, - ) - - def fromParams( - sourceIds: Seq[SourceInfo], - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceIds, - params(ConsumerBasedWalsParams.ModelNameParam), - params(ConsumerBasedWalsParams.ModelInputNameParam), - params(ConsumerBasedWalsParams.ModelOutputNameParam), - params(ConsumerBasedWalsParams.ModelSignatureNameParam), - params(ConsumerBasedWalsParams.WilyNsNameParam), - ), - params - ) - } - - def toSimilarityEngineInfo( - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ConsumerBasedWalsANN, - modelId = None, - score = Some(score)) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngine.scala deleted file mode 100644 index 82a074208..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTripSimilarityEngine.scala +++ /dev/null @@ -1,118 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.TripTweetWithScore -import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedTripParams -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.ClusterId -import com.twitter.simclusters_v2.common.SimClustersEmbedding -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.timelines.configapi.Params -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.Cluster -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.ClusterDomain -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripTweet -import com.twitter.trends.trip_v1.trip_tweets.thriftscala.TripDomain -import com.twitter.util.Future - -case class TripEngineQuery( - modelId: String, - sourceId: InternalId, - tripSourceId: String, - maxResult: Int, - params: Params) - -case class ConsumerEmbeddingBasedTripSimilarityEngine( - embeddingStoreLookUpMap: Map[String, ReadableStore[UserId, SimClustersEmbedding]], - tripCandidateSource: ReadableStore[TripDomain, Seq[TripTweet]], - statsReceiver: StatsReceiver, -) extends ReadableStore[TripEngineQuery, Seq[TripTweetWithScore]] { - import ConsumerEmbeddingBasedTripSimilarityEngine._ - - private val scopedStats = statsReceiver.scope(name) - private def fetchTopClusters(query: TripEngineQuery): Future[Option[Seq[ClusterId]]] = { - query.sourceId match { - case InternalId.UserId(userId) => - val embeddingStore = embeddingStoreLookUpMap.getOrElse( - query.modelId, - throw new IllegalArgumentException( - s"${this.getClass.getSimpleName}: " + - s"ModelId ${query.modelId} does not exist for embeddingStore" - ) - ) - embeddingStore.get(userId).map(_.map(_.topClusterIds(MaxClusters))) - case _ => - Future.None - } - } - private def fetchCandidates( - topClusters: Seq[ClusterId], - tripSourceId: String - ): Future[Seq[Seq[TripTweetWithScore]]] = { - Future - .collect { - topClusters.map { clusterId => - tripCandidateSource - .get( - TripDomain( - sourceId = tripSourceId, - clusterDomain = Some( - ClusterDomain(simCluster = Some(Cluster(clusterIntId = Some(clusterId))))))).map { - _.map { - _.collect { - case TripTweet(tweetId, score) => - TripTweetWithScore(tweetId, score) - } - }.getOrElse(Seq.empty).take(MaxNumResultsPerCluster) - } - } - } - } - - override def get(engineQuery: TripEngineQuery): Future[Option[Seq[TripTweetWithScore]]] = { - val fetchTopClustersStat = scopedStats.scope(engineQuery.modelId).scope("fetchTopClusters") - val fetchCandidatesStat = scopedStats.scope(engineQuery.modelId).scope("fetchCandidates") - - for { - topClustersOpt <- StatsUtil.trackOptionStats(fetchTopClustersStat) { - fetchTopClusters(engineQuery) - } - candidates <- StatsUtil.trackItemsStats(fetchCandidatesStat) { - topClustersOpt match { - case Some(topClusters) => fetchCandidates(topClusters, engineQuery.tripSourceId) - case None => Future.Nil - } - } - } yield { - val interleavedTweets = InterleaveUtil.interleave(candidates) - val dedupCandidates = interleavedTweets - .groupBy(_.tweetId).flatMap { - case (_, tweetWithScoreSeq) => tweetWithScoreSeq.sortBy(-_.score).take(1) - }.toSeq.take(engineQuery.maxResult) - Some(dedupCandidates) - } - } -} - -object ConsumerEmbeddingBasedTripSimilarityEngine { - private val MaxClusters: Int = 8 - private val MaxNumResultsPerCluster: Int = 25 - private val name: String = this.getClass.getSimpleName - - def fromParams( - modelId: String, - sourceId: InternalId, - params: configapi.Params - ): TripEngineQuery = { - TripEngineQuery( - modelId = modelId, - sourceId = sourceId, - tripSourceId = params(ConsumerEmbeddingBasedTripParams.SourceIdParam), - maxResult = params(ConsumerEmbeddingBasedTripParams.MaxNumCandidatesParam), - params = params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngine.scala deleted file mode 100644 index ed722f3eb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwHINSimilarityEngine.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedTwHINParams -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.timelines.configapi - -object ConsumerEmbeddingBasedTwHINSimilarityEngine { - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): HnswANNEngineQuery = { - HnswANNEngineQuery( - sourceId = sourceId, - modelId = params(ConsumerEmbeddingBasedTwHINParams.ModelIdParam), - params = params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngine.scala deleted file mode 100644 index c63d517d6..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumerEmbeddingBasedTwoTowerSimilarityEngine.scala +++ /dev/null @@ -1,18 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.param.ConsumerEmbeddingBasedTwoTowerParams -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.timelines.configapi - -object ConsumerEmbeddingBasedTwoTowerSimilarityEngine { - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): HnswANNEngineQuery = { - HnswANNEngineQuery( - sourceId = sourceId, - modelId = params(ConsumerEmbeddingBasedTwoTowerParams.ModelIdParam), - params = params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngine.scala deleted file mode 100644 index 585edc584..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserAdGraphSimilarityEngine.scala +++ /dev/null @@ -1,90 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.ConsumersBasedUserAdGraphParams -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest -import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * This store uses the graph based input (a list of userIds) - * to query consumersBasedUserAdGraph and get their top engaged ad tweets - */ -@Singleton -case class ConsumersBasedUserAdGraphSimilarityEngine( - consumersBasedUserAdGraphStore: ReadableStore[ - ConsumersBasedRelatedAdRequest, - RelatedAdResponse - ], - statsReceiver: StatsReceiver) - extends ReadableStore[ - ConsumersBasedUserAdGraphSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - override def get( - query: ConsumersBasedUserAdGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - val consumersBasedRelatedAdRequest = - ConsumersBasedRelatedAdRequest( - query.seedWithScores.keySet.toSeq, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - minScore = Some(query.minScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - consumersBasedUserAdGraphStore - .get(consumersBasedRelatedAdRequest) - .map { relatedAdResponseOpt => - relatedAdResponseOpt.map { relatedAdResponse => - relatedAdResponse.adTweets.map { tweet => - TweetWithScore(tweet.adTweetId, tweet.score) - } - } - } - } -} - -object ConsumersBasedUserAdGraphSimilarityEngine { - - case class Query( - seedWithScores: Map[UserId, Double], - maxResults: Int, - minCooccurrence: Int, - minScore: Double, - maxTweetAgeInHours: Int) - - def toSimilarityEngineInfo( - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ConsumersBasedUserAdGraph, - modelId = None, - score = Some(score)) - } - - def fromParams( - seedWithScores: Map[UserId, Double], - params: configapi.Params, - ): EngineQuery[Query] = { - - EngineQuery( - Query( - seedWithScores = seedWithScores, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(ConsumersBasedUserAdGraphParams.MinCoOccurrenceParam), - minScore = params(ConsumersBasedUserAdGraphParams.MinScoreParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngine.scala deleted file mode 100644 index 633f5ee6d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ConsumersBasedUserVideoGraphSimilarityEngine.scala +++ /dev/null @@ -1,91 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.ConsumersBasedUserVideoGraphParams -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * This store uses the graph based input (a list of userIds) - * to query consumersBasedUserVideoGraph and get their top engaged tweets - */ -@Singleton -case class ConsumersBasedUserVideoGraphSimilarityEngine( - consumersBasedUserVideoGraphStore: ReadableStore[ - ConsumersBasedRelatedTweetRequest, - RelatedTweetResponse - ], - statsReceiver: StatsReceiver) - extends ReadableStore[ - ConsumersBasedUserVideoGraphSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - override def get( - query: ConsumersBasedUserVideoGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - val consumersBasedRelatedTweetRequest = - ConsumersBasedRelatedTweetRequest( - query.seedWithScores.keySet.toSeq, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - minScore = Some(query.minScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - consumersBasedUserVideoGraphStore - .get(consumersBasedRelatedTweetRequest) - .map { relatedTweetResponseOpt => - relatedTweetResponseOpt.map { relatedTweetResponse => - relatedTweetResponse.tweets.map { tweet => - TweetWithScore(tweet.tweetId, tweet.score) - } - } - } - } -} - -object ConsumersBasedUserVideoGraphSimilarityEngine { - - case class Query( - seedWithScores: Map[UserId, Double], - maxResults: Int, - minCooccurrence: Int, - minScore: Double, - maxTweetAgeInHours: Int) - - def toSimilarityEngineInfo( - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ConsumersBasedUserVideoGraph, - modelId = None, - score = Some(score)) - } - - def fromParamsForRealGraphIn( - seedWithScores: Map[UserId, Double], - params: configapi.Params, - ): EngineQuery[Query] = { - - EngineQuery( - Query( - seedWithScores = seedWithScores, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = - params(ConsumersBasedUserVideoGraphParams.RealGraphInMinCoOccurrenceParam), - minScore = params(ConsumersBasedUserVideoGraphParams.RealGraphInMinScoreParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/DiffusionBasedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/DiffusionBasedSimilarityEngine.scala deleted file mode 100644 index a1bc0e248..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/DiffusionBasedSimilarityEngine.scala +++ /dev/null @@ -1,73 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.simclusters_v2.thriftscala.TweetsWithScore -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -case class DiffusionBasedSimilarityEngine( - retweetBasedDiffusionRecsMhStore: ReadableStore[Long, TweetsWithScore], - statsReceiver: StatsReceiver) - extends ReadableStore[ - DiffusionBasedSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - override def get( - query: DiffusionBasedSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - - query.sourceId match { - case InternalId.UserId(userId) => - retweetBasedDiffusionRecsMhStore.get(userId).map { - _.map { tweetsWithScore => - { - tweetsWithScore.tweets - .map(tweet => TweetWithScore(tweet.tweetId, tweet.score)) - } - } - } - case _ => - Future.None - } - } -} - -object DiffusionBasedSimilarityEngine { - - val defaultScore: Double = 0.0 - - case class Query( - sourceId: InternalId, - ) - - def toSimilarityEngineInfo( - query: LookupEngineQuery[Query], - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.DiffusionBasedTweet, - modelId = Some(query.lookupKey), - score = Some(score)) - } - - def fromParams( - sourceId: InternalId, - modelId: String, - params: configapi.Params, - ): LookupEngineQuery[Query] = { - LookupEngineQuery( - Query(sourceId = sourceId), - modelId, - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdModelBasedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdModelBasedSimilarityEngine.scala deleted file mode 100644 index da82b4eb1..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdModelBasedSimilarityEngine.scala +++ /dev/null @@ -1,92 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery -import com.twitter.cr_mixer.similarity_engine.EarlybirdSimilarityEngineBase._ -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.finagle.tracing.Trace -import com.twitter.search.common.ranking.thriftscala.ThriftRankingParams -import com.twitter.search.common.ranking.thriftscala.ThriftScoringFunctionType -import com.twitter.search.earlybird.thriftscala.EarlybirdRequest -import com.twitter.search.earlybird.thriftscala.EarlybirdService -import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery -import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode -import com.twitter.search.earlybird.thriftscala.ThriftSearchRelevanceOptions -import com.twitter.simclusters_v2.common.UserId -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class EarlybirdModelBasedSimilarityEngine @Inject() ( - earlybirdSearchClient: EarlybirdService.MethodPerEndpoint, - timeoutConfig: TimeoutConfig, - stats: StatsReceiver) - extends EarlybirdSimilarityEngineBase[EarlybirdModelBasedSearchQuery] { - import EarlybirdModelBasedSimilarityEngine._ - override val statsReceiver: StatsReceiver = stats.scope(this.getClass.getSimpleName) - override def getEarlybirdRequest( - query: EarlybirdModelBasedSearchQuery - ): Option[EarlybirdRequest] = - if (query.seedUserIds.nonEmpty) - Some( - EarlybirdRequest( - searchQuery = getThriftSearchQuery(query), - clientId = Some(EarlybirdClientId), - timeoutMs = timeoutConfig.earlybirdServerTimeout.inMilliseconds.intValue(), - clientRequestID = Some(s"${Trace.id.traceId}"), - )) - else None -} - -object EarlybirdModelBasedSimilarityEngine { - case class EarlybirdModelBasedSearchQuery( - seedUserIds: Seq[UserId], - maxNumTweets: Int, - oldestTweetTimestampInSec: Option[UserId], - frsUserToScoresForScoreAdjustment: Option[Map[UserId, Double]]) - extends EarlybirdSearchQuery - - /** - * Used by Push Service - */ - val RealGraphScoringModel = "frigate_unified_engagement_rg" - val MaxHitsToProcess = 1000 - val MaxConsecutiveSameUser = 1 - - private def getModelBasedRankingParams( - authorSpecificScoreAdjustments: Map[Long, Double] - ): ThriftRankingParams = ThriftRankingParams( - `type` = Some(ThriftScoringFunctionType.ModelBased), - selectedModels = Some(Map(RealGraphScoringModel -> 1.0)), - applyBoosts = false, - authorSpecificScoreAdjustments = Some(authorSpecificScoreAdjustments) - ) - - private def getRelevanceOptions( - authorSpecificScoreAdjustments: Map[Long, Double], - ): ThriftSearchRelevanceOptions = { - ThriftSearchRelevanceOptions( - maxConsecutiveSameUser = Some(MaxConsecutiveSameUser), - rankingParams = Some(getModelBasedRankingParams(authorSpecificScoreAdjustments)), - maxHitsToProcess = Some(MaxHitsToProcess), - orderByRelevance = true - ) - } - - private def getThriftSearchQuery(query: EarlybirdModelBasedSearchQuery): ThriftSearchQuery = - ThriftSearchQuery( - serializedQuery = Some(f"(* [since_time ${query.oldestTweetTimestampInSec.getOrElse(0)}])"), - fromUserIDFilter64 = Some(query.seedUserIds), - numResults = query.maxNumTweets, - maxHitsToProcess = MaxHitsToProcess, - rankingMode = ThriftSearchRankingMode.Relevance, - relevanceOptions = - Some(getRelevanceOptions(query.frsUserToScoresForScoreAdjustment.getOrElse(Map.empty))), - facetFieldNames = Some(FacetsToFetch), - resultMetadataOptions = Some(MetadataOptions), - searcherId = None - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdRecencyBasedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdRecencyBasedSimilarityEngine.scala deleted file mode 100644 index 988d666a4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdRecencyBasedSimilarityEngine.scala +++ /dev/null @@ -1,86 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TweetWithAuthor -import com.twitter.cr_mixer.similarity_engine.EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -@Singleton -case class EarlybirdRecencyBasedSimilarityEngine @Inject() ( - @Named(ModuleNames.EarlybirdRecencyBasedWithoutRetweetsRepliesTweetsCache) - earlybirdRecencyBasedWithoutRetweetsRepliesTweetsCacheStore: ReadableStore[ - UserId, - Seq[TweetId] - ], - @Named(ModuleNames.EarlybirdRecencyBasedWithRetweetsRepliesTweetsCache) - earlybirdRecencyBasedWithRetweetsRepliesTweetsCacheStore: ReadableStore[ - UserId, - Seq[TweetId] - ], - timeoutConfig: TimeoutConfig, - stats: StatsReceiver) - extends ReadableStore[EarlybirdRecencyBasedSearchQuery, Seq[TweetWithAuthor]] { - import EarlybirdRecencyBasedSimilarityEngine._ - val statsReceiver: StatsReceiver = stats.scope(this.getClass.getSimpleName) - - override def get( - query: EarlybirdRecencyBasedSearchQuery - ): Future[Option[Seq[TweetWithAuthor]]] = { - Future - .collect { - if (query.filterOutRetweetsAndReplies) { - query.seedUserIds.map { seedUserId => - StatsUtil.trackOptionItemsStats(statsReceiver.scope("WithoutRetweetsAndReplies")) { - earlybirdRecencyBasedWithoutRetweetsRepliesTweetsCacheStore - .get(seedUserId).map(_.map(_.map(tweetId => - TweetWithAuthor(tweetId = tweetId, authorId = seedUserId)))) - } - } - } else { - query.seedUserIds.map { seedUserId => - StatsUtil.trackOptionItemsStats(statsReceiver.scope("WithRetweetsAndReplies")) { - earlybirdRecencyBasedWithRetweetsRepliesTweetsCacheStore - .get(seedUserId) - .map(_.map(_.map(tweetId => - TweetWithAuthor(tweetId = tweetId, authorId = seedUserId)))) - } - } - } - } - .map { tweetWithAuthorList => - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - query.maxTweetAge) - tweetWithAuthorList - .flatMap(_.getOrElse(Seq.empty)) - .filter(tweetWithAuthor => - tweetWithAuthor.tweetId >= earliestTweetId // tweet age filter - && !query.excludedTweetIds - .contains(tweetWithAuthor.tweetId)) // excluded tweet filter - .sortBy(tweetWithAuthor => - -SnowflakeId.unixTimeMillisFromId(tweetWithAuthor.tweetId)) // sort by recency - .take(query.maxNumTweets) // take most recent N tweets - } - .map(result => Some(result)) - } - -} - -object EarlybirdRecencyBasedSimilarityEngine { - case class EarlybirdRecencyBasedSearchQuery( - seedUserIds: Seq[UserId], - maxNumTweets: Int, - excludedTweetIds: Set[TweetId], - maxTweetAge: Duration, - filterOutRetweetsAndReplies: Boolean) - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngine.scala deleted file mode 100644 index be23134eb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngine.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.TweetWithAuthor -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future - -class EarlybirdSimilarityEngine[ - Query, - EarlybirdSimilarityEngineStore <: ReadableStore[Query, Seq[TweetWithAuthor]] -]( - implementingStore: EarlybirdSimilarityEngineStore, - override val identifier: SimilarityEngineType, - globalStats: StatsReceiver, - engineConfig: SimilarityEngineConfig, -) extends SimilarityEngine[EngineQuery[Query], TweetWithAuthor] { - private val scopedStats = globalStats.scope("similarityEngine", identifier.toString) - - def getScopedStats: StatsReceiver = scopedStats - - def getCandidates(query: EngineQuery[Query]): Future[Option[Seq[TweetWithAuthor]]] = { - SimilarityEngine.getFromFn( - implementingStore.get, - query.storeQuery, - engineConfig, - query.params, - scopedStats - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineBase.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineBase.scala deleted file mode 100644 index ab4eb408e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineBase.scala +++ /dev/null @@ -1,56 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.TweetWithAuthor -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.search.earlybird.thriftscala.EarlybirdRequest -import com.twitter.search.earlybird.thriftscala.EarlybirdResponseCode -import com.twitter.search.earlybird.thriftscala.EarlybirdService -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future - -/** - * This trait is a base trait for Earlybird similarity engines. All Earlybird similarity - * engines extend from it and override the construction method for EarlybirdRequest - */ -trait EarlybirdSimilarityEngineBase[EarlybirdSearchQuery] - extends ReadableStore[EarlybirdSearchQuery, Seq[TweetWithAuthor]] { - def earlybirdSearchClient: EarlybirdService.MethodPerEndpoint - - def statsReceiver: StatsReceiver - - def getEarlybirdRequest(query: EarlybirdSearchQuery): Option[EarlybirdRequest] - - override def get(query: EarlybirdSearchQuery): Future[Option[Seq[TweetWithAuthor]]] = { - getEarlybirdRequest(query) - .map { earlybirdRequest => - earlybirdSearchClient - .search(earlybirdRequest).map { response => - response.responseCode match { - case EarlybirdResponseCode.Success => - val earlybirdSearchResult = - response.searchResults - .map( - _.results - .map(searchResult => - TweetWithAuthor( - searchResult.id, - // fromUserId should be there since MetadataOptions.getFromUserId = true - searchResult.metadata.map(_.fromUserId).getOrElse(0))).toSeq) - statsReceiver.scope("result").stat("size").add(earlybirdSearchResult.size) - earlybirdSearchResult - case e => - statsReceiver.scope("failures").counter(e.getClass.getSimpleName).incr() - Some(Seq.empty) - } - } - }.getOrElse(Future.None) - } -} - -object EarlybirdSimilarityEngineBase { - trait EarlybirdSearchQuery { - def seedUserIds: Seq[UserId] - def maxNumTweets: Int - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineRouter.scala deleted file mode 100644 index 3237f13f8..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdSimilarityEngineRouter.scala +++ /dev/null @@ -1,136 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_ModelBased -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_RecencyBased -import com.twitter.cr_mixer.model.EarlybirdSimilarityEngineType_TensorflowBased -import com.twitter.cr_mixer.model.TweetWithAuthor -import com.twitter.cr_mixer.param.EarlybirdFrsBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.EarlybirdFrsBasedCandidateGenerationParams.FrsBasedCandidateGenerationEarlybirdSimilarityEngineTypeParam -import com.twitter.cr_mixer.param.FrsParams.FrsBasedCandidateGenerationMaxCandidatesNumParam -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class EarlybirdSimilarityEngineRouter @Inject() ( - earlybirdRecencyBasedSimilarityEngine: EarlybirdSimilarityEngine[ - EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery, - EarlybirdRecencyBasedSimilarityEngine - ], - earlybirdModelBasedSimilarityEngine: EarlybirdSimilarityEngine[ - EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery, - EarlybirdModelBasedSimilarityEngine - ], - earlybirdTensorflowBasedSimilarityEngine: EarlybirdSimilarityEngine[ - EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery, - EarlybirdTensorflowBasedSimilarityEngine - ], - timeoutConfig: TimeoutConfig, - statsReceiver: StatsReceiver) - extends ReadableStore[EarlybirdSimilarityEngineRouter.Query, Seq[TweetWithAuthor]] { - import EarlybirdSimilarityEngineRouter._ - - override def get( - k: EarlybirdSimilarityEngineRouter.Query - ): Future[Option[Seq[TweetWithAuthor]]] = { - k.rankingMode match { - case EarlybirdSimilarityEngineType_RecencyBased => - earlybirdRecencyBasedSimilarityEngine.getCandidates(recencyBasedQueryFromParams(k)) - case EarlybirdSimilarityEngineType_ModelBased => - earlybirdModelBasedSimilarityEngine.getCandidates(modelBasedQueryFromParams(k)) - case EarlybirdSimilarityEngineType_TensorflowBased => - earlybirdTensorflowBasedSimilarityEngine.getCandidates(tensorflowBasedQueryFromParams(k)) - } - } -} - -object EarlybirdSimilarityEngineRouter { - case class Query( - searcherUserId: Option[UserId], - seedUserIds: Seq[UserId], - maxNumTweets: Int, - excludedTweetIds: Set[TweetId], - rankingMode: EarlybirdSimilarityEngineType, - frsUserToScoresForScoreAdjustment: Option[Map[UserId, Double]], - maxTweetAge: Duration, - filterOutRetweetsAndReplies: Boolean, - params: configapi.Params) - - def queryFromParams( - searcherUserId: Option[UserId], - seedUserIds: Seq[UserId], - excludedTweetIds: Set[TweetId], - frsUserToScoresForScoreAdjustment: Option[Map[UserId, Double]], - params: configapi.Params - ): Query = - Query( - searcherUserId, - seedUserIds, - maxNumTweets = params(FrsBasedCandidateGenerationMaxCandidatesNumParam), - excludedTweetIds, - rankingMode = - params(FrsBasedCandidateGenerationEarlybirdSimilarityEngineTypeParam).rankingMode, - frsUserToScoresForScoreAdjustment, - maxTweetAge = params( - EarlybirdFrsBasedCandidateGenerationParams.FrsBasedCandidateGenerationEarlybirdMaxTweetAge), - filterOutRetweetsAndReplies = params( - EarlybirdFrsBasedCandidateGenerationParams.FrsBasedCandidateGenerationEarlybirdFilterOutRetweetsAndReplies), - params - ) - - private def recencyBasedQueryFromParams( - query: Query - ): EngineQuery[EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery] = - EngineQuery( - EarlybirdRecencyBasedSimilarityEngine.EarlybirdRecencyBasedSearchQuery( - seedUserIds = query.seedUserIds, - maxNumTweets = query.maxNumTweets, - excludedTweetIds = query.excludedTweetIds, - maxTweetAge = query.maxTweetAge, - filterOutRetweetsAndReplies = query.filterOutRetweetsAndReplies - ), - query.params - ) - - private def tensorflowBasedQueryFromParams( - query: Query, - ): EngineQuery[EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery] = - EngineQuery( - EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery( - searcherUserId = query.searcherUserId, - seedUserIds = query.seedUserIds, - maxNumTweets = query.maxNumTweets, - // hard code the params below for now. Will move to FS after shipping the ddg - beforeTweetIdExclusive = None, - afterTweetIdExclusive = - Some(SnowflakeId.firstIdFor((Time.now - query.maxTweetAge).inMilliseconds)), - filterOutRetweetsAndReplies = query.filterOutRetweetsAndReplies, - useTensorflowRanking = true, - excludedTweetIds = query.excludedTweetIds, - maxNumHitsPerShard = 1000 - ), - query.params - ) - private def modelBasedQueryFromParams( - query: Query, - ): EngineQuery[EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery] = - EngineQuery( - EarlybirdModelBasedSimilarityEngine.EarlybirdModelBasedSearchQuery( - seedUserIds = query.seedUserIds, - maxNumTweets = query.maxNumTweets, - oldestTweetTimestampInSec = Some(query.maxTweetAge.ago.inSeconds), - frsUserToScoresForScoreAdjustment = query.frsUserToScoresForScoreAdjustment - ), - query.params - ) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala deleted file mode 100644 index 8df6ec711..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/EarlybirdTensorflowBasedSimilarityEngine.scala +++ /dev/null @@ -1,138 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.search.earlybird.thriftscala.EarlybirdRequest -import com.twitter.search.earlybird.thriftscala.EarlybirdService -import com.twitter.search.earlybird.thriftscala.ThriftSearchQuery -import com.twitter.util.Time -import com.twitter.search.common.query.thriftjava.thriftscala.CollectorParams -import com.twitter.search.common.ranking.thriftscala.ThriftRankingParams -import com.twitter.search.common.ranking.thriftscala.ThriftScoringFunctionType -import com.twitter.search.earlybird.thriftscala.ThriftSearchRelevanceOptions -import javax.inject.Inject -import javax.inject.Singleton -import EarlybirdSimilarityEngineBase._ -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.similarity_engine.EarlybirdTensorflowBasedSimilarityEngine.EarlybirdTensorflowBasedSearchQuery -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.EarlybirdClientId -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.FacetsToFetch -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetCollectorTerminationParams -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetEarlybirdQuery -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.MetadataOptions -import com.twitter.cr_mixer.util.EarlybirdSearchUtil.GetNamedDisjunctions -import com.twitter.search.earlybird.thriftscala.ThriftSearchRankingMode -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Duration - -@Singleton -case class EarlybirdTensorflowBasedSimilarityEngine @Inject() ( - earlybirdSearchClient: EarlybirdService.MethodPerEndpoint, - timeoutConfig: TimeoutConfig, - stats: StatsReceiver) - extends EarlybirdSimilarityEngineBase[EarlybirdTensorflowBasedSearchQuery] { - import EarlybirdTensorflowBasedSimilarityEngine._ - override val statsReceiver: StatsReceiver = stats.scope(this.getClass.getSimpleName) - override def getEarlybirdRequest( - query: EarlybirdTensorflowBasedSearchQuery - ): Option[EarlybirdRequest] = { - if (query.seedUserIds.nonEmpty) - Some( - EarlybirdRequest( - searchQuery = getThriftSearchQuery(query, timeoutConfig.earlybirdServerTimeout), - clientHost = None, - clientRequestID = None, - clientId = Some(EarlybirdClientId), - clientRequestTimeMs = Some(Time.now.inMilliseconds), - cachingParams = None, - timeoutMs = timeoutConfig.earlybirdServerTimeout.inMilliseconds.intValue(), - facetRequest = None, - termStatisticsRequest = None, - debugMode = 0, - debugOptions = None, - searchSegmentId = None, - returnStatusType = None, - successfulResponseThreshold = None, - querySource = None, - getOlderResults = Some(false), - followedUserIds = Some(query.seedUserIds), - adjustedProtectedRequestParams = None, - adjustedFullArchiveRequestParams = None, - getProtectedTweetsOnly = Some(false), - retokenizeSerializedQuery = None, - skipVeryRecentTweets = true, - experimentClusterToUse = None - )) - else None - } -} - -object EarlybirdTensorflowBasedSimilarityEngine { - case class EarlybirdTensorflowBasedSearchQuery( - searcherUserId: Option[UserId], - seedUserIds: Seq[UserId], - maxNumTweets: Int, - beforeTweetIdExclusive: Option[TweetId], - afterTweetIdExclusive: Option[TweetId], - filterOutRetweetsAndReplies: Boolean, - useTensorflowRanking: Boolean, - excludedTweetIds: Set[TweetId], - maxNumHitsPerShard: Int) - extends EarlybirdSearchQuery - - private def getThriftSearchQuery( - query: EarlybirdTensorflowBasedSearchQuery, - processingTimeout: Duration - ): ThriftSearchQuery = - ThriftSearchQuery( - serializedQuery = GetEarlybirdQuery( - query.beforeTweetIdExclusive, - query.afterTweetIdExclusive, - query.excludedTweetIds, - query.filterOutRetweetsAndReplies).map(_.serialize), - fromUserIDFilter64 = Some(query.seedUserIds), - numResults = query.maxNumTweets, - // Whether to collect conversation IDs. Remove it for now. - // collectConversationId = Gate.True(), // true for Home - rankingMode = ThriftSearchRankingMode.Relevance, - relevanceOptions = Some(getRelevanceOptions), - collectorParams = Some( - CollectorParams( - // numResultsToReturn defines how many results each EB shard will return to search root - numResultsToReturn = 1000, - // terminationParams.maxHitsToProcess is used for early terminating per shard results fetching. - terminationParams = - GetCollectorTerminationParams(query.maxNumHitsPerShard, processingTimeout) - )), - facetFieldNames = Some(FacetsToFetch), - resultMetadataOptions = Some(MetadataOptions), - searcherId = query.searcherUserId, - searchStatusIds = None, - namedDisjunctionMap = GetNamedDisjunctions(query.excludedTweetIds) - ) - - // The specific values of recap relevance/reranking options correspond to - // experiment: enable_recap_reranking_2988,timeline_internal_disable_recap_filter - // bucket : enable_rerank,disable_filter - private def getRelevanceOptions: ThriftSearchRelevanceOptions = { - ThriftSearchRelevanceOptions( - proximityScoring = true, - maxConsecutiveSameUser = Some(2), - rankingParams = Some(getTensorflowBasedRankingParams), - maxHitsToProcess = Some(500), - maxUserBlendCount = Some(3), - proximityPhraseWeight = 9.0, - returnAllResults = Some(true) - ) - } - - private def getTensorflowBasedRankingParams: ThriftRankingParams = { - ThriftRankingParams( - `type` = Some(ThriftScoringFunctionType.TensorflowBased), - selectedTensorflowModel = Some("timelines_rectweet_replica"), - minScore = -1.0e100, - applyBoosts = false, - authorSpecificScoreAdjustments = None - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/FilterUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/FilterUtil.scala deleted file mode 100644 index 4cd94d2bb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/FilterUtil.scala +++ /dev/null @@ -1,42 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.util.Duration -import com.twitter.util.Time - -object FilterUtil { - - /** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */ - def tweetAgeFilter( - candidates: Seq[TweetWithScore], - maxTweetAgeHours: Duration - ): Seq[TweetWithScore] = { - // Tweet IDs are approximately chronological (see http://go/snowflake), - // so we are building the earliest tweet id once - // The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper. - // See @cyao's phab on CrMixer generic age filter for reference https://phabricator.twitter.biz/D903188 - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours) - candidates.filter { candidate => candidate.tweetId >= earliestTweetId } - } - - /** Returns a list of tweet sources that are generated less than `maxTweetAgeHours` hours ago */ - def tweetSourceAgeFilter( - candidates: Seq[SourceInfo], - maxTweetSignalAgeHoursParam: Duration - ): Seq[SourceInfo] = { - // Tweet IDs are approximately chronological (see http://go/snowflake), - // so we are building the earliest tweet id once - // This filter applies to source signals. Some candidate source calls can be avoided if source signals - // can be filtered. - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetSignalAgeHoursParam) - candidates.filter { candidate => - candidate.internalId match { - case InternalId.TweetId(tweetId) => tweetId >= earliestTweetId - case _ => false - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/HnswANNSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/HnswANNSimilarityEngine.scala deleted file mode 100644 index 4a1422ce9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/HnswANNSimilarityEngine.scala +++ /dev/null @@ -1,187 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.ann.common.thriftscala.Distance -import com.twitter.ann.common.thriftscala.NearestNeighborQuery -import com.twitter.ann.hnsw.HnswCommon -import com.twitter.ann.hnsw.HnswParams -import com.twitter.bijection.Injection -import com.twitter.cortex.ml.embeddings.common.TweetKind -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.MemCacheConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.mediaservices.commons.codec.ArrayByteBufferCodec -import com.twitter.ml.api.thriftscala.{Embedding => ThriftEmbedding} -import com.twitter.ml.featurestore.lib -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future - -case class HnswANNEngineQuery( - modelId: String, - sourceId: InternalId, - params: Params, -) { - val cacheKey: String = s"${modelId}_${sourceId.toString}" -} - -/** - * This Engine looks for tweets whose similarity is close to a Source Dense Embedding. - * Only support Long based embedding lookup. UserId or TweetId. - * - * It provides HNSW specific implementations - * - * @param memCacheConfigOpt If specified, it will wrap the underlying store with a MemCache layer - * You should only enable this for cacheable queries, e.x. TweetIds. - * consumer based UserIds are generally not possible to cache. - */ -class HnswANNSimilarityEngine( - embeddingStoreLookUpMap: Map[String, ReadableStore[InternalId, ThriftEmbedding]], - annServiceLookUpMap: Map[String, AnnQueryService.MethodPerEndpoint], - globalStats: StatsReceiver, - override val identifier: SimilarityEngineType, - engineConfig: SimilarityEngineConfig, - memCacheConfigOpt: Option[MemCacheConfig[HnswANNEngineQuery]] = None) - extends SimilarityEngine[HnswANNEngineQuery, TweetWithScore] { - - private val MaxNumResults: Int = 200 - private val ef: Int = 800 - private val TweetIdByteInjection: Injection[lib.TweetId, Array[Byte]] = TweetKind.byteInjection - - private val scopedStats = globalStats.scope("similarityEngine", identifier.toString) - - def getScopedStats: StatsReceiver = scopedStats - - private def fetchEmbedding( - query: HnswANNEngineQuery, - ): Future[Option[ThriftEmbedding]] = { - val embeddingStore = embeddingStoreLookUpMap.getOrElse( - query.modelId, - throw new IllegalArgumentException( - s"${this.getClass.getSimpleName} ${identifier.toString}: " + - s"ModelId ${query.modelId} does not exist for embeddingStore" - ) - ) - - embeddingStore.get(query.sourceId) - } - - private def fetchCandidates( - query: HnswANNEngineQuery, - embedding: ThriftEmbedding - ): Future[Seq[TweetWithScore]] = { - val annService = annServiceLookUpMap.getOrElse( - query.modelId, - throw new IllegalArgumentException( - s"${this.getClass.getSimpleName} ${identifier.toString}: " + - s"ModelId ${query.modelId} does not exist for annStore" - ) - ) - - val hnswParams = HnswCommon.RuntimeParamsInjection.apply(HnswParams(ef)) - - val annQuery = - NearestNeighborQuery(embedding, withDistance = true, hnswParams, MaxNumResults) - - annService - .query(annQuery) - .map( - _.nearestNeighbors - .map { nearestNeighbor => - val candidateId = TweetIdByteInjection - .invert(ArrayByteBufferCodec.decode(nearestNeighbor.id)) - .toOption - .map(_.tweetId) - (candidateId, nearestNeighbor.distance) - }.collect { - case (Some(candidateId), Some(distance)) => - TweetWithScore(candidateId, toScore(distance)) - }) - } - - // Convert Distance to a score such that higher scores mean more similar. - def toScore(distance: Distance): Double = { - distance match { - case Distance.EditDistance(editDistance) => - // (-Infinite, 0.0] - 0.0 - editDistance.distance - case Distance.L2Distance(l2Distance) => - // (-Infinite, 0.0] - 0.0 - l2Distance.distance - case Distance.CosineDistance(cosineDistance) => - // [0.0 - 1.0] - 1.0 - cosineDistance.distance - case Distance.InnerProductDistance(innerProductDistance) => - // (-Infinite, Infinite) - 1.0 - innerProductDistance.distance - case Distance.UnknownUnionField(_) => - throw new IllegalStateException( - s"${this.getClass.getSimpleName} does not recognize $distance.toString" - ) - } - } - - private[similarity_engine] def getEmbeddingAndCandidates( - query: HnswANNEngineQuery - ): Future[Option[Seq[TweetWithScore]]] = { - - val fetchEmbeddingStat = scopedStats.scope(query.modelId).scope("fetchEmbedding") - val fetchCandidatesStat = scopedStats.scope(query.modelId).scope("fetchCandidates") - - for { - embeddingOpt <- StatsUtil.trackOptionStats(fetchEmbeddingStat) { fetchEmbedding(query) } - candidates <- StatsUtil.trackItemsStats(fetchCandidatesStat) { - - embeddingOpt match { - case Some(embedding) => fetchCandidates(query, embedding) - case None => Future.Nil - } - } - } yield { - Some(candidates) - } - } - - // Add memcache wrapper, if specified - private val store = { - val uncachedStore = ReadableStore.fromFnFuture(getEmbeddingAndCandidates) - - memCacheConfigOpt match { - case Some(config) => - SimilarityEngine.addMemCache( - underlyingStore = uncachedStore, - memCacheConfig = config, - statsReceiver = scopedStats - ) - case _ => uncachedStore - } - } - - def toSimilarityEngineInfo( - query: HnswANNEngineQuery, - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = this.identifier, - modelId = Some(query.modelId), - score = Some(score)) - } - - override def getCandidates( - engineQuery: HnswANNEngineQuery - ): Future[Option[Seq[TweetWithScore]]] = { - val versionedStats = globalStats.scope(engineQuery.modelId) - SimilarityEngine.getFromFn( - store.get, - engineQuery, - engineConfig, - engineQuery.params, - versionedStats - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/LookupSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/LookupSimilarityEngine.scala deleted file mode 100644 index c4e469899..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/LookupSimilarityEngine.scala +++ /dev/null @@ -1,78 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.MemCacheConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future - -case class LookupEngineQuery[Query]( - storeQuery: Query, // the actual Query type of the underlying store - lookupKey: String, - params: Params, -) - -/** - * This Engine provides a map interface for looking up different model implementations. - * It provides modelId level monitoring for free. - * - * Example use cases include OfflineSimClusters lookup - * - * - * @param versionedStoreMap A mapping from a modelId to a corresponding implementation - * @param memCacheConfigOpt If specified, it will wrap the underlying store with a MemCache layer - * You should only enable this for cacheable queries, e.x. TweetIds. - * consumer based UserIds are generally not possible to cache. - */ -class LookupSimilarityEngine[Query, Candidate <: Serializable]( - versionedStoreMap: Map[String, ReadableStore[Query, Seq[Candidate]]], // key = modelId - override val identifier: SimilarityEngineType, - globalStats: StatsReceiver, - engineConfig: SimilarityEngineConfig, - memCacheConfigOpt: Option[MemCacheConfig[Query]] = None) - extends SimilarityEngine[LookupEngineQuery[Query], Candidate] { - - private val scopedStats = globalStats.scope("similarityEngine", identifier.toString) - - private val underlyingLookupMap = { - memCacheConfigOpt match { - case Some(config) => - versionedStoreMap.map { - case (modelId, store) => - ( - modelId, - SimilarityEngine.addMemCache( - underlyingStore = store, - memCacheConfig = config, - keyPrefix = Some(modelId), - statsReceiver = scopedStats - ) - ) - } - case _ => versionedStoreMap - } - } - - override def getCandidates( - engineQuery: LookupEngineQuery[Query] - ): Future[Option[Seq[Candidate]]] = { - val versionedStore = - underlyingLookupMap - .getOrElse( - engineQuery.lookupKey, - throw new IllegalArgumentException( - s"${this.getClass.getSimpleName} ${identifier.toString}: ModelId ${engineQuery.lookupKey} does not exist" - ) - ) - - SimilarityEngine.getFromFn( - fn = versionedStore.get, - storeQuery = engineQuery.storeQuery, - engineConfig = engineConfig, - params = engineQuery.params, - scopedStats = scopedStats.scope(engineQuery.lookupKey) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ModelBasedANNStore.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ModelBasedANNStore.scala deleted file mode 100644 index 064bb8b1a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ModelBasedANNStore.scala +++ /dev/null @@ -1,136 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.ann.common.thriftscala.AnnQueryService -import com.twitter.ann.common.thriftscala.Distance -import com.twitter.ann.common.thriftscala.NearestNeighborQuery -import com.twitter.ann.common.thriftscala.NearestNeighborResult -import com.twitter.ann.hnsw.HnswCommon -import com.twitter.ann.hnsw.HnswParams -import com.twitter.bijection.Injection -import com.twitter.conversions.DurationOps._ -import com.twitter.cortex.ml.embeddings.common.TweetKind -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.mediaservices.commons.codec.ArrayByteBufferCodec -import com.twitter.ml.api.thriftscala.{Embedding => ThriftEmbedding} -import com.twitter.ml.featurestore.lib -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Duration -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * This store looks for tweets whose similarity is close to a Source Dense Embedding. - * Only support Long based embedding lookup. UserId or TweetId - */ -@Singleton -class ModelBasedANNStore( - embeddingStoreLookUpMap: Map[String, ReadableStore[InternalId, ThriftEmbedding]], - annServiceLookUpMap: Map[String, AnnQueryService.MethodPerEndpoint], - globalStats: StatsReceiver) - extends ReadableStore[ - ModelBasedANNStore.Query, - Seq[TweetWithScore] - ] { - - import ModelBasedANNStore._ - - private val stats = globalStats.scope(this.getClass.getSimpleName) - private val fetchEmbeddingStat = stats.scope("fetchEmbedding") - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get(query: Query): Future[Option[Seq[TweetWithScore]]] = { - for { - maybeEmbedding <- StatsUtil.trackOptionStats(fetchEmbeddingStat.scope(query.modelId)) { - fetchEmbedding(query) - } - maybeCandidates <- StatsUtil.trackOptionStats(fetchCandidatesStat.scope(query.modelId)) { - maybeEmbedding match { - case Some(embedding) => - fetchCandidates(query, embedding) - case None => - Future.None - } - } - } yield { - maybeCandidates.map( - _.nearestNeighbors - .map { nearestNeighbor => - val candidateId = TweetIdByteInjection - .invert(ArrayByteBufferCodec.decode(nearestNeighbor.id)) - .toOption - .map(_.tweetId) - (candidateId, nearestNeighbor.distance) - }.collect { - case (Some(candidateId), Some(distance)) => - TweetWithScore(candidateId, toScore(distance)) - }) - } - } - - private def fetchEmbedding(query: Query): Future[Option[ThriftEmbedding]] = { - embeddingStoreLookUpMap.get(query.modelId) match { - case Some(embeddingStore) => - embeddingStore.get(query.sourceId) - case _ => - Future.None - } - } - - private def fetchCandidates( - query: Query, - embedding: ThriftEmbedding - ): Future[Option[NearestNeighborResult]] = { - val hnswParams = HnswCommon.RuntimeParamsInjection.apply(HnswParams(query.ef)) - - annServiceLookUpMap.get(query.modelId) match { - case Some(annService) => - val annQuery = - NearestNeighborQuery(embedding, withDistance = true, hnswParams, MaxNumResults) - annService.query(annQuery).map(v => Some(v)) - case _ => - Future.None - } - } -} - -object ModelBasedANNStore { - - val MaxNumResults: Int = 200 - val MaxTweetCandidateAge: Duration = 1.day - - val TweetIdByteInjection: Injection[lib.TweetId, Array[Byte]] = TweetKind.byteInjection - - // For more information about HNSW algorithm: https://docbird.twitter.biz/ann/hnsw.html - case class Query( - sourceId: InternalId, - modelId: String, - similarityEngineType: SimilarityEngineType, - ef: Int = 800) - - def toScore(distance: Distance): Double = { - distance match { - case Distance.L2Distance(l2Distance) => - // (-Infinite, 0.0] - 0.0 - l2Distance.distance - case Distance.CosineDistance(cosineDistance) => - // [0.0 - 1.0] - 1.0 - cosineDistance.distance - case Distance.InnerProductDistance(innerProductDistance) => - // (-Infinite, Infinite) - 1.0 - innerProductDistance.distance - case _ => - 0.0 - } - } - def toSimilarityEngineInfo(query: Query, score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = query.similarityEngineType, - modelId = Some(query.modelId), - score = Some(score)) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUnifiedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUnifiedSimilarityEngine.scala deleted file mode 100644 index f782ae037..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUnifiedSimilarityEngine.scala +++ /dev/null @@ -1,641 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.ProducerBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.UnifiedSETweetCombinationMethod -import com.twitter.cr_mixer.param.RelatedTweetProducerBasedParams -import com.twitter.cr_mixer.param.SimClustersANNParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.ModelVersions -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Future -import javax.inject.Named -import javax.inject.Singleton -import scala.collection.mutable.ArrayBuffer - -/** - * This store looks for similar tweets from UserTweetGraph for a Source ProducerId - * For a query producerId,User Tweet Graph (UTG), - * lets us find out which tweets the query producer's followers co-engaged - */ -@Singleton -case class ProducerBasedUnifiedSimilarityEngine( - @Named(ModuleNames.ProducerBasedUserTweetGraphSimilarityEngine) - producerBasedUserTweetGraphSimilarityEngine: StandardSimilarityEngine[ - ProducerBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ], - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - statsReceiver: StatsReceiver) - extends ReadableStore[ProducerBasedUnifiedSimilarityEngine.Query, Seq[ - TweetWithCandidateGenerationInfo - ]] { - - import ProducerBasedUnifiedSimilarityEngine._ - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: Query - ): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = { - query.sourceInfo.internalId match { - case _: InternalId.UserId => - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val sannCandidatesFut = if (query.enableSimClustersANN) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANNQuery) - } else Future.None - - val sann1CandidatesFut = - if (query.enableSimClustersANN1) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN1Query) - } else Future.None - - val sann2CandidatesFut = - if (query.enableSimClustersANN2) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN2Query) - } else Future.None - - val sann3CandidatesFut = - if (query.enableSimClustersANN3) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN3Query) - } else Future.None - - val sann4CandidatesFut = - if (query.enableSimClustersANN4) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN4Query) - } else Future.None - - val sann5CandidatesFut = - if (query.enableSimClustersANN5) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN5Query) - } else Future.None - - val experimentalSANNCandidatesFut = - if (query.enableExperimentalSimClustersANN) { - simClustersANNSimilarityEngine.getCandidates(query.experimentalSimClustersANNQuery) - } else Future.None - - val utgCandidatesFut = if (query.enableUtg) { - producerBasedUserTweetGraphSimilarityEngine.getCandidates(query.utgQuery) - } else Future.None - - Future - .join( - sannCandidatesFut, - sann1CandidatesFut, - sann2CandidatesFut, - sann3CandidatesFut, - sann4CandidatesFut, - sann5CandidatesFut, - experimentalSANNCandidatesFut, - utgCandidatesFut - ).map { - case ( - simClustersAnnCandidates, - simClustersAnn1Candidates, - simClustersAnn2Candidates, - simClustersAnn3Candidates, - simClustersAnn4Candidates, - simClustersAnn5Candidates, - experimentalSANNCandidates, - userTweetGraphCandidates) => - val filteredSANNTweets = simClustersCandidateMinScoreFilter( - simClustersAnnCandidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANNQuery.storeQuery.simClustersANNConfigId) - - val filteredExperimentalSANNTweets = simClustersCandidateMinScoreFilter( - experimentalSANNCandidates.toSeq.flatten, - query.simClustersMinScore, - query.experimentalSimClustersANNQuery.storeQuery.simClustersANNConfigId) - - val filteredSANN1Tweets = simClustersCandidateMinScoreFilter( - simClustersAnn1Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN1Query.storeQuery.simClustersANNConfigId) - - val filteredSANN2Tweets = simClustersCandidateMinScoreFilter( - simClustersAnn2Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN2Query.storeQuery.simClustersANNConfigId) - - val filteredSANN3Tweets = simClustersCandidateMinScoreFilter( - simClustersAnn3Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN3Query.storeQuery.simClustersANNConfigId) - - val filteredSANN4Tweets = simClustersCandidateMinScoreFilter( - simClustersAnn4Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN4Query.storeQuery.simClustersANNConfigId) - - val filteredSANN5Tweets = simClustersCandidateMinScoreFilter( - simClustersAnn5Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN5Query.storeQuery.simClustersANNConfigId) - - val filteredUTGTweets = - userTweetGraphFilter(userTweetGraphCandidates.toSeq.flatten) - - val sannTweetsWithCGInfo = filteredSANNTweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANNQuery, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann1TweetsWithCGInfo = filteredSANN1Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN1Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann2TweetsWithCGInfo = filteredSANN2Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN2Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val sann3TweetsWithCGInfo = filteredSANN3Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN3Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val sann4TweetsWithCGInfo = filteredSANN4Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN4Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val sann5TweetsWithCGInfo = filteredSANN5Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN5Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val experimentalSANNTweetsWithCGInfo = filteredExperimentalSANNTweets.map { - tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo( - query.experimentalSimClustersANNQuery, - tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val utgTweetsWithCGInfo = filteredUTGTweets.map { tweetWithScore => - val similarityEngineInfo = - ProducerBasedUserTweetGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val candidateSourcesToBeInterleaved = - ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]]( - sannTweetsWithCGInfo, - sann1TweetsWithCGInfo, - sann2TweetsWithCGInfo, - sann3TweetsWithCGInfo, - sann4TweetsWithCGInfo, - sann5TweetsWithCGInfo, - experimentalSANNTweetsWithCGInfo, - ) - - if (query.utgCombinationMethod == UnifiedSETweetCombinationMethod.Interleave) { - candidateSourcesToBeInterleaved += utgTweetsWithCGInfo - } - - val interleavedCandidates = - InterleaveUtil.interleave(candidateSourcesToBeInterleaved) - - val candidateSourcesToBeOrdered = - ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]](interleavedCandidates) - - if (query.utgCombinationMethod == UnifiedSETweetCombinationMethod.Frontload) - candidateSourcesToBeOrdered.prepend(utgTweetsWithCGInfo) - - val candidatesFromGivenOrderCombination = - SimilaritySourceOrderingUtil.keepGivenOrder(candidateSourcesToBeOrdered) - - val unifiedCandidatesWithUnifiedCGInfo = candidatesFromGivenOrderCombination.map { - candidate => - /*** - * when a candidate was made by interleave/keepGivenOrder, - * then we apply getProducerBasedUnifiedCGInfo() to override with the unified CGInfo - * - * in contributingSE list for interleave. We only have the chosen SE available. - * This is hard to add for interleave, and we plan to add it later after abstraction improvement. - */ - TweetWithCandidateGenerationInfo( - tweetId = candidate.tweetId, - candidateGenerationInfo = getProducerBasedUnifiedCGInfo( - candidate.candidateGenerationInfo.sourceInfoOpt, - candidate.getSimilarityScore, - candidate.candidateGenerationInfo.contributingSimilarityEngines - ) // getSimilarityScore comes from either unifiedScore or single score - ) - } - stats.stat("unified_candidate_size").add(unifiedCandidatesWithUnifiedCGInfo.size) - val truncatedCandidates = - unifiedCandidatesWithUnifiedCGInfo.take(query.maxCandidateNumPerSourceKey) - stats.stat("truncatedCandidates_size").add(truncatedCandidates.size) - - Some(truncatedCandidates) - - } - } - - case _ => - stats.counter("sourceId_is_not_userId_cnt").incr() - Future.None - } - } - - private def simClustersCandidateMinScoreFilter( - simClustersAnnCandidates: Seq[TweetWithScore], - simClustersMinScore: Double, - simClustersANNConfigId: String - ): Seq[TweetWithScore] = { - val filteredCandidates = simClustersAnnCandidates - .filter { candidate => - candidate.score > simClustersMinScore - } - - stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size) - stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr() - if (filteredCandidates.isEmpty) - stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - - /** A no-op filter as UTG filter already happened at UTG service side */ - private def userTweetGraphFilter( - userTweetGraphCandidates: Seq[TweetWithScore] - ): Seq[TweetWithScore] = { - val filteredCandidates = userTweetGraphCandidates - - stats.stat("userTweetGraphCandidates_size").add(userTweetGraphCandidates.size) - if (filteredCandidates.isEmpty) stats.counter("emptyFilteredUserTweetGraphCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - -} -object ProducerBasedUnifiedSimilarityEngine { - - /*** - * Every candidate will have the CG Info with ProducerBasedUnifiedSimilarityEngine - * as they are generated by a composite of Similarity Engines. - * Additionally, we store the contributing SEs (eg., SANN, UTG). - */ - private def getProducerBasedUnifiedCGInfo( - sourceInfoOpt: Option[SourceInfo], - unifiedScore: Double, - contributingSimilarityEngines: Seq[SimilarityEngineInfo] - ): CandidateGenerationInfo = { - CandidateGenerationInfo( - sourceInfoOpt, - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ProducerBasedUnifiedSimilarityEngine, - modelId = None, // We do not assign modelId for a unified similarity engine - score = Some(unifiedScore) - ), - contributingSimilarityEngines - ) - } - - case class Query( - sourceInfo: SourceInfo, - maxCandidateNumPerSourceKey: Int, - maxTweetAgeHours: Duration, - // SimClusters - enableSimClustersANN: Boolean, - simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableExperimentalSimClustersANN: Boolean, - experimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN1: Boolean, - simClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN2: Boolean, - simClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN4: Boolean, - simClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN3: Boolean, - simClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN5: Boolean, - simClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - simClustersMinScore: Double, - // UTG - enableUtg: Boolean, - utgCombinationMethod: UnifiedSETweetCombinationMethod.Value, - utgQuery: EngineQuery[ProducerBasedUserTweetGraphSimilarityEngine.Query]) - - def fromParams( - sourceInfo: SourceInfo, - params: configapi.Params, - ): EngineQuery[Query] = { - val maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - val maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - // SimClusters - val enableSimClustersANN = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANNParam) - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - // SimClusters - Experimental SANN Similarity Engine - val enableExperimentalSimClustersANN = params( - ProducerBasedCandidateGenerationParams.EnableExperimentalSimClustersANNParam) - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - // SimClusters - SANN cluster 1 Similarity Engine - val enableSimClustersANN1 = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANN1Param) - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - // SimClusters - SANN cluster 2 Similarity Engine - val enableSimClustersANN2 = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANN2Param) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - // SimClusters - SANN cluster 3 Similarity Engine - val enableSimClustersANN3 = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANN3Param) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - // SimClusters - SANN cluster 5 Similarity Engine - val enableSimClustersANN5 = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANN5Param) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - val enableSimClustersANN4 = params( - ProducerBasedCandidateGenerationParams.EnableSimClustersANN4Param) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - - val simClustersMinScore = params( - ProducerBasedCandidateGenerationParams.SimClustersMinScoreParam) - - // SimClusters ANN Query - val simClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANNConfigId, - params - ) - val experimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params - ) - val simClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN1ConfigId, - params - ) - val simClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - val simClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN3ConfigId, - params - ) - val simClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN5ConfigId, - params - ) - val simClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN4ConfigId, - params - ) - // UTG - val enableUtg = params(ProducerBasedCandidateGenerationParams.EnableUTGParam) - val utgCombinationMethod = params( - ProducerBasedCandidateGenerationParams.UtgCombinationMethodParam) - - EngineQuery( - Query( - sourceInfo = sourceInfo, - maxCandidateNumPerSourceKey = maxCandidateNumPerSourceKey, - maxTweetAgeHours = maxTweetAgeHours, - enableSimClustersANN = enableSimClustersANN, - simClustersANNQuery = simClustersANNQuery, - enableExperimentalSimClustersANN = enableExperimentalSimClustersANN, - experimentalSimClustersANNQuery = experimentalSimClustersANNQuery, - enableSimClustersANN1 = enableSimClustersANN1, - simClustersANN1Query = simClustersANN1Query, - enableSimClustersANN2 = enableSimClustersANN2, - simClustersANN2Query = simClustersANN2Query, - enableSimClustersANN3 = enableSimClustersANN3, - simClustersANN3Query = simClustersANN3Query, - enableSimClustersANN5 = enableSimClustersANN5, - simClustersANN5Query = simClustersANN5Query, - enableSimClustersANN4 = enableSimClustersANN4, - simClustersANN4Query = simClustersANN4Query, - simClustersMinScore = simClustersMinScore, - enableUtg = enableUtg, - utgCombinationMethod = utgCombinationMethod, - utgQuery = ProducerBasedUserTweetGraphSimilarityEngine - .fromParams(sourceInfo.internalId, params) - ), - params - ) - } - - def fromParamsForRelatedTweet( - internalId: InternalId, - params: configapi.Params - ): EngineQuery[Query] = { - val maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - val maxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - // SimClusters - val enableSimClustersANN = params(RelatedTweetProducerBasedParams.EnableSimClustersANNParam) - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - val simClustersMinScore = - params(RelatedTweetProducerBasedParams.SimClustersMinScoreParam) - // SimClusters - Experimental SANN Similarity Engine - val enableExperimentalSimClustersANN = params( - RelatedTweetProducerBasedParams.EnableExperimentalSimClustersANNParam) - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - // SimClusters - SANN cluster 1 Similarity Engine - val enableSimClustersANN1 = params(RelatedTweetProducerBasedParams.EnableSimClustersANN1Param) - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - // SimClusters - SANN cluster 2 Similarity Engine - val enableSimClustersANN2 = params(RelatedTweetProducerBasedParams.EnableSimClustersANN2Param) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - // SimClusters - SANN cluster 3 Similarity Engine - val enableSimClustersANN3 = params(RelatedTweetProducerBasedParams.EnableSimClustersANN3Param) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - // SimClusters - SANN cluster 5 Similarity Engine - val enableSimClustersANN5 = params(RelatedTweetProducerBasedParams.EnableSimClustersANN5Param) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - - val enableSimClustersANN4 = params(RelatedTweetProducerBasedParams.EnableSimClustersANN4Param) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - // Build SANN Query - val simClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANNConfigId, - params - ) - val experimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params - ) - val simClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN1ConfigId, - params - ) - val simClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - val simClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN3ConfigId, - params - ) - val simClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN5ConfigId, - params - ) - val simClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.FavBasedProducer, - simClustersModelVersion, - simClustersANN4ConfigId, - params - ) - // UTG - val enableUtg = params(RelatedTweetProducerBasedParams.EnableUTGParam) - val utgCombinationMethod = params( - ProducerBasedCandidateGenerationParams.UtgCombinationMethodParam) - - // SourceType.RequestUserId is a placeholder. - val sourceInfo = SourceInfo(SourceType.RequestUserId, internalId, None) - - EngineQuery( - Query( - sourceInfo = sourceInfo, - maxCandidateNumPerSourceKey = maxCandidateNumPerSourceKey, - maxTweetAgeHours = maxTweetAgeHours, - enableSimClustersANN = enableSimClustersANN, - simClustersANNQuery = simClustersANNQuery, - enableExperimentalSimClustersANN = enableExperimentalSimClustersANN, - experimentalSimClustersANNQuery = experimentalSimClustersANNQuery, - enableSimClustersANN1 = enableSimClustersANN1, - simClustersANN1Query = simClustersANN1Query, - enableSimClustersANN2 = enableSimClustersANN2, - simClustersANN2Query = simClustersANN2Query, - enableSimClustersANN3 = enableSimClustersANN3, - simClustersANN3Query = simClustersANN3Query, - enableSimClustersANN5 = enableSimClustersANN5, - simClustersANN5Query = simClustersANN5Query, - enableSimClustersANN4 = enableSimClustersANN4, - simClustersANN4Query = simClustersANN4Query, - simClustersMinScore = simClustersMinScore, - enableUtg = enableUtg, - utgQuery = ProducerBasedUserTweetGraphSimilarityEngine.fromParams(internalId, params), - utgCombinationMethod = utgCombinationMethod - ), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserAdGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserAdGraphSimilarityEngine.scala deleted file mode 100644 index c9ebc91e7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserAdGraphSimilarityEngine.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.ProducerBasedUserAdGraphParams -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.recos.user_ad_graph.thriftscala.ProducerBasedRelatedAdRequest -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Singleton -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.timelines.configapi - -/** - * This store looks for similar tweets from UserAdGraph for a Source ProducerId - * For a query producerId,User Tweet Graph (UAG), - * lets us find out which ad tweets the query producer's followers co-engaged - */ -@Singleton -case class ProducerBasedUserAdGraphSimilarityEngine( - userAdGraphService: UserAdGraph.MethodPerEndpoint, - statsReceiver: StatsReceiver) - extends ReadableStore[ProducerBasedUserAdGraphSimilarityEngine.Query, Seq[ - TweetWithScore - ]] { - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: ProducerBasedUserAdGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - query.sourceId match { - case InternalId.UserId(producerId) => - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val relatedAdRequest = - ProducerBasedRelatedAdRequest( - producerId, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - minScore = Some(query.minScore), - maxNumFollowers = Some(query.maxNumFollowers), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours), - ) - - userAdGraphService.producerBasedRelatedAds(relatedAdRequest).map { relatedAdResponse => - val candidates = - relatedAdResponse.adTweets.map(tweet => TweetWithScore(tweet.adTweetId, tweet.score)) - Some(candidates) - } - } - case _ => - Future.value(None) - } - } -} - -object ProducerBasedUserAdGraphSimilarityEngine { - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ProducerBasedUserAdGraph, - modelId = None, - score = Some(score)) - } - - case class Query( - sourceId: InternalId, - maxResults: Int, - minCooccurrence: Int, // require at least {minCooccurrence} lhs user engaged with returned tweet - minScore: Double, - maxNumFollowers: Int, // max number of lhs users - maxTweetAgeInHours: Int) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceId = sourceId, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(ProducerBasedUserAdGraphParams.MinCoOccurrenceParam), - maxNumFollowers = params(ProducerBasedUserAdGraphParams.MaxNumFollowersParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - minScore = params(ProducerBasedUserAdGraphParams.MinScoreParam) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngine.scala deleted file mode 100644 index 6e7ca95bd..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/ProducerBasedUserTweetGraphSimilarityEngine.scala +++ /dev/null @@ -1,96 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.ProducerBasedUserTweetGraphParams -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.recos.user_tweet_graph.thriftscala.ProducerBasedRelatedTweetRequest -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Singleton -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.timelines.configapi -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph - -/** - * This store looks for similar tweets from UserTweetGraph for a Source ProducerId - * For a query producerId,User Tweet Graph (UTG), - * lets us find out which tweets the query producer's followers co-engaged - */ -@Singleton -case class ProducerBasedUserTweetGraphSimilarityEngine( - userTweetGraphService: UserTweetGraph.MethodPerEndpoint, - statsReceiver: StatsReceiver) - extends ReadableStore[ProducerBasedUserTweetGraphSimilarityEngine.Query, Seq[ - TweetWithScore - ]] { - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: ProducerBasedUserTweetGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - query.sourceId match { - case InternalId.UserId(producerId) => - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val relatedTweetRequest = - ProducerBasedRelatedTweetRequest( - producerId, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - minScore = Some(query.minScore), - maxNumFollowers = Some(query.maxNumFollowers), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours), - ) - - userTweetGraphService.producerBasedRelatedTweets(relatedTweetRequest).map { - relatedTweetResponse => - val candidates = - relatedTweetResponse.tweets.map(tweet => TweetWithScore(tweet.tweetId, tweet.score)) - Some(candidates) - } - } - case _ => - Future.value(None) - } - } -} - -object ProducerBasedUserTweetGraphSimilarityEngine { - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.ProducerBasedUserTweetGraph, - modelId = None, - score = Some(score)) - } - - case class Query( - sourceId: InternalId, - maxResults: Int, - minCooccurrence: Int, // require at least {minCooccurrence} lhs user engaged with returned tweet - minScore: Double, - maxNumFollowers: Int, // max number of lhs users - maxTweetAgeInHours: Int) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceId = sourceId, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(ProducerBasedUserTweetGraphParams.MinCoOccurrenceParam), - maxNumFollowers = params(ProducerBasedUserTweetGraphParams.MaxNumFollowersParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - minScore = params(ProducerBasedUserTweetGraphParams.MinScoreParam) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimClustersANNSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimClustersANNSimilarityEngine.scala deleted file mode 100644 index 228627c87..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimClustersANNSimilarityEngine.scala +++ /dev/null @@ -1,113 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.config.SimClustersANNConfig -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.simclusters_v2.thriftscala.ModelVersion -import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId -import com.twitter.simclustersann.thriftscala.SimClustersANNService -import com.twitter.simclustersann.thriftscala.{Query => SimClustersANNQuery} -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton -import com.twitter.cr_mixer.exception.InvalidSANNConfigException -import com.twitter.relevance_platform.simclustersann.multicluster.ServiceNameMapper - -@Singleton -case class SimClustersANNSimilarityEngine( - simClustersANNServiceNameToClientMapper: Map[String, SimClustersANNService.MethodPerEndpoint], - statsReceiver: StatsReceiver) - extends ReadableStore[ - SimClustersANNSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - private val name: String = this.getClass.getSimpleName - private val stats = statsReceiver.scope(name) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - private def getSimClustersANNService( - query: SimClustersANNQuery - ): Option[SimClustersANNService.MethodPerEndpoint] = { - ServiceNameMapper - .getServiceName( - query.sourceEmbeddingId.modelVersion, - query.config.candidateEmbeddingType).flatMap(serviceName => - simClustersANNServiceNameToClientMapper.get(serviceName)) - } - - override def get( - query: SimClustersANNSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - - getSimClustersANNService(query.simClustersANNQuery) match { - case Some(simClustersANNService) => - simClustersANNService.getTweetCandidates(query.simClustersANNQuery).map { - simClustersANNTweetCandidates => - val tweetWithScores = simClustersANNTweetCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - Some(tweetWithScores) - } - case None => - throw InvalidSANNConfigException( - "No SANN Cluster configured to serve this query, check CandidateEmbeddingType and ModelVersion") - } - } - } -} - -object SimClustersANNSimilarityEngine { - case class Query( - simClustersANNQuery: SimClustersANNQuery, - simClustersANNConfigId: String) - - def toSimilarityEngineInfo( - query: EngineQuery[Query], - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.SimClustersANN, - modelId = Some( - s"SimClustersANN_${query.storeQuery.simClustersANNQuery.sourceEmbeddingId.embeddingType.toString}_" + - s"${query.storeQuery.simClustersANNQuery.sourceEmbeddingId.modelVersion.toString}_" + - s"${query.storeQuery.simClustersANNConfigId}"), - score = Some(score) - ) - } - - def fromParams( - internalId: InternalId, - embeddingType: EmbeddingType, - modelVersion: ModelVersion, - simClustersANNConfigId: String, - params: configapi.Params, - ): EngineQuery[Query] = { - - // SimClusters EmbeddingId and ANNConfig - val simClustersEmbeddingId = - SimClustersEmbeddingId(embeddingType, modelVersion, internalId) - val simClustersANNConfig = - SimClustersANNConfig - .getConfig(embeddingType.toString, modelVersion.toString, simClustersANNConfigId) - - EngineQuery( - Query( - SimClustersANNQuery( - sourceEmbeddingId = simClustersEmbeddingId, - config = simClustersANNConfig.toSANNConfigThrift - ), - simClustersANNConfigId - ), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilarityEngine.scala deleted file mode 100644 index 6bc332f75..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilarityEngine.scala +++ /dev/null @@ -1,169 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.GlobalRequestTimeoutException -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.memcached.Client -import com.twitter.finagle.mux.ServerApplicationError -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.hashing.KeyHasher -import com.twitter.hermit.store.common.ObservedMemcachedReadableStore -import com.twitter.relevance_platform.common.injection.LZ4Injection -import com.twitter.relevance_platform.common.injection.SeqObjectInjection -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi.FSParam -import com.twitter.timelines.configapi.Params -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.TimeoutException -import com.twitter.util.logging.Logging -import org.apache.thrift.TApplicationException - -/** - * A SimilarityEngine is a wrapper which, given a [[Query]], returns a list of [[Candidate]] - * The main purposes of a SimilarityEngine is to provide a consistent interface for candidate - * generation logic, and provides default functions, including: - * - Identification - * - Observability - * - Timeout settings - * - Exception Handling - * - Gating by Deciders & FeatureSwitch settings - * - (coming soon): Dark traffic - * - * Note: - * A SimilarityEngine by itself is NOT meant to be cacheable. - * Caching should be implemented in the underlying ReadableStore that provides the [[Candidate]]s - * - * Please keep extension of this class local this directory only - * - */ -trait SimilarityEngine[Query, Candidate] { - - /** - * Uniquely identifies a similarity engine. - * Avoid using the same engine type for more than one engine, it will cause stats to double count - */ - private[similarity_engine] def identifier: SimilarityEngineType - - def getCandidates(query: Query): Future[Option[Seq[Candidate]]] - -} - -object SimilarityEngine extends Logging { - case class SimilarityEngineConfig( - timeout: Duration, - gatingConfig: GatingConfig) - - /** - * Controls for whether or not this Engine is enabled. - * In our previous design, we were expecting a Sim Engine will only take one set of Params, - * and that’s why we decided to have GatingConfig and the EnableFeatureSwitch in the trait. - * However, we now have two candidate generation pipelines: Tweet Rec, Related Tweets - * and they are now having their own set of Params, but EnableFeatureSwitch can only put in 1 fixed value. - * We need some further refactor work to make it more flexible. - * - * @param deciderConfig Gate the Engine by a decider. If specified, - * @param enableFeatureSwitch. DO NOT USE IT FOR NOW. It needs some refactorting. Please set it to None (SD-20268) - */ - case class GatingConfig( - deciderConfig: Option[DeciderConfig], - enableFeatureSwitch: Option[ - FSParam[Boolean] - ]) // Do NOT use the enableFeatureSwitch. It needs some refactoring. - - case class DeciderConfig( - decider: CrMixerDecider, - deciderString: String) - - case class MemCacheConfig[K]( - cacheClient: Client, - ttl: Duration, - asyncUpdate: Boolean = false, - keyToString: K => String) - - private[similarity_engine] def isEnabled( - params: Params, - gatingConfig: GatingConfig - ): Boolean = { - val enabledByDecider = - gatingConfig.deciderConfig.forall { config => - config.decider.isAvailable(config.deciderString) - } - - val enabledByFS = gatingConfig.enableFeatureSwitch.forall(params.apply) - - enabledByDecider && enabledByFS - } - - // Default key hasher for memcache keys - val keyHasher: KeyHasher = KeyHasher.FNV1A_64 - - /** - * Add a MemCache wrapper to a ReadableStore with a preset key and value injection functions - * Note: The [[Query]] object needs to be cacheable, - * i.e. it cannot be a runtime objects or complex objects, for example, configapi.Params - * - * @param underlyingStore un-cached store implementation - * @param keyPrefix a prefix differentiates 2 stores if they share the same key space. - * e.x. 2 implementations of ReadableStore[UserId, Seq[Candidiate] ] - * can use prefix "store_v1", "store_v2" - * @return A ReadableStore with a MemCache wrapper - */ - private[similarity_engine] def addMemCache[Query, Candidate <: Serializable]( - underlyingStore: ReadableStore[Query, Seq[Candidate]], - memCacheConfig: MemCacheConfig[Query], - keyPrefix: Option[String] = None, - statsReceiver: StatsReceiver - ): ReadableStore[Query, Seq[Candidate]] = { - val prefix = keyPrefix.getOrElse("") - - ObservedMemcachedReadableStore.fromCacheClient[Query, Seq[Candidate]]( - backingStore = underlyingStore, - cacheClient = memCacheConfig.cacheClient, - ttl = memCacheConfig.ttl, - asyncUpdate = memCacheConfig.asyncUpdate, - )( - valueInjection = LZ4Injection.compose(SeqObjectInjection[Candidate]()), - keyToString = { k: Query => s"CRMixer:$prefix${memCacheConfig.keyToString(k)}" }, - statsReceiver = statsReceiver - ) - } - - private val timer = com.twitter.finagle.util.DefaultTimer - - /** - * Applies runtime configs, like stats, timeouts, exception handling, onto fn - */ - private[similarity_engine] def getFromFn[Query, Candidate]( - fn: Query => Future[Option[Seq[Candidate]]], - storeQuery: Query, - engineConfig: SimilarityEngineConfig, - params: Params, - scopedStats: StatsReceiver - ): Future[Option[Seq[Candidate]]] = { - if (isEnabled(params, engineConfig.gatingConfig)) { - scopedStats.counter("gate_enabled").incr() - - StatsUtil - .trackOptionItemsStats(scopedStats) { - fn.apply(storeQuery).raiseWithin(engineConfig.timeout)(timer) - } - .rescue { - case _: TimeoutException | _: GlobalRequestTimeoutException | _: TApplicationException | - _: ClientDiscardedRequestException | - _: ServerApplicationError // TApplicationException inside - => - debug("Failed to fetch. request aborted or timed out") - Future.None - case e => - error("Failed to fetch. request aborted or timed out", e) - Future.None - } - } else { - scopedStats.counter("gate_disabled").incr() - Future.None - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilaritySourceOrderingUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilaritySourceOrderingUtil.scala deleted file mode 100644 index b3da2b631..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SimilaritySourceOrderingUtil.scala +++ /dev/null @@ -1,32 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.simclusters_v2.common.TweetId -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -object SimilaritySourceOrderingUtil { - /** - * This function flatten and dedup input candidates according to the order in the input Seq - * [[candidate10, candidate11], [candidate20, candidate21]] => [candidate10, candidate11, candidate20, candidate21] - */ - def keepGivenOrder( - candidates: Seq[Seq[TweetWithCandidateGenerationInfo]], - ): Seq[TweetWithCandidateGenerationInfo] = { - - val seen = mutable.Set[TweetId]() - val combinedCandidates = candidates.flatten - val result = ArrayBuffer[TweetWithCandidateGenerationInfo]() - - combinedCandidates.foreach { candidate => - val candidateTweetId = candidate.tweetId - val seenCandidate = seen.contains(candidateTweetId) // de-dup - if (!seenCandidate) { - result += candidate - seen.add(candidate.tweetId) - } - } - //convert result to immutable seq - result.toList - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitHighPrecisionTopicTweetSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitHighPrecisionTopicTweetSimilarityEngine.scala deleted file mode 100644 index 37701e79e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitHighPrecisionTopicTweetSimilarityEngine.scala +++ /dev/null @@ -1,123 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.google.inject.Inject -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.contentrecommender.thriftscala.AlgorithmType -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.TopicTweetParams -import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine._ -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.topic_recos.thriftscala.TopicTweet -import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey -import com.twitter.util.Future - -@Singleton -case class SkitHighPrecisionTopicTweetSimilarityEngine @Inject() ( - @Named(ModuleNames.SkitStratoStoreName) skitStratoStore: ReadableStore[ - TopicTweetPartitionFlatKey, - Seq[TopicTweet] - ], - statsReceiver: StatsReceiver) - extends ReadableStore[EngineQuery[Query], Seq[TopicTweetWithScore]] { - - private val name: String = this.getClass.getSimpleName - private val stats = statsReceiver.scope(name) - - override def get(query: EngineQuery[Query]): Future[Option[Seq[TopicTweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(stats) { - fetch(query).map { tweets => - val topTweets = - tweets - .sortBy(-_.favCount) - .take(query.storeQuery.maxCandidates) - .map { tweet => - TopicTweetWithScore( - tweetId = tweet.tweetId, - score = tweet.favCount, - similarityEngineType = SimilarityEngineType.SkitHighPrecisionTopicTweet - ) - } - Some(topTweets) - } - } - } - - private def fetch(query: EngineQuery[Query]): Future[Seq[SkitTopicTweet]] = { - val latestTweetTimeInHour = System.currentTimeMillis() / 1000 / 60 / 60 - - val earliestTweetTimeInHour = latestTweetTimeInHour - - math.min(MaxTweetAgeInHours, query.storeQuery.maxTweetAge.inHours) - val timedKeys = for (timePartition <- earliestTweetTimeInHour to latestTweetTimeInHour) yield { - - TopicTweetPartitionFlatKey( - entityId = query.storeQuery.topicId.entityId, - timePartition = timePartition, - algorithmType = Some(AlgorithmType.SemanticCoreTweet), - tweetEmbeddingType = Some(EmbeddingType.LogFavBasedTweet), - language = query.storeQuery.topicId.language.getOrElse("").toLowerCase, - country = None, // Disable country. It is not used. - semanticCoreAnnotationVersionId = Some(query.storeQuery.semanticCoreVersionId) - ) - } - - getTweetsForKeys( - timedKeys, - query.storeQuery.topicId - ) - } - - /** - * Given a set of keys, multiget the underlying Strato store, combine and flatten the results. - */ - private def getTweetsForKeys( - keys: Seq[TopicTweetPartitionFlatKey], - sourceTopic: TopicId - ): Future[Seq[SkitTopicTweet]] = { - Future - .collect { skitStratoStore.multiGet(keys.toSet).values.toSeq } - .map { combinedResults => - val topTweets = combinedResults.flatten.flatten - topTweets.map { tweet => - SkitTopicTweet( - tweetId = tweet.tweetId, - favCount = tweet.scores.favCount.getOrElse(0L), - cosineSimilarityScore = tweet.scores.cosineSimilarity.getOrElse(0.0), - sourceTopic = sourceTopic - ) - } - } - } -} - -object SkitHighPrecisionTopicTweetSimilarityEngine { - - def fromParams( - topicId: TopicId, - isVideoOnly: Boolean, - params: configapi.Params, - ): EngineQuery[Query] = { - val maxCandidates = if (isVideoOnly) { - params(TopicTweetParams.MaxSkitHighPrecisionCandidatesParam) * 2 - } else { - params(TopicTweetParams.MaxSkitHighPrecisionCandidatesParam) - } - - EngineQuery( - Query( - topicId = topicId, - maxCandidates = maxCandidates, - maxTweetAge = params(TopicTweetParams.MaxTweetAge), - semanticCoreVersionId = params(TopicTweetParams.SemanticCoreVersionIdParam) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitTopicTweetSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitTopicTweetSimilarityEngine.scala deleted file mode 100644 index 44bb4b319..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/SkitTopicTweetSimilarityEngine.scala +++ /dev/null @@ -1,143 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.google.inject.Inject -import com.google.inject.Singleton -import com.google.inject.name.Named -import com.twitter.contentrecommender.thriftscala.AlgorithmType -import com.twitter.conversions.DurationOps._ -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.TopicTweetWithScore -import com.twitter.cr_mixer.param.TopicTweetParams -import com.twitter.cr_mixer.similarity_engine.SkitTopicTweetSimilarityEngine._ -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.ModelVersion -import com.twitter.simclusters_v2.thriftscala.TopicId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.topic_recos.thriftscala.TopicTweet -import com.twitter.topic_recos.thriftscala.TopicTweetPartitionFlatKey -import com.twitter.util.Duration -import com.twitter.util.Future - -@Singleton -case class SkitTopicTweetSimilarityEngine @Inject() ( - @Named(ModuleNames.SkitStratoStoreName) skitStratoStore: ReadableStore[ - TopicTweetPartitionFlatKey, - Seq[TopicTweet] - ], - statsReceiver: StatsReceiver) - extends ReadableStore[EngineQuery[Query], Seq[TopicTweetWithScore]] { - - private val name: String = this.getClass.getSimpleName - private val stats = statsReceiver.scope(name) - - override def get(query: EngineQuery[Query]): Future[Option[Seq[TopicTweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(stats) { - fetch(query).map { tweets => - val topTweets = - tweets - .sortBy(-_.cosineSimilarityScore) - .take(query.storeQuery.maxCandidates) - .map { tweet => - TopicTweetWithScore( - tweetId = tweet.tweetId, - score = tweet.cosineSimilarityScore, - similarityEngineType = SimilarityEngineType.SkitTfgTopicTweet - ) - } - Some(topTweets) - } - } - } - - private def fetch(query: EngineQuery[Query]): Future[Seq[SkitTopicTweet]] = { - val latestTweetTimeInHour = System.currentTimeMillis() / 1000 / 60 / 60 - - val earliestTweetTimeInHour = latestTweetTimeInHour - - math.min(MaxTweetAgeInHours, query.storeQuery.maxTweetAge.inHours) - val timedKeys = for (timePartition <- earliestTweetTimeInHour to latestTweetTimeInHour) yield { - - TopicTweetPartitionFlatKey( - entityId = query.storeQuery.topicId.entityId, - timePartition = timePartition, - algorithmType = Some(AlgorithmType.TfgTweet), - tweetEmbeddingType = Some(EmbeddingType.LogFavBasedTweet), - language = query.storeQuery.topicId.language.getOrElse("").toLowerCase, - country = None, // Disable country. It is not used. - semanticCoreAnnotationVersionId = Some(query.storeQuery.semanticCoreVersionId), - simclustersModelVersion = Some(ModelVersion.Model20m145k2020) - ) - } - - getTweetsForKeys( - timedKeys, - query.storeQuery.topicId - ) - } - - /** - * Given a set of keys, multiget the underlying Strato store, combine and flatten the results. - */ - private def getTweetsForKeys( - keys: Seq[TopicTweetPartitionFlatKey], - sourceTopic: TopicId - ): Future[Seq[SkitTopicTweet]] = { - Future - .collect { skitStratoStore.multiGet(keys.toSet).values.toSeq } - .map { combinedResults => - val topTweets = combinedResults.flatten.flatten - topTweets.map { tweet => - SkitTopicTweet( - tweetId = tweet.tweetId, - favCount = tweet.scores.favCount.getOrElse(0L), - cosineSimilarityScore = tweet.scores.cosineSimilarity.getOrElse(0.0), - sourceTopic = sourceTopic - ) - } - } - } -} - -object SkitTopicTweetSimilarityEngine { - - val MaxTweetAgeInHours: Int = 7.days.inHours // Simple guard to prevent overloading - - // Query is used as a cache key. Do not add any user level information in this. - case class Query( - topicId: TopicId, - maxCandidates: Int, - maxTweetAge: Duration, - semanticCoreVersionId: Long) - - case class SkitTopicTweet( - sourceTopic: TopicId, - tweetId: TweetId, - favCount: Long, - cosineSimilarityScore: Double) - - def fromParams( - topicId: TopicId, - isVideoOnly: Boolean, - params: configapi.Params, - ): EngineQuery[Query] = { - val maxCandidates = if (isVideoOnly) { - params(TopicTweetParams.MaxSkitTfgCandidatesParam) * 2 - } else { - params(TopicTweetParams.MaxSkitTfgCandidatesParam) - } - - EngineQuery( - Query( - topicId = topicId, - maxCandidates = maxCandidates, - maxTweetAge = params(TopicTweetParams.MaxTweetAge), - semanticCoreVersionId = params(TopicTweetParams.SemanticCoreVersionIdParam) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/StandardSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/StandardSimilarityEngine.scala deleted file mode 100644 index ae71c3736..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/StandardSimilarityEngine.scala +++ /dev/null @@ -1,65 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.MemCacheConfig -import com.twitter.cr_mixer.similarity_engine.SimilarityEngine.SimilarityEngineConfig -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi.Params -import com.twitter.util.Future - -/** - * @tparam Query ReadableStore's input type. - */ -case class EngineQuery[Query]( - storeQuery: Query, - params: Params, -) - -/** - * A straight forward SimilarityEngine implementation that wraps a ReadableStore - * - * @param implementingStore Provides the candidate retrieval's implementations - * @param memCacheConfig If specified, it will wrap the underlying store with a MemCache layer - * You should only enable this for cacheable queries, e.x. TweetIds. - * consumer based UserIds are generally not possible to cache. - * @tparam Query ReadableStore's input type - * @tparam Candidate ReadableStore's return type is Seq[[[Candidate]]] - */ -class StandardSimilarityEngine[Query, Candidate <: Serializable]( - implementingStore: ReadableStore[Query, Seq[Candidate]], - override val identifier: SimilarityEngineType, - globalStats: StatsReceiver, - engineConfig: SimilarityEngineConfig, - memCacheConfig: Option[MemCacheConfig[Query]] = None) - extends SimilarityEngine[EngineQuery[Query], Candidate] { - - private val scopedStats = globalStats.scope("similarityEngine", identifier.toString) - - def getScopedStats: StatsReceiver = scopedStats - - // Add memcache wrapper, if specified - private val store = { - memCacheConfig match { - case Some(config) => - SimilarityEngine.addMemCache( - underlyingStore = implementingStore, - memCacheConfig = config, - statsReceiver = scopedStats - ) - case _ => implementingStore - } - } - - override def getCandidates( - engineQuery: EngineQuery[Query] - ): Future[Option[Seq[Candidate]]] = { - SimilarityEngine.getFromFn( - store.get, - engineQuery.storeQuery, - engineConfig, - engineQuery.params, - scopedStats - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedQigSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedQigSimilarityEngine.scala deleted file mode 100644 index 317f09727..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedQigSimilarityEngine.scala +++ /dev/null @@ -1,114 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.base.Stats -import com.twitter.product_mixer.core.thriftscala.ClientContext -import com.twitter.qig_ranker.thriftscala.Product -import com.twitter.qig_ranker.thriftscala.ProductContext -import com.twitter.qig_ranker.thriftscala.QigRanker -import com.twitter.qig_ranker.thriftscala.QigRankerProductResponse -import com.twitter.qig_ranker.thriftscala.QigRankerRequest -import com.twitter.qig_ranker.thriftscala.QigRankerResponse -import com.twitter.qig_ranker.thriftscala.TwistlySimilarTweetsProductContext -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * This store looks for similar tweets from QueryInteractionGraph (QIG) for a source tweet id. - * For a given query tweet, QIG returns us the similar tweets that have an overlap of engagements - * (with the query tweet) on different search queries - */ -@Singleton -case class TweetBasedQigSimilarityEngine( - qigRanker: QigRanker.MethodPerEndpoint, - statsReceiver: StatsReceiver) - extends ReadableStore[ - TweetBasedQigSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: TweetBasedQigSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - query.sourceId match { - case InternalId.TweetId(tweetId) => - val qigSimilarTweetsRequest = getQigSimilarTweetsRequest(tweetId) - - Stats.trackOption(fetchCandidatesStat) { - qigRanker - .getSimilarCandidates(qigSimilarTweetsRequest) - .map { qigSimilarTweetsResponse => - getCandidatesFromQigResponse(qigSimilarTweetsResponse) - } - } - case _ => - Future.value(None) - } - } - - private def getQigSimilarTweetsRequest( - tweetId: Long - ): QigRankerRequest = { - // Note: QigRanker needs a non-empty userId to be passed to return results. - // We are passing in a dummy userId until we fix this on QigRanker side - val clientContext = ClientContext(userId = Some(0L)) - val productContext = ProductContext.TwistlySimilarTweetsProductContext( - TwistlySimilarTweetsProductContext(tweetId = tweetId)) - - QigRankerRequest( - clientContext = clientContext, - product = Product.TwistlySimilarTweets, - productContext = Some(productContext), - ) - } - - private def getCandidatesFromQigResponse( - qigSimilarTweetsResponse: QigRankerResponse - ): Option[Seq[TweetWithScore]] = { - qigSimilarTweetsResponse.productResponse match { - case QigRankerProductResponse - .TwistlySimilarTweetCandidatesResponse(response) => - val tweetsWithScore = response.similarTweets - .map { similarTweetResult => - TweetWithScore( - similarTweetResult.tweetResult.tweetId, - similarTweetResult.tweetResult.score.getOrElse(0L)) - } - Some(tweetsWithScore) - - case _ => None - } - } -} - -object TweetBasedQigSimilarityEngine { - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.Qig, - modelId = None, - score = Some(score)) - } - - case class Query(sourceId: InternalId) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query(sourceId = sourceId), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala deleted file mode 100644 index 6b84e2f67..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUnifiedSimilarityEngine.scala +++ /dev/null @@ -1,962 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.model.TweetWithCandidateGenerationInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.RelatedTweetTweetBasedParams -import com.twitter.cr_mixer.param.RelatedVideoTweetTweetBasedParams -import com.twitter.cr_mixer.param.SimClustersANNParams -import com.twitter.cr_mixer.param.TweetBasedCandidateGenerationParams -import com.twitter.cr_mixer.param.TweetBasedTwHINParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.util.InterleaveUtil -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.ModelVersions -import com.twitter.simclusters_v2.thriftscala.EmbeddingType -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Named -import javax.inject.Singleton -import scala.collection.mutable.ArrayBuffer - -/** - * This store fetches similar tweets from multiple tweet based candidate sources - * and combines them using different methods obtained from query params - */ -@Singleton -case class TweetBasedUnifiedSimilarityEngine( - @Named(ModuleNames.TweetBasedUserTweetGraphSimilarityEngine) - tweetBasedUserTweetGraphSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedUserVideoGraphSimilarityEngine) - tweetBasedUserVideoGraphSimilarityEngine: StandardSimilarityEngine[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - TweetWithScore - ], - simClustersANNSimilarityEngine: StandardSimilarityEngine[ - SimClustersANNSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedQigSimilarityEngine) - tweetBasedQigSimilarTweetsSimilarityEngine: StandardSimilarityEngine[ - TweetBasedQigSimilarityEngine.Query, - TweetWithScore - ], - @Named(ModuleNames.TweetBasedTwHINANNSimilarityEngine) - tweetBasedTwHINANNSimilarityEngine: HnswANNSimilarityEngine, - statsReceiver: StatsReceiver) - extends ReadableStore[ - TweetBasedUnifiedSimilarityEngine.Query, - Seq[TweetWithCandidateGenerationInfo] - ] { - - import TweetBasedUnifiedSimilarityEngine._ - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - - override def get( - query: Query - ): Future[Option[Seq[TweetWithCandidateGenerationInfo]]] = { - - query.sourceInfo.internalId match { - case _: InternalId.TweetId => - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val twhinQuery = - HnswANNEngineQuery( - sourceId = query.sourceInfo.internalId, - modelId = query.twhinModelId, - params = query.params) - val utgCandidatesFut = - if (query.enableUtg) - tweetBasedUserTweetGraphSimilarityEngine.getCandidates(query.utgQuery) - else Future.None - - val uvgCandidatesFut = - if (query.enableUvg) - tweetBasedUserVideoGraphSimilarityEngine.getCandidates(query.uvgQuery) - else Future.None - - val sannCandidatesFut = if (query.enableSimClustersANN) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANNQuery) - } else Future.None - - val sann1CandidatesFut = - if (query.enableSimClustersANN1) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN1Query) - } else Future.None - - val sann2CandidatesFut = - if (query.enableSimClustersANN2) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN2Query) - } else Future.None - - val sann3CandidatesFut = - if (query.enableSimClustersANN3) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN3Query) - } else Future.None - - val sann5CandidatesFut = - if (query.enableSimClustersANN5) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN5Query) - } else Future.None - - val sann4CandidatesFut = - if (query.enableSimClustersANN4) { - simClustersANNSimilarityEngine.getCandidates(query.simClustersANN4Query) - } else Future.None - - val experimentalSANNCandidatesFut = - if (query.enableExperimentalSimClustersANN) { - simClustersANNSimilarityEngine.getCandidates(query.experimentalSimClustersANNQuery) - } else Future.None - - val qigCandidatesFut = - if (query.enableQig) - tweetBasedQigSimilarTweetsSimilarityEngine.getCandidates(query.qigQuery) - else Future.None - - val twHINCandidateFut = if (query.enableTwHIN) { - tweetBasedTwHINANNSimilarityEngine.getCandidates(twhinQuery) - } else Future.None - - Future - .join( - utgCandidatesFut, - sannCandidatesFut, - sann1CandidatesFut, - sann2CandidatesFut, - sann3CandidatesFut, - sann5CandidatesFut, - sann4CandidatesFut, - experimentalSANNCandidatesFut, - qigCandidatesFut, - twHINCandidateFut, - uvgCandidatesFut - ).map { - case ( - userTweetGraphCandidates, - simClustersANNCandidates, - simClustersANN1Candidates, - simClustersANN2Candidates, - simClustersANN3Candidates, - simClustersANN5Candidates, - simClustersANN4Candidates, - experimentalSANNCandidates, - qigSimilarTweetsCandidates, - twhinCandidates, - userVideoGraphCandidates) => - val filteredUTGTweets = - userTweetGraphFilter(userTweetGraphCandidates.toSeq.flatten) - val filteredUVGTweets = - userVideoGraphFilter(userVideoGraphCandidates.toSeq.flatten) - val filteredSANNTweets = simClustersCandidateMinScoreFilter( - simClustersANNCandidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANNQuery.storeQuery.simClustersANNConfigId) - - val filteredSANN1Tweets = simClustersCandidateMinScoreFilter( - simClustersANN1Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN1Query.storeQuery.simClustersANNConfigId) - - val filteredSANN2Tweets = simClustersCandidateMinScoreFilter( - simClustersANN2Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN2Query.storeQuery.simClustersANNConfigId) - - val filteredSANN3Tweets = simClustersCandidateMinScoreFilter( - simClustersANN3Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN3Query.storeQuery.simClustersANNConfigId) - - val filteredSANN4Tweets = simClustersCandidateMinScoreFilter( - simClustersANN4Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN4Query.storeQuery.simClustersANNConfigId) - - val filteredSANN5Tweets = simClustersCandidateMinScoreFilter( - simClustersANN5Candidates.toSeq.flatten, - query.simClustersMinScore, - query.simClustersANN5Query.storeQuery.simClustersANNConfigId) - - val filteredExperimentalSANNTweets = simClustersCandidateMinScoreFilter( - experimentalSANNCandidates.toSeq.flatten, - query.simClustersVideoBasedMinScore, - query.experimentalSimClustersANNQuery.storeQuery.simClustersANNConfigId) - - val filteredQigTweets = qigSimilarTweetsFilter( - qigSimilarTweetsCandidates.toSeq.flatten, - query.qigMaxTweetAgeHours, - query.qigMaxNumSimilarTweets - ) - - val filteredTwHINTweets = twhinFilter( - twhinCandidates.toSeq.flatten.sortBy(-_.score), - query.twhinMaxTweetAgeHours, - tweetBasedTwHINANNSimilarityEngine.getScopedStats - ) - val utgTweetsWithCGInfo = filteredUTGTweets.map { tweetWithScore => - val similarityEngineInfo = TweetBasedUserTweetGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val uvgTweetsWithCGInfo = filteredUVGTweets.map { tweetWithScore => - val similarityEngineInfo = TweetBasedUserVideoGraphSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sannTweetsWithCGInfo = filteredSANNTweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANNQuery, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann1TweetsWithCGInfo = filteredSANN1Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN1Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann2TweetsWithCGInfo = filteredSANN2Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN2Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann3TweetsWithCGInfo = filteredSANN3Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN3Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann4TweetsWithCGInfo = filteredSANN4Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN4Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val sann5TweetsWithCGInfo = filteredSANN5Tweets.map { tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo(query.simClustersANN5Query, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val experimentalSANNTweetsWithCGInfo = filteredExperimentalSANNTweets.map { - tweetWithScore => - val similarityEngineInfo = SimClustersANNSimilarityEngine - .toSimilarityEngineInfo( - query.experimentalSimClustersANNQuery, - tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - val qigTweetsWithCGInfo = filteredQigTweets.map { tweetWithScore => - val similarityEngineInfo = TweetBasedQigSimilarityEngine - .toSimilarityEngineInfo(tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val twHINTweetsWithCGInfo = filteredTwHINTweets.map { tweetWithScore => - val similarityEngineInfo = tweetBasedTwHINANNSimilarityEngine - .toSimilarityEngineInfo(twhinQuery, tweetWithScore.score) - TweetWithCandidateGenerationInfo( - tweetWithScore.tweetId, - CandidateGenerationInfo( - Some(query.sourceInfo), - similarityEngineInfo, - Seq(similarityEngineInfo) - )) - } - - val candidateSourcesToBeInterleaved = - ArrayBuffer[Seq[TweetWithCandidateGenerationInfo]]( - sannTweetsWithCGInfo, - experimentalSANNTweetsWithCGInfo, - sann1TweetsWithCGInfo, - sann2TweetsWithCGInfo, - sann3TweetsWithCGInfo, - sann5TweetsWithCGInfo, - sann4TweetsWithCGInfo, - qigTweetsWithCGInfo, - uvgTweetsWithCGInfo, - utgTweetsWithCGInfo, - twHINTweetsWithCGInfo - ) - - val interleavedCandidates = - InterleaveUtil.interleave(candidateSourcesToBeInterleaved) - - val unifiedCandidatesWithUnifiedCGInfo = - interleavedCandidates.map { candidate => - /*** - * when a candidate was made by interleave/keepGivenOrder, - * then we apply getTweetBasedUnifiedCGInfo() to override with the unified CGInfo - * - * we'll not have ALL SEs that generated the tweet - * in contributingSE list for interleave. We only have the chosen SE available. - */ - TweetWithCandidateGenerationInfo( - tweetId = candidate.tweetId, - candidateGenerationInfo = getTweetBasedUnifiedCGInfo( - candidate.candidateGenerationInfo.sourceInfoOpt, - candidate.getSimilarityScore, - candidate.candidateGenerationInfo.contributingSimilarityEngines - ) // getSimilarityScore comes from either unifiedScore or single score - ) - } - stats - .stat("unified_candidate_size").add(unifiedCandidatesWithUnifiedCGInfo.size) - - val truncatedCandidates = - unifiedCandidatesWithUnifiedCGInfo.take(query.maxCandidateNumPerSourceKey) - stats.stat("truncatedCandidates_size").add(truncatedCandidates.size) - - Some(truncatedCandidates) - } - } - - case _ => - stats.counter("sourceId_is_not_tweetId_cnt").incr() - Future.None - } - } - - private def simClustersCandidateMinScoreFilter( - simClustersAnnCandidates: Seq[TweetWithScore], - simClustersMinScore: Double, - simClustersANNConfigId: String - ): Seq[TweetWithScore] = { - val filteredCandidates = simClustersAnnCandidates - .filter { candidate => - candidate.score > simClustersMinScore - } - - stats.stat(simClustersANNConfigId, "simClustersAnnCandidates_size").add(filteredCandidates.size) - stats.counter(simClustersANNConfigId, "simClustersAnnRequests").incr() - if (filteredCandidates.isEmpty) - stats.counter(simClustersANNConfigId, "emptyFilteredSimClustersAnnCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - - /** Returns a list of tweets that are generated less than `maxTweetAgeHours` hours ago */ - private def tweetAgeFilter( - candidates: Seq[TweetWithScore], - maxTweetAgeHours: Duration - ): Seq[TweetWithScore] = { - // Tweet IDs are approximately chronological (see http://go/snowflake), - // so we are building the earliest tweet id once - // The per-candidate logic here then be candidate.tweetId > earliestPermittedTweetId, which is far cheaper. - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAgeHours) - candidates.filter { candidate => candidate.tweetId >= earliestTweetId } - } - - private def twhinFilter( - twhinCandidates: Seq[TweetWithScore], - twhinMaxTweetAgeHours: Duration, - simEngineStats: StatsReceiver - ): Seq[TweetWithScore] = { - simEngineStats.stat("twhinCandidates_size").add(twhinCandidates.size) - val candidates = twhinCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - - val filteredCandidates = tweetAgeFilter(candidates, twhinMaxTweetAgeHours) - simEngineStats.stat("filteredTwhinCandidates_size").add(filteredCandidates.size) - if (filteredCandidates.isEmpty) simEngineStats.counter("emptyFilteredTwhinCandidates").incr() - - filteredCandidates - } - - /** A no-op filter as UTG filtering already happens on UTG service side */ - private def userTweetGraphFilter( - userTweetGraphCandidates: Seq[TweetWithScore] - ): Seq[TweetWithScore] = { - val filteredCandidates = userTweetGraphCandidates - - stats.stat("userTweetGraphCandidates_size").add(userTweetGraphCandidates.size) - if (filteredCandidates.isEmpty) stats.counter("emptyFilteredUserTweetGraphCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - - /** A no-op filter as UVG filtering already happens on UVG service side */ - private def userVideoGraphFilter( - userVideoGraphCandidates: Seq[TweetWithScore] - ): Seq[TweetWithScore] = { - val filteredCandidates = userVideoGraphCandidates - - stats.stat("userVideoGraphCandidates_size").add(userVideoGraphCandidates.size) - if (filteredCandidates.isEmpty) stats.counter("emptyFilteredUserVideoGraphCandidates").incr() - - filteredCandidates.map { candidate => - TweetWithScore(candidate.tweetId, candidate.score) - } - } - private def qigSimilarTweetsFilter( - qigSimilarTweetsCandidates: Seq[TweetWithScore], - qigMaxTweetAgeHours: Duration, - qigMaxNumSimilarTweets: Int - ): Seq[TweetWithScore] = { - val ageFilteredCandidates = tweetAgeFilter(qigSimilarTweetsCandidates, qigMaxTweetAgeHours) - stats.stat("ageFilteredQigSimilarTweetsCandidates_size").add(ageFilteredCandidates.size) - - val filteredCandidates = ageFilteredCandidates.take(qigMaxNumSimilarTweets) - if (filteredCandidates.isEmpty) stats.counter("emptyFilteredQigSimilarTweetsCandidates").incr() - - filteredCandidates - } - - /*** - * Every candidate will have the CG Info with TweetBasedUnifiedSimilarityEngine - * as they are generated by a composite of Similarity Engines. - * Additionally, we store the contributing SEs (eg., SANN, UTG). - */ - private def getTweetBasedUnifiedCGInfo( - sourceInfoOpt: Option[SourceInfo], - unifiedScore: Double, - contributingSimilarityEngines: Seq[SimilarityEngineInfo] - ): CandidateGenerationInfo = { - CandidateGenerationInfo( - sourceInfoOpt, - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.TweetBasedUnifiedSimilarityEngine, - modelId = None, // We do not assign modelId for a unified similarity engine - score = Some(unifiedScore) - ), - contributingSimilarityEngines - ) - } -} - -object TweetBasedUnifiedSimilarityEngine { - - case class Query( - sourceInfo: SourceInfo, - maxCandidateNumPerSourceKey: Int, - enableSimClustersANN: Boolean, - simClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableExperimentalSimClustersANN: Boolean, - experimentalSimClustersANNQuery: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN1: Boolean, - simClustersANN1Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN2: Boolean, - simClustersANN2Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN3: Boolean, - simClustersANN3Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN5: Boolean, - simClustersANN5Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - enableSimClustersANN4: Boolean, - simClustersANN4Query: EngineQuery[SimClustersANNSimilarityEngine.Query], - simClustersMinScore: Double, - simClustersVideoBasedMinScore: Double, - twhinModelId: String, - enableTwHIN: Boolean, - twhinMaxTweetAgeHours: Duration, - qigMaxTweetAgeHours: Duration, - qigMaxNumSimilarTweets: Int, - enableUtg: Boolean, - utgQuery: EngineQuery[TweetBasedUserTweetGraphSimilarityEngine.Query], - enableUvg: Boolean, - uvgQuery: EngineQuery[TweetBasedUserVideoGraphSimilarityEngine.Query], - enableQig: Boolean, - qigQuery: EngineQuery[TweetBasedQigSimilarityEngine.Query], - params: configapi.Params) - - def fromParams( - sourceInfo: SourceInfo, - params: configapi.Params, - ): EngineQuery[Query] = { - // SimClusters - val enableSimClustersANN = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANNParam) - - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersMinScore = params(TweetBasedCandidateGenerationParams.SimClustersMinScoreParam) - val simClustersVideoBasedMinScore = params( - TweetBasedCandidateGenerationParams.SimClustersVideoBasedMinScoreParam) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - // SimClusters - Experimental SANN Similarity Engine (Video based SE) - val enableExperimentalSimClustersANN = - params(TweetBasedCandidateGenerationParams.EnableExperimentalSimClustersANNParam) - - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - // SimClusters - SANN cluster 1 Similarity Engine - val enableSimClustersANN1 = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANN1Param) - - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - // SimClusters - SANN cluster 2 Similarity Engine - val enableSimClustersANN2 = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANN2Param) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - // SimClusters - SANN cluster 3 Similarity Engine - val enableSimClustersANN3 = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANN3Param) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - // SimClusters - SANN cluster 5 Similarity Engine - val enableSimClustersANN5 = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANN5Param) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - // SimClusters - SANN cluster 4 Similarity Engine - val enableSimClustersANN4 = - params(TweetBasedCandidateGenerationParams.EnableSimClustersANN4Param) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - // SimClusters ANN Queries for different SANN clusters - val simClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANNConfigId, - params - ) - val experimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params - ) - val simClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN1ConfigId, - params - ) - val simClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - val simClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN3ConfigId, - params - ) - val simClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN5ConfigId, - params - ) - val simClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( - sourceInfo.internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN4ConfigId, - params - ) - // TweetBasedCandidateGeneration - val maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - // TwHIN - val twhinModelId = params(TweetBasedTwHINParams.ModelIdParam) - val enableTwHIN = - params(TweetBasedCandidateGenerationParams.EnableTwHINParam) - - val twhinMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - - // QIG - val enableQig = - params(TweetBasedCandidateGenerationParams.EnableQigSimilarTweetsParam) - val qigMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - val qigMaxNumSimilarTweets = params( - TweetBasedCandidateGenerationParams.QigMaxNumSimilarTweetsParam) - - // UTG - val enableUtg = - params(TweetBasedCandidateGenerationParams.EnableUTGParam) - // UVG - val enableUvg = - params(TweetBasedCandidateGenerationParams.EnableUVGParam) - EngineQuery( - Query( - sourceInfo = sourceInfo, - maxCandidateNumPerSourceKey = maxCandidateNumPerSourceKey, - enableSimClustersANN = enableSimClustersANN, - simClustersANNQuery = simClustersANNQuery, - enableExperimentalSimClustersANN = enableExperimentalSimClustersANN, - experimentalSimClustersANNQuery = experimentalSimClustersANNQuery, - enableSimClustersANN1 = enableSimClustersANN1, - simClustersANN1Query = simClustersANN1Query, - enableSimClustersANN2 = enableSimClustersANN2, - simClustersANN2Query = simClustersANN2Query, - enableSimClustersANN3 = enableSimClustersANN3, - simClustersANN3Query = simClustersANN3Query, - enableSimClustersANN5 = enableSimClustersANN5, - simClustersANN5Query = simClustersANN5Query, - enableSimClustersANN4 = enableSimClustersANN4, - simClustersANN4Query = simClustersANN4Query, - simClustersMinScore = simClustersMinScore, - simClustersVideoBasedMinScore = simClustersVideoBasedMinScore, - twhinModelId = twhinModelId, - enableTwHIN = enableTwHIN, - twhinMaxTweetAgeHours = twhinMaxTweetAgeHours, - qigMaxTweetAgeHours = qigMaxTweetAgeHours, - qigMaxNumSimilarTweets = qigMaxNumSimilarTweets, - enableUtg = enableUtg, - utgQuery = TweetBasedUserTweetGraphSimilarityEngine - .fromParams(sourceInfo.internalId, params), - enableQig = enableQig, - qigQuery = TweetBasedQigSimilarityEngine.fromParams(sourceInfo.internalId, params), - enableUvg = enableUvg, - uvgQuery = - TweetBasedUserVideoGraphSimilarityEngine.fromParams(sourceInfo.internalId, params), - params = params - ), - params - ) - } - - def fromParamsForRelatedTweet( - internalId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - // SimClusters - val enableSimClustersANN = params(RelatedTweetTweetBasedParams.EnableSimClustersANNParam) - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersMinScore = params(RelatedTweetTweetBasedParams.SimClustersMinScoreParam) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - val enableExperimentalSimClustersANN = - params(RelatedTweetTweetBasedParams.EnableExperimentalSimClustersANNParam) - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - // SimClusters - SANN cluster 1 Similarity Engine - val enableSimClustersANN1 = params(RelatedTweetTweetBasedParams.EnableSimClustersANN1Param) - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - // SimClusters - SANN cluster 2 Similarity Engine - val enableSimClustersANN2 = params(RelatedTweetTweetBasedParams.EnableSimClustersANN2Param) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - // SimClusters - SANN cluster 3 Similarity Engine - val enableSimClustersANN3 = params(RelatedTweetTweetBasedParams.EnableSimClustersANN3Param) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - // SimClusters - SANN cluster 5 Similarity Engine - val enableSimClustersANN5 = params(RelatedTweetTweetBasedParams.EnableSimClustersANN5Param) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - // SimClusters - SANN cluster 4 Similarity Engine - val enableSimClustersANN4 = params(RelatedTweetTweetBasedParams.EnableSimClustersANN4Param) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - // SimClusters ANN Queries for different SANN clusters - val simClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANNConfigId, - params - ) - val experimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params - ) - val simClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN1ConfigId, - params - ) - val simClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - val simClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN3ConfigId, - params - ) - val simClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN5ConfigId, - params - ) - val simClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN4ConfigId, - params - ) - // TweetBasedCandidateGeneration - val maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - // TwHIN - val twhinModelId = params(TweetBasedTwHINParams.ModelIdParam) - val enableTwHIN = params(RelatedTweetTweetBasedParams.EnableTwHINParam) - val twhinMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - // QIG - val enableQig = params(RelatedTweetTweetBasedParams.EnableQigSimilarTweetsParam) - val qigMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - val qigMaxNumSimilarTweets = params( - TweetBasedCandidateGenerationParams.QigMaxNumSimilarTweetsParam) - // UTG - val enableUtg = params(RelatedTweetTweetBasedParams.EnableUTGParam) - // UVG - val enableUvg = params(RelatedTweetTweetBasedParams.EnableUVGParam) - // SourceType.RequestTweetId is a placeholder. - val sourceInfo = SourceInfo(SourceType.RequestTweetId, internalId, None) - - EngineQuery( - Query( - sourceInfo = sourceInfo, - maxCandidateNumPerSourceKey = maxCandidateNumPerSourceKey, - enableSimClustersANN = enableSimClustersANN, - simClustersMinScore = simClustersMinScore, - simClustersVideoBasedMinScore = simClustersMinScore, - simClustersANNQuery = simClustersANNQuery, - enableExperimentalSimClustersANN = enableExperimentalSimClustersANN, - experimentalSimClustersANNQuery = experimentalSimClustersANNQuery, - enableSimClustersANN1 = enableSimClustersANN1, - simClustersANN1Query = simClustersANN1Query, - enableSimClustersANN2 = enableSimClustersANN2, - simClustersANN2Query = simClustersANN2Query, - enableSimClustersANN3 = enableSimClustersANN3, - simClustersANN3Query = simClustersANN3Query, - enableSimClustersANN5 = enableSimClustersANN5, - simClustersANN5Query = simClustersANN5Query, - enableSimClustersANN4 = enableSimClustersANN4, - simClustersANN4Query = simClustersANN4Query, - twhinModelId = twhinModelId, - enableTwHIN = enableTwHIN, - twhinMaxTweetAgeHours = twhinMaxTweetAgeHours, - qigMaxTweetAgeHours = qigMaxTweetAgeHours, - qigMaxNumSimilarTweets = qigMaxNumSimilarTweets, - enableUtg = enableUtg, - utgQuery = TweetBasedUserTweetGraphSimilarityEngine - .fromParams(sourceInfo.internalId, params), - enableQig = enableQig, - qigQuery = TweetBasedQigSimilarityEngine.fromParams(sourceInfo.internalId, params), - enableUvg = enableUvg, - uvgQuery = - TweetBasedUserVideoGraphSimilarityEngine.fromParams(sourceInfo.internalId, params), - params = params, - ), - params - ) - } - def fromParamsForRelatedVideoTweet( - internalId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - // SimClusters - val enableSimClustersANN = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANNParam) - val simClustersModelVersion = - ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) - val simClustersMinScore = params(RelatedVideoTweetTweetBasedParams.SimClustersMinScoreParam) - val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) - val enableExperimentalSimClustersANN = params( - RelatedVideoTweetTweetBasedParams.EnableExperimentalSimClustersANNParam) - val experimentalSimClustersANNConfigId = params( - SimClustersANNParams.ExperimentalSimClustersANNConfigId) - // SimClusters - SANN cluster 1 Similarity Engine - val enableSimClustersANN1 = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANN1Param) - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) - // SimClusters - SANN cluster 2 Similarity Engine - val enableSimClustersANN2 = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANN2Param) - val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) - // SimClusters - SANN cluster 3 Similarity Engine - val enableSimClustersANN3 = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANN3Param) - val simClustersANN3ConfigId = params(SimClustersANNParams.SimClustersANN3ConfigId) - // SimClusters - SANN cluster 5 Similarity Engine - val enableSimClustersANN5 = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANN5Param) - val simClustersANN5ConfigId = params(SimClustersANNParams.SimClustersANN5ConfigId) - - // SimClusters - SANN cluster 4 Similarity Engine - val enableSimClustersANN4 = params(RelatedVideoTweetTweetBasedParams.EnableSimClustersANN4Param) - val simClustersANN4ConfigId = params(SimClustersANNParams.SimClustersANN4ConfigId) - // SimClusters ANN Queries for different SANN clusters - val simClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANNConfigId, - params - ) - val experimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - experimentalSimClustersANNConfigId, - params - ) - val simClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN1ConfigId, - params - ) - val simClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN2ConfigId, - params - ) - val simClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN3ConfigId, - params - ) - val simClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN5ConfigId, - params - ) - - val simClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( - internalId, - EmbeddingType.LogFavLongestL2EmbeddingTweet, - simClustersModelVersion, - simClustersANN4ConfigId, - params - ) - // TweetBasedCandidateGeneration - val maxCandidateNumPerSourceKey = params(GlobalParams.MaxCandidateNumPerSourceKeyParam) - // TwHIN - val twhinModelId = params(TweetBasedTwHINParams.ModelIdParam) - val enableTwHIN = params(RelatedVideoTweetTweetBasedParams.EnableTwHINParam) - val twhinMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - // QIG - val enableQig = params(RelatedVideoTweetTweetBasedParams.EnableQigSimilarTweetsParam) - val qigMaxTweetAgeHours = params(GlobalParams.MaxTweetAgeHoursParam) - val qigMaxNumSimilarTweets = params( - TweetBasedCandidateGenerationParams.QigMaxNumSimilarTweetsParam) - // UTG - val enableUtg = params(RelatedVideoTweetTweetBasedParams.EnableUTGParam) - - // SourceType.RequestTweetId is a placeholder. - val sourceInfo = SourceInfo(SourceType.RequestTweetId, internalId, None) - - val enableUvg = params(RelatedVideoTweetTweetBasedParams.EnableUVGParam) - EngineQuery( - Query( - sourceInfo = sourceInfo, - maxCandidateNumPerSourceKey = maxCandidateNumPerSourceKey, - enableSimClustersANN = enableSimClustersANN, - simClustersMinScore = simClustersMinScore, - simClustersVideoBasedMinScore = simClustersMinScore, - simClustersANNQuery = simClustersANNQuery, - enableExperimentalSimClustersANN = enableExperimentalSimClustersANN, - experimentalSimClustersANNQuery = experimentalSimClustersANNQuery, - enableSimClustersANN1 = enableSimClustersANN1, - simClustersANN1Query = simClustersANN1Query, - enableSimClustersANN2 = enableSimClustersANN2, - simClustersANN2Query = simClustersANN2Query, - enableSimClustersANN3 = enableSimClustersANN3, - simClustersANN3Query = simClustersANN3Query, - enableSimClustersANN5 = enableSimClustersANN5, - simClustersANN5Query = simClustersANN5Query, - enableSimClustersANN4 = enableSimClustersANN4, - simClustersANN4Query = simClustersANN4Query, - twhinModelId = twhinModelId, - enableTwHIN = enableTwHIN, - twhinMaxTweetAgeHours = twhinMaxTweetAgeHours, - qigMaxTweetAgeHours = qigMaxTweetAgeHours, - qigMaxNumSimilarTweets = qigMaxNumSimilarTweets, - enableUtg = enableUtg, - utgQuery = TweetBasedUserTweetGraphSimilarityEngine - .fromParams(sourceInfo.internalId, params), - enableUvg = enableUvg, - uvgQuery = - TweetBasedUserVideoGraphSimilarityEngine.fromParams(sourceInfo.internalId, params), - enableQig = enableQig, - qigQuery = TweetBasedQigSimilarityEngine.fromParams(sourceInfo.internalId, params), - params = params - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserAdGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserAdGraphSimilarityEngine.scala deleted file mode 100644 index 365bead35..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserAdGraphSimilarityEngine.scala +++ /dev/null @@ -1,129 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.TweetBasedUserAdGraphParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.recos.user_ad_graph.thriftscala.ConsumersBasedRelatedAdRequest -import com.twitter.recos.user_ad_graph.thriftscala.RelatedAdResponse -import com.twitter.recos.user_ad_graph.thriftscala.UserAdGraph -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import com.twitter.util.Future -import javax.inject.Singleton - -/** - * This store looks for similar tweets from UserAdGraph for a Source TweetId - * For a query tweet,User Ad Graph (UAG) - * lets us find out which other tweets share a lot of the same engagers with the query tweet - */ -@Singleton -case class TweetBasedUserAdGraphSimilarityEngine( - userAdGraphService: UserAdGraph.MethodPerEndpoint, - tweetEngagedUsersStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - statsReceiver: StatsReceiver) - extends ReadableStore[ - TweetBasedUserAdGraphSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - import TweetBasedUserAdGraphSimilarityEngine._ - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCoverageExpansionCandidatesStat = stats.scope("fetchCoverageExpansionCandidates") - override def get( - query: TweetBasedUserAdGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - query.sourceId match { - case InternalId.TweetId(tweetId) => getCandidates(tweetId, query) - case _ => - Future.value(None) - } - } - - // We first fetch tweet's recent engaged users as consumeSeedSet from MH store, - // then query consumersBasedUTG using the consumerSeedSet - private def getCandidates( - tweetId: TweetId, - query: TweetBasedUserAdGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil - .trackOptionItemsStats(fetchCoverageExpansionCandidatesStat) { - tweetEngagedUsersStore - .get(tweetId).flatMap { - _.map { tweetRecentEngagedUsers => - val consumerSeedSet = - tweetRecentEngagedUsers.recentEngagedUsers - .map { _.userId }.take(query.maxConsumerSeedsNum) - val consumersBasedRelatedAdRequest = - ConsumersBasedRelatedAdRequest( - consumerSeedSet = consumerSeedSet, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - excludeTweetIds = Some(Seq(tweetId)), - minScore = Some(query.consumersBasedMinScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - toTweetWithScore(userAdGraphService - .consumersBasedRelatedAds(consumersBasedRelatedAdRequest).map { Some(_) }) - }.getOrElse(Future.value(None)) - } - } - } - -} - -object TweetBasedUserAdGraphSimilarityEngine { - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.TweetBasedUserAdGraph, - modelId = None, - score = Some(score)) - } - private def toTweetWithScore( - relatedAdResponseFut: Future[Option[RelatedAdResponse]] - ): Future[Option[Seq[TweetWithScore]]] = { - relatedAdResponseFut.map { relatedAdResponseOpt => - relatedAdResponseOpt.map { relatedAdResponse => - val candidates = - relatedAdResponse.adTweets.map(tweet => TweetWithScore(tweet.adTweetId, tweet.score)) - - candidates - } - } - } - - case class Query( - sourceId: InternalId, - maxResults: Int, - minCooccurrence: Int, - consumersBasedMinScore: Double, - maxTweetAgeInHours: Int, - maxConsumerSeedsNum: Int, - ) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceId = sourceId, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(TweetBasedUserAdGraphParams.MinCoOccurrenceParam), - consumersBasedMinScore = params(TweetBasedUserAdGraphParams.ConsumersBasedMinScoreParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - maxConsumerSeedsNum = params(TweetBasedUserAdGraphParams.MaxConsumerSeedsNumParam), - ), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserTweetGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserTweetGraphSimilarityEngine.scala deleted file mode 100644 index 316f980a7..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserTweetGraphSimilarityEngine.scala +++ /dev/null @@ -1,184 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.TweetBasedUserTweetGraphParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.recos.user_tweet_graph.thriftscala.RelatedTweetResponse -import com.twitter.recos.user_tweet_graph.thriftscala.TweetBasedRelatedTweetRequest -import com.twitter.recos.user_tweet_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_tweet_graph.thriftscala.UserTweetGraph -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import com.twitter.util.Future -import javax.inject.Singleton -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Time -import scala.concurrent.duration.HOURS - -/** - * This store looks for similar tweets from UserTweetGraph for a Source TweetId - * For a query tweet,User Tweet Graph (UTG), - * lets us find out which other tweets share a lot of the same engagers with the query tweet - * one-pager: go/UTG - */ -@Singleton -case class TweetBasedUserTweetGraphSimilarityEngine( - userTweetGraphService: UserTweetGraph.MethodPerEndpoint, - tweetEngagedUsersStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - statsReceiver: StatsReceiver) - extends ReadableStore[ - TweetBasedUserTweetGraphSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - import TweetBasedUserTweetGraphSimilarityEngine._ - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - private val fetchCoverageExpansionCandidatesStat = stats.scope("fetchCoverageExpansionCandidates") - - override def get( - query: TweetBasedUserTweetGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - query.sourceId match { - case InternalId.TweetId(tweetId) if query.enableCoverageExpansionAllTweet => - getCoverageExpansionCandidates(tweetId, query) - - case InternalId.TweetId(tweetId) if query.enableCoverageExpansionOldTweet => // For Home - if (isOldTweet(tweetId)) getCoverageExpansionCandidates(tweetId, query) - else getCandidates(tweetId, query) - - case InternalId.TweetId(tweetId) => getCandidates(tweetId, query) - case _ => - Future.value(None) - } - } - - // This is the main candidate source - private def getCandidates( - tweetId: TweetId, - query: TweetBasedUserTweetGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val tweetBasedRelatedTweetRequest = { - TweetBasedRelatedTweetRequest( - tweetId, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - excludeTweetIds = Some(Seq(tweetId)), - minScore = Some(query.tweetBasedMinScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - } - toTweetWithScore( - userTweetGraphService.tweetBasedRelatedTweets(tweetBasedRelatedTweetRequest).map { - Some(_) - }) - } - } - - // function for DDGs, for coverage expansion algo, we first fetch tweet's recent engaged users as consumeSeedSet from MH store, - // and query consumersBasedUTG using the consumeSeedSet - private def getCoverageExpansionCandidates( - tweetId: TweetId, - query: TweetBasedUserTweetGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil - .trackOptionItemsStats(fetchCoverageExpansionCandidatesStat) { - tweetEngagedUsersStore - .get(tweetId).flatMap { - _.map { tweetRecentEngagedUsers => - val consumerSeedSet = - tweetRecentEngagedUsers.recentEngagedUsers - .map { _.userId }.take(query.maxConsumerSeedsNum) - val consumersBasedRelatedTweetRequest = - ConsumersBasedRelatedTweetRequest( - consumerSeedSet = consumerSeedSet, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - excludeTweetIds = Some(Seq(tweetId)), - minScore = Some(query.consumersBasedMinScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - - toTweetWithScore(userTweetGraphService - .consumersBasedRelatedTweets(consumersBasedRelatedTweetRequest).map { Some(_) }) - }.getOrElse(Future.value(None)) - } - } - } - -} - -object TweetBasedUserTweetGraphSimilarityEngine { - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.TweetBasedUserTweetGraph, - modelId = None, - score = Some(score)) - } - - private val oldTweetCap: Duration = Duration(48, HOURS) - - private def toTweetWithScore( - relatedTweetResponseFut: Future[Option[RelatedTweetResponse]] - ): Future[Option[Seq[TweetWithScore]]] = { - relatedTweetResponseFut.map { relatedTweetResponseOpt => - relatedTweetResponseOpt.map { relatedTweetResponse => - val candidates = - relatedTweetResponse.tweets.map(tweet => TweetWithScore(tweet.tweetId, tweet.score)) - candidates - } - } - } - - private def isOldTweet(tweetId: TweetId): Boolean = { - SnowflakeId - .timeFromIdOpt(tweetId).forall { tweetTime => tweetTime < Time.now - oldTweetCap } - // If there's no snowflake timestamp, we have no idea when this tweet happened. - } - - case class Query( - sourceId: InternalId, - maxResults: Int, - minCooccurrence: Int, - tweetBasedMinScore: Double, - consumersBasedMinScore: Double, - maxTweetAgeInHours: Int, - maxConsumerSeedsNum: Int, - enableCoverageExpansionOldTweet: Boolean, - enableCoverageExpansionAllTweet: Boolean, - ) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceId = sourceId, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(TweetBasedUserTweetGraphParams.MinCoOccurrenceParam), - tweetBasedMinScore = params(TweetBasedUserTweetGraphParams.TweetBasedMinScoreParam), - consumersBasedMinScore = params(TweetBasedUserTweetGraphParams.ConsumersBasedMinScoreParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - maxConsumerSeedsNum = params(TweetBasedUserTweetGraphParams.MaxConsumerSeedsNumParam), - enableCoverageExpansionOldTweet = - params(TweetBasedUserTweetGraphParams.EnableCoverageExpansionOldTweetParam), - enableCoverageExpansionAllTweet = - params(TweetBasedUserTweetGraphParams.EnableCoverageExpansionAllTweetParam), - ), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserVideoGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserVideoGraphSimilarityEngine.scala deleted file mode 100644 index 6190cd4fb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TweetBasedUserVideoGraphSimilarityEngine.scala +++ /dev/null @@ -1,184 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.TweetBasedUserVideoGraphParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.recos.user_video_graph.thriftscala.RelatedTweetResponse -import com.twitter.recos.user_video_graph.thriftscala.ConsumersBasedRelatedTweetRequest -import com.twitter.recos.user_video_graph.thriftscala.TweetBasedRelatedTweetRequest -import com.twitter.recos.user_video_graph.thriftscala.UserVideoGraph -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.snowflake.id.SnowflakeId -import com.twitter.timelines.configapi -import com.twitter.twistly.thriftscala.TweetRecentEngagedUsers -import com.twitter.util.Duration -import javax.inject.Singleton -import com.twitter.util.Future -import com.twitter.util.Time -import scala.concurrent.duration.HOURS - -/** - * This store looks for similar tweets from UserVideoGraph for a Source TweetId - * For a query tweet,User Video Graph (UVG), - * lets us find out which other video tweets share a lot of the same engagers with the query tweet - */ -@Singleton -case class TweetBasedUserVideoGraphSimilarityEngine( - userVideoGraphService: UserVideoGraph.MethodPerEndpoint, - tweetEngagedUsersStore: ReadableStore[TweetId, TweetRecentEngagedUsers], - statsReceiver: StatsReceiver) - extends ReadableStore[ - TweetBasedUserVideoGraphSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - import TweetBasedUserVideoGraphSimilarityEngine._ - - private val stats = statsReceiver.scope(this.getClass.getSimpleName) - private val fetchCandidatesStat = stats.scope("fetchCandidates") - private val fetchCoverageExpansionCandidatesStat = stats.scope("fetchCoverageExpansionCandidates") - - override def get( - query: TweetBasedUserVideoGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - - query.sourceId match { - case InternalId.TweetId(tweetId) if query.enableCoverageExpansionAllTweet => - getCoverageExpansionCandidates(tweetId, query) - - case InternalId.TweetId(tweetId) if query.enableCoverageExpansionOldTweet => // For Home - if (isOldTweet(tweetId)) getCoverageExpansionCandidates(tweetId, query) - else getCandidates(tweetId, query) - - case InternalId.TweetId(tweetId) => getCandidates(tweetId, query) - case _ => - Future.value(None) - } - } - - private def getCandidates( - tweetId: TweetId, - query: TweetBasedUserVideoGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil.trackOptionItemsStats(fetchCandidatesStat) { - val tweetBasedRelatedTweetRequest = { - TweetBasedRelatedTweetRequest( - tweetId, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - excludeTweetIds = Some(Seq(tweetId)), - minScore = Some(query.tweetBasedMinScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - } - toTweetWithScore( - userVideoGraphService.tweetBasedRelatedTweets(tweetBasedRelatedTweetRequest).map { - Some(_) - }) - } - } - - private def getCoverageExpansionCandidates( - tweetId: TweetId, - query: TweetBasedUserVideoGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - StatsUtil - .trackOptionItemsStats(fetchCoverageExpansionCandidatesStat) { - tweetEngagedUsersStore - .get(tweetId).flatMap { - _.map { tweetRecentEngagedUsers => - val consumerSeedSet = - tweetRecentEngagedUsers.recentEngagedUsers - .map { - _.userId - }.take(query.maxConsumerSeedsNum) - val consumersBasedRelatedTweetRequest = - ConsumersBasedRelatedTweetRequest( - consumerSeedSet = consumerSeedSet, - maxResults = Some(query.maxResults), - minCooccurrence = Some(query.minCooccurrence), - excludeTweetIds = Some(Seq(tweetId)), - minScore = Some(query.consumersBasedMinScore), - maxTweetAgeInHours = Some(query.maxTweetAgeInHours) - ) - - toTweetWithScore(userVideoGraphService - .consumersBasedRelatedTweets(consumersBasedRelatedTweetRequest).map { - Some(_) - }) - }.getOrElse(Future.value(None)) - } - } - } - -} - -object TweetBasedUserVideoGraphSimilarityEngine { - - private val oldTweetCap: Duration = Duration(24, HOURS) - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.TweetBasedUserVideoGraph, - modelId = None, - score = Some(score)) - } - - private def toTweetWithScore( - relatedTweetResponseFut: Future[Option[RelatedTweetResponse]] - ): Future[Option[Seq[TweetWithScore]]] = { - relatedTweetResponseFut.map { relatedTweetResponseOpt => - relatedTweetResponseOpt.map { relatedTweetResponse => - val candidates = - relatedTweetResponse.tweets.map(tweet => TweetWithScore(tweet.tweetId, tweet.score)) - candidates - } - } - } - - private def isOldTweet(tweetId: TweetId): Boolean = { - SnowflakeId - .timeFromIdOpt(tweetId).forall { tweetTime => tweetTime < Time.now - oldTweetCap } - // If there's no snowflake timestamp, we have no idea when this tweet happened. - } - - case class Query( - sourceId: InternalId, - maxResults: Int, - minCooccurrence: Int, - tweetBasedMinScore: Double, - consumersBasedMinScore: Double, - maxTweetAgeInHours: Int, - maxConsumerSeedsNum: Int, - enableCoverageExpansionOldTweet: Boolean, - enableCoverageExpansionAllTweet: Boolean) - - def fromParams( - sourceId: InternalId, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - sourceId = sourceId, - maxResults = params(GlobalParams.MaxCandidateNumPerSourceKeyParam), - minCooccurrence = params(TweetBasedUserVideoGraphParams.MinCoOccurrenceParam), - tweetBasedMinScore = params(TweetBasedUserVideoGraphParams.TweetBasedMinScoreParam), - consumersBasedMinScore = params(TweetBasedUserVideoGraphParams.ConsumersBasedMinScoreParam), - maxTweetAgeInHours = params(GlobalParams.MaxTweetAgeHoursParam).inHours, - maxConsumerSeedsNum = params(TweetBasedUserVideoGraphParams.MaxConsumerSeedsNumParam), - enableCoverageExpansionOldTweet = - params(TweetBasedUserVideoGraphParams.EnableCoverageExpansionOldTweetParam), - enableCoverageExpansionAllTweet = - params(TweetBasedUserVideoGraphParams.EnableCoverageExpansionAllTweetParam) - ), - params - ) - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TwhinCollabFilterSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TwhinCollabFilterSimilarityEngine.scala deleted file mode 100644 index eccab6aa3..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/TwhinCollabFilterSimilarityEngine.scala +++ /dev/null @@ -1,72 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.cr_mixer.model.TweetWithScore -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -case class TwhinCollabFilterSimilarityEngine( - twhinCandidatesStratoStore: ReadableStore[Long, Seq[TweetId]], - statsReceiver: StatsReceiver) - extends ReadableStore[ - TwhinCollabFilterSimilarityEngine.Query, - Seq[TweetWithScore] - ] { - - import TwhinCollabFilterSimilarityEngine._ - override def get( - query: TwhinCollabFilterSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScore]]] = { - - query.sourceId match { - case InternalId.UserId(userId) => - twhinCandidatesStratoStore.get(userId).map { - _.map { - _.map { tweetId => TweetWithScore(tweetId, defaultScore) } - } - } - case _ => - Future.None - } - } -} - -object TwhinCollabFilterSimilarityEngine { - - val defaultScore: Double = 1.0 - - case class TwhinCollabFilterView(clusterVersion: String) - - case class Query( - sourceId: InternalId, - ) - - def toSimilarityEngineInfo( - query: LookupEngineQuery[Query], - score: Double - ): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.TwhinCollabFilter, - modelId = Some(query.lookupKey), - score = Some(score)) - } - - def fromParams( - sourceId: InternalId, - modelId: String, - params: configapi.Params, - ): LookupEngineQuery[Query] = { - LookupEngineQuery( - Query(sourceId = sourceId), - modelId, - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/UserTweetEntityGraphSimilarityEngine.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/UserTweetEntityGraphSimilarityEngine.scala deleted file mode 100644 index 9c61b3d1c..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/similarity_engine/UserTweetEntityGraphSimilarityEngine.scala +++ /dev/null @@ -1,110 +0,0 @@ -package com.twitter.cr_mixer.similarity_engine - -import com.twitter.recos.recos_common.thriftscala.SocialProofType -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.TweetWithScoreAndSocialProof -import com.twitter.cr_mixer.param.UtegTweetGlobalParams -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.recos.user_tweet_entity_graph.thriftscala.TweetEntityDisplayLocation -import com.twitter.recos.user_tweet_entity_graph.thriftscala.UserTweetEntityGraph -import com.twitter.recos.user_tweet_entity_graph.thriftscala.RecommendTweetEntityRequest -import com.twitter.recos.user_tweet_entity_graph.thriftscala.RecommendationType -import com.twitter.recos.user_tweet_entity_graph.thriftscala.UserTweetEntityRecommendationUnion.TweetRec -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.storehaus.ReadableStore -import com.twitter.timelines.configapi -import com.twitter.util.Duration -import com.twitter.util.Future -import javax.inject.Singleton - -@Singleton -case class UserTweetEntityGraphSimilarityEngine( - userTweetEntityGraph: UserTweetEntityGraph.MethodPerEndpoint, - statsReceiver: StatsReceiver) - extends ReadableStore[ - UserTweetEntityGraphSimilarityEngine.Query, - Seq[TweetWithScoreAndSocialProof] - ] { - - override def get( - query: UserTweetEntityGraphSimilarityEngine.Query - ): Future[Option[Seq[TweetWithScoreAndSocialProof]]] = { - val recommendTweetEntityRequest = - RecommendTweetEntityRequest( - requesterId = query.userId, - displayLocation = TweetEntityDisplayLocation.HomeTimeline, - recommendationTypes = Seq(RecommendationType.Tweet), - seedsWithWeights = query.seedsWithWeights, - maxResultsByType = Some(Map(RecommendationType.Tweet -> query.maxUtegCandidates)), - maxTweetAgeInMillis = Some(query.maxTweetAge.inMilliseconds), - excludedTweetIds = query.excludedTweetIds, - maxUserSocialProofSize = Some(UserTweetEntityGraphSimilarityEngine.MaxUserSocialProofSize), - maxTweetSocialProofSize = - Some(UserTweetEntityGraphSimilarityEngine.MaxTweetSocialProofSize), - minUserSocialProofSizes = Some(Map(RecommendationType.Tweet -> 1)), - tweetTypes = None, - socialProofTypes = query.socialProofTypes, - socialProofTypeUnions = None, - tweetAuthors = None, - maxEngagementAgeInMillis = None, - excludedTweetAuthors = None, - ) - - userTweetEntityGraph - .recommendTweets(recommendTweetEntityRequest) - .map { recommendTweetsResponse => - val candidates = recommendTweetsResponse.recommendations.flatMap { - case TweetRec(recommendation) => - Some( - TweetWithScoreAndSocialProof( - recommendation.tweetId, - recommendation.score, - recommendation.socialProofByType.toMap)) - case _ => None - } - Some(candidates) - } - } -} - -object UserTweetEntityGraphSimilarityEngine { - - private val MaxUserSocialProofSize = 10 - private val MaxTweetSocialProofSize = 10 - - def toSimilarityEngineInfo(score: Double): SimilarityEngineInfo = { - SimilarityEngineInfo( - similarityEngineType = SimilarityEngineType.Uteg, - modelId = None, - score = Some(score)) - } - - case class Query( - userId: UserId, - seedsWithWeights: Map[UserId, Double], - excludedTweetIds: Option[Seq[Long]] = None, - maxUtegCandidates: Int, - maxTweetAge: Duration, - socialProofTypes: Option[Seq[SocialProofType]]) - - def fromParams( - userId: UserId, - seedsWithWeights: Map[UserId, Double], - excludedTweetIds: Option[Seq[TweetId]] = None, - params: configapi.Params, - ): EngineQuery[Query] = { - EngineQuery( - Query( - userId = userId, - seedsWithWeights = seedsWithWeights, - excludedTweetIds = excludedTweetIds, - maxUtegCandidates = params(UtegTweetGlobalParams.MaxUtegCandidatesToRequestParam), - maxTweetAge = params(UtegTweetGlobalParams.CandidateRefreshSinceTimeOffsetHoursParam), - socialProofTypes = Some(Seq(SocialProofType.Favorite)) - ), - params - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/BUILD deleted file mode 100644 index 37b0a7585..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/BUILD +++ /dev/null @@ -1,32 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/javax/inject:javax.inject", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "configapi/configapi-core", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param/decider", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "decider/src/main/scala", - "finatra-internal/mtls-thriftmux/src/main/scala", - "finatra/inject/inject-core/src/main/scala", - "follow-recommendations-service/thrift/src/main/thrift:thrift-scala", - "frigate/frigate-common:base", - "frigate/frigate-common:util", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/candidate", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/hermit/stp:hermit-stp-scala", - "src/thrift/com/twitter/onboarding/relevance/coldstart_lookalike:coldstartlookalike-thrift-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - "src/thrift/com/twitter/wtf/candidate:wtf-candidate-scala", - "user-signal-service/thrift/src/main/thrift:thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceGraphFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceGraphFetcher.scala deleted file mode 100644 index 16162c67d..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceGraphFetcher.scala +++ /dev/null @@ -1,54 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.FrsParams -import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/*** - * This store fetches user recommendations from FRS (go/frs) for a given userId - */ -@Singleton -case class FrsSourceGraphFetcher @Inject() ( - @Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]], - override val timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) - extends SourceGraphFetcher { - - override protected val stats: StatsReceiver = globalStats.scope(identifier) - override protected val graphSourceType: SourceType = SourceType.FollowRecommendation - - override def isEnabled(query: FetcherQuery): Boolean = { - query.params(FrsParams.EnableSourceGraphParam) - } - - override def fetchAndProcess( - query: FetcherQuery, - ): Future[Option[GraphSourceInfo]] = { - - val rawSignals = trackPerItemStats(query)( - frsStore - .get( - FrsStore - .Query(query.userId, query.params(FrsParams.MaxConsumerSeedsNumParam))).map { - _.map { - _.map { v => (v.userId, v.score) } - } - } - ) - rawSignals.map { - _.map { userWithScores => - convertGraphSourceInfo(userWithScores) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceSignalFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceSignalFetcher.scala deleted file mode 100644 index 4e9069376..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsSourceSignalFetcher.scala +++ /dev/null @@ -1,65 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.param.FrsParams -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import javax.inject.Singleton -import javax.inject.Inject -import javax.inject.Named - -@Singleton -case class FrsSourceSignalFetcher @Inject() ( - @Named(ModuleNames.FrsStore) frsStore: ReadableStore[FrsStore.Query, Seq[FrsQueryResult]], - override val timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) - extends SourceSignalFetcher { - - override protected val stats: StatsReceiver = globalStats.scope(identifier) - override type SignalConvertType = UserId - - override def isEnabled(query: FetcherQuery): Boolean = { - query.params(FrsParams.EnableSourceParam) - } - - override def fetchAndProcess(query: FetcherQuery): Future[Option[Seq[SourceInfo]]] = { - // Fetch raw signals - val rawSignals = frsStore - .get(FrsStore.Query(query.userId, query.params(GlobalParams.UnifiedMaxSourceKeyNum))) - .map { - _.map { - _.map { - _.userId - } - } - } - // Process signals - rawSignals.map { - _.map { frsUsers => - convertSourceInfo(SourceType.FollowRecommendation, frsUsers) - } - } - } - - override def convertSourceInfo( - sourceType: SourceType, - signals: Seq[SignalConvertType] - ): Seq[SourceInfo] = { - signals.map { signal => - SourceInfo( - sourceType = sourceType, - internalId = InternalId.UserId(signal), - sourceEventTime = None - ) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsStore.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsStore.scala deleted file mode 100644 index 0221bc318..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/FrsStore.scala +++ /dev/null @@ -1,81 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.param.decider.CrMixerDecider -import com.twitter.cr_mixer.param.decider.DeciderConstants -import com.twitter.cr_mixer.source_signal.FrsStore.Query -import com.twitter.cr_mixer.source_signal.FrsStore.FrsQueryResult -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.follow_recommendations.thriftscala.ClientContext -import com.twitter.follow_recommendations.thriftscala.DisplayLocation -import com.twitter.follow_recommendations.thriftscala.FollowRecommendationsThriftService -import com.twitter.follow_recommendations.thriftscala.Recommendation -import com.twitter.follow_recommendations.thriftscala.RecommendationRequest -import com.twitter.storehaus.ReadableStore -import javax.inject.Singleton -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future - -@Singleton -case class FrsStore( - frsClient: FollowRecommendationsThriftService.MethodPerEndpoint, - statsReceiver: StatsReceiver, - decider: CrMixerDecider) - extends ReadableStore[Query, Seq[FrsQueryResult]] { - - override def get( - query: Query - ): Future[Option[Seq[FrsQueryResult]]] = { - if (decider.isAvailable(DeciderConstants.enableFRSTrafficDeciderKey)) { - val recommendationRequest = - buildFollowRecommendationRequest(query) - - frsClient - .getRecommendations(recommendationRequest).map { recommendationResponse => - Some(recommendationResponse.recommendations.collect { - case recommendation: Recommendation.User => - FrsQueryResult( - recommendation.user.userId, - recommendation.user.scoringDetails - .flatMap(_.score).getOrElse(0.0), - recommendation.user.scoringDetails - .flatMap(_.candidateSourceDetails.flatMap(_.primarySource)), - recommendation.user.scoringDetails - .flatMap(_.candidateSourceDetails.flatMap(_.candidateSourceScores)).map(_.toMap) - ) - }) - } - } else { - Future.None - } - } - - private def buildFollowRecommendationRequest( - query: Query - ): RecommendationRequest = { - RecommendationRequest( - clientContext = ClientContext( - userId = Some(query.userId), - countryCode = query.countryCodeOpt, - languageCode = query.languageCodeOpt), - displayLocation = query.displayLocation, - maxResults = Some(query.maxConsumerSeedsNum), - excludedIds = Some(query.excludedUserIds) - ) - } -} - -object FrsStore { - case class Query( - userId: UserId, - maxConsumerSeedsNum: Int, - displayLocation: DisplayLocation = DisplayLocation.ContentRecommender, - excludedUserIds: Seq[UserId] = Seq.empty, - languageCodeOpt: Option[String] = None, - countryCodeOpt: Option[String] = None) - - case class FrsQueryResult( - userId: UserId, - score: Double, - primarySource: Option[Int], - sourceWithScores: Option[Map[String, Double]]) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphInSourceGraphFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphInSourceGraphFetcher.scala deleted file mode 100644 index ac708d0bb..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphInSourceGraphFetcher.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.RealGraphInParams -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import com.twitter.wtf.candidate.thriftscala.CandidateSeq -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/** - * This store fetch user recommendations from In-Network RealGraph (go/realgraph) for a given userId - */ -@Singleton -case class RealGraphInSourceGraphFetcher @Inject() ( - @Named(ModuleNames.RealGraphInStore) realGraphStoreMh: ReadableStore[UserId, CandidateSeq], - override val timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) - extends SourceGraphFetcher { - - override protected val stats: StatsReceiver = globalStats.scope(identifier) - override protected val graphSourceType: SourceType = SourceType.RealGraphIn - - override def isEnabled(query: FetcherQuery): Boolean = { - query.params(RealGraphInParams.EnableSourceGraphParam) - } - - override def fetchAndProcess( - query: FetcherQuery, - ): Future[Option[GraphSourceInfo]] = { - val rawSignals = trackPerItemStats(query)( - realGraphStoreMh.get(query.userId).map { - _.map { candidateSeq => - candidateSeq.candidates - .map { candidate => - // Bundle the userId with its score - (candidate.userId, candidate.score) - } - } - } - ) - rawSignals.map { - _.map { userWithScores => - convertGraphSourceInfo(userWithScores) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphOonSourceGraphFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphOonSourceGraphFetcher.scala deleted file mode 100644 index e03d140a4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/RealGraphOonSourceGraphFetcher.scala +++ /dev/null @@ -1,55 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.param.RealGraphOonParams -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import com.twitter.wtf.candidate.thriftscala.CandidateSeq -import javax.inject.Inject -import javax.inject.Named -import javax.inject.Singleton - -/** - * This store fetch user recommendations from RealGraphOON (go/realgraph) for a given userId - */ -@Singleton -case class RealGraphOonSourceGraphFetcher @Inject() ( - @Named(ModuleNames.RealGraphOonStore) realGraphOonStore: ReadableStore[UserId, CandidateSeq], - override val timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) - extends SourceGraphFetcher { - - override protected val stats: StatsReceiver = globalStats.scope(identifier) - override protected val graphSourceType: SourceType = SourceType.RealGraphOon - - override def isEnabled(query: FetcherQuery): Boolean = { - query.params(RealGraphOonParams.EnableSourceGraphParam) - } - - override def fetchAndProcess( - query: FetcherQuery, - ): Future[Option[GraphSourceInfo]] = { - val rawSignals = trackPerItemStats(query)( - realGraphOonStore.get(query.userId).map { - _.map { candidateSeq => - candidateSeq.candidates - .map { candidate => - // Bundle the userId with its score - (candidate.userId, candidate.score) - }.take(query.params(RealGraphOonParams.MaxConsumerSeedsNumParam)) - } - } - ) - rawSignals.map { - _.map { userWithScores => - convertGraphSourceInfo(userWithScores) - } - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceFetcher.scala deleted file mode 100644 index 4fa4dfb93..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceFetcher.scala +++ /dev/null @@ -1,101 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.simclusters_v2.common.UserId -import com.twitter.timelines.configapi.Params -import com.twitter.cr_mixer.thriftscala.{Product => TProduct} -import com.twitter.finagle.GlobalRequestTimeoutException -import com.twitter.finagle.mux.ClientDiscardedRequestException -import com.twitter.finagle.mux.ServerApplicationError -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.util.Future -import com.twitter.util.TimeoutException -import org.apache.thrift.TApplicationException -import com.twitter.util.logging.Logging - -/** - * A SourceFetcher is a trait which, given a [[FetcherQuery]], returns [[ResultType]] - * The main purposes of a SourceFetcher is to provide a consistent interface for source fetch - * logic, and provides default functions, including: - * - Identification - * - Observability - * - Timeout settings - * - Exception Handling - */ -trait SourceFetcher[ResultType] extends ReadableStore[FetcherQuery, ResultType] with Logging { - - protected final val timer = com.twitter.finagle.util.DefaultTimer - protected final def identifier: String = this.getClass.getSimpleName - protected def stats: StatsReceiver - protected def timeoutConfig: TimeoutConfig - - /*** - * Use FeatureSwitch to decide if a specific source is enabled. - */ - def isEnabled(query: FetcherQuery): Boolean - - /*** - * This function fetches the raw sources and process them. - * Custom stats tracking can be added depending on the type of ResultType - */ - def fetchAndProcess( - query: FetcherQuery, - ): Future[Option[ResultType]] - - /*** - * Side-effect function to track stats for signal fetching and processing. - */ - def trackStats( - query: FetcherQuery - )( - func: => Future[Option[ResultType]] - ): Future[Option[ResultType]] - - /*** - * This function is called by the top level class to fetch sources. It executes the pipeline to - * fetch raw data, process and transform the sources. Exceptions, Stats, and timeout control are - * handled here. - */ - override def get( - query: FetcherQuery - ): Future[Option[ResultType]] = { - val scopedStats = stats.scope(query.product.originalName) - if (isEnabled(query)) { - scopedStats.counter("gate_enabled").incr() - trackStats(query)(fetchAndProcess(query)) - .raiseWithin(timeoutConfig.signalFetchTimeout)(timer) - .onFailure { e => - scopedStats.scope("exceptions").counter(e.getClass.getSimpleName).incr() - } - .rescue { - case _: TimeoutException | _: GlobalRequestTimeoutException | _: TApplicationException | - _: ClientDiscardedRequestException | - _: ServerApplicationError // TApplicationException inside - => - Future.None - case e => - logger.info(e) - Future.None - } - } else { - scopedStats.counter("gate_disabled").incr() - Future.None - } - } -} - -object SourceFetcher { - - /*** - * Every SourceFetcher all share the same input: FetcherQuery - */ - case class FetcherQuery( - userId: UserId, - product: TProduct, - userState: UserState, - params: Params) - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceGraphFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceGraphFetcher.scala deleted file mode 100644 index ac33d91e9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceGraphFetcher.scala +++ /dev/null @@ -1,70 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.simclusters_v2.common.UserId -import com.twitter.util.Future - -/*** - * A SourceGraphFetcher is a trait that extends from `SourceFetcher` - * and is specialized in tackling User Graph (eg., RealGraphOon, FRS) fetch. - * - * The [[ResultType]] of a SourceGraphFetcher is a `GraphSourceInfo` which contains a userSeedSet. - * When we pass in userId, the underlying store returns one GraphSourceInfo. - */ -trait SourceGraphFetcher extends SourceFetcher[GraphSourceInfo] { - protected final val DefaultSeedScore = 1.0 - protected def graphSourceType: SourceType - - /*** - * RawDataType contains a consumers seed UserId and a score (weight) - */ - protected type RawDataType = (UserId, Double) - - def trackStats( - query: FetcherQuery - )( - func: => Future[Option[GraphSourceInfo]] - ): Future[Option[GraphSourceInfo]] = { - val productScopedStats = stats.scope(query.product.originalName) - val productUserStateScopedStats = productScopedStats.scope(query.userState.toString) - StatsUtil - .trackOptionStats(productScopedStats) { - StatsUtil - .trackOptionStats(productUserStateScopedStats) { - func - } - } - } - - // Track per item stats on the fetched graph results - def trackPerItemStats( - query: FetcherQuery - )( - func: => Future[Option[Seq[RawDataType]]] - ): Future[Option[Seq[RawDataType]]] = { - val productScopedStats = stats.scope(query.product.originalName) - val productUserStateScopedStats = productScopedStats.scope(query.userState.toString) - StatsUtil.trackOptionItemsStats(productScopedStats) { - StatsUtil.trackOptionItemsStats(productUserStateScopedStats) { - func - } - } - } - - /*** - * Convert Seq[RawDataType] into GraphSourceInfo - */ - protected final def convertGraphSourceInfo( - userWithScores: Seq[RawDataType] - ): GraphSourceInfo = { - GraphSourceInfo( - sourceType = graphSourceType, - seedWithScores = userWithScores.map { userWithScore => - userWithScore._1 -> userWithScore._2 - }.toMap - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceInfoRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceInfoRouter.scala deleted file mode 100644 index 5942fb8e4..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceInfoRouter.scala +++ /dev/null @@ -1,68 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.core_workflows.user_model.thriftscala.UserState -import com.twitter.cr_mixer.model.GraphSourceInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.thriftscala.{Product => TProduct} -import com.twitter.simclusters_v2.common.UserId -import com.twitter.timelines.configapi -import com.twitter.util.Future -import javax.inject.Inject -import javax.inject.Singleton - -@Singleton -case class SourceInfoRouter @Inject() ( - ussSourceSignalFetcher: UssSourceSignalFetcher, - frsSourceSignalFetcher: FrsSourceSignalFetcher, - frsSourceGraphFetcher: FrsSourceGraphFetcher, - realGraphOonSourceGraphFetcher: RealGraphOonSourceGraphFetcher, - realGraphInSourceGraphFetcher: RealGraphInSourceGraphFetcher, -) { - - def get( - userId: UserId, - product: TProduct, - userState: UserState, - params: configapi.Params - ): Future[(Set[SourceInfo], Map[String, Option[GraphSourceInfo]])] = { - - val fetcherQuery = FetcherQuery(userId, product, userState, params) - Future.join( - getSourceSignals(fetcherQuery), - getSourceGraphs(fetcherQuery) - ) - } - - private def getSourceSignals( - fetcherQuery: FetcherQuery - ): Future[Set[SourceInfo]] = { - Future - .join( - ussSourceSignalFetcher.get(fetcherQuery), - frsSourceSignalFetcher.get(fetcherQuery)).map { - case (ussSignalsOpt, frsSignalsOpt) => - (ussSignalsOpt.getOrElse(Seq.empty) ++ frsSignalsOpt.getOrElse(Seq.empty)).toSet - } - } - - private def getSourceGraphs( - fetcherQuery: FetcherQuery - ): Future[Map[String, Option[GraphSourceInfo]]] = { - - Future - .join( - frsSourceGraphFetcher.get(fetcherQuery), - realGraphOonSourceGraphFetcher.get(fetcherQuery), - realGraphInSourceGraphFetcher.get(fetcherQuery) - ).map { - case (frsGraphOpt, realGraphOonGraphOpt, realGraphInGraphOpt) => - Map( - SourceType.FollowRecommendation.name -> frsGraphOpt, - SourceType.RealGraphOon.name -> realGraphOonGraphOpt, - SourceType.RealGraphIn.name -> realGraphInGraphOpt, - ) - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceSignalFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceSignalFetcher.scala deleted file mode 100644 index 01d302661..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/SourceSignalFetcher.scala +++ /dev/null @@ -1,45 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.frigate.common.util.StatsUtil -import com.twitter.util.Future - -/*** - * A SourceSignalFetcher is a trait that extends from `SourceFetcher` - * and is specialized in tackling Signals (eg., USS, FRS) fetch. - * Currently, we define Signals as (but not limited to) a set of past engagements that - * the user makes, such as RecentFav, RecentFollow, etc. - * - * The [[ResultType]] of a SourceSignalFetcher is `Seq[SourceInfo]`. When we pass in userId, - * the underlying store returns a list of signals. - */ -trait SourceSignalFetcher extends SourceFetcher[Seq[SourceInfo]] { - - protected type SignalConvertType - - def trackStats( - query: FetcherQuery - )( - func: => Future[Option[Seq[SourceInfo]]] - ): Future[Option[Seq[SourceInfo]]] = { - val productScopedStats = stats.scope(query.product.originalName) - val productUserStateScopedStats = productScopedStats.scope(query.userState.toString) - StatsUtil - .trackOptionItemsStats(productScopedStats) { - StatsUtil - .trackOptionItemsStats(productUserStateScopedStats) { - func - } - } - } - - /*** - * Convert a list of Signals of type [[SignalConvertType]] into SourceInfo - */ - def convertSourceInfo( - sourceType: SourceType, - signals: Seq[SignalConvertType] - ): Seq[SourceInfo] -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssSourceSignalFetcher.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssSourceSignalFetcher.scala deleted file mode 100644 index dcce3e94e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssSourceSignalFetcher.scala +++ /dev/null @@ -1,160 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.config.TimeoutConfig -import com.twitter.cr_mixer.model.ModuleNames -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.source_signal.SourceFetcher.FetcherQuery -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.storehaus.ReadableStore -import com.twitter.usersignalservice.thriftscala.{Signal => UssSignal} -import com.twitter.usersignalservice.thriftscala.SignalType -import com.twitter.frigate.common.util.StatsUtil.Size -import com.twitter.frigate.common.util.StatsUtil.Success -import com.twitter.frigate.common.util.StatsUtil.Empty -import com.twitter.util.Future -import com.twitter.util.Time -import javax.inject.Singleton -import javax.inject.Inject -import javax.inject.Named - -@Singleton -case class UssSourceSignalFetcher @Inject() ( - @Named(ModuleNames.UssStore) ussStore: ReadableStore[UssStore.Query, Seq[ - (SignalType, Seq[UssSignal]) - ]], - override val timeoutConfig: TimeoutConfig, - globalStats: StatsReceiver) - extends SourceSignalFetcher { - - override protected val stats: StatsReceiver = globalStats.scope(identifier) - override type SignalConvertType = UssSignal - - // always enable USS call. We have fine-grained FS to decider which signal to fetch - override def isEnabled(query: FetcherQuery): Boolean = true - - override def fetchAndProcess( - query: FetcherQuery, - ): Future[Option[Seq[SourceInfo]]] = { - // Fetch raw signals - val rawSignals = ussStore.get(UssStore.Query(query.userId, query.params, query.product)).map { - _.map { - _.map { - case (signalType, signals) => - trackUssSignalStatsPerSignalType(query, signalType, signals) - (signalType, signals) - } - } - } - - /** - * Process signals: - * Transform a Seq of USS Signals with signalType specified to a Seq of SourceInfo - * We do case match to make sure the SignalType can correctly map to a SourceType defined in CrMixer - * and it should be simplified. - */ - rawSignals.map { - _.map { nestedSignal => - val sourceInfoList = nestedSignal.flatMap { - case (signalType, ussSignals) => - signalType match { - case SignalType.TweetFavorite => - convertSourceInfo(sourceType = SourceType.TweetFavorite, signals = ussSignals) - case SignalType.Retweet => - convertSourceInfo(sourceType = SourceType.Retweet, signals = ussSignals) - case SignalType.Reply => - convertSourceInfo(sourceType = SourceType.Reply, signals = ussSignals) - case SignalType.OriginalTweet => - convertSourceInfo(sourceType = SourceType.OriginalTweet, signals = ussSignals) - case SignalType.AccountFollow => - convertSourceInfo(sourceType = SourceType.UserFollow, signals = ussSignals) - case SignalType.RepeatedProfileVisit180dMinVisit6V1 | - SignalType.RepeatedProfileVisit90dMinVisit6V1 | - SignalType.RepeatedProfileVisit14dMinVisit2V1 => - convertSourceInfo( - sourceType = SourceType.UserRepeatedProfileVisit, - signals = ussSignals) - case SignalType.NotificationOpenAndClickV1 => - convertSourceInfo(sourceType = SourceType.NotificationClick, signals = ussSignals) - case SignalType.TweetShareV1 => - convertSourceInfo(sourceType = SourceType.TweetShare, signals = ussSignals) - case SignalType.RealGraphOon => - convertSourceInfo(sourceType = SourceType.RealGraphOon, signals = ussSignals) - case SignalType.GoodTweetClick | SignalType.GoodTweetClick5s | - SignalType.GoodTweetClick10s | SignalType.GoodTweetClick30s => - convertSourceInfo(sourceType = SourceType.GoodTweetClick, signals = ussSignals) - case SignalType.VideoView90dPlayback50V1 => - convertSourceInfo( - sourceType = SourceType.VideoTweetPlayback50, - signals = ussSignals) - case SignalType.VideoView90dQualityV1 => - convertSourceInfo( - sourceType = SourceType.VideoTweetQualityView, - signals = ussSignals) - case SignalType.GoodProfileClick | SignalType.GoodProfileClick20s | - SignalType.GoodProfileClick30s => - convertSourceInfo(sourceType = SourceType.GoodProfileClick, signals = ussSignals) - // negative signals - case SignalType.AccountBlock => - convertSourceInfo(sourceType = SourceType.AccountBlock, signals = ussSignals) - case SignalType.AccountMute => - convertSourceInfo(sourceType = SourceType.AccountMute, signals = ussSignals) - case SignalType.TweetReport => - convertSourceInfo(sourceType = SourceType.TweetReport, signals = ussSignals) - case SignalType.TweetDontLike => - convertSourceInfo(sourceType = SourceType.TweetDontLike, signals = ussSignals) - // Aggregated Signals - case SignalType.TweetBasedUnifiedEngagementWeightedSignal | - SignalType.TweetBasedUnifiedUniformSignal => - convertSourceInfo(sourceType = SourceType.TweetAggregation, signals = ussSignals) - case SignalType.ProducerBasedUnifiedEngagementWeightedSignal | - SignalType.ProducerBasedUnifiedUniformSignal => - convertSourceInfo(sourceType = SourceType.ProducerAggregation, signals = ussSignals) - - // Default - case _ => - Seq.empty[SourceInfo] - } - } - sourceInfoList - } - } - } - - override def convertSourceInfo( - sourceType: SourceType, - signals: Seq[SignalConvertType] - ): Seq[SourceInfo] = { - signals.map { signal => - SourceInfo( - sourceType = sourceType, - internalId = signal.targetInternalId.getOrElse( - throw new IllegalArgumentException( - s"${sourceType.toString} Signal does not have internalId")), - sourceEventTime = - if (signal.timestamp == 0L) None else Some(Time.fromMilliseconds(signal.timestamp)) - ) - } - } - - private def trackUssSignalStatsPerSignalType( - query: FetcherQuery, - signalType: SignalType, - ussSignals: Seq[UssSignal] - ): Unit = { - val productScopedStats = stats.scope(query.product.originalName) - val productUserStateScopedStats = productScopedStats.scope(query.userState.toString) - val productStats = productScopedStats.scope(signalType.toString) - val productUserStateStats = productUserStateScopedStats.scope(signalType.toString) - - productStats.counter(Success).incr() - productUserStateStats.counter(Success).incr() - val size = ussSignals.size - productStats.stat(Size).add(size) - productUserStateStats.stat(Size).add(size) - if (size == 0) { - productStats.counter(Empty).incr() - productUserStateStats.counter(Empty).incr() - } - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssStore.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssStore.scala deleted file mode 100644 index 02f0287f5..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/source_signal/UssStore.scala +++ /dev/null @@ -1,209 +0,0 @@ -package com.twitter.cr_mixer.source_signal - -import com.twitter.cr_mixer.param.GlobalParams -import com.twitter.cr_mixer.param.GoodProfileClickParams -import com.twitter.cr_mixer.param.GoodTweetClickParams -import com.twitter.cr_mixer.param.RealGraphOonParams -import com.twitter.cr_mixer.param.RecentFollowsParams -import com.twitter.cr_mixer.param.RecentNegativeSignalParams -import com.twitter.cr_mixer.param.RecentNotificationsParams -import com.twitter.cr_mixer.param.RecentOriginalTweetsParams -import com.twitter.cr_mixer.param.RecentReplyTweetsParams -import com.twitter.cr_mixer.param.RecentRetweetsParams -import com.twitter.cr_mixer.param.RecentTweetFavoritesParams -import com.twitter.cr_mixer.param.RepeatedProfileVisitsParams -import com.twitter.cr_mixer.param.TweetSharesParams -import com.twitter.cr_mixer.param.UnifiedUSSSignalParams -import com.twitter.cr_mixer.param.VideoViewTweetsParams -import com.twitter.cr_mixer.source_signal.UssStore.Query -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.finagle.stats.StatsReceiver -import com.twitter.simclusters_v2.common.UserId -import com.twitter.storehaus.ReadableStore -import com.twitter.usersignalservice.thriftscala.{Signal => UssSignal} -import com.twitter.usersignalservice.thriftscala.SignalType -import javax.inject.Singleton -import com.twitter.timelines.configapi -import com.twitter.timelines.configapi.Params -import com.twitter.usersignalservice.thriftscala.BatchSignalRequest -import com.twitter.usersignalservice.thriftscala.BatchSignalResponse -import com.twitter.usersignalservice.thriftscala.SignalRequest -import com.twitter.util.Future -import com.twitter.cr_mixer.thriftscala.Product -import com.twitter.usersignalservice.thriftscala.ClientIdentifier - -@Singleton -case class UssStore( - stratoStore: ReadableStore[BatchSignalRequest, BatchSignalResponse], - statsReceiver: StatsReceiver) - extends ReadableStore[Query, Seq[(SignalType, Seq[UssSignal])]] { - - import com.twitter.cr_mixer.source_signal.UssStore._ - - override def get(query: Query): Future[Option[Seq[(SignalType, Seq[UssSignal])]]] = { - val ussClientIdentifier = query.product match { - case Product.Home => - ClientIdentifier.CrMixerHome - case Product.Notifications => - ClientIdentifier.CrMixerNotifications - case Product.Email => - ClientIdentifier.CrMixerEmail - case _ => - ClientIdentifier.Unknown - } - val batchSignalRequest = - BatchSignalRequest( - query.userId, - buildUserSignalServiceRequests(query.params), - Some(ussClientIdentifier)) - - stratoStore - .get(batchSignalRequest) - .map { - _.map { batchSignalResponse => - batchSignalResponse.signalResponse.toSeq.map { - case (signalType, ussSignals) => - (signalType, ussSignals) - } - } - } - } - - private def buildUserSignalServiceRequests( - param: Params, - ): Seq[SignalRequest] = { - val unifiedMaxSourceKeyNum = param(GlobalParams.UnifiedMaxSourceKeyNum) - val goodTweetClickMaxSignalNum = param(GoodTweetClickParams.MaxSignalNumParam) - val aggrTweetMaxSourceKeyNum = param(UnifiedUSSSignalParams.UnifiedTweetSourceNumberParam) - val aggrProducerMaxSourceKeyNum = param(UnifiedUSSSignalParams.UnifiedProducerSourceNumberParam) - - val maybeRecentTweetFavorite = - if (param(RecentTweetFavoritesParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.TweetFavorite)) - else None - val maybeRecentRetweet = - if (param(RecentRetweetsParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.Retweet)) - else None - val maybeRecentReply = - if (param(RecentReplyTweetsParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.Reply)) - else None - val maybeRecentOriginalTweet = - if (param(RecentOriginalTweetsParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.OriginalTweet)) - else None - val maybeRecentFollow = - if (param(RecentFollowsParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.AccountFollow)) - else None - val maybeRepeatedProfileVisits = - if (param(RepeatedProfileVisitsParams.EnableSourceParam)) - Some( - SignalRequest( - Some(unifiedMaxSourceKeyNum), - param(RepeatedProfileVisitsParams.ProfileMinVisitType).signalType)) - else None - val maybeRecentNotifications = - if (param(RecentNotificationsParams.EnableSourceParam)) - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.NotificationOpenAndClickV1)) - else None - val maybeTweetShares = - if (param(TweetSharesParams.EnableSourceParam)) { - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.TweetShareV1)) - } else None - val maybeRealGraphOon = - if (param(RealGraphOonParams.EnableSourceParam)) { - Some(SignalRequest(Some(unifiedMaxSourceKeyNum), SignalType.RealGraphOon)) - } else None - - val maybeGoodTweetClick = - if (param(GoodTweetClickParams.EnableSourceParam)) - Some( - SignalRequest( - Some(goodTweetClickMaxSignalNum), - param(GoodTweetClickParams.ClickMinDwellTimeType).signalType)) - else None - val maybeVideoViewTweets = - if (param(VideoViewTweetsParams.EnableSourceParam)) { - Some( - SignalRequest( - Some(unifiedMaxSourceKeyNum), - param(VideoViewTweetsParams.VideoViewTweetTypeParam).signalType)) - } else None - val maybeGoodProfileClick = - if (param(GoodProfileClickParams.EnableSourceParam)) - Some( - SignalRequest( - Some(unifiedMaxSourceKeyNum), - param(GoodProfileClickParams.ClickMinDwellTimeType).signalType)) - else None - val maybeAggTweetSignal = - if (param(UnifiedUSSSignalParams.EnableTweetAggSourceParam)) - Some( - SignalRequest( - Some(aggrTweetMaxSourceKeyNum), - param(UnifiedUSSSignalParams.TweetAggTypeParam).signalType - ) - ) - else None - val maybeAggProducerSignal = - if (param(UnifiedUSSSignalParams.EnableProducerAggSourceParam)) - Some( - SignalRequest( - Some(aggrProducerMaxSourceKeyNum), - param(UnifiedUSSSignalParams.ProducerAggTypeParam).signalType - ) - ) - else None - - // negative signals - val maybeNegativeSignals = if (param(RecentNegativeSignalParams.EnableSourceParam)) { - EnabledNegativeSignalTypes - .map(negativeSignal => SignalRequest(Some(unifiedMaxSourceKeyNum), negativeSignal)).toSeq - } else Seq.empty - - val allPositiveSignals = - if (param(UnifiedUSSSignalParams.ReplaceIndividualUSSSourcesParam)) - Seq( - maybeRecentOriginalTweet, - maybeRecentNotifications, - maybeRealGraphOon, - maybeGoodTweetClick, - maybeGoodProfileClick, - maybeAggProducerSignal, - maybeAggTweetSignal, - ) - else - Seq( - maybeRecentTweetFavorite, - maybeRecentRetweet, - maybeRecentReply, - maybeRecentOriginalTweet, - maybeRecentFollow, - maybeRepeatedProfileVisits, - maybeRecentNotifications, - maybeTweetShares, - maybeRealGraphOon, - maybeGoodTweetClick, - maybeVideoViewTweets, - maybeGoodProfileClick, - maybeAggProducerSignal, - maybeAggTweetSignal, - ) - allPositiveSignals.flatten ++ maybeNegativeSignals - } - -} - -object UssStore { - case class Query( - userId: UserId, - params: configapi.Params, - product: Product) - - val EnabledNegativeSourceTypes: Set[SourceType] = - Set(SourceType.AccountBlock, SourceType.AccountMute) - private val EnabledNegativeSignalTypes: Set[SignalType] = - Set(SignalType.AccountBlock, SignalType.AccountMute) -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/BUILD b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/BUILD deleted file mode 100644 index 46c71420a..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/BUILD +++ /dev/null @@ -1,29 +0,0 @@ -scala_library( - sources = ["*.scala"], - compiler_option_sets = ["fatal_warnings"], - strict_deps = True, - tags = ["bazel-compatible"], - dependencies = [ - "3rdparty/jvm/com/twitter/storehaus:core", - "3rdparty/jvm/org/lz4:lz4-java", - "3rdparty/src/jvm/com/twitter/storehaus:core", - "configapi/configapi-core", - "content-recommender/thrift/src/main/thrift:thrift-scala", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/model", - "cr-mixer/server/src/main/scala/com/twitter/cr_mixer/param", - "cr-mixer/thrift/src/main/thrift:thrift-scala", - "finatra/inject/inject-core/src/main/scala", - "frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util:stats_util", - "relevance-platform/src/main/scala/com/twitter/relevance_platform/common/stats", - "src/java/com/twitter/search/common/schema/base", - "src/java/com/twitter/search/common/schema/earlybird", - "src/java/com/twitter/search/queryparser/query:core-query-nodes", - "src/java/com/twitter/search/queryparser/query/search:search-query-nodes", - "src/scala/com/twitter/simclusters_v2/common", - "src/thrift/com/twitter/core_workflows/user_model:user_model-scala", - "src/thrift/com/twitter/search:earlybird-scala", - "src/thrift/com/twitter/search/common:ranking-scala", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala", - ], -) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CandidateGenerationKeyUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CandidateGenerationKeyUtil.scala deleted file mode 100644 index fd698f6d9..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CandidateGenerationKeyUtil.scala +++ /dev/null @@ -1,39 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.thriftscala.CandidateGenerationKey -import com.twitter.cr_mixer.thriftscala.SimilarityEngine -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.simclusters_v2.common.UserId -import com.twitter.simclusters_v2.thriftscala.InternalId -import com.twitter.util.Time - -object CandidateGenerationKeyUtil { - private val PlaceholderUserId = 0L // this default value will not be used - - private val DefaultSourceInfo: SourceInfo = SourceInfo( - sourceType = SourceType.RequestUserId, - sourceEventTime = None, - internalId = InternalId.UserId(PlaceholderUserId) - ) - - def toThrift( - candidateGenerationInfo: CandidateGenerationInfo, - requestUserId: UserId - ): CandidateGenerationKey = { - CandidateGenerationKey( - sourceType = candidateGenerationInfo.sourceInfoOpt.getOrElse(DefaultSourceInfo).sourceType, - sourceEventTime = candidateGenerationInfo.sourceInfoOpt - .getOrElse(DefaultSourceInfo).sourceEventTime.getOrElse(Time.fromMilliseconds(0L)).inMillis, - id = candidateGenerationInfo.sourceInfoOpt - .map(_.internalId).getOrElse(InternalId.UserId(requestUserId)), - modelId = candidateGenerationInfo.similarityEngineInfo.modelId.getOrElse(""), - similarityEngineType = - Some(candidateGenerationInfo.similarityEngineInfo.similarityEngineType), - contributingSimilarityEngine = - Some(candidateGenerationInfo.contributingSimilarityEngines.map(se => - SimilarityEngine(se.similarityEngineType, se.modelId, se.score))) - ) - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CountWeightedInterleaveUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CountWeightedInterleaveUtil.scala deleted file mode 100644 index bfae90057..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/CountWeightedInterleaveUtil.scala +++ /dev/null @@ -1,180 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.cr_mixer.model.Candidate -import com.twitter.cr_mixer.model.InitialCandidate -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.param.BlenderParams.BlendGroupingMethodEnum -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.simclusters_v2.thriftscala.InternalId - -object CountWeightedInterleaveUtil { - - /** - * Grouping key for interleaving candidates - * - * @param sourceInfoOpt optional SourceInfo, containing the source information - * @param similarityEngineTypeOpt optional SimilarityEngineType, containing similarity engine - * information - * @param modelIdOpt optional modelId, containing the model ID - * @param authorIdOpt optional authorId, containing the tweet author ID - * @param groupIdOpt optional groupId, containing the ID corresponding to the blending group - */ - case class GroupingKey( - sourceInfoOpt: Option[SourceInfo], - similarityEngineTypeOpt: Option[SimilarityEngineType], - modelIdOpt: Option[String], - authorIdOpt: Option[Long], - groupIdOpt: Option[Int]) - - /** - * Converts candidates to grouping key based upon the feature that we interleave with. - */ - def toGroupingKey[CandidateType <: Candidate]( - candidate: CandidateType, - interleaveFeature: Option[BlendGroupingMethodEnum.Value], - groupId: Option[Int], - ): GroupingKey = { - val grouping: GroupingKey = candidate match { - case c: RankedCandidate => - interleaveFeature.getOrElse(BlendGroupingMethodEnum.SourceKeyDefault) match { - case BlendGroupingMethodEnum.SourceKeyDefault => - GroupingKey( - sourceInfoOpt = c.reasonChosen.sourceInfoOpt, - similarityEngineTypeOpt = - Some(c.reasonChosen.similarityEngineInfo.similarityEngineType), - modelIdOpt = c.reasonChosen.similarityEngineInfo.modelId, - authorIdOpt = None, - groupIdOpt = groupId - ) - // Some candidate sources don't have a sourceType, so it defaults to similarityEngine - case BlendGroupingMethodEnum.SourceTypeSimilarityEngine => - val sourceInfoOpt = c.reasonChosen.sourceInfoOpt.map(_.sourceType).map { sourceType => - SourceInfo( - sourceType = sourceType, - internalId = InternalId.UserId(0), - sourceEventTime = None) - } - GroupingKey( - sourceInfoOpt = sourceInfoOpt, - similarityEngineTypeOpt = - Some(c.reasonChosen.similarityEngineInfo.similarityEngineType), - modelIdOpt = c.reasonChosen.similarityEngineInfo.modelId, - authorIdOpt = None, - groupIdOpt = groupId - ) - case BlendGroupingMethodEnum.AuthorId => - GroupingKey( - sourceInfoOpt = None, - similarityEngineTypeOpt = None, - modelIdOpt = None, - authorIdOpt = Some(c.tweetInfo.authorId), - groupIdOpt = groupId - ) - case _ => - throw new UnsupportedOperationException( - s"Unsupported interleave feature: $interleaveFeature") - } - case _ => - GroupingKey( - sourceInfoOpt = None, - similarityEngineTypeOpt = None, - modelIdOpt = None, - authorIdOpt = None, - groupIdOpt = groupId - ) - } - grouping - } - - /** - * Rather than manually calculating and maintaining the weights to rank with, we instead - * calculate the weights on the fly, based upon the frequencies of the candidates within each - * group. To ensure that diversity of the feature is maintained, we additionally employ a - * 'shrinkage' parameter which enforces more diversity by moving the weights closer to uniformity. - * More details are available at go/weighted-interleave. - * - * @param candidateSeqKeyByFeature candidate to key. - * @param rankerWeightShrinkage value between [0, 1] with 1 being complete uniformity. - * @return Interleaving weights keyed by feature. - */ - private def calculateWeightsKeyByFeature[CandidateType <: Candidate]( - candidateSeqKeyByFeature: Map[GroupingKey, Seq[CandidateType]], - rankerWeightShrinkage: Double - ): Map[GroupingKey, Double] = { - val maxNumberCandidates: Double = candidateSeqKeyByFeature.values - .map { candidates => - candidates.size - }.max.toDouble - candidateSeqKeyByFeature.map { - case (featureKey: GroupingKey, candidateSeq: Seq[CandidateType]) => - val observedWeight: Double = candidateSeq.size.toDouble / maxNumberCandidates - // How much to shrink empirical estimates to 1 (Default is to make all weights 1). - val finalWeight = - (1.0 - rankerWeightShrinkage) * observedWeight + rankerWeightShrinkage * 1.0 - featureKey -> finalWeight - } - } - - /** - * Builds out the groups and weights for weighted interleaving of the candidates. - * More details are available at go/weighted-interleave. - * - * @param rankedCandidateSeq candidates to interleave. - * @param rankerWeightShrinkage value between [0, 1] with 1 being complete uniformity. - * @return Candidates grouped by feature key and with calculated interleaving weights. - */ - def buildRankedCandidatesWithWeightKeyByFeature( - rankedCandidateSeq: Seq[RankedCandidate], - rankerWeightShrinkage: Double, - interleaveFeature: BlendGroupingMethodEnum.Value - ): Seq[(Seq[RankedCandidate], Double)] = { - // To accommodate the re-grouping in InterleaveRanker - // In InterleaveBlender, we have already abandoned the grouping keys, and use Seq[Seq[]] to do interleave - // Since that we build the candidateSeq with groupingKey, we can guarantee there is no empty candidateSeq - val candidateSeqKeyByFeature: Map[GroupingKey, Seq[RankedCandidate]] = - rankedCandidateSeq.groupBy { candidate: RankedCandidate => - toGroupingKey(candidate, Some(interleaveFeature), None) - } - - // These weights [0, 1] are used to do weighted interleaving - // The default value of 1.0 ensures the group is always sampled. - val candidateWeightsKeyByFeature: Map[GroupingKey, Double] = - calculateWeightsKeyByFeature(candidateSeqKeyByFeature, rankerWeightShrinkage) - - candidateSeqKeyByFeature.map { - case (groupingKey: GroupingKey, candidateSeq: Seq[RankedCandidate]) => - Tuple2( - candidateSeq.sortBy(-_.predictionScore), - candidateWeightsKeyByFeature.getOrElse(groupingKey, 1.0)) - }.toSeq - } - - /** - * Takes current grouping (as implied by the outer Seq) and computes blending weights. - * - * @param initialCandidatesSeqSeq grouped candidates to interleave. - * @param rankerWeightShrinkage value between [0, 1] with 1 being complete uniformity. - * @return Grouped candidates with calculated interleaving weights. - */ - def buildInitialCandidatesWithWeightKeyByFeature( - initialCandidatesSeqSeq: Seq[Seq[InitialCandidate]], - rankerWeightShrinkage: Double, - ): Seq[(Seq[InitialCandidate], Double)] = { - val candidateSeqKeyByFeature: Map[GroupingKey, Seq[InitialCandidate]] = - initialCandidatesSeqSeq.zipWithIndex.map(_.swap).toMap.map { - case (groupId: Int, initialCandidatesSeq: Seq[InitialCandidate]) => - toGroupingKey(initialCandidatesSeq.head, None, Some(groupId)) -> initialCandidatesSeq - } - - // These weights [0, 1] are used to do weighted interleaving - // The default value of 1.0 ensures the group is always sampled. - val candidateWeightsKeyByFeature = - calculateWeightsKeyByFeature(candidateSeqKeyByFeature, rankerWeightShrinkage) - - candidateSeqKeyByFeature.map { - case (groupingKey: GroupingKey, candidateSeq: Seq[InitialCandidate]) => - Tuple2(candidateSeq, candidateWeightsKeyByFeature.getOrElse(groupingKey, 1.0)) - }.toSeq - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/EarlybirdSearchUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/EarlybirdSearchUtil.scala deleted file mode 100644 index 6ddd358dc..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/EarlybirdSearchUtil.scala +++ /dev/null @@ -1,130 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.search.common.schema.earlybird.EarlybirdFieldConstants.EarlybirdFieldConstant -import com.twitter.search.queryparser.query.search.SearchOperator -import com.twitter.search.queryparser.query.search.SearchOperatorConstants -import com.twitter.search.queryparser.query.{Query => EbQuery} -import com.twitter.search.queryparser.query.Conjunction -import scala.collection.JavaConverters._ -import com.twitter.search.earlybird.thriftscala.ThriftSearchResultMetadataOptions -import com.twitter.simclusters_v2.common.TweetId -import com.twitter.search.queryparser.query.Query -import com.twitter.util.Duration -import com.twitter.search.common.query.thriftjava.thriftscala.CollectorTerminationParams - -object EarlybirdSearchUtil { - val EarlybirdClientId: String = "cr-mixer.prod" - - val Mentions: String = EarlybirdFieldConstant.MENTIONS_FACET - val Hashtags: String = EarlybirdFieldConstant.HASHTAGS_FACET - val FacetsToFetch: Seq[String] = Seq(Mentions, Hashtags) - - val MetadataOptions: ThriftSearchResultMetadataOptions = ThriftSearchResultMetadataOptions( - getTweetUrls = true, - getResultLocation = false, - getLuceneScore = false, - getInReplyToStatusId = true, - getReferencedTweetAuthorId = true, - getMediaBits = true, - getAllFeatures = true, - getFromUserId = true, - returnSearchResultFeatures = true, - // Set getExclusiveConversationAuthorId in order to retrieve Exclusive / SuperFollow tweets. - getExclusiveConversationAuthorId = true - ) - - // Filter out retweets and replies - val TweetTypesToExclude: Seq[String] = - Seq( - SearchOperatorConstants.NATIVE_RETWEETS, - SearchOperatorConstants.REPLIES) - - def GetCollectorTerminationParams( - maxNumHitsPerShard: Int, - processingTimeout: Duration - ): Option[CollectorTerminationParams] = { - Some( - CollectorTerminationParams( - // maxHitsToProcess is used for early termination on each EB shard - maxHitsToProcess = Some(maxNumHitsPerShard), - timeoutMs = processingTimeout.inMilliseconds.toInt - )) - } - - /** - * Get EarlybirdQuery - * This function creates a EBQuery based on the search input - */ - def GetEarlybirdQuery( - beforeTweetIdExclusive: Option[TweetId], - afterTweetIdExclusive: Option[TweetId], - excludedTweetIds: Set[TweetId], - filterOutRetweetsAndReplies: Boolean - ): Option[EbQuery] = - CreateConjunction( - Seq( - CreateRangeQuery(beforeTweetIdExclusive, afterTweetIdExclusive), - CreateExcludedTweetIdsQuery(excludedTweetIds), - CreateTweetTypesFilters(filterOutRetweetsAndReplies) - ).flatten) - - def CreateRangeQuery( - beforeTweetIdExclusive: Option[TweetId], - afterTweetIdExclusive: Option[TweetId] - ): Option[EbQuery] = { - val beforeIdClause = beforeTweetIdExclusive.map { beforeId => - // MAX_ID is an inclusive value therefore we subtract 1 from beforeId. - new SearchOperator(SearchOperator.Type.MAX_ID, (beforeId - 1).toString) - } - val afterIdClause = afterTweetIdExclusive.map { afterId => - new SearchOperator(SearchOperator.Type.SINCE_ID, afterId.toString) - } - CreateConjunction(Seq(beforeIdClause, afterIdClause).flatten) - } - - def CreateTweetTypesFilters(filterOutRetweetsAndReplies: Boolean): Option[EbQuery] = { - if (filterOutRetweetsAndReplies) { - val tweetTypeFilters = TweetTypesToExclude.map { searchOperator => - new SearchOperator(SearchOperator.Type.EXCLUDE, searchOperator) - } - CreateConjunction(tweetTypeFilters) - } else None - } - - def CreateConjunction(clauses: Seq[EbQuery]): Option[EbQuery] = { - clauses.size match { - case 0 => None - case 1 => Some(clauses.head) - case _ => Some(new Conjunction(clauses.asJava)) - } - } - - def CreateExcludedTweetIdsQuery(tweetIds: Set[TweetId]): Option[EbQuery] = { - if (tweetIds.nonEmpty) { - Some( - new SearchOperator.Builder() - .setType(SearchOperator.Type.NAMED_MULTI_TERM_DISJUNCTION) - .addOperand(EarlybirdFieldConstant.ID_FIELD.getFieldName) - .addOperand(EXCLUDE_TWEET_IDS) - .setOccur(Query.Occur.MUST_NOT) - .build()) - } else None - } - - /** - * Get NamedDisjunctions with excludedTweetIds - */ - def GetNamedDisjunctions(excludedTweetIds: Set[TweetId]): Option[Map[String, Seq[Long]]] = - if (excludedTweetIds.nonEmpty) - createNamedDisjunctionsExcludedTweetIds(excludedTweetIds) - else None - - val EXCLUDE_TWEET_IDS = "exclude_tweet_ids" - private def createNamedDisjunctionsExcludedTweetIds( - tweetIds: Set[TweetId] - ): Option[Map[String, Seq[Long]]] = { - if (tweetIds.nonEmpty) { - Some(Map(EXCLUDE_TWEET_IDS -> tweetIds.toSeq)) - } else None - } -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/InterleaveUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/InterleaveUtil.scala deleted file mode 100644 index c75abde2e..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/InterleaveUtil.scala +++ /dev/null @@ -1,160 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.cr_mixer.model.Candidate -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.simclusters_v2.common.TweetId -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -object InterleaveUtil { - - /** - * Interleaves candidates by iteratively taking one candidate from the 1st Seq and adding it to the result. - * Once we take a candidate from a Seq, we move this Seq to the end of the queue to process, - * and remove the candidate from that Seq. - * - * We keep a mutable.Set[TweetId] buffer to ensure there are no duplicates. - * - * @param candidates candidates assumed to be sorted by eventTime (latest event comes first) - * @return interleaved candidates - */ - def interleave[CandidateType <: Candidate]( - candidates: Seq[Seq[CandidateType]] - ): Seq[CandidateType] = { - - // copy candidates into a mutable map so this method is thread-safe - val candidatesPerSequence = candidates.map { tweetCandidates => - mutable.Queue() ++= tweetCandidates - } - - val seen = mutable.Set[TweetId]() - - val candidateSeqQueue = mutable.Queue() ++= candidatesPerSequence - - val result = ArrayBuffer[CandidateType]() - - while (candidateSeqQueue.nonEmpty) { - val candidatesQueue = candidateSeqQueue.head - - if (candidatesQueue.nonEmpty) { - val candidate = candidatesQueue.dequeue() - val candidateTweetId = candidate.tweetId - val seenCandidate = seen.contains(candidateTweetId) - if (!seenCandidate) { - result += candidate - seen.add(candidate.tweetId) - candidateSeqQueue.enqueue( - candidateSeqQueue.dequeue() - ) // move this Seq to end - } - } else { - candidateSeqQueue.dequeue() //finished processing this Seq - } - } - //convert result to immutable seq - result.toList - } - - /** - * Interleaves candidates by iteratively - * 1. Checking weight to see if enough accumulation has occurred to sample from - * 2. If yes, taking one candidate from the the Seq and adding it to the result. - * 3. Move this Seq to the end of the queue to process (and remove the candidate from that Seq if - * we sampled it from step 2). - * - * We keep count of the iterations to prevent infinite loops. - * We keep a mutable.Set[TweetId] buffer to ensure there are no duplicates. - * - * @param candidatesAndWeight candidates assumed to be sorted by eventTime (latest event comes first), - * along with sampling weights to help prioritize important groups. - * @param maxWeightAdjustments Maximum number of iterations to account for weighting before - * defaulting to uniform interleaving. - * @return interleaved candidates - */ - def weightedInterleave[CandidateType <: Candidate]( - candidatesAndWeight: Seq[(Seq[CandidateType], Double)], - maxWeightAdjustments: Int = 0 - ): Seq[CandidateType] = { - - // Set to avoid numerical issues around 1.0 - val min_weight = 1 - 1e-30 - - // copy candidates into a mutable map so this method is thread-safe - // adds a counter to use towards sampling - val candidatesAndWeightsPerSequence: Seq[ - (mutable.Queue[CandidateType], InterleaveWeights) - ] = - candidatesAndWeight.map { candidatesAndWeight => - (mutable.Queue() ++= candidatesAndWeight._1, InterleaveWeights(candidatesAndWeight._2, 0.0)) - } - - val seen: mutable.Set[TweetId] = mutable.Set[TweetId]() - - val candidateSeqQueue: mutable.Queue[(mutable.Queue[CandidateType], InterleaveWeights)] = - mutable.Queue() ++= candidatesAndWeightsPerSequence - - val result: ArrayBuffer[CandidateType] = ArrayBuffer[CandidateType]() - var number_iterations: Int = 0 - - while (candidateSeqQueue.nonEmpty) { - val (candidatesQueue, currentWeights) = candidateSeqQueue.head - if (candidatesQueue.nonEmpty) { - // Confirm weighting scheme - currentWeights.summed_weight += currentWeights.weight - number_iterations += 1 - if (currentWeights.summed_weight >= min_weight || number_iterations >= maxWeightAdjustments) { - // If we sample, then adjust the counter - currentWeights.summed_weight -= 1.0 - val candidate = candidatesQueue.dequeue() - val candidateTweetId = candidate.tweetId - val seenCandidate = seen.contains(candidateTweetId) - if (!seenCandidate) { - result += candidate - seen.add(candidate.tweetId) - candidateSeqQueue.enqueue(candidateSeqQueue.dequeue()) // move this Seq to end - } - } else { - candidateSeqQueue.enqueue(candidateSeqQueue.dequeue()) // move this Seq to end - } - } else { - candidateSeqQueue.dequeue() //finished processing this Seq - } - } - //convert result to immutable seq - result.toList - } - - def buildCandidatesKeyByCGInfo( - candidates: Seq[RankedCandidate], - ): Seq[Seq[RankedCandidate]] = { - // To accommodate the re-grouping in InterleaveRanker - // In InterleaveBlender, we have already abandoned the grouping keys, and use Seq[Seq[]] to do interleave - // Since that we build the candidateSeq with groupingKey, we can guarantee there is no empty candidateSeq - val candidateSeqKeyByCG = - candidates.groupBy(candidate => GroupingKey.toGroupingKey(candidate.reasonChosen)) - candidateSeqKeyByCG.map { - case (groupingKey, candidateSeq) => - candidateSeq.sortBy(-_.predictionScore) - }.toSeq - } -} - -case class GroupingKey( - sourceInfoOpt: Option[SourceInfo], - similarityEngineType: SimilarityEngineType, - modelId: Option[String]) {} - -object GroupingKey { - def toGroupingKey(candidateGenerationInfo: CandidateGenerationInfo): GroupingKey = { - GroupingKey( - candidateGenerationInfo.sourceInfoOpt, - candidateGenerationInfo.similarityEngineInfo.similarityEngineType, - candidateGenerationInfo.similarityEngineInfo.modelId - ) - } -} - -case class InterleaveWeights(weight: Double, var summed_weight: Double) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/MetricTagUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/MetricTagUtil.scala deleted file mode 100644 index caa6d9f07..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/MetricTagUtil.scala +++ /dev/null @@ -1,135 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SimilarityEngineInfo -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.thriftscala.MetricTag -import com.twitter.cr_mixer.thriftscala.SimilarityEngineType -import com.twitter.cr_mixer.thriftscala.SourceType - -object MetricTagUtil { - - def buildMetricTags(candidate: RankedCandidate): Seq[MetricTag] = { - val interestedInMetricTag = isFromInterestedIn(candidate) - - val cgInfoMetricTags = candidate.potentialReasons - .flatMap { cgInfo => - val sourceMetricTag = cgInfo.sourceInfoOpt.flatMap { sourceInfo => - toMetricTagFromSource(sourceInfo.sourceType) - } - val similarityEngineTags = toMetricTagFromSimilarityEngine( - cgInfo.similarityEngineInfo, - cgInfo.contributingSimilarityEngines) - - val combinedMetricTag = cgInfo.sourceInfoOpt.flatMap { sourceInfo => - toMetricTagFromSourceAndSimilarityEngine(sourceInfo, cgInfo.similarityEngineInfo) - } - - Seq(sourceMetricTag) ++ similarityEngineTags ++ Seq(combinedMetricTag) - }.flatten.toSet - (interestedInMetricTag ++ cgInfoMetricTags).toSeq - } - - /*** - * match a sourceType to a metricTag - */ - private def toMetricTagFromSource(sourceType: SourceType): Option[MetricTag] = { - sourceType match { - case SourceType.TweetFavorite => Some(MetricTag.TweetFavorite) // Personalized Topics in Home - case SourceType.Retweet => Some(MetricTag.Retweet) // Personalized Topics in Home - case SourceType.NotificationClick => - Some(MetricTag.PushOpenOrNtabClick) // Health Filter in MR - case SourceType.OriginalTweet => - Some(MetricTag.OriginalTweet) - case SourceType.Reply => - Some(MetricTag.Reply) - case SourceType.TweetShare => - Some(MetricTag.TweetShare) - case SourceType.UserFollow => - Some(MetricTag.UserFollow) - case SourceType.UserRepeatedProfileVisit => - Some(MetricTag.UserRepeatedProfileVisit) - case SourceType.TwiceUserId => - Some(MetricTag.TwiceUserId) - case _ => None - } - } - - /*** - * If the SEInfo is built by a unified sim engine, we un-wrap the contributing sim engines. - * If not, we log the sim engine as usual. - * @param seInfo (CandidateGenerationInfo.similarityEngineInfo): SimilarityEngineInfo, - * @param cseInfo (CandidateGenerationInfo.contributingSimilarityEngines): Seq[SimilarityEngineInfo] - */ - private def toMetricTagFromSimilarityEngine( - seInfo: SimilarityEngineInfo, - cseInfo: Seq[SimilarityEngineInfo] - ): Seq[Option[MetricTag]] = { - seInfo.similarityEngineType match { - case SimilarityEngineType.TweetBasedUnifiedSimilarityEngine => // un-wrap the unified sim engine - cseInfo.map { contributingSimEngine => - toMetricTagFromSimilarityEngine(contributingSimEngine, Seq.empty) - }.flatten - case SimilarityEngineType.ProducerBasedUnifiedSimilarityEngine => // un-wrap the unified sim engine - cseInfo.map { contributingSimEngine => - toMetricTagFromSimilarityEngine(contributingSimEngine, Seq.empty) - }.flatten - // SimClustersANN can either be called on its own, or be called under unified sim engine - case SimilarityEngineType.SimClustersANN => // the old "UserInterestedIn" will be replaced by this. Also, OfflineTwice - Seq(Some(MetricTag.SimClustersANN), seInfo.modelId.flatMap(toMetricTagFromModelId(_))) - case SimilarityEngineType.ConsumerEmbeddingBasedTwHINANN => - Seq(Some(MetricTag.ConsumerEmbeddingBasedTwHINANN)) - case SimilarityEngineType.TwhinCollabFilter => Seq(Some(MetricTag.TwhinCollabFilter)) - // In the current implementation, TweetBasedUserTweetGraph/TweetBasedTwHINANN has a tag when - // it's either a base SE or a contributing SE. But for now they only show up in contributing SE. - case SimilarityEngineType.TweetBasedUserTweetGraph => - Seq(Some(MetricTag.TweetBasedUserTweetGraph)) - case SimilarityEngineType.TweetBasedTwHINANN => - Seq(Some(MetricTag.TweetBasedTwHINANN)) - case _ => Seq.empty - } - } - - /*** - * pass in a model id, and match it with the metric tag type. - */ - private def toMetricTagFromModelId( - modelId: String - ): Option[MetricTag] = { - - val pushOpenBasedModelRegex = "(.*_Model20m145k2020_20220819)".r - - modelId match { - case pushOpenBasedModelRegex(_*) => - Some(MetricTag.RequestHealthFilterPushOpenBasedTweetEmbedding) - case _ => None - } - } - - private def toMetricTagFromSourceAndSimilarityEngine( - sourceInfo: SourceInfo, - seInfo: SimilarityEngineInfo - ): Option[MetricTag] = { - sourceInfo.sourceType match { - case SourceType.Lookalike - if seInfo.similarityEngineType == SimilarityEngineType.ConsumersBasedUserTweetGraph => - Some(MetricTag.LookalikeUTG) - case _ => None - } - } - - /** - * Special use case: used by Notifications team to generate the UserInterestedIn CRT push copy. - * - * if we have different types of InterestedIn (eg. UserInterestedIn, NextInterestedIn), - * this if statement will have to be refactored to contain the real UserInterestedIn. - * @return - */ - private def isFromInterestedIn(candidate: RankedCandidate): Set[MetricTag] = { - if (candidate.reasonChosen.sourceInfoOpt.isEmpty - && candidate.reasonChosen.similarityEngineInfo.similarityEngineType == SimilarityEngineType.SimClustersANN) { - Set(MetricTag.UserInterestedIn) - } else Set.empty - } - -} diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/SignalTimestampStatsUtil.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/SignalTimestampStatsUtil.scala deleted file mode 100644 index ae2789432..000000000 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/util/SignalTimestampStatsUtil.scala +++ /dev/null @@ -1,66 +0,0 @@ -package com.twitter.cr_mixer.util - -import com.twitter.cr_mixer.model.CandidateGenerationInfo -import com.twitter.cr_mixer.model.RankedCandidate -import com.twitter.cr_mixer.model.SourceInfo -import com.twitter.cr_mixer.thriftscala.SourceType -import com.twitter.cr_mixer.thriftscala.TweetRecommendation -import javax.inject.Inject -import com.twitter.finagle.stats.StatsReceiver -import javax.inject.Singleton -import com.twitter.relevance_platform.common.stats.BucketTimestampStats - -@Singleton -class SignalTimestampStatsUtil @Inject() (statsReceiver: StatsReceiver) { - import SignalTimestampStatsUtil._ - - private val signalDelayAgePerDayStats = - new BucketTimestampStats[TweetRecommendation]( - BucketTimestampStats.MillisecondsPerDay, - _.latestSourceSignalTimestampInMillis.getOrElse(0), - Some(SignalTimestampMaxDays))( - statsReceiver.scope("signal_timestamp_per_day") - ) // only stats past 90 days - private val signalDelayAgePerHourStats = - new BucketTimestampStats[TweetRecommendation]( - BucketTimestampStats.MillisecondsPerHour, - _.latestSourceSignalTimestampInMillis.getOrElse(0), - Some(SignalTimestampMaxHours))( - statsReceiver.scope("signal_timestamp_per_hour") - ) // only stats past 24 hours - private val signalDelayAgePerMinStats = - new BucketTimestampStats[TweetRecommendation]( - BucketTimestampStats.MillisecondsPerMinute, - _.latestSourceSignalTimestampInMillis.getOrElse(0), - Some(SignalTimestampMaxMins))( - statsReceiver.scope("signal_timestamp_per_min") - ) // only stats past 60 minutes - - def statsSignalTimestamp( - tweets: Seq[TweetRecommendation], - ): Seq[TweetRecommendation] = { - signalDelayAgePerMinStats.count(tweets) - signalDelayAgePerHourStats.count(tweets) - signalDelayAgePerDayStats.count(tweets) - } -} - -object SignalTimestampStatsUtil { - val SignalTimestampMaxMins = 60 // stats at most 60 mins - val SignalTimestampMaxHours = 24 // stats at most 24 hours - val SignalTimestampMaxDays = 90 // stats at most 90 days - - def buildLatestSourceSignalTimestamp(candidate: RankedCandidate): Option[Long] = { - val timestampSeq = candidate.potentialReasons - .collect { - case CandidateGenerationInfo(Some(SourceInfo(sourceType, _, Some(sourceEventTime))), _, _) - if sourceType == SourceType.TweetFavorite => - sourceEventTime.inMilliseconds - } - if (timestampSeq.nonEmpty) { - Some(timestampSeq.max(Ordering.Long)) - } else { - None - } - } -} diff --git a/cr-mixer/thrift/src/main/thrift/BUILD b/cr-mixer/thrift/src/main/thrift/BUILD deleted file mode 100644 index 3ccb85681..000000000 --- a/cr-mixer/thrift/src/main/thrift/BUILD +++ /dev/null @@ -1,48 +0,0 @@ -create_thrift_libraries( - base_name = "thrift", - sources = ["*.thrift"], - platform = "java8", - tags = ["bazel-compatible"], - dependency_roots = [ - "finatra-internal/thrift/src/main/thrift", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift", - "src/thrift/com/twitter/ads/schema:common", - "src/thrift/com/twitter/ml/api:data", - "src/thrift/com/twitter/recos:recos-common", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift", - "src/thrift/com/twitter/timelines/render:thrift", - "strato/config/src/thrift/com/twitter/strato/graphql", - "strato/config/src/thrift/com/twitter/strato/graphql:api-media-graphql", - "strato/config/src/thrift/com/twitter/strato/graphql:topics-graphql", - ], - generate_languages = [ - "java", - "scala", - "strato", - ], - provides_java_name = "cr-mixer-thrift-java", - provides_scala_name = "cr-mixer-thrift-scala", -) - -create_thrift_libraries( - base_name = "cr-mixer-scribe", - sources = ["*.thrift"], - tags = ["bazel-compatible"], - dependency_roots = [ - "finatra-internal/thrift/src/main/thrift", - "product-mixer/core/src/main/thrift/com/twitter/product_mixer/core:thrift", - "src/thrift/com/twitter/ads/schema:common", - "src/thrift/com/twitter/ml/api:data", - "src/thrift/com/twitter/recos:recos-common", - "src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift", - "src/thrift/com/twitter/timelines/render:thrift", - "strato/config/src/thrift/com/twitter/strato/graphql", - ], - generate_languages = [ - "java", - "scala", - "strato", - ], - provides_java_name = "cr-mixer-scribe-java", - provides_scala_name = "cr-mixer-scribe-scala", -) diff --git a/cr-mixer/thrift/src/main/thrift/ads.thrift b/cr-mixer/thrift/src/main/thrift/ads.thrift deleted file mode 100644 index 70d4ad562..000000000 --- a/cr-mixer/thrift/src/main/thrift/ads.thrift +++ /dev/null @@ -1,33 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "product.thrift" -include "product_context.thrift" - -include "com/twitter/product_mixer/core/client_context.thrift" -include "com/twitter/ads/schema/shared.thrift" - -struct AdsRequest { - 1: required client_context.ClientContext clientContext - 2: required product.Product product - # Product-specific parameters should be placed in the Product Context - 3: optional product_context.ProductContext productContext - 4: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct AdsResponse { - 1: required list ads -} (persisted='true') - -struct AdTweetRecommendation { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: required double score - 3: optional list lineItems - -} (persisted='true') - -struct LineItemInfo { - 1: required i64 lineItemId (personalDataType = 'LineItemId') - 2: required shared.LineItemObjective lineItemObjective -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/candidate_generation_key.thrift b/cr-mixer/thrift/src/main/thrift/candidate_generation_key.thrift deleted file mode 100644 index 4f2a4a9ee..000000000 --- a/cr-mixer/thrift/src/main/thrift/candidate_generation_key.thrift +++ /dev/null @@ -1,21 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "source_type.thrift" -include "com/twitter/simclusters_v2/identifier.thrift" - -struct SimilarityEngine { - 1: required source_type.SimilarityEngineType similarityEngineType - 2: optional string modelId - 3: optional double score -} (persisted='true') - -struct CandidateGenerationKey { - 1: required source_type.SourceType sourceType - 2: required i64 sourceEventTime (personalDataType = 'PrivateTimestamp') - 3: required identifier.InternalId id - 4: required string modelId - 5: optional source_type.SimilarityEngineType similarityEngineType - 6: optional list contributingSimilarityEngine -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/cr_mixer.thrift b/cr-mixer/thrift/src/main/thrift/cr_mixer.thrift deleted file mode 100644 index 2fddf1cf8..000000000 --- a/cr-mixer/thrift/src/main/thrift/cr_mixer.thrift +++ /dev/null @@ -1,104 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "ads.thrift" -include "candidate_generation_key.thrift" -include "product.thrift" -include "product_context.thrift" -include "validation.thrift" -include "metric_tags.thrift" -include "related_tweet.thrift" -include "uteg.thrift" -include "frs_based_tweet.thrift" -include "related_video_tweet.thrift" -include "topic_tweet.thrift" - -include "com/twitter/product_mixer/core/client_context.thrift" -include "com/twitter/timelines/render/response.thrift" -include "finatra-thrift/finatra_thrift_exceptions.thrift" -include "com/twitter/strato/graphql/slice.thrift" - -struct CrMixerTweetRequest { - 1: required client_context.ClientContext clientContext - 2: required product.Product product - # Product-specific parameters should be placed in the Product Context - 3: optional product_context.ProductContext productContext - 4: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct TweetRecommendation { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: required double score - 3: optional list metricTags - # 4: the author of the tweet candidate. To be used by Content-Mixer to unblock the Hydra experiment. - 4: optional i64 authorId (personalDataType = 'UserId') - # 5: extra info about candidate generation. To be used by Content-Mixer to unblock the Hydra experiment. - 5: optional candidate_generation_key.CandidateGenerationKey candidateGenerationKey - # 1001: the latest timestamp of fav signals. If null, the candidate is not generated from fav signals - 1001: optional i64 latestSourceSignalTimestampInMillis(personalDataType = 'PublicTimestamp') -} (persisted='true', hasPersonalData = 'true') - -struct CrMixerTweetResponse { - 1: required list tweets -} (persisted='true') - -service CrMixer { - CrMixerTweetResponse getTweetRecommendations(1: CrMixerTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - # getRelatedTweetsForQueryTweet and getRelatedTweetsForQueryAuthor do very similar things - # We can merge these two endpoints into one unified endpoint - related_tweet.RelatedTweetResponse getRelatedTweetsForQueryTweet(1: related_tweet.RelatedTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - related_tweet.RelatedTweetResponse getRelatedTweetsForQueryAuthor(1: related_tweet.RelatedTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - uteg.UtegTweetResponse getUtegTweetRecommendations(1: uteg.UtegTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - frs_based_tweet.FrsTweetResponse getFrsBasedTweetRecommendations(1: frs_based_tweet.FrsTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - related_video_tweet.RelatedVideoTweetResponse getRelatedVideoTweetsForQueryTweet(1: related_video_tweet.RelatedVideoTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - ads.AdsResponse getAdsRecommendations(1: ads.AdsRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) - - topic_tweet.TopicTweetResponse getTopicTweetRecommendations(1: topic_tweet.TopicTweetRequest request) throws ( - # Validation errors - the details of which will be reported to clients on failure - 1: validation.ValidationExceptionList validationErrors; - # Server errors - the details of which will not be reported to clients - 2: finatra_thrift_exceptions.ServerError serverError - ) -} diff --git a/cr-mixer/thrift/src/main/thrift/frs_based_tweet.thrift b/cr-mixer/thrift/src/main/thrift/frs_based_tweet.thrift deleted file mode 100644 index bb83397b6..000000000 --- a/cr-mixer/thrift/src/main/thrift/frs_based_tweet.thrift +++ /dev/null @@ -1,35 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "product.thrift" -include "product_context.thrift" -include "com/twitter/product_mixer/core/client_context.thrift" - -struct FrsTweetRequest { -1: required client_context.ClientContext clientContext -2: required product.Product product -3: optional product_context.ProductContext productContext -# excludedUserIds - user ids to be excluded from FRS candidate generation -4: optional list excludedUserIds (personalDataType = 'UserId') -# excludedTweetIds - tweet ids to be excluded from Earlybird candidate generation -5: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct FrsTweet { -1: required i64 tweetId (personalDataType = 'TweetId') -2: required i64 authorId (personalDataType = 'UserId') -# skip 3 in case we need tweet score in the future -# frsPrimarySource - which FRS candidate source is the primary one to generate this author -4: optional i32 frsPrimarySource -# frsCandidateSourceScores - FRS candidate sources and the scores for this author -# for i32 to algorithm mapping, see https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala?L12 -5: optional map frsCandidateSourceScores -# frsPrimaryScore - the score of the FRS primary candidate source -6: optional double frsAuthorScore -} (persisted='true', hasPersonalData = 'true') - -struct FrsTweetResponse { - 1: required list tweets -} (persisted='true') - diff --git a/cr-mixer/thrift/src/main/thrift/metric_tags.thrift b/cr-mixer/thrift/src/main/thrift/metric_tags.thrift deleted file mode 100644 index dd4fb5012..000000000 --- a/cr-mixer/thrift/src/main/thrift/metric_tags.thrift +++ /dev/null @@ -1,44 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - - -// NOTE: DO NOT depend on MetricTags for important ML Features or business logic. -// MetricTags are meant for stats tracking & debugging purposes ONLY. -// cr-mixer may change its definitions & how each candidate is tagged without public notice. -// NOTE: TSPS needs the caller (Home) to specify which signal it uses to make Personalized Topics -enum MetricTag { - // Source Signal Tags - TweetFavorite = 0 - Retweet = 1 - TrafficAttribution = 2 - OriginalTweet = 3 - Reply = 4 - TweetShare = 5 - - UserFollow = 101 - UserRepeatedProfileVisit = 102 - - PushOpenOrNtabClick = 201 - - HomeTweetClick = 301 - HomeVideoView = 302 - - // sim engine types - SimClustersANN = 401 - TweetBasedUserTweetGraph = 402 - TweetBasedTwHINANN = 403 - ConsumerEmbeddingBasedTwHINANN = 404 - - - // combined engine types - UserInterestedIn = 501 // Will deprecate soon - LookalikeUTG = 502 - TwhinCollabFilter = 503 - - // Offline Twice - TwiceUserId = 601 - - // Other Metric Tags - RequestHealthFilterPushOpenBasedTweetEmbedding = 701 -} (persisted='true', hasPersonalData='true') diff --git a/cr-mixer/thrift/src/main/thrift/product.thrift b/cr-mixer/thrift/src/main/thrift/product.thrift deleted file mode 100644 index 6e23a1092..000000000 --- a/cr-mixer/thrift/src/main/thrift/product.thrift +++ /dev/null @@ -1,19 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -# In CrMixer, one org should only have one Product -enum Product { - Home = 1 - Notifications = 2 - Email = 3 - MoreTweetsModule = 4 # aka RUX - ImmersiveMediaViewer = 5 - VideoCarousel = 6 - ExploreTopics = 7 - Ads = 8 - HomeRealTime = 9 // Home Real-Time Tab is considered as a different Product surface to Home Tab. It's in early experiment phase. - TopicLandingPage = 10 - HomeTopicsBackfill = 11 - TopicTweetsStrato = 12 -} diff --git a/cr-mixer/thrift/src/main/thrift/product_context.thrift b/cr-mixer/thrift/src/main/thrift/product_context.thrift deleted file mode 100644 index 29e2d9687..000000000 --- a/cr-mixer/thrift/src/main/thrift/product_context.thrift +++ /dev/null @@ -1,21 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -struct HomeContext { - 2: optional i32 maxResults // enabled for QuaityFactor related DDGs only -} (persisted='true', hasPersonalData='false') - -struct NotificationsContext { - 1: optional i32 devNull // not being used. it's a placeholder -} (persisted='true', hasPersonalData='false') - -struct ExploreContext { - 1: required bool isVideoOnly -} (persisted='true', hasPersonalData='false') - -union ProductContext { - 1: HomeContext homeContext - 2: NotificationsContext notificationsContext - 3: ExploreContext exploreContext -} (persisted='true', hasPersonalData='false') diff --git a/cr-mixer/thrift/src/main/thrift/related_tweet.thrift b/cr-mixer/thrift/src/main/thrift/related_tweet.thrift deleted file mode 100644 index 04e797b1b..000000000 --- a/cr-mixer/thrift/src/main/thrift/related_tweet.thrift +++ /dev/null @@ -1,24 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "product.thrift" -include "com/twitter/product_mixer/core/client_context.thrift" -include "com/twitter/simclusters_v2/identifier.thrift" - -struct RelatedTweetRequest { - 1: required identifier.InternalId internalId - 2: required product.Product product - 3: required client_context.ClientContext clientContext # RUX LogOut will have clientContext.userId = None - 4: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct RelatedTweet { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: optional double score - 3: optional i64 authorId (personalDataType = 'UserId') -} (persisted='true', hasPersonalData='true') - -struct RelatedTweetResponse { - 1: required list tweets -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/related_video_tweet.thrift b/cr-mixer/thrift/src/main/thrift/related_video_tweet.thrift deleted file mode 100644 index 18a987e7d..000000000 --- a/cr-mixer/thrift/src/main/thrift/related_video_tweet.thrift +++ /dev/null @@ -1,23 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "product.thrift" -include "com/twitter/product_mixer/core/client_context.thrift" -include "com/twitter/simclusters_v2/identifier.thrift" - -struct RelatedVideoTweetRequest { - 1: required identifier.InternalId internalId - 2: required product.Product product - 3: required client_context.ClientContext clientContext # RUX LogOut will have clientContext.userId = None - 4: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct RelatedVideoTweet { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: optional double score -} (persisted='true', hasPersonalData='true') - -struct RelatedVideoTweetResponse { - 1: required list tweets -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/scribe.thrift b/cr-mixer/thrift/src/main/thrift/scribe.thrift deleted file mode 100644 index 61fdb5eb9..000000000 --- a/cr-mixer/thrift/src/main/thrift/scribe.thrift +++ /dev/null @@ -1,168 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "ads.thrift" -include "candidate_generation_key.thrift" -include "cr_mixer.thrift" -include "metric_tags.thrift" -include "product.thrift" -include "related_tweet.thrift" -include "source_type.thrift" -include "uteg.thrift" -include "com/twitter/ml/api/data.thrift" -include "com/twitter/simclusters_v2/identifier.thrift" - -struct VITTweetCandidatesScribe { - 1: required i64 uuid (personalDataType = 'UniversallyUniqueIdentifierUuid') # RequestUUID - unique scribe id for every request that comes in. Same request but different stages of scribe log (FetchCandidate, Filter, etc) share the same uuid - 2: required i64 userId (personalDataType = 'UserId') - 3: required list candidates - 7: required product.Product product - 8: required list impressedBuckets -} (persisted='true', hasPersonalData = 'true') - -struct VITTweetCandidateScribe { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: required i64 authorId (personalDataType = 'UserId') - 3: required double score - 4: required list metricTags -} (persisted='true', hasPersonalData = 'true') - -struct GetTweetsRecommendationsScribe { - 1: required i64 uuid (personalDataType = 'UniversallyUniqueIdentifierUuid') # RequestUUID - unique scribe id for every request that comes in. Same request but different stages of scribe log (FetchCandidate, Filter, etc) share the same uuid - 2: required i64 userId (personalDataType = 'UserId') - 3: required Result result - 4: optional i64 traceId - 5: optional PerformanceMetrics performanceMetrics - 6: optional list impressedBuckets -} (persisted='true', hasPersonalData = 'true') - -struct SourceSignal { - # optional, since that the next step covers all info here - 1: optional identifier.InternalId id -} (persisted='true') - -struct PerformanceMetrics { - 1: optional i64 latencyMs -} (persisted='true') - -struct TweetCandidateWithMetadata { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: optional candidate_generation_key.CandidateGenerationKey candidateGenerationKey - 3: optional i64 authorId (personalDataType = 'UserId') # only for InterleaveResult for hydrating training data - 4: optional double score # score with respect to candidateGenerationKey - 5: optional data.DataRecord dataRecord # attach any features to this candidate - 6: optional i32 numCandidateGenerationKeys # num CandidateGenerationKeys generating this tweetId -} (persisted='true') - -struct FetchSignalSourcesResult { - 1: optional set signals -} (persisted='true') - -struct FetchCandidatesResult { - 1: optional list tweets -} (persisted='true') - -struct PreRankFilterResult { - 1: optional list tweets -} (persisted='true') - -struct InterleaveResult { - 1: optional list tweets -} (persisted='true') - -struct RankResult { - 1: optional list tweets -} (persisted='true') - -struct TopLevelApiResult { - 1: required i64 timestamp (personalDataType = 'PrivateTimestamp') - 2: required cr_mixer.CrMixerTweetRequest request - 3: required cr_mixer.CrMixerTweetResponse response -} (persisted='true') - -union Result { - 1: FetchSignalSourcesResult fetchSignalSourcesResult - 2: FetchCandidatesResult fetchCandidatesResult - 3: PreRankFilterResult preRankFilterResult - 4: InterleaveResult interleaveResult - 5: RankResult rankResult - 6: TopLevelApiResult topLevelApiResult -} (persisted='true', hasPersonalData = 'true') - -struct ImpressesedBucketInfo { - 1: required i64 experimentId (personalDataType = 'ExperimentId') - 2: required string bucketName - 3: required i32 version -} (persisted='true') - -############# RelatedTweets Scribe ############# - -struct GetRelatedTweetsScribe { - 1: required i64 uuid (personalDataType = 'UniversallyUniqueIdentifierUuid') # RequestUUID - unique scribe id for every request that comes in. Same request but different stages of scribe log (FetchCandidate, Filter, etc) share the same uuid - 2: required identifier.InternalId internalId - 3: required RelatedTweetResult relatedTweetResult - 4: optional i64 requesterId (personalDataType = 'UserId') - 5: optional i64 guestId (personalDataType = 'GuestId') - 6: optional i64 traceId - 7: optional PerformanceMetrics performanceMetrics - 8: optional list impressedBuckets -} (persisted='true', hasPersonalData = 'true') - -struct RelatedTweetTopLevelApiResult { - 1: required i64 timestamp (personalDataType = 'PrivateTimestamp') - 2: required related_tweet.RelatedTweetRequest request - 3: required related_tweet.RelatedTweetResponse response -} (persisted='true') - -union RelatedTweetResult { - 1: RelatedTweetTopLevelApiResult relatedTweetTopLevelApiResult - 2: FetchCandidatesResult fetchCandidatesResult - 3: PreRankFilterResult preRankFilterResult # results after seqential filters - # if later we need rankResult, we can add it here -} (persisted='true', hasPersonalData = 'true') - -############# UtegTweets Scribe ############# - -struct GetUtegTweetsScribe { - 1: required i64 uuid (personalDataType = 'UniversallyUniqueIdentifierUuid') # RequestUUID - unique scribe id for every request that comes in. Same request but different stages of scribe log (FetchCandidate, Filter, etc) share the same uuid - 2: required i64 userId (personalDataType = 'UserId') - 3: required UtegTweetResult utegTweetResult - 4: optional i64 traceId - 5: optional PerformanceMetrics performanceMetrics - 6: optional list impressedBuckets -} (persisted='true', hasPersonalData = 'true') - -struct UtegTweetTopLevelApiResult { - 1: required i64 timestamp (personalDataType = 'PrivateTimestamp') - 2: required uteg.UtegTweetRequest request - 3: required uteg.UtegTweetResponse response -} (persisted='true') - -union UtegTweetResult { - 1: UtegTweetTopLevelApiResult utegTweetTopLevelApiResult - 2: FetchCandidatesResult fetchCandidatesResult - # if later we need rankResult, we can add it here -} (persisted='true', hasPersonalData = 'true') - -############# getAdsRecommendations() Scribe ############# - -struct GetAdsRecommendationsScribe { - 1: required i64 uuid (personalDataType = 'UniversallyUniqueIdentifierUuid') # RequestUUID - unique scribe id for every request that comes in. Same request but different stages of scribe log (FetchCandidate, Filter, etc) share the same uuid - 2: required i64 userId (personalDataType = 'UserId') - 3: required AdsRecommendationsResult result - 4: optional i64 traceId - 5: optional PerformanceMetrics performanceMetrics - 6: optional list impressedBuckets -} (persisted='true', hasPersonalData = 'true') - -struct AdsRecommendationTopLevelApiResult { - 1: required i64 timestamp (personalDataType = 'PrivateTimestamp') - 2: required ads.AdsRequest request - 3: required ads.AdsResponse response -} (persisted='true') - -union AdsRecommendationsResult{ - 1: AdsRecommendationTopLevelApiResult adsRecommendationTopLevelApiResult - 2: FetchCandidatesResult fetchCandidatesResult -}(persisted='true', hasPersonalData = 'true') diff --git a/cr-mixer/thrift/src/main/thrift/source_type.thrift b/cr-mixer/thrift/src/main/thrift/source_type.thrift deleted file mode 100644 index 913739fa3..000000000 --- a/cr-mixer/thrift/src/main/thrift/source_type.thrift +++ /dev/null @@ -1,123 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -// Due to legacy reason, SourceType used to represent both SourceSignalType and SimilarityEngineType -// Hence, you can see several SourceType such as UserInterestedIn, HashSpace, etc. -// Moving forward, SourceType will be used for SourceSignalType ONLY. eg., TweetFavorite, UserFollow -// We will create a new SimilarityEngineType to separate them. eg., SimClustersANN -enum SourceType { - // Tweet based Source Signal - TweetFavorite = 0 - Retweet = 1 - TrafficAttribution = 2 // Traffic Attribution will be migrated over in Q3 - OriginalTweet = 3 - Reply = 4 - TweetShare = 5 - GoodTweetClick = 6 // total dwell time > N seconds after click on the tweet - VideoTweetQualityView = 7 - VideoTweetPlayback50 = 8 - - // UserId based Source Signal (includes both Producer/Consumer) - UserFollow = 101 - UserRepeatedProfileVisit = 102 - - CurrentUser_DEPRECATED = 103 - - RealGraphOon = 104 - FollowRecommendation = 105 - - TwiceUserId = 106 - UserTrafficAttributionProfileVisit = 107 - GoodProfileClick = 108 // total dwell time > N seconds after click into the profile page - - // (Notification) Tweet based Source Signal - NotificationClick = 201 - - // (Home) Tweet based Source Signal - HomeTweetClick = 301 - HomeVideoView = 302 - HomeSongbirdShowMore = 303 - - // Topic based Source Signal - TopicFollow = 401 // Deprecated - PopularTopic = 402 // Deprecated - - // Old CR code - UserInterestedIn = 501 // Deprecated - TwiceInterestedIn = 502 // Deprecated - MBCG = 503 // Deprecated - HashSpace = 504 // Deprecated - - // Old CR code - Cluster = 601 // Deprecated - - // Search based Source Signal - SearchProfileClick = 701 // Deprecated - SearchTweetClick = 702 // Deprecated - - // Graph based Source - StrongTiePrediction = 801 // STP - TwiceClustersMembers = 802 - Lookalike = 803 // Deprecated - RealGraphIn = 804 - - // Current requester User Id. It is only used for scribing. Placeholder value - RequestUserId = 1001 - // Current request Tweet Id used in RelatedTweet. Placeholder value - RequestTweetId = 1002 - - // Negative Signals - TweetReport = 1101 - TweetDontLike = 1102 - TweetSeeFewer = 1103 - AccountBlock = 1104 - AccountMute = 1105 - - // Aggregated Signals - TweetAggregation = 1201 - ProducerAggregation = 1202 -} (persisted='true', hasPersonalData='true') - -enum SimilarityEngineType { - SimClustersANN = 1 - TweetBasedUserTweetGraph = 2 - TweetBasedTwHINANN = 3 - Follow2VecANN = 4 // ConsumerEmbeddingBasedFollow2Vec - QIG = 5 - OfflineSimClustersANN = 6 - LookalikeUTG_DEPRECATED = 7 - ProducerBasedUserTweetGraph = 8 - FrsUTG_DEPRECATED = 9 - RealGraphOonUTG_DEPRECATED = 10 - ConsumerEmbeddingBasedTwHINANN = 11 - TwhinCollabFilter = 12 - TwiceUTG_DEPRECATED = 13 - ConsumerEmbeddingBasedTwoTowerANN = 14 - TweetBasedBeTANN = 15 - StpUTG_DEPRECATED = 16 - UTEG = 17 - ROMR = 18 - ConsumersBasedUserTweetGraph = 19 - TweetBasedUserVideoGraph = 20 - CertoTopicTweet = 24 - ConsumersBasedUserAdGraph = 25 - TweetBasedUserAdGraph = 26 - SkitTfgTopicTweet = 27 - ConsumerBasedWalsANN = 28 - ProducerBasedUserAdGraph = 29 - SkitHighPrecisionTopicTweet = 30 - SkitInterestBrowserTopicTweet = 31 - SkitProducerBasedTopicTweet = 32 - ExploreTripOfflineSimClustersTweets = 33 - DiffusionBasedTweet = 34 - ConsumersBasedUserVideoGraph = 35 - - // In network - EarlybirdRecencyBasedSimilarityEngine = 21 - EarlybirdModelBasedSimilarityEngine = 22 - EarlybirdTensorflowBasedSimilarityEngine = 23 - // Composite - TweetBasedUnifiedSimilarityEngine = 1001 - ProducerBasedUnifiedSimilarityEngine = 1002 -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/topic_tweet.thrift b/cr-mixer/thrift/src/main/thrift/topic_tweet.thrift deleted file mode 100644 index 46552d454..000000000 --- a/cr-mixer/thrift/src/main/thrift/topic_tweet.thrift +++ /dev/null @@ -1,28 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "com/twitter/product_mixer/core/client_context.thrift" -include "product.thrift" -include "product_context.thrift" -include "source_type.thrift" - - -struct TopicTweetRequest { - 1: required client_context.ClientContext clientContext - 2: required product.Product product - 3: required list topicIds - 5: optional product_context.ProductContext productContext - 6: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct TopicTweet { - 1: required i64 tweetId (personalDataType = 'TweetId') - 2: required double score - 3: required source_type.SimilarityEngineType similarityEngineType -} (persisted='true', hasPersonalData = 'true') - -struct TopicTweetResponse { - 1: required map> tweets -} (persisted='true') - diff --git a/cr-mixer/thrift/src/main/thrift/uteg.thrift b/cr-mixer/thrift/src/main/thrift/uteg.thrift deleted file mode 100644 index 2f5c4198d..000000000 --- a/cr-mixer/thrift/src/main/thrift/uteg.thrift +++ /dev/null @@ -1,31 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -include "product.thrift" -include "product_context.thrift" - -include "com/twitter/product_mixer/core/client_context.thrift" -include "com/twitter/recos/recos_common.thrift" - -struct UtegTweetRequest { - 1: required client_context.ClientContext clientContext - 2: required product.Product product - # Product-specific parameters should be placed in the Product Context - 3: optional product_context.ProductContext productContext - 4: optional list excludedTweetIds (personalDataType = 'TweetId') -} (persisted='true', hasPersonalData='true') - -struct UtegTweet { - // tweet id - 1: required i64 tweetId(personalDataType = 'TweetId') - // sum of weights of seed users who engaged with the tweet. - // If a user engaged with the same tweet twice, liked it and retweeted it, then his/her weight was counted twice. - 2: required double score - // user social proofs per engagement type - 3: required map> socialProofByType(personalDataTypeKey='EngagementTypePrivate', personalDataTypeValue='UserId') -} (persisted='true', hasPersonalData = 'true') - -struct UtegTweetResponse { - 1: required list tweets -} (persisted='true') diff --git a/cr-mixer/thrift/src/main/thrift/validation.thrift b/cr-mixer/thrift/src/main/thrift/validation.thrift deleted file mode 100644 index 96a04be3c..000000000 --- a/cr-mixer/thrift/src/main/thrift/validation.thrift +++ /dev/null @@ -1,19 +0,0 @@ -namespace java com.twitter.cr_mixer.thriftjava -#@namespace scala com.twitter.cr_mixer.thriftscala -#@namespace strato com.twitter.cr_mixer - -// ValidationErrorCode is used to identify classes of client errors returned from a Product Mixer -// service. Use [[PipelineFailureExceptionMapper]] to adapt pipeline failures into thrift errors. -enum ValidationErrorCode { - PRODUCT_DISABLED = 1 - PLACEHOLDER_2 = 2 -} (hasPersonalData='false') - -exception ValidationException { - 1: ValidationErrorCode errorCode - 2: string msg -} (hasPersonalData='false') - -exception ValidationExceptionList { - 1: list errors -} (hasPersonalData='false')