diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala index 4e8f0a41d..f8250aa83 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/AdsBlender.scala @@ -26,18 +26,17 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) { def blend( inputCandidates: Seq[Seq[InitialAdsCandidate]], ): Future[Seq[BlendedAdsCandidate]] = { - - // Filter out empty candidate sequence + // Filter out empty candidate sequence. val candidates = inputCandidates.filter(_.nonEmpty) val (interestedInCandidates, twistlyCandidates) = candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty) - // First interleave twistly candidates + // Interleave twistly candidates. val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates) val twistlyAndInterestedInCandidates = Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates) - // then interleave twistly candidates with interested in to make them even + // Interleave twistly candidates with interested in to make them even. val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates) stats.stat("candidates").add(interleavedCandidates.size) @@ -45,6 +44,7 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) { val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates) Future.value(blendedCandidates) } + private def buildBlendedAdsCandidate( inputCandidates: Seq[Seq[InitialAdsCandidate]], interleavedCandidates: Seq[InitialAdsCandidate] @@ -73,5 +73,4 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) { } tweetIdMap.toMap } - } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala index 1a864a6c2..5f06019af 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/BlendedCandidatesBuilder.scala @@ -9,7 +9,7 @@ import scala.collection.mutable object BlendedCandidatesBuilder { /** - * @param inputCandidates input candidate prior to interleaving + * @param inputCandidates input candidate prior to interleaving. * @param interleavedCandidates after interleaving. These tweets are de-duplicated. */ def build( @@ -23,8 +23,8 @@ object BlendedCandidatesBuilder { } /** + * This function tells you which CandidateGenerationInfo generated a given tweet. * The same tweet can be generated by different sources. - * This function tells you which CandidateGenerationInfo generated a given tweet */ private def buildCandidateToCGInfosMap( candidateSeq: Seq[Seq[InitialCandidate]], @@ -44,5 +44,4 @@ object BlendedCandidatesBuilder { } tweetIdMap.toMap } - } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala index 9ef81009b..e27a6358e 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/ContentSignalBlender.scala @@ -17,7 +17,7 @@ case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) { private val stats: StatsReceiver = globalStats.scope(name) /** - * Exposes multiple types of sorting relying only on Content Based signals + * Exposes multiple types of sorting relying only on Content Based signals. * Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores * that come from the active SimilarityEngine and then sort on the standardized scores. */ @@ -25,7 +25,7 @@ case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) { params: Params, inputCandidates: Seq[Seq[InitialCandidate]], ): Future[Seq[BlendedCandidate]] = { - // Filter out empty candidate sequence + // Filter out empty candidate sequence. val candidates = inputCandidates.filter(_.nonEmpty) val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match { case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency => diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala index 4c5dd07c3..32a040a37 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/CountWeightedInterleaveBlender.scala @@ -70,9 +70,9 @@ object CountWeightedInterleaveBlender { * We pass two parameters to the weighted interleaver: * @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we * stay to uniform sampling. The bigger the shrinkage the - * closer we are to uniform round robin + * closer we are to uniform round robin. * @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to - * uniform. Set so that we avoid infinite loops (e.g. if weights are + * uniform. Set so that we avoid infinite loops. (e.g. if weights are * 0) */ case class WeightedBlenderQuery( diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala index 14e93d53d..c10fae4d7 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SourceTypeBackFillBlender.scala @@ -18,22 +18,22 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) { private val stats: StatsReceiver = globalStats.scope(name) /** - * Partition the candidates based on source type + * Partition the candidates based on source type. * Interleave the two partitions of candidates separately - * Then append the back fill candidates to the end + * and then append the back fill candidates to the end. */ def blend( params: Params, inputCandidates: Seq[Seq[InitialCandidate]], ): Future[Seq[BlendedCandidate]] = { - // Filter out empty candidate sequence + // Filter out empty candidate sequence. val candidates = inputCandidates.filter(_.nonEmpty) val backFillSourceTypes = if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo else BackFillSourceTypes - // partition candidates based on their source types + // Partition candidates based on their source types. val (backFillCandidates, regularCandidates) = candidates.partition( _.head.candidateGenerationInfo.sourceInfoOpt @@ -43,7 +43,7 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) { val interleavedBackFillCandidates = InterleaveUtil.interleave(backFillCandidates) stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size) - // Append interleaved backfill candidates to the end + // Append interleaved backfill candidates to the end. val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates stats.stat("candidates").add(interleavedCandidates.size) @@ -51,7 +51,6 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) { val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates) Future.value(blendedCandidates) } - } object ImplicitSignalBackFillBlender { diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala index 7052a71a5..e769b08a2 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/blender/SwitchBlender.scala @@ -27,7 +27,7 @@ case class SwitchBlender @Inject() ( userState: UserState, inputCandidates: Seq[Seq[InitialCandidate]], ): Future[Seq[BlendedCandidate]] = { - // Take out empty seq + // Take out the empty seq. val nonEmptyCandidates = inputCandidates.collect { case candidates if candidates.nonEmpty => candidates @@ -43,7 +43,7 @@ case class SwitchBlender @Inject() ( } val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting) - // Blend based on specified blender rules + // Blend based on specified blender rules. params(BlenderParams.BlendingAlgorithmParam) match { case BlendingAlgorithmEnum.RoundRobin => defaultBlender.blend(candidatesToBlend) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala index e240ebf2d..a0ea94679 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateGenerator.scala @@ -39,24 +39,25 @@ class AdsCandidateGenerator @Inject() ( def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = { val allStats = stats.scope("all") val perProductStats = stats.scope("perProduct", query.product.toString) - StatsUtil.trackItemsStats(allStats) { StatsUtil.trackItemsStats(perProductStats) { for { - // fetch source signals + // Fetch source signals. sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) { fetchSources(query) } + realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) { fetchSeeds(query) } - // get initial candidates from similarity engines - // hydrate lineItemInfo and filter out non active ads + + // Get initial candidates from similarity engines. + // Hydrate lineItemInfo and filter out non active ads. initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) { fetchCandidates(query, sourceSignals, realGraphSeeds) } - // blend candidates + // Blend candidates. blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) { interleave(initialCandidates) } @@ -73,7 +74,6 @@ class AdsCandidateGenerator @Inject() ( } } } - } def fetchSources( @@ -95,7 +95,6 @@ class AdsCandidateGenerator @Inject() ( .fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params), query.params(AdsParams.EnableScribe) ) - } private def fetchSeeds( @@ -121,7 +120,6 @@ class AdsCandidateGenerator @Inject() ( scoreBoostFactor: Double, statsReceiver: StatsReceiver, ): Future[Seq[RankedAdsCandidate]] = { - val candidateSize = candidates.size val rankedCandidates = candidates.zipWithIndex.map { case (candidate, index) => diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala index 69ef31b74..44b1fe3d1 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/AdsCandidateSourcesRouter.scala @@ -90,7 +90,6 @@ case class AdsCandidateSourcesRouter @Inject() ( realGraphSeeds: Map[UserId, Double], params: configapi.Params ): Future[Seq[Seq[InitialAdsCandidate]]] = { - val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId) val tweetBasedSANNMinScore = params( @@ -296,7 +295,7 @@ case class AdsCandidateSourcesRouter @Inject() ( params ) - // dark traffic to simclusters-ann-2 + // Dark traffic to simclusters-ann-2 if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) { val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId) val sann2Query = SimClustersANNSimilarityEngine.fromParams( @@ -329,7 +328,6 @@ case class AdsCandidateSourcesRouter @Inject() ( sourceInfo: Option[SourceInfo], params: configapi.Params ) = { - val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams( sourceInfo.get.internalId, params @@ -352,7 +350,6 @@ case class AdsCandidateSourcesRouter @Inject() ( sourceInfo: Option[SourceInfo], params: configapi.Params ) = { - val query = TweetBasedUserAdGraphSimilarityEngine.fromParams( sourceInfo.get.internalId, params @@ -375,7 +372,6 @@ case class AdsCandidateSourcesRouter @Inject() ( realGraphSeeds: Map[UserId, Double], params: configapi.Params ) = { - val query = ConsumersBasedUserAdGraphSimilarityEngine .fromParams(realGraphSeeds, params) @@ -394,7 +390,7 @@ case class AdsCandidateSourcesRouter @Inject() ( CandidateGenerationInfo( Some(sourceInfo), similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs + Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs. ) ) }) @@ -404,7 +400,7 @@ case class AdsCandidateSourcesRouter @Inject() ( similarityEngine: HnswANNSimilarityEngine, similarityEngineType: SimilarityEngineType, requestUserId: UserId, - sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine + sourceInfo: Option[SourceInfo], // If none, then it's consumer-based similarity engine. model: String ): Future[Seq[TweetWithCandidateGenerationInfo]] = { val internalId = @@ -455,7 +451,7 @@ case class AdsCandidateSourcesRouter @Inject() ( CandidateGenerationInfo( None, similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs + Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs. ) ) } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala index 49cc37bde..08434d11a 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CandidateSourcesRouter.scala @@ -96,7 +96,6 @@ case class CandidateSourcesRouter @Inject() ( sourceGraphs: Map[String, Option[GraphSourceInfo]], params: configapi.Params, ): Future[Seq[Seq[InitialCandidate]]] = { - val tweetBasedCandidatesFuture = getCandidates( getTweetBasedSourceInfo(sourceSignals), params, @@ -225,7 +224,7 @@ case class CandidateSourcesRouter @Inject() ( consumersBasedUvgRealGraphInCandidatesFuture, customizedRetrievalBasedCandidatesFuture )).map { candidatesList => - // remove empty innerSeq + // Remove empty innerSeq. val result = candidatesList.flatten.filter(_.nonEmpty) stats.stat("numOfSequences").add(result.size) stats.stat("flattenCandidatesWithDup").add(result.flatten.size) @@ -262,7 +261,7 @@ case class CandidateSourcesRouter @Inject() ( CandidateGenerationInfo( sourceInfo, similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs + Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs. ) ) } @@ -330,7 +329,7 @@ case class CandidateSourcesRouter @Inject() ( CandidateGenerationInfo( None, similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs + Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs. ) ) } @@ -358,7 +357,7 @@ case class CandidateSourcesRouter @Inject() ( engine.getCandidates(EngineQuery(query, params)).map { _.map { _.map { tweetWithScore => - // define filters + // Define filters. TweetWithCandidateGenerationInfo( tweetWithScore.tweetId, CandidateGenerationInfo( @@ -401,7 +400,7 @@ case class CandidateSourcesRouter @Inject() ( CandidateGenerationInfo( None, similarityEngineInfo, - Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs + Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs. ) ) } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala index c69d0c4f2..9a395fd0b 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CrCandidateGenerator.scala @@ -48,7 +48,6 @@ class CrCandidateGenerator @Inject() ( timeoutConfig: TimeoutConfig, globalStats: StatsReceiver) { private val timer: Timer = new JavaTimer(true) - private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName) private val fetchSourcesStats = stats.scope("fetchSources") @@ -78,14 +77,14 @@ class CrCandidateGenerator @Inject() ( fetchSources(query) } initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) { - // find the positive and negative signals + // Find the positive and negative signals. val (positiveSignals, negativeSignals) = sourceSignals.partition { signal => !EnabledNegativeSourceTypes.contains(signal.sourceType) } fetchPositiveSourcesStats.stat("size").add(positiveSignals.size) fetchNegativeSourcesStats.stat("size").add(negativeSignals.size) - // find the positive signals to keep, removing block and muted users + // Find the positive signals to keep, removing block and muted users. val filteredSourceInfo = if (negativeSignals.nonEmpty && query.params( RecentNegativeSignalParams.EnableSourceParam)) { @@ -94,7 +93,7 @@ class CrCandidateGenerator @Inject() ( positiveSignals } - // fetch candidates from the positive signals + // Fetch candidates from the positive signals. StatsUtil.trackBlockStats(fetchCandidatesStats) { fetchCandidates(query, filteredSourceInfo, sourceGraphsMap) } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala index 427dd9b74..11c59e5ee 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/CustomizedRetrievalCandidateGeneration.scala @@ -28,7 +28,7 @@ import javax.inject.Singleton import scala.collection.mutable.ArrayBuffer /** - * A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources + * A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources. * * Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different * similarity engines without blending. In other words, this class shall not be thought of as a diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala index 0c5334c28..7e3de2b3a 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/FrsTweetCandidateGenerator.scala @@ -105,7 +105,7 @@ class FrsTweetCandidateGenerator @Inject() ( } /** - * Fetch recommended seed users from FRS + * Fetch recommended seed users from FRS. */ private def fetchSeeds( userId: UserId, @@ -131,7 +131,7 @@ class FrsTweetCandidateGenerator @Inject() ( } /** - * Fetch tweet candidates from Earlybird + * Fetch tweet candidates from Earlybird. */ private def fetchCandidates( searcherUserId: UserId, @@ -141,7 +141,7 @@ class FrsTweetCandidateGenerator @Inject() ( params: Params ): Future[Option[Seq[TweetWithAuthor]]] = { if (seedAuthors.nonEmpty) { - // call earlybird + // Call Earlybird. val query = EarlybirdSimilarityEngineRouter.queryFromParams( Some(searcherUserId), seedAuthors, @@ -154,7 +154,7 @@ class FrsTweetCandidateGenerator @Inject() ( } /** - * Filter candidates that do not pass visibility filter policy + * Filter candidates that do not pass visibility filter policy. */ private def filterCandidates( candidates: Option[Seq[TweetWithAuthor]], @@ -175,7 +175,7 @@ class FrsTweetCandidateGenerator @Inject() ( } /** - * Hydrate the candidates with the FRS candidate sources and scores + * Hydrate the candidates with the FRS candidate sources and scores. */ private def hydrateCandidates( frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]], @@ -193,8 +193,8 @@ class FrsTweetCandidateGenerator @Inject() ( frsCandidateSourceScores = frsQueryResult.flatMap { result => result.sourceWithScores.map { _.collect { - // see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala - // see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala + // See TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala + // See Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala case (candidateSourceAlgoStr, score) if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains( candidateSourceAlgoStr) => @@ -210,7 +210,6 @@ class FrsTweetCandidateGenerator @Inject() ( } } } - } object FrsTweetCandidateGenerator { diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala index 45a919a57..a45ca6c5e 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedTweetCandidateGenerator.scala @@ -43,7 +43,6 @@ class RelatedTweetCandidateGenerator @Inject() ( def get( query: RelatedTweetCandidateGeneratorQuery ): Future[Seq[InitialCandidate]] = { - val allStats = stats.scope("all") val perProductStats = stats.scope("perProduct", query.product.toString) StatsUtil.trackItemsStats(allStats) { @@ -90,9 +89,9 @@ class RelatedTweetCandidateGenerator @Inject() ( } /*** - * fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, - * and apply VF filter based on TweetInfoStore - * To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]] + * Fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, + * and apply VF filter based on TweetInfoStore. + * To align with the downstream processing (filter, rank). We tend to return a Seq[Seq[InitialCandidate]] * instead of a Seq[Candidate] even though we only have a Seq in it. */ private def getCandidatesFromSimilarityEngine[QueryType]( @@ -103,7 +102,7 @@ class RelatedTweetCandidateGenerator @Inject() ( /*** * We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation - * and as a result, it will return Seq[Seq[InitialCandidate]] + * and as a result, it will return Seq[Seq[InitialCandidate]]. */ val engineQueries = Seq(fromParamsForRelatedTweet(query.internalId, query.params)) @@ -138,7 +137,7 @@ class RelatedTweetCandidateGenerator @Inject() ( Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => /*** * If tweetInfo does not exist, we will filter out this tweet candidate. - * This tweetInfo filter also acts as the VF filter + * This tweetInfo filter also acts as the VF filter. */ candidates.collect { case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala index cc7f55859..dc2d104e9 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/RelatedVideoTweetCandidateGenerator.scala @@ -36,7 +36,6 @@ class RelatedVideoTweetCandidateGenerator @Inject() ( def get( query: RelatedVideoTweetCandidateGeneratorQuery ): Future[Seq[InitialCandidate]] = { - val allStats = stats.scope("all") val perProductStats = stats.scope("perProduct", query.product.toString) StatsUtil.trackItemsStats(allStats) { @@ -75,8 +74,8 @@ class RelatedVideoTweetCandidateGenerator @Inject() ( } /*** - * fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, - * and apply VF filter based on TweetInfoStore + * Fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine, + * and apply VF filter based on TweetInfoStore. * To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]] * instead of a Seq[Candidate] even though we only have a Seq in it. */ @@ -88,7 +87,7 @@ class RelatedVideoTweetCandidateGenerator @Inject() ( /*** * We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation - * and as a result, it will return Seq[Seq[InitialCandidate]] + * and as a result, it will return Seq[Seq[InitialCandidate]]. */ val engineQueries = Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params)) @@ -121,7 +120,7 @@ class RelatedVideoTweetCandidateGenerator @Inject() ( Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => /*** * If tweetInfo does not exist, we will filter out this tweet candidate. - * This tweetInfo filter also acts as the VF filter + * This tweetInfo filter also acts as the VF filter. */ candidates.collect { case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined => diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala index a40901a58..8834a42c9 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/SimClustersInterestedInCandidateGeneration.scala @@ -179,7 +179,7 @@ case class SimClustersInterestedInCandidateGeneration @Inject() ( else Future.None - // AddressBookInterestedIn Queries + // AddressBookInterestedIn Queries. val userAddressBookInterestedInCandidateResultFut = if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine) getInterestedInCandidateResult( @@ -397,7 +397,7 @@ object SimClustersInterestedInCandidateGeneration { internalId: InternalId, params: configapi.Params, ): Query = { - // SimClusters common configs + // SimClusters common configs. val simClustersModelVersion = ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam)) val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId) @@ -415,13 +415,13 @@ object SimClustersInterestedInCandidateGeneration { val simClustersAddressBookInterestedInMinScore = params( InterestedInParams.MinScoreAddressBookParam) - // InterestedIn embeddings parameters + // InterestedIn embeddings parameters. val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam) val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam) val addressbookInterestedInEmbedding = params( InterestedInParams.AddressBookInterestedInEmbeddingIdParam) - // Prod SimClustersANN Query + // Prod SimClustersANN Query. val interestedInSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -446,7 +446,7 @@ object SimClustersInterestedInCandidateGeneration { simClustersANNConfigId, params) - // Experimental SANN cluster Query + // Experimental SANN cluster Query. val interestedInExperimentalSimClustersANNQuery = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -471,7 +471,7 @@ object SimClustersInterestedInCandidateGeneration { experimentalSimClustersANNConfigId, params) - // SimClusters ANN cluster 1 Query + // SimClusters ANN cluster 1 Query. val interestedInSimClustersANN1Query = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -496,7 +496,7 @@ object SimClustersInterestedInCandidateGeneration { simClustersANN1ConfigId, params) - // SimClusters ANN cluster 2 Query + // SimClusters ANN cluster 2 Query. val interestedInSimClustersANN2Query = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -521,7 +521,7 @@ object SimClustersInterestedInCandidateGeneration { simClustersANN2ConfigId, params) - // SimClusters ANN cluster 3 Query + // SimClusters ANN cluster 3 Query. val interestedInSimClustersANN3Query = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -546,7 +546,7 @@ object SimClustersInterestedInCandidateGeneration { simClustersANN3ConfigId, params) - // SimClusters ANN cluster 5 Query + // SimClusters ANN cluster 5 Query. val interestedInSimClustersANN5Query = SimClustersANNSimilarityEngine.fromParams( internalId, @@ -554,7 +554,8 @@ object SimClustersInterestedInCandidateGeneration { simClustersModelVersion, simClustersANN5ConfigId, params) - // SimClusters ANN cluster 4 Query + + // SimClusters ANN cluster 4 Query. val interestedInSimClustersANN4Query = SimClustersANNSimilarityEngine.fromParams( internalId, diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala index 690fda482..66c9a23ff 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/TopicTweetCandidateGenerator.scala @@ -116,7 +116,7 @@ class TopicTweetCandidateGenerator @Inject() ( val tweetIds = candidates.map(_.tweetId).toSet val numTweetsPreFilter = tweetIds.size Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /** * + /** * If tweetInfo does not exist, we will filter out this tweet candidate. */ val tweetyPieFilteredInitialCandidates = candidates.collect { @@ -142,7 +142,6 @@ class TopicTweetCandidateGenerator @Inject() ( topicId -> tweetyPieFilteredInitialCandidates } } - Future.collect(initialCandidates.toSeq).map(_.toMap) } @@ -152,7 +151,6 @@ class TopicTweetCandidateGenerator @Inject() ( isVideoOnly: Boolean, excludeTweetIds: Set[TweetId] ): Future[Map[TopicId, Seq[InitialCandidate]]] = { - val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge) val filteredResults = topicTweetMap.map { diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala index ecf0bb98e..550a99ea6 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/candidate_generation/UtegTweetCandidateGenerator.scala @@ -46,18 +46,16 @@ class UtegTweetCandidateGenerator @Inject() ( def get( query: UtegTweetCandidateGeneratorQuery ): Future[Seq[TweetWithScoreAndSocialProof]] = { - val allStats = stats.scope("all") val perProductStats = stats.scope("perProduct", query.product.toString) StatsUtil.trackItemsStats(allStats) { StatsUtil.trackItemsStats(perProductStats) { - /** * The candidate we return in the end needs a social proof field, which isn't * supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof * instead. * - * However, filters and light ranker expect Candidate-typed param to work. In order to minimise the + * However, filters and light ranker expect Candidate-typed param to work. In order to minimize the * changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate * in this method. */ @@ -111,7 +109,6 @@ class UtegTweetCandidateGenerator @Inject() ( candidate.toRankedCandidate(score) } ) - } def fetchCandidates( @@ -136,7 +133,7 @@ class UtegTweetCandidateGenerator @Inject() ( ): Future[Seq[InitialCandidate]] = { val tweetIds = candidates.map(_.tweetId).toSet Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos => - /** * + /** * If tweetInfo does not exist, we will filter out this tweet candidate. */ candidates.collect { @@ -172,7 +169,7 @@ class UtegTweetCandidateGenerator @Inject() ( candidate.predictionScore, tweet.socialProofByType ) - // The exception should never be thrown + // The exception should never be thrown. }.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets")) } } diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala index dbf3ad6fd..f3cdc218a 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/SimClustersANNConfig.scala @@ -41,12 +41,11 @@ object SimClustersANNConfig { annAlgorithm = ScoringAlgorithm.CosineSimilarity, ) - /* - SimClustersANNConfigId: String - Format: Prod - “EmbeddingType_ModelVersion_Default” - Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD + /** + * SimClustersANNConfigId: String + * Format: Prod - “EmbeddingType_ModelVersion_Default” + * Format: Experiment - “EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number”. Date : YYYYMMDD */ - private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy() // Chunnan's exp on maxTweetCandidateAgeDays 2 @@ -142,12 +141,14 @@ object SimClustersANNConfig { candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet, maxTweetCandidateAge = 1.hours ) + // SANN-4 config private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 = LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy( candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, maxTweetCandidateAge = 48.hours ) + private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy() // Chunnan's exp on maxTweetCandidateAgeDays 2 @@ -199,6 +200,7 @@ object SimClustersANNConfig { candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, maxTweetCandidateAge = 48.hours ) + private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy() // Chunnan's exp on maxTweetCandidateAgeDays 2 @@ -302,6 +304,7 @@ object SimClustersANNConfig { candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, maxTweetCandidateAge = 48.hours ) + private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy() // Chunnan's exp on maxTweetCandidateAgeDays 2 @@ -353,7 +356,8 @@ object SimClustersANNConfig { candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, maxTweetCandidateAge = 48.hours ) - // Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow + + // Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow. private val FollowBasedProducer_Model20m145k2020_Default = FavBasedProducer_Model20m145k2020_Default.copy() @@ -400,6 +404,7 @@ object SimClustersANNConfig { candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet, maxTweetCandidateAge = 48.hours ) + val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map( "FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default, "FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06, diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala index 46e32990b..3a78a8088 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/config/TimeoutConfig.scala @@ -3,22 +3,22 @@ package com.twitter.cr_mixer.config import com.twitter.util.Duration case class TimeoutConfig( - /* Default timeouts for candidate generator */ + // Default timeouts for candidate generator. serviceTimeout: Duration, signalFetchTimeout: Duration, similarityEngineTimeout: Duration, annServiceClientTimeout: Duration, - /* For Uteg Candidate Generator */ + // For Uteg Candidate Generator. utegSimilarityEngineTimeout: Duration, - /* For User State Store */ + // For User State Store userStateUnderlyingStoreTimeout: Duration, userStateStoreTimeout: Duration, - /* For FRS based tweets */ - // Timeout passed to EarlyBird server + // For FRS based tweets . + // Timeout passed to EarlyBird server. earlybirdServerTimeout: Duration, // Timeout set on CrMixer side earlybirdSimilarityEngineTimeout: Duration, frsBasedTweetEndpointTimeout: Duration, topicTweetEndpointTimeout: Duration, - // Timeout Settings for Navi gRPC Client + // Timeout Settings for Navi gRPC Client. naviRequestTimeout: Duration) diff --git a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala index c16d76de8..fe07d5f90 100644 --- a/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala +++ b/cr-mixer/server/src/main/scala/com/twitter/cr_mixer/controller/CrMixerThriftController.scala @@ -101,7 +101,7 @@ class CrMixerThriftController @Inject() ( ExceptionUtils.getStackTrace(e) ).mkString("\n") - /** * + /** * We chose logger.info() here to print message instead of logger.error since that * logger.error sometimes suppresses detailed stacktrace. */ @@ -109,8 +109,7 @@ class CrMixerThriftController @Inject() ( } private def generateRequestUUID(): Long = { - - /** * + /** * We generate unique UUID via bitwise operations. See the below link for more: * https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid */ @@ -119,7 +118,6 @@ class CrMixerThriftController @Inject() ( handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args => val endpointName = "getTweetRecommendations" - val requestUUID = generateRequestUUID() val startTime = Time.now.inMilliseconds val userId = args.request.clientContext.userId.getOrElse( @@ -168,10 +166,9 @@ class CrMixerThriftController @Inject() ( Future(CrMixerTweetResponse(Seq.empty)) } } - } - /** * + /** * GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially * doing very similar things, except that one passes in TweetId which calls TweetBased engine, * and the other passes in AuthorId which calls ProducerBased engine. @@ -221,7 +218,6 @@ class CrMixerThriftController @Inject() ( Future(RelatedTweetResponse(Seq.empty)) } } - } private def getRelatedVideoTweets( @@ -330,7 +326,6 @@ class CrMixerThriftController @Inject() ( Future(AdsResponse(Seq.empty)) } } - } private def buildCrCandidateGeneratorQuery( @@ -338,7 +333,6 @@ class CrMixerThriftController @Inject() ( requestUUID: Long, userId: Long ): Future[CrCandidateGeneratorQuery] = { - val product = thriftRequest.product val productContext = thriftRequest.productContext val scopedStats = statsReceiver @@ -357,7 +351,7 @@ class CrMixerThriftController @Inject() ( userState ) - // Specify product-specific behavior mapping here + // Specify product-specific behavior mapping here. val maxNumResults = (product, productContext) match { case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) => homeContext.maxResults.getOrElse(9999) @@ -392,7 +386,6 @@ class CrMixerThriftController @Inject() ( thriftRequest: RelatedTweetRequest, requestUUID: Long ): Future[RelatedTweetCandidateGeneratorQuery] = { - val product = thriftRequest.product val scopedStats = statsReceiver .scope(product.toString).scope("RelatedTweetRequest") @@ -409,8 +402,8 @@ class CrMixerThriftController @Inject() ( thriftRequest.product, userState) - // Specify product-specific behavior mapping here - // Currently, Home takes 10, and RUX takes 100 + // Specify product-specific behavior mapping here. + // Currently, Home takes 10, and RUX takes 100. val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam) RelatedTweetCandidateGeneratorQuery( @@ -458,7 +451,6 @@ class CrMixerThriftController @Inject() ( thriftRequest: RelatedVideoTweetRequest, requestUUID: Long ): Future[RelatedVideoTweetCandidateGeneratorQuery] = { - val product = thriftRequest.product val scopedStats = statsReceiver .scope(product.toString).scope("RelatedVideoTweetRequest") @@ -487,14 +479,12 @@ class CrMixerThriftController @Inject() ( requestUUID = requestUUID ) } - } private def buildUtegTweetQuery( thriftRequest: UtegTweetRequest, requestUUID: Long ): Future[UtegTweetCandidateGeneratorQuery] = { - val userId = thriftRequest.clientContext.userId.getOrElse( throw new IllegalArgumentException("userId must be present in the Thrift clientContext") ) @@ -536,7 +526,6 @@ class CrMixerThriftController @Inject() ( requestUUID = requestUUID ) } - } private def buildTopicTweetQuery( @@ -550,7 +539,7 @@ class CrMixerThriftController @Inject() ( val product = thriftRequest.product val productContext = thriftRequest.productContext - // Specify product-specific behavior mapping here + // Specify product-specific behavior mapping here. val isVideoOnly = (product, productContext) match { case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) => context.isVideoOnly @@ -646,7 +635,6 @@ class CrMixerThriftController @Inject() ( private def buildThriftResponse( candidates: Seq[RankedCandidate] ): CrMixerTweetResponse = { - val tweets = candidates.map { candidate => TweetRecommendation( tweetId = candidate.tweetId, @@ -663,7 +651,7 @@ class CrMixerThriftController @Inject() ( private def scribeTweetScoreFunnelSeries( candidates: Seq[RankedCandidate] ): Seq[RankedCandidate] = { - // 202210210901 is a random number for code search of Lensview + // 202210210901 is a random number for code search of Lensview. tweetScoreFunnelSeries.startNewSpan( name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType", codePtr = 202210210901L) { @@ -734,7 +722,6 @@ class CrMixerThriftController @Inject() ( request: CrMixerTweetRequest, response: Future[CrMixerTweetResponse] ): Unit = { - val userId = request.clientContext.userId.getOrElse( throw new IllegalArgumentException( "userId must be present in getTweetRecommendations() Thrift clientContext"))