Fix formatting and comments

This commit is contained in:
petemihaylov 2023-04-01 16:34:49 +02:00
parent 76ace4fea3
commit cbbf9e0a01
20 changed files with 92 additions and 118 deletions

View File

@ -26,18 +26,17 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) {
def blend(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
): Future[Seq[BlendedAdsCandidate]] = {
// Filter out empty candidate sequence
// Filter out empty candidate sequence.
val candidates = inputCandidates.filter(_.nonEmpty)
val (interestedInCandidates, twistlyCandidates) =
candidates.partition(_.head.candidateGenerationInfo.sourceInfoOpt.isEmpty)
// First interleave twistly candidates
// Interleave twistly candidates.
val interleavedTwistlyCandidates = InterleaveUtil.interleave(twistlyCandidates)
val twistlyAndInterestedInCandidates =
Seq(interestedInCandidates.flatten, interleavedTwistlyCandidates)
// then interleave twistly candidates with interested in to make them even
// Interleave twistly candidates with interested in to make them even.
val interleavedCandidates = InterleaveUtil.interleave(twistlyAndInterestedInCandidates)
stats.stat("candidates").add(interleavedCandidates.size)
@ -45,6 +44,7 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) {
val blendedCandidates = buildBlendedAdsCandidate(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
private def buildBlendedAdsCandidate(
inputCandidates: Seq[Seq[InitialAdsCandidate]],
interleavedCandidates: Seq[InitialAdsCandidate]
@ -73,5 +73,4 @@ case class AdsBlender @Inject() (globalStats: StatsReceiver) {
}
tweetIdMap.toMap
}
}

View File

@ -9,7 +9,7 @@ import scala.collection.mutable
object BlendedCandidatesBuilder {
/**
* @param inputCandidates input candidate prior to interleaving
* @param inputCandidates input candidate prior to interleaving.
* @param interleavedCandidates after interleaving. These tweets are de-duplicated.
*/
def build(
@ -23,8 +23,8 @@ object BlendedCandidatesBuilder {
}
/**
* This function tells you which CandidateGenerationInfo generated a given tweet.
* The same tweet can be generated by different sources.
* This function tells you which CandidateGenerationInfo generated a given tweet
*/
private def buildCandidateToCGInfosMap(
candidateSeq: Seq[Seq[InitialCandidate]],
@ -44,5 +44,4 @@ object BlendedCandidatesBuilder {
}
tweetIdMap.toMap
}
}

View File

@ -17,7 +17,7 @@ case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Exposes multiple types of sorting relying only on Content Based signals
* Exposes multiple types of sorting relying only on Content Based signals.
* Candidate Recency, Random, FavoriteCount and finally Standardized, which standardizes the scores
* that come from the active SimilarityEngine and then sort on the standardized scores.
*/
@ -25,7 +25,7 @@ case class ContentSignalBlender @Inject() (globalStats: StatsReceiver) {
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
// Filter out empty candidate sequence.
val candidates = inputCandidates.filter(_.nonEmpty)
val sortedCandidates = params(BlenderParams.ContentBlenderTypeSortingAlgorithmParam) match {
case BlenderParams.ContentBasedSortingAlgorithmEnum.CandidateRecency =>

View File

@ -70,9 +70,9 @@ object CountWeightedInterleaveBlender {
* We pass two parameters to the weighted interleaver:
* @param rankerWeightShrinkage shrinkage parameter between [0, 1] that determines how close we
* stay to uniform sampling. The bigger the shrinkage the
* closer we are to uniform round robin
* closer we are to uniform round robin.
* @param maxWeightAdjustments max number of weighted sampling to do prior to defaulting to
* uniform. Set so that we avoid infinite loops (e.g. if weights are
* uniform. Set so that we avoid infinite loops. (e.g. if weights are
* 0)
*/
case class WeightedBlenderQuery(

View File

@ -18,22 +18,22 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
private val stats: StatsReceiver = globalStats.scope(name)
/**
* Partition the candidates based on source type
* Partition the candidates based on source type.
* Interleave the two partitions of candidates separately
* Then append the back fill candidates to the end
* and then append the back fill candidates to the end.
*/
def blend(
params: Params,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Filter out empty candidate sequence
// Filter out empty candidate sequence.
val candidates = inputCandidates.filter(_.nonEmpty)
val backFillSourceTypes =
if (params(BlenderParams.SourceTypeBackFillEnableVideoBackFill)) BackFillSourceTypesWithVideo
else BackFillSourceTypes
// partition candidates based on their source types
// Partition candidates based on their source types.
val (backFillCandidates, regularCandidates) =
candidates.partition(
_.head.candidateGenerationInfo.sourceInfoOpt
@ -43,7 +43,7 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
val interleavedBackFillCandidates =
InterleaveUtil.interleave(backFillCandidates)
stats.stat("backFillCandidates").add(interleavedBackFillCandidates.size)
// Append interleaved backfill candidates to the end
// Append interleaved backfill candidates to the end.
val interleavedCandidates = interleavedRegularCandidates ++ interleavedBackFillCandidates
stats.stat("candidates").add(interleavedCandidates.size)
@ -51,7 +51,6 @@ case class SourceTypeBackFillBlender @Inject() (globalStats: StatsReceiver) {
val blendedCandidates = BlendedCandidatesBuilder.build(inputCandidates, interleavedCandidates)
Future.value(blendedCandidates)
}
}
object ImplicitSignalBackFillBlender {

View File

@ -27,7 +27,7 @@ case class SwitchBlender @Inject() (
userState: UserState,
inputCandidates: Seq[Seq[InitialCandidate]],
): Future[Seq[BlendedCandidate]] = {
// Take out empty seq
// Take out the empty seq.
val nonEmptyCandidates = inputCandidates.collect {
case candidates if candidates.nonEmpty =>
candidates
@ -43,7 +43,7 @@ case class SwitchBlender @Inject() (
}
val candidatesToBlend = nonEmptyCandidates.sortBy(_.head)(innerSignalSorting)
// Blend based on specified blender rules
// Blend based on specified blender rules.
params(BlenderParams.BlendingAlgorithmParam) match {
case BlendingAlgorithmEnum.RoundRobin =>
defaultBlender.blend(candidatesToBlend)

View File

@ -39,24 +39,25 @@ class AdsCandidateGenerator @Inject() (
def get(query: AdsCandidateGeneratorQuery): Future[Seq[RankedAdsCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
for {
// fetch source signals
// Fetch source signals.
sourceSignals <- StatsUtil.trackBlockStats(fetchSourcesStats) {
fetchSources(query)
}
realGraphSeeds <- StatsUtil.trackItemMapStats(fetchRealGraphSeedsStats) {
fetchSeeds(query)
}
// get initial candidates from similarity engines
// hydrate lineItemInfo and filter out non active ads
// Get initial candidates from similarity engines.
// Hydrate lineItemInfo and filter out non active ads.
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, sourceSignals, realGraphSeeds)
}
// blend candidates
// Blend candidates.
blendedCandidates <- StatsUtil.trackItemsStats(interleaveStats) {
interleave(initialCandidates)
}
@ -73,7 +74,6 @@ class AdsCandidateGenerator @Inject() (
}
}
}
}
def fetchSources(
@ -95,7 +95,6 @@ class AdsCandidateGenerator @Inject() (
.fetchCandidates(query.userId, sourceSignals, realGraphSeeds, query.params),
query.params(AdsParams.EnableScribe)
)
}
private def fetchSeeds(
@ -121,7 +120,6 @@ class AdsCandidateGenerator @Inject() (
scoreBoostFactor: Double,
statsReceiver: StatsReceiver,
): Future[Seq[RankedAdsCandidate]] = {
val candidateSize = candidates.size
val rankedCandidates = candidates.zipWithIndex.map {
case (candidate, index) =>

View File

@ -90,7 +90,6 @@ case class AdsCandidateSourcesRouter @Inject() (
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
): Future[Seq[Seq[InitialAdsCandidate]]] = {
val simClustersANN1ConfigId = params(SimClustersANNParams.SimClustersANN1ConfigId)
val tweetBasedSANNMinScore = params(
@ -296,7 +295,7 @@ case class AdsCandidateSourcesRouter @Inject() (
params
)
// dark traffic to simclusters-ann-2
// Dark traffic to simclusters-ann-2
if (decider.isAvailable(DeciderConstants.enableSimClustersANN2DarkTrafficDeciderKey)) {
val simClustersANN2ConfigId = params(SimClustersANNParams.SimClustersANN2ConfigId)
val sann2Query = SimClustersANNSimilarityEngine.fromParams(
@ -329,7 +328,6 @@ case class AdsCandidateSourcesRouter @Inject() (
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = ProducerBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
@ -352,7 +350,6 @@ case class AdsCandidateSourcesRouter @Inject() (
sourceInfo: Option[SourceInfo],
params: configapi.Params
) = {
val query = TweetBasedUserAdGraphSimilarityEngine.fromParams(
sourceInfo.get.internalId,
params
@ -375,7 +372,6 @@ case class AdsCandidateSourcesRouter @Inject() (
realGraphSeeds: Map[UserId, Double],
params: configapi.Params
) = {
val query = ConsumersBasedUserAdGraphSimilarityEngine
.fromParams(realGraphSeeds, params)
@ -394,7 +390,7 @@ case class AdsCandidateSourcesRouter @Inject() (
CandidateGenerationInfo(
Some(sourceInfo),
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs.
)
)
})
@ -404,7 +400,7 @@ case class AdsCandidateSourcesRouter @Inject() (
similarityEngine: HnswANNSimilarityEngine,
similarityEngineType: SimilarityEngineType,
requestUserId: UserId,
sourceInfo: Option[SourceInfo], // if none, then it's consumer-based similarity engine
sourceInfo: Option[SourceInfo], // If none, then it's consumer-based similarity engine.
model: String
): Future[Seq[TweetWithCandidateGenerationInfo]] = {
val internalId =
@ -455,7 +451,7 @@ case class AdsCandidateSourcesRouter @Inject() (
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs.
)
)
}

View File

@ -96,7 +96,6 @@ case class CandidateSourcesRouter @Inject() (
sourceGraphs: Map[String, Option[GraphSourceInfo]],
params: configapi.Params,
): Future[Seq[Seq[InitialCandidate]]] = {
val tweetBasedCandidatesFuture = getCandidates(
getTweetBasedSourceInfo(sourceSignals),
params,
@ -225,7 +224,7 @@ case class CandidateSourcesRouter @Inject() (
consumersBasedUvgRealGraphInCandidatesFuture,
customizedRetrievalBasedCandidatesFuture
)).map { candidatesList =>
// remove empty innerSeq
// Remove empty innerSeq.
val result = candidatesList.flatten.filter(_.nonEmpty)
stats.stat("numOfSequences").add(result.size)
stats.stat("flattenCandidatesWithDup").add(result.flatten.size)
@ -262,7 +261,7 @@ case class CandidateSourcesRouter @Inject() (
CandidateGenerationInfo(
sourceInfo,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs.
)
)
}
@ -330,7 +329,7 @@ case class CandidateSourcesRouter @Inject() (
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs.
)
)
}
@ -358,7 +357,7 @@ case class CandidateSourcesRouter @Inject() (
engine.getCandidates(EngineQuery(query, params)).map {
_.map {
_.map { tweetWithScore =>
// define filters
// Define filters.
TweetWithCandidateGenerationInfo(
tweetWithScore.tweetId,
CandidateGenerationInfo(
@ -401,7 +400,7 @@ case class CandidateSourcesRouter @Inject() (
CandidateGenerationInfo(
None,
similarityEngineInfo,
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs
Seq.empty // Atomic Similarity Engine. Hence it has no contributing SEs.
)
)
}

View File

@ -48,7 +48,6 @@ class CrCandidateGenerator @Inject() (
timeoutConfig: TimeoutConfig,
globalStats: StatsReceiver) {
private val timer: Timer = new JavaTimer(true)
private val stats: StatsReceiver = globalStats.scope(this.getClass.getCanonicalName)
private val fetchSourcesStats = stats.scope("fetchSources")
@ -78,14 +77,14 @@ class CrCandidateGenerator @Inject() (
fetchSources(query)
}
initialCandidates <- StatsUtil.trackBlockStats(fetchCandidatesAfterFilterStats) {
// find the positive and negative signals
// Find the positive and negative signals.
val (positiveSignals, negativeSignals) = sourceSignals.partition { signal =>
!EnabledNegativeSourceTypes.contains(signal.sourceType)
}
fetchPositiveSourcesStats.stat("size").add(positiveSignals.size)
fetchNegativeSourcesStats.stat("size").add(negativeSignals.size)
// find the positive signals to keep, removing block and muted users
// Find the positive signals to keep, removing block and muted users.
val filteredSourceInfo =
if (negativeSignals.nonEmpty && query.params(
RecentNegativeSignalParams.EnableSourceParam)) {
@ -94,7 +93,7 @@ class CrCandidateGenerator @Inject() (
positiveSignals
}
// fetch candidates from the positive signals
// Fetch candidates from the positive signals.
StatsUtil.trackBlockStats(fetchCandidatesStats) {
fetchCandidates(query, filteredSourceInfo, sourceGraphsMap)
}

View File

@ -28,7 +28,7 @@ import javax.inject.Singleton
import scala.collection.mutable.ArrayBuffer
/**
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources
* A candidate generator that fetches similar tweets from multiple customized retrieval based candidate sources.
*
* Different from [[TweetBasedCandidateGeneration]], this store returns candidates from different
* similarity engines without blending. In other words, this class shall not be thought of as a

View File

@ -105,7 +105,7 @@ class FrsTweetCandidateGenerator @Inject() (
}
/**
* Fetch recommended seed users from FRS
* Fetch recommended seed users from FRS.
*/
private def fetchSeeds(
userId: UserId,
@ -131,7 +131,7 @@ class FrsTweetCandidateGenerator @Inject() (
}
/**
* Fetch tweet candidates from Earlybird
* Fetch tweet candidates from Earlybird.
*/
private def fetchCandidates(
searcherUserId: UserId,
@ -141,7 +141,7 @@ class FrsTweetCandidateGenerator @Inject() (
params: Params
): Future[Option[Seq[TweetWithAuthor]]] = {
if (seedAuthors.nonEmpty) {
// call earlybird
// Call Earlybird.
val query = EarlybirdSimilarityEngineRouter.queryFromParams(
Some(searcherUserId),
seedAuthors,
@ -154,7 +154,7 @@ class FrsTweetCandidateGenerator @Inject() (
}
/**
* Filter candidates that do not pass visibility filter policy
* Filter candidates that do not pass visibility filter policy.
*/
private def filterCandidates(
candidates: Option[Seq[TweetWithAuthor]],
@ -175,7 +175,7 @@ class FrsTweetCandidateGenerator @Inject() (
}
/**
* Hydrate the candidates with the FRS candidate sources and scores
* Hydrate the candidates with the FRS candidate sources and scores.
*/
private def hydrateCandidates(
frsAuthorWithScores: Option[Map[UserId, FrsQueryResult]],
@ -193,8 +193,8 @@ class FrsTweetCandidateGenerator @Inject() (
frsCandidateSourceScores = frsQueryResult.flatMap { result =>
result.sourceWithScores.map {
_.collect {
// see TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
// see Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
// See TokenStrToAlgorithmMap @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/constants/AlgorithmFeedbackTokens.scala
// See Algorithm @ https://sourcegraph.twitter.biz/git.twitter.biz/source/-/blob/hermit/hermit-core/src/main/scala/com/twitter/hermit/model/Algorithm.scala
case (candidateSourceAlgoStr, score)
if AlgorithmFeedbackTokens.TokenStrToAlgorithmMap.contains(
candidateSourceAlgoStr) =>
@ -210,7 +210,6 @@ class FrsTweetCandidateGenerator @Inject() (
}
}
}
}
object FrsTweetCandidateGenerator {

View File

@ -43,7 +43,6 @@ class RelatedTweetCandidateGenerator @Inject() (
def get(
query: RelatedTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
@ -90,9 +89,9 @@ class RelatedTweetCandidateGenerator @Inject() (
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* Fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore.
* To align with the downstream processing (filter, rank). We tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
private def getCandidatesFromSimilarityEngine[QueryType](
@ -103,7 +102,7 @@ class RelatedTweetCandidateGenerator @Inject() (
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
* and as a result, it will return Seq[Seq[InitialCandidate]].
*/
val engineQueries =
Seq(fromParamsForRelatedTweet(query.internalId, query.params))
@ -138,7 +137,7 @@ class RelatedTweetCandidateGenerator @Inject() (
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
* This tweetInfo filter also acts as the VF filter.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>

View File

@ -36,7 +36,6 @@ class RelatedVideoTweetCandidateGenerator @Inject() (
def get(
query: RelatedVideoTweetCandidateGeneratorQuery
): Future[Seq[InitialCandidate]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
@ -75,8 +74,8 @@ class RelatedVideoTweetCandidateGenerator @Inject() (
}
/***
* fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore
* Fetch Candidates from TweetBased/ProducerBased Unified Similarity Engine,
* and apply VF filter based on TweetInfoStore.
* To align with the downstream processing (filter, rank), we tend to return a Seq[Seq[InitialCandidate]]
* instead of a Seq[Candidate] even though we only have a Seq in it.
*/
@ -88,7 +87,7 @@ class RelatedVideoTweetCandidateGenerator @Inject() (
/***
* We wrap the query to be a Seq of queries for the Sim Engine to ensure evolvability of candidate generation
* and as a result, it will return Seq[Seq[InitialCandidate]]
* and as a result, it will return Seq[Seq[InitialCandidate]].
*/
val engineQueries =
Seq(fromParamsForRelatedVideoTweet(query.internalId, query.params))
@ -121,7 +120,7 @@ class RelatedVideoTweetCandidateGenerator @Inject() (
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/***
* If tweetInfo does not exist, we will filter out this tweet candidate.
* This tweetInfo filter also acts as the VF filter
* This tweetInfo filter also acts as the VF filter.
*/
candidates.collect {
case candidate if tweetInfos.getOrElse(candidate.tweetId, None).isDefined =>

View File

@ -179,7 +179,7 @@ case class SimClustersInterestedInCandidateGeneration @Inject() (
else
Future.None
// AddressBookInterestedIn Queries
// AddressBookInterestedIn Queries.
val userAddressBookInterestedInCandidateResultFut =
if (query.enableAddressBookNextInterestedIn && query.enableProdSimClustersANNSimilarityEngine)
getInterestedInCandidateResult(
@ -397,7 +397,7 @@ object SimClustersInterestedInCandidateGeneration {
internalId: InternalId,
params: configapi.Params,
): Query = {
// SimClusters common configs
// SimClusters common configs.
val simClustersModelVersion =
ModelVersions.Enum.enumToSimClustersModelVersionMap(params(GlobalParams.ModelVersionParam))
val simClustersANNConfigId = params(SimClustersANNParams.SimClustersANNConfigId)
@ -415,13 +415,13 @@ object SimClustersInterestedInCandidateGeneration {
val simClustersAddressBookInterestedInMinScore = params(
InterestedInParams.MinScoreAddressBookParam)
// InterestedIn embeddings parameters
// InterestedIn embeddings parameters.
val interestedInEmbedding = params(InterestedInParams.InterestedInEmbeddingIdParam)
val nextInterestedInEmbedding = params(InterestedInParams.NextInterestedInEmbeddingIdParam)
val addressbookInterestedInEmbedding = params(
InterestedInParams.AddressBookInterestedInEmbeddingIdParam)
// Prod SimClustersANN Query
// Prod SimClustersANN Query.
val interestedInSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -446,7 +446,7 @@ object SimClustersInterestedInCandidateGeneration {
simClustersANNConfigId,
params)
// Experimental SANN cluster Query
// Experimental SANN cluster Query.
val interestedInExperimentalSimClustersANNQuery =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -471,7 +471,7 @@ object SimClustersInterestedInCandidateGeneration {
experimentalSimClustersANNConfigId,
params)
// SimClusters ANN cluster 1 Query
// SimClusters ANN cluster 1 Query.
val interestedInSimClustersANN1Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -496,7 +496,7 @@ object SimClustersInterestedInCandidateGeneration {
simClustersANN1ConfigId,
params)
// SimClusters ANN cluster 2 Query
// SimClusters ANN cluster 2 Query.
val interestedInSimClustersANN2Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -521,7 +521,7 @@ object SimClustersInterestedInCandidateGeneration {
simClustersANN2ConfigId,
params)
// SimClusters ANN cluster 3 Query
// SimClusters ANN cluster 3 Query.
val interestedInSimClustersANN3Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -546,7 +546,7 @@ object SimClustersInterestedInCandidateGeneration {
simClustersANN3ConfigId,
params)
// SimClusters ANN cluster 5 Query
// SimClusters ANN cluster 5 Query.
val interestedInSimClustersANN5Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,
@ -554,7 +554,8 @@ object SimClustersInterestedInCandidateGeneration {
simClustersModelVersion,
simClustersANN5ConfigId,
params)
// SimClusters ANN cluster 4 Query
// SimClusters ANN cluster 4 Query.
val interestedInSimClustersANN4Query =
SimClustersANNSimilarityEngine.fromParams(
internalId,

View File

@ -116,7 +116,7 @@ class TopicTweetCandidateGenerator @Inject() (
val tweetIds = candidates.map(_.tweetId).toSet
val numTweetsPreFilter = tweetIds.size
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
/**
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
val tweetyPieFilteredInitialCandidates = candidates.collect {
@ -142,7 +142,6 @@ class TopicTweetCandidateGenerator @Inject() (
topicId -> tweetyPieFilteredInitialCandidates
}
}
Future.collect(initialCandidates.toSeq).map(_.toMap)
}
@ -152,7 +151,6 @@ class TopicTweetCandidateGenerator @Inject() (
isVideoOnly: Boolean,
excludeTweetIds: Set[TweetId]
): Future[Map[TopicId, Seq[InitialCandidate]]] = {
val earliestTweetId = SnowflakeId.firstIdFor(Time.now - maxTweetAge)
val filteredResults = topicTweetMap.map {

View File

@ -46,18 +46,16 @@ class UtegTweetCandidateGenerator @Inject() (
def get(
query: UtegTweetCandidateGeneratorQuery
): Future[Seq[TweetWithScoreAndSocialProof]] = {
val allStats = stats.scope("all")
val perProductStats = stats.scope("perProduct", query.product.toString)
StatsUtil.trackItemsStats(allStats) {
StatsUtil.trackItemsStats(perProductStats) {
/**
* The candidate we return in the end needs a social proof field, which isn't
* supported by the any existing Candidate type, so we created TweetWithScoreAndSocialProof
* instead.
*
* However, filters and light ranker expect Candidate-typed param to work. In order to minimise the
* However, filters and light ranker expect Candidate-typed param to work. In order to minimize the
* changes to them, we are doing conversions from/to TweetWithScoreAndSocialProof to/from Candidate
* in this method.
*/
@ -111,7 +109,6 @@ class UtegTweetCandidateGenerator @Inject() (
candidate.toRankedCandidate(score)
}
)
}
def fetchCandidates(
@ -136,7 +133,7 @@ class UtegTweetCandidateGenerator @Inject() (
): Future[Seq[InitialCandidate]] = {
val tweetIds = candidates.map(_.tweetId).toSet
Future.collect(tweetInfoStore.multiGet(tweetIds)).map { tweetInfos =>
/** *
/**
* If tweetInfo does not exist, we will filter out this tweet candidate.
*/
candidates.collect {
@ -172,7 +169,7 @@ class UtegTweetCandidateGenerator @Inject() (
candidate.predictionScore,
tweet.socialProofByType
)
// The exception should never be thrown
// The exception should never be thrown.
}.getOrElse(throw new Exception("Cannot find ranked candidate in original UTEG tweets"))
}
}

View File

@ -41,12 +41,11 @@ object SimClustersANNConfig {
annAlgorithm = ScoringAlgorithm.CosineSimilarity,
)
/*
SimClustersANNConfigId: String
Format: Prod - EmbeddingType_ModelVersion_Default
Format: Experiment - EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number. Date : YYYYMMDD
/**
* SimClustersANNConfigId: String
* Format: Prod - EmbeddingType_ModelVersion_Default
* Format: Experiment - EmbeddingType_ModelVersion_Date_Two-Digit-Serial-Number. Date : YYYYMMDD
*/
private val FavBasedProducer_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
@ -142,12 +141,14 @@ object SimClustersANNConfig {
candidateEmbeddingType = EmbeddingType.LogFavBasedRealTimeTweet,
maxTweetCandidateAge = 1.hours
)
// SANN-4 config
private val LogFavLongestL2EmbeddingTweet_Model20m145k2020_20221220 =
LogFavLongestL2EmbeddingTweet_Model20m145k2020_Default.copy(
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UnfilteredUserInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
@ -199,6 +200,7 @@ object SimClustersANNConfig {
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val LogFavBasedUserInterestedInFromAPE_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
@ -302,6 +304,7 @@ object SimClustersANNConfig {
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
private val UserNextInterestedIn_Model20m145k2020_Default = DefaultConfig.copy()
// Chunnan's exp on maxTweetCandidateAgeDays 2
@ -353,7 +356,8 @@ object SimClustersANNConfig {
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow
// Vincent's experiment on using FollowBasedProducer as query embedding type for UserFollow.
private val FollowBasedProducer_Model20m145k2020_Default =
FavBasedProducer_Model20m145k2020_Default.copy()
@ -400,6 +404,7 @@ object SimClustersANNConfig {
candidateEmbeddingType = EmbeddingType.LogFavBasedEvergreenTweet,
maxTweetCandidateAge = 48.hours
)
val DefaultConfigMappings: Map[String, SimClustersANNConfig] = Map(
"FavBasedProducer_Model20m145k2020_Default" -> FavBasedProducer_Model20m145k2020_Default,
"FavBasedProducer_Model20m145k2020_20220617_06" -> FavBasedProducer_Model20m145k2020_20220617_06,

View File

@ -3,22 +3,22 @@ package com.twitter.cr_mixer.config
import com.twitter.util.Duration
case class TimeoutConfig(
/* Default timeouts for candidate generator */
// Default timeouts for candidate generator.
serviceTimeout: Duration,
signalFetchTimeout: Duration,
similarityEngineTimeout: Duration,
annServiceClientTimeout: Duration,
/* For Uteg Candidate Generator */
// For Uteg Candidate Generator.
utegSimilarityEngineTimeout: Duration,
/* For User State Store */
// For User State Store
userStateUnderlyingStoreTimeout: Duration,
userStateStoreTimeout: Duration,
/* For FRS based tweets */
// Timeout passed to EarlyBird server
// For FRS based tweets .
// Timeout passed to EarlyBird server.
earlybirdServerTimeout: Duration,
// Timeout set on CrMixer side
earlybirdSimilarityEngineTimeout: Duration,
frsBasedTweetEndpointTimeout: Duration,
topicTweetEndpointTimeout: Duration,
// Timeout Settings for Navi gRPC Client
// Timeout Settings for Navi gRPC Client.
naviRequestTimeout: Duration)

View File

@ -101,7 +101,7 @@ class CrMixerThriftController @Inject() (
ExceptionUtils.getStackTrace(e)
).mkString("\n")
/** *
/**
* We chose logger.info() here to print message instead of logger.error since that
* logger.error sometimes suppresses detailed stacktrace.
*/
@ -109,8 +109,7 @@ class CrMixerThriftController @Inject() (
}
private def generateRequestUUID(): Long = {
/** *
/**
* We generate unique UUID via bitwise operations. See the below link for more:
* https://stackoverflow.com/questions/15184820/how-to-generate-unique-positive-long-using-uuid
*/
@ -119,7 +118,6 @@ class CrMixerThriftController @Inject() (
handle(t.CrMixer.GetTweetRecommendations) { args: t.CrMixer.GetTweetRecommendations.Args =>
val endpointName = "getTweetRecommendations"
val requestUUID = generateRequestUUID()
val startTime = Time.now.inMilliseconds
val userId = args.request.clientContext.userId.getOrElse(
@ -168,10 +166,9 @@ class CrMixerThriftController @Inject() (
Future(CrMixerTweetResponse(Seq.empty))
}
}
}
/** *
/**
* GetRelatedTweetsForQueryTweet and GetRelatedTweetsForQueryAuthor are essentially
* doing very similar things, except that one passes in TweetId which calls TweetBased engine,
* and the other passes in AuthorId which calls ProducerBased engine.
@ -221,7 +218,6 @@ class CrMixerThriftController @Inject() (
Future(RelatedTweetResponse(Seq.empty))
}
}
}
private def getRelatedVideoTweets(
@ -330,7 +326,6 @@ class CrMixerThriftController @Inject() (
Future(AdsResponse(Seq.empty))
}
}
}
private def buildCrCandidateGeneratorQuery(
@ -338,7 +333,6 @@ class CrMixerThriftController @Inject() (
requestUUID: Long,
userId: Long
): Future[CrCandidateGeneratorQuery] = {
val product = thriftRequest.product
val productContext = thriftRequest.productContext
val scopedStats = statsReceiver
@ -357,7 +351,7 @@ class CrMixerThriftController @Inject() (
userState
)
// Specify product-specific behavior mapping here
// Specify product-specific behavior mapping here.
val maxNumResults = (product, productContext) match {
case (t.Product.Home, Some(t.ProductContext.HomeContext(homeContext))) =>
homeContext.maxResults.getOrElse(9999)
@ -392,7 +386,6 @@ class CrMixerThriftController @Inject() (
thriftRequest: RelatedTweetRequest,
requestUUID: Long
): Future[RelatedTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedTweetRequest")
@ -409,8 +402,8 @@ class CrMixerThriftController @Inject() (
thriftRequest.product,
userState)
// Specify product-specific behavior mapping here
// Currently, Home takes 10, and RUX takes 100
// Specify product-specific behavior mapping here.
// Currently, Home takes 10, and RUX takes 100.
val maxNumResults = params(RelatedTweetGlobalParams.MaxCandidatesPerRequestParam)
RelatedTweetCandidateGeneratorQuery(
@ -458,7 +451,6 @@ class CrMixerThriftController @Inject() (
thriftRequest: RelatedVideoTweetRequest,
requestUUID: Long
): Future[RelatedVideoTweetCandidateGeneratorQuery] = {
val product = thriftRequest.product
val scopedStats = statsReceiver
.scope(product.toString).scope("RelatedVideoTweetRequest")
@ -487,14 +479,12 @@ class CrMixerThriftController @Inject() (
requestUUID = requestUUID
)
}
}
private def buildUtegTweetQuery(
thriftRequest: UtegTweetRequest,
requestUUID: Long
): Future[UtegTweetCandidateGeneratorQuery] = {
val userId = thriftRequest.clientContext.userId.getOrElse(
throw new IllegalArgumentException("userId must be present in the Thrift clientContext")
)
@ -536,7 +526,6 @@ class CrMixerThriftController @Inject() (
requestUUID = requestUUID
)
}
}
private def buildTopicTweetQuery(
@ -550,7 +539,7 @@ class CrMixerThriftController @Inject() (
val product = thriftRequest.product
val productContext = thriftRequest.productContext
// Specify product-specific behavior mapping here
// Specify product-specific behavior mapping here.
val isVideoOnly = (product, productContext) match {
case (t.Product.ExploreTopics, Some(t.ProductContext.ExploreContext(context))) =>
context.isVideoOnly
@ -646,7 +635,6 @@ class CrMixerThriftController @Inject() (
private def buildThriftResponse(
candidates: Seq[RankedCandidate]
): CrMixerTweetResponse = {
val tweets = candidates.map { candidate =>
TweetRecommendation(
tweetId = candidate.tweetId,
@ -663,7 +651,7 @@ class CrMixerThriftController @Inject() (
private def scribeTweetScoreFunnelSeries(
candidates: Seq[RankedCandidate]
): Seq[RankedCandidate] = {
// 202210210901 is a random number for code search of Lensview
// 202210210901 is a random number for code search of Lensview.
tweetScoreFunnelSeries.startNewSpan(
name = "GetTweetRecommendationsTopLevelTweetSimilarityEngineType",
codePtr = 202210210901L) {
@ -734,7 +722,6 @@ class CrMixerThriftController @Inject() (
request: CrMixerTweetRequest,
response: Future[CrMixerTweetResponse]
): Unit = {
val userId = request.clientContext.userId.getOrElse(
throw new IllegalArgumentException(
"userId must be present in getTweetRecommendations() Thrift clientContext"))