the-algorithm/follow-recommendations-service/common/src/main/scala/com/twitter/follow_recommendations/common/candidate_sources/base/RealGraphExpansionRepository.scala
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

209 lines
7.9 KiB
Scala

package com.twitter.follow_recommendations.common.candidate_sources.base
import com.twitter.conversions.DurationOps._
import com.twitter.finagle.stats.NullStatsReceiver
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.finagle.util.DefaultTimer
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.DefaultScore
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.MaxNumIntermediateNodesToKeep
import com.twitter.follow_recommendations.common.candidate_sources.base.RealGraphExpansionRepository.FirstDegreeCandidatesTimeout
import com.twitter.follow_recommendations.common.models.CandidateUser
import com.twitter.follow_recommendations.common.models._
import com.twitter.onboarding.relevance.features.ymbii.ExpansionCandidateScores
import com.twitter.onboarding.relevance.features.ymbii.RawYMBIICandidateFeatures
import com.twitter.onboarding.relevance.store.thriftscala.CandidatesFollowedV1
import com.twitter.product_mixer.core.functional_component.candidate_source.CandidateSource
import com.twitter.product_mixer.core.model.common.identifier.CandidateSourceIdentifier
import com.twitter.stitch.Stitch
import com.twitter.strato.client.Fetcher
import com.twitter.util.Duration
import scala.collection.immutable
import scala.util.control.NonFatal
private final case class InterestExpansionCandidate(
userID: Long,
score: Double,
features: RawYMBIICandidateFeatures)
abstract class RealGraphExpansionRepository[Request](
realgraphExpansionStore: Fetcher[
Long,
Unit,
CandidatesFollowedV1
],
override val identifier: CandidateSourceIdentifier,
statsReceiver: StatsReceiver = NullStatsReceiver,
maxUnderlyingCandidatesToQuery: Int = 50,
maxCandidatesToReturn: Int = 40,
overrideUnderlyingTimeout: Option[Duration] = None,
appendSocialProof: Boolean = false)
extends CandidateSource[
Request,
CandidateUser
] {
val underlyingCandidateSource: Seq[
CandidateSource[
Request,
CandidateUser
]
]
private val stats = statsReceiver.scope(this.getClass.getSimpleName).scope(identifier.name)
private val underlyingCandidateSourceFailureStats =
stats.scope("underlying_candidate_source_failure")
def apply(
request: Request,
): Stitch[Seq[CandidateUser]] = {
val candidatesFromUnderlyingSourcesStitch: Seq[Stitch[Seq[CandidateUser]]] =
underlyingCandidateSource.map { candidateSource =>
candidateSource
.apply(request)
.within(overrideUnderlyingTimeout.getOrElse(FirstDegreeCandidatesTimeout))(
DefaultTimer
)
.handle {
case NonFatal(e) =>
underlyingCandidateSourceFailureStats
.counter(candidateSource.identifier.name, e.getClass.getSimpleName).incr()
Seq.empty
}
}
for {
underlyingCandidatesFromEachAlgo <- Stitch.collect(candidatesFromUnderlyingSourcesStitch)
// The first algorithm in the list has the highest priority. Depending on if its not
// populated, fall back to other algorithms. Once a particular algorithm is chosen, only
// take the top few candidates from the underlying store for expansion.
underlyingCandidatesTuple =
underlyingCandidatesFromEachAlgo
.zip(underlyingCandidateSource)
.find(_._1.nonEmpty)
underlyingAlgorithmUsed: Option[CandidateSourceIdentifier] = underlyingCandidatesTuple.map {
case (_, candidateSource) => candidateSource.identifier
}
// Take maxUnderlyingCandidatesToQuery to query realgraphExpansionStore
underlyingCandidates =
underlyingCandidatesTuple
.map {
case (candidates, candidateSource) =>
stats
.scope("underlyingAlgorithmUsedScope").counter(
candidateSource.identifier.name).incr()
candidates
}
.getOrElse(Seq.empty)
.sortBy(_.score.getOrElse(DefaultScore))(Ordering.Double.reverse)
.take(maxUnderlyingCandidatesToQuery)
underlyingCandidateMap: Map[Long, Double] = underlyingCandidates.map { candidate =>
(candidate.id, candidate.score.getOrElse(DefaultScore))
}.toMap
expansionCandidates <-
Stitch
.traverse(underlyingCandidateMap.keySet.toSeq) { candidateId =>
Stitch.join(
Stitch.value(candidateId),
realgraphExpansionStore.fetch(candidateId).map(_.v))
}.map(_.toMap)
rerankedCandidates: Seq[InterestExpansionCandidate] =
rerankCandidateExpansions(underlyingCandidateMap, expansionCandidates)
rerankedCandidatesFiltered = rerankedCandidates.take(maxCandidatesToReturn)
} yield {
rerankedCandidatesFiltered.map { candidate =>
val socialProofReason = if (appendSocialProof) {
val socialProofIds = candidate.features.expansionCandidateScores
.map(_.intermediateCandidateId)
Some(
Reason(Some(
AccountProof(followProof = Some(FollowProof(socialProofIds, socialProofIds.size))))))
} else {
None
}
CandidateUser(
id = candidate.userID,
score = Some(candidate.score),
reason = socialProofReason,
userCandidateSourceDetails = Some(
UserCandidateSourceDetails(
primaryCandidateSource = Some(identifier),
candidateSourceFeatures = Map(identifier -> Seq(candidate.features))
))
).addAddressBookMetadataIfAvailable(underlyingAlgorithmUsed.toSeq)
}
}
}
/**
* Expands underlying candidates, returning them in sorted order.
*
* @param underlyingCandidatesMap A map from underlying candidate id to score
* @param expansionCandidateMap A map from underlying candidate id to optional expansion candidates
* @return A sorted sequence of expansion candidates and associated scores
*/
private def rerankCandidateExpansions(
underlyingCandidatesMap: Map[Long, Double],
expansionCandidateMap: Map[Long, Option[CandidatesFollowedV1]]
): Seq[InterestExpansionCandidate] = {
// extract features
val candidates: Seq[(Long, ExpansionCandidateScores)] = for {
(underlyingCandidateId, underlyingCandidateScore) <- underlyingCandidatesMap.toSeq
expansionCandidates =
expansionCandidateMap
.get(underlyingCandidateId)
.flatten
.map(_.candidatesFollowed)
.getOrElse(Seq.empty)
expansionCandidate <- expansionCandidates
} yield expansionCandidate.candidateID -> ExpansionCandidateScores(
underlyingCandidateId,
Some(underlyingCandidateScore),
Some(expansionCandidate.score)
)
// merge intermediate nodes for the same candidate
val dedupedCandidates: Seq[(Long, Seq[ExpansionCandidateScores])] =
candidates.groupBy(_._1).mapValues(_.map(_._2).sortBy(_.intermediateCandidateId)).toSeq
// score the candidate
val candidatesWithTotalScore: Seq[((Long, Seq[ExpansionCandidateScores]), Double)] =
dedupedCandidates.map { candidate: (Long, Seq[ExpansionCandidateScores]) =>
(
candidate,
candidate._2.map { ieScore: ExpansionCandidateScores =>
ieScore.scoreFromUserToIntermediateCandidate.getOrElse(DefaultScore) *
ieScore.scoreFromIntermediateToExpansionCandidate.getOrElse(DefaultScore)
}.sum)
}
// sort candidate by score
for {
((candidate, edges), score) <- candidatesWithTotalScore.sortBy(_._2)(Ordering[Double].reverse)
} yield InterestExpansionCandidate(
candidate,
score,
RawYMBIICandidateFeatures(
edges.size,
edges.take(MaxNumIntermediateNodesToKeep).to[immutable.Seq])
)
}
}
object RealGraphExpansionRepository {
private val FirstDegreeCandidatesTimeout: Duration = 250.milliseconds
private val MaxNumIntermediateNodesToKeep = 20
private val DefaultScore = 0.0d
}