mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-01 00:38:46 +02:00
b389c3d302
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
308 lines
12 KiB
Scala
308 lines
12 KiB
Scala
package com.twitter.frigate.pushservice.model.candidate
|
|
|
|
import com.twitter.frigate.common.base.FeatureMap
|
|
import com.twitter.frigate.common.rec_types.RecTypes
|
|
import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate
|
|
import com.twitter.frigate.pushservice.ml.HydrationContextBuilder
|
|
import com.twitter.frigate.pushservice.ml.PushMLModelScorer
|
|
import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams
|
|
import com.twitter.frigate.pushservice.params.PushMLModel
|
|
import com.twitter.frigate.pushservice.params.WeightedOpenOrNtabClickModel
|
|
import com.twitter.nrel.hydration.push.HydrationContext
|
|
import com.twitter.timelines.configapi.FSParam
|
|
import com.twitter.util.Future
|
|
import java.util.concurrent.ConcurrentHashMap
|
|
import scala.collection.concurrent.{Map => CMap}
|
|
import scala.collection.convert.decorateAsScala._
|
|
|
|
trait MLScores {
|
|
|
|
self: PushCandidate =>
|
|
|
|
lazy val candidateHydrationContext: Future[HydrationContext] = HydrationContextBuilder.build(self)
|
|
|
|
def weightedOpenOrNtabClickModelScorer: PushMLModelScorer
|
|
|
|
// Used to store the scores and avoid duplicate prediction
|
|
private val qualityModelScores: CMap[
|
|
(PushMLModel.Value, WeightedOpenOrNtabClickModel.ModelNameType),
|
|
Future[Option[Double]]
|
|
] =
|
|
new ConcurrentHashMap[(PushMLModel.Value, WeightedOpenOrNtabClickModel.ModelNameType), Future[
|
|
Option[Double]
|
|
]]().asScala
|
|
|
|
def populateQualityModelScore(
|
|
pushMLModel: PushMLModel.Value,
|
|
modelVersion: WeightedOpenOrNtabClickModel.ModelNameType,
|
|
prob: Future[Option[Double]]
|
|
) = {
|
|
val modelAndVersion = (pushMLModel, modelVersion)
|
|
if (!qualityModelScores.contains(modelAndVersion)) {
|
|
qualityModelScores += modelAndVersion -> prob
|
|
}
|
|
}
|
|
|
|
// The ML scores that also depend on other candidates and are only available after all candidates are processed
|
|
// For example, the likelihood info for Importance Sampling
|
|
private lazy val crossCandidateMlScores: CMap[String, Double] =
|
|
new ConcurrentHashMap[String, Double]().asScala
|
|
|
|
def populateCrossCandidateMlScores(scoreName: String, score: Double): Unit = {
|
|
if (crossCandidateMlScores.contains(scoreName)) {
|
|
throw new Exception(
|
|
s"$scoreName has been populated in the CrossCandidateMlScores!\n" +
|
|
s"Existing crossCandidateMlScores are ${crossCandidateMlScores}\n"
|
|
)
|
|
}
|
|
crossCandidateMlScores += scoreName -> score
|
|
}
|
|
|
|
def getMLModelScore(
|
|
pushMLModel: PushMLModel.Value,
|
|
modelVersion: WeightedOpenOrNtabClickModel.ModelNameType
|
|
): Future[Option[Double]] = {
|
|
qualityModelScores.getOrElseUpdate(
|
|
(pushMLModel, modelVersion),
|
|
weightedOpenOrNtabClickModelScorer
|
|
.singlePredicationForModelVersion(modelVersion, self, Some(pushMLModel))
|
|
)
|
|
}
|
|
|
|
def getMLModelScoreWithoutUpdate(
|
|
pushMLModel: PushMLModel.Value,
|
|
modelVersion: WeightedOpenOrNtabClickModel.ModelNameType
|
|
): Future[Option[Double]] = {
|
|
qualityModelScores.getOrElse(
|
|
(pushMLModel, modelVersion),
|
|
Future.None
|
|
)
|
|
}
|
|
|
|
def getWeightedOpenOrNtabClickModelScore(
|
|
weightedOONCModelParam: FSParam[WeightedOpenOrNtabClickModel.ModelNameType]
|
|
): Future[Option[Double]] = {
|
|
getMLModelScore(
|
|
PushMLModel.WeightedOpenOrNtabClickProbability,
|
|
target.params(weightedOONCModelParam)
|
|
)
|
|
}
|
|
|
|
/* After we unify the ranking and filtering models, we follow the iteration process below
|
|
When improving the WeightedOONC model,
|
|
1) Run experiment which only replace the ranking model
|
|
2) Make decisions according to the experiment results
|
|
3) Use the ranking model for filtering
|
|
4) Adjust percentile thresholds if necessary
|
|
*/
|
|
lazy val mrWeightedOpenOrNtabClickRankingProbability: Future[Option[Double]] =
|
|
target.rankingModelParam.flatMap { modelParam =>
|
|
getWeightedOpenOrNtabClickModelScore(modelParam)
|
|
}
|
|
|
|
def getBigFilteringScore(
|
|
pushMLModel: PushMLModel.Value,
|
|
modelVersion: WeightedOpenOrNtabClickModel.ModelNameType
|
|
): Future[Option[Double]] = {
|
|
mrWeightedOpenOrNtabClickRankingProbability.flatMap {
|
|
case Some(rankingScore) =>
|
|
// Adds ranking score to feature map (we must ensure the feature key is also in the feature context)
|
|
mergeFeatures(
|
|
FeatureMap(
|
|
numericFeatures = Map("scribe.WeightedOpenOrNtabClickProbability" -> rankingScore)
|
|
)
|
|
)
|
|
getMLModelScore(pushMLModel, modelVersion)
|
|
case _ => Future.None
|
|
}
|
|
}
|
|
|
|
def getWeightedOpenOrNtabClickScoreForScribing(): Seq[Future[Map[String, Double]]] = {
|
|
Seq(
|
|
mrWeightedOpenOrNtabClickRankingProbability.map {
|
|
case Some(score) => Map(PushMLModel.WeightedOpenOrNtabClickProbability.toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
},
|
|
Future
|
|
.join(
|
|
target.rankingModelParam,
|
|
mrWeightedOpenOrNtabClickRankingProbability
|
|
).map {
|
|
case (rankingModelParam, Some(score)) =>
|
|
Map(target.params(rankingModelParam).toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
}
|
|
)
|
|
}
|
|
|
|
def getNsfwScoreForScribing(): Seq[Future[Map[String, Double]]] = {
|
|
val nsfwScoreFut = getMLModelScoreWithoutUpdate(
|
|
PushMLModel.HealthNsfwProbability,
|
|
target.params(PushFeatureSwitchParams.BqmlHealthModelTypeParam))
|
|
Seq(nsfwScoreFut.map { nsfwScoreOpt =>
|
|
nsfwScoreOpt
|
|
.map(nsfwScore => Map(PushMLModel.HealthNsfwProbability.toString -> nsfwScore)).getOrElse(
|
|
Map.empty[String, Double])
|
|
})
|
|
}
|
|
|
|
def getBigFilteringSupervisedScoresForScribing(): Seq[Future[Map[String, Double]]] = {
|
|
if (target.params(
|
|
PushFeatureSwitchParams.EnableMrRequestScribingBigFilteringSupervisedScores)) {
|
|
Seq(
|
|
mrBigFilteringSupervisedSendingScore.map {
|
|
case Some(score) =>
|
|
Map(PushMLModel.BigFilteringSupervisedSendingModel.toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
},
|
|
mrBigFilteringSupervisedWithoutSendingScore.map {
|
|
case Some(score) =>
|
|
Map(PushMLModel.BigFilteringSupervisedWithoutSendingModel.toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
}
|
|
)
|
|
} else Seq.empty[Future[Map[String, Double]]]
|
|
}
|
|
|
|
def getBigFilteringRLScoresForScribing(): Seq[Future[Map[String, Double]]] = {
|
|
if (target.params(PushFeatureSwitchParams.EnableMrRequestScribingBigFilteringRLScores)) {
|
|
Seq(
|
|
mrBigFilteringRLSendingScore.map {
|
|
case Some(score) => Map(PushMLModel.BigFilteringRLSendingModel.toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
},
|
|
mrBigFilteringRLWithoutSendingScore.map {
|
|
case Some(score) => Map(PushMLModel.BigFilteringRLWithoutSendingModel.toString -> score)
|
|
case _ => Map.empty[String, Double]
|
|
}
|
|
)
|
|
} else Seq.empty[Future[Map[String, Double]]]
|
|
}
|
|
|
|
def buildModelScoresSeqForScribing(): Seq[Future[Map[String, Double]]] = {
|
|
getWeightedOpenOrNtabClickScoreForScribing() ++
|
|
getBigFilteringSupervisedScoresForScribing() ++
|
|
getBigFilteringRLScoresForScribing() ++
|
|
getNsfwScoreForScribing()
|
|
}
|
|
|
|
lazy val mrBigFilteringSupervisedSendingScore: Future[Option[Double]] =
|
|
getBigFilteringScore(
|
|
PushMLModel.BigFilteringSupervisedSendingModel,
|
|
target.params(PushFeatureSwitchParams.BigFilteringSupervisedSendingModelParam)
|
|
)
|
|
|
|
lazy val mrBigFilteringSupervisedWithoutSendingScore: Future[Option[Double]] =
|
|
getBigFilteringScore(
|
|
PushMLModel.BigFilteringSupervisedWithoutSendingModel,
|
|
target.params(PushFeatureSwitchParams.BigFilteringSupervisedWithoutSendingModelParam)
|
|
)
|
|
|
|
lazy val mrBigFilteringRLSendingScore: Future[Option[Double]] =
|
|
getBigFilteringScore(
|
|
PushMLModel.BigFilteringRLSendingModel,
|
|
target.params(PushFeatureSwitchParams.BigFilteringRLSendingModelParam)
|
|
)
|
|
|
|
lazy val mrBigFilteringRLWithoutSendingScore: Future[Option[Double]] =
|
|
getBigFilteringScore(
|
|
PushMLModel.BigFilteringRLWithoutSendingModel,
|
|
target.params(PushFeatureSwitchParams.BigFilteringRLWithoutSendingModelParam)
|
|
)
|
|
|
|
lazy val mrWeightedOpenOrNtabClickFilteringProbability: Future[Option[Double]] =
|
|
getWeightedOpenOrNtabClickModelScore(
|
|
target.filteringModelParam
|
|
)
|
|
|
|
lazy val mrQualityUprankingProbability: Future[Option[Double]] =
|
|
getMLModelScore(
|
|
PushMLModel.FilteringProbability,
|
|
target.params(PushFeatureSwitchParams.QualityUprankingModelTypeParam)
|
|
)
|
|
|
|
lazy val mrNsfwScore: Future[Option[Double]] =
|
|
getMLModelScoreWithoutUpdate(
|
|
PushMLModel.HealthNsfwProbability,
|
|
target.params(PushFeatureSwitchParams.BqmlHealthModelTypeParam)
|
|
)
|
|
|
|
// MR quality upranking param
|
|
private val qualityUprankingBoost: String = "QualityUprankingBoost"
|
|
private val producerQualityUprankingBoost: String = "ProducerQualityUprankingBoost"
|
|
private val qualityUprankingInfo: CMap[String, Double] =
|
|
new ConcurrentHashMap[String, Double]().asScala
|
|
|
|
lazy val mrQualityUprankingBoost: Option[Double] =
|
|
qualityUprankingInfo.get(qualityUprankingBoost)
|
|
lazy val mrProducerQualityUprankingBoost: Option[Double] =
|
|
qualityUprankingInfo.get(producerQualityUprankingBoost)
|
|
|
|
def setQualityUprankingBoost(boost: Double) =
|
|
if (qualityUprankingInfo.contains(qualityUprankingBoost)) {
|
|
qualityUprankingInfo(qualityUprankingBoost) = boost
|
|
} else {
|
|
qualityUprankingInfo += qualityUprankingBoost -> boost
|
|
}
|
|
def setProducerQualityUprankingBoost(boost: Double) =
|
|
if (qualityUprankingInfo.contains(producerQualityUprankingBoost)) {
|
|
qualityUprankingInfo(producerQualityUprankingBoost) = boost
|
|
} else {
|
|
qualityUprankingInfo += producerQualityUprankingBoost -> boost
|
|
}
|
|
|
|
private lazy val mrModelScoresFut: Future[Map[String, Double]] = {
|
|
if (self.target.isLoggedOutUser) {
|
|
Future.value(Map.empty[String, Double])
|
|
} else {
|
|
Future
|
|
.collectToTry {
|
|
buildModelScoresSeqForScribing()
|
|
}.map { scoreTrySeq =>
|
|
scoreTrySeq
|
|
.collect {
|
|
case result if result.isReturn => result.get()
|
|
}.reduce(_ ++ _)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Internal model scores (scores that are independent of other candidates) for scribing
|
|
lazy val modelScores: Future[Map[String, Double]] =
|
|
target.dauProbability.flatMap { dauProbabilityOpt =>
|
|
val dauProbScoreMap = dauProbabilityOpt
|
|
.map(_.probability).map { dauProb =>
|
|
PushMLModel.DauProbability.toString -> dauProb
|
|
}.toMap
|
|
|
|
// Avoid unnecessary MR model scribing
|
|
if (target.isDarkWrite) {
|
|
mrModelScoresFut.map(dauProbScoreMap ++ _)
|
|
} else if (RecTypes.isSendHandlerType(commonRecType) && !RecTypes
|
|
.sendHandlerTypesUsingMrModel(commonRecType)) {
|
|
Future.value(dauProbScoreMap)
|
|
} else {
|
|
mrModelScoresFut.map(dauProbScoreMap ++ _)
|
|
}
|
|
}
|
|
|
|
// We will scribe both internal ML scores and cross-Candidate scores
|
|
def getModelScoresforScribing(): Future[Map[String, Double]] = {
|
|
if (RecTypes.notEligibleForModelScoreTracking(commonRecType) || self.target.isLoggedOutUser) {
|
|
Future.value(Map.empty[String, Double])
|
|
} else {
|
|
modelScores.map { internalScores =>
|
|
if (internalScores.keySet.intersect(crossCandidateMlScores.keySet).nonEmpty) {
|
|
throw new Exception(
|
|
"crossCandidateMlScores overlap internalModelScores\n" +
|
|
s"internalScores keySet: ${internalScores.keySet}\n" +
|
|
s"crossCandidateScores keySet: ${crossCandidateMlScores.keySet}\n"
|
|
)
|
|
}
|
|
|
|
internalScores ++ crossCandidateMlScores
|
|
}
|
|
}
|
|
}
|
|
}
|