mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-14 15:18:55 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
204 lines
7.3 KiB
Scala
204 lines
7.3 KiB
Scala
package com.twitter.timelines.data_processing.ad_hoc.earlybird_ranking.model_evaluation
|
|
|
|
import scala.collection.GenTraversableOnce
|
|
|
|
case class CandidateRecord(tweetId: Long, fullScore: Double, earlyScore: Double, served: Boolean)
|
|
|
|
/**
|
|
* A metric that compares scores generated by a "full" prediction
|
|
* model to a "light" (Earlybird) model. The metric is calculated for candidates
|
|
* from a single request.
|
|
*/
|
|
sealed trait EarlybirdEvaluationMetric {
|
|
def name: String
|
|
def apply(candidates: Seq[CandidateRecord]): Option[Double]
|
|
}
|
|
|
|
/**
|
|
* Picks the set of `k` top candidates using light scores, and calculates
|
|
* recall of these light-score based candidates among set of `k` top candidates
|
|
* using full scores.
|
|
*
|
|
* If there are fewer than `k` candidates, then we can choose to filter out requests (will
|
|
* lower value of recall) or keep them by trivially computing recall as 1.0.
|
|
*/
|
|
case class TopKRecall(k: Int, filterFewerThanK: Boolean) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"top_${k}_recall${if (filterFewerThanK) "_filtered" else ""}"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size <= k) {
|
|
if (filterFewerThanK) None else Some(1.0)
|
|
} else {
|
|
val topFull = candidates.sortBy(-_.fullScore).take(k)
|
|
val topLight = candidates.sortBy(-_.earlyScore).take(k)
|
|
val overlap = topFull.map(_.tweetId).intersect(topLight.map(_.tweetId))
|
|
val truePos = overlap.size.toDouble
|
|
Some(truePos / k.toDouble)
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculates the probability that a random pair of candidates will be ordered the same by the
|
|
* full and earlybird models.
|
|
*
|
|
* Note: A pair with same scores for one model and different for the other will contribute 1
|
|
* to the sum. Pairs that are strictly ordered the same, will contribute 2.
|
|
* It follows that the score for a constant model is 0.5, which is approximately equal to a
|
|
* random model as expected.
|
|
*/
|
|
case object ProbabilityOfCorrectOrdering extends EarlybirdEvaluationMetric {
|
|
|
|
def fractionOf[A](trav: GenTraversableOnce[A])(p: A => Boolean): Double = {
|
|
if (trav.isEmpty)
|
|
0.0
|
|
else {
|
|
val (numPos, numElements) = trav.foldLeft((0, 0)) {
|
|
case ((numPosAcc, numElementsAcc), elem) =>
|
|
(if (p(elem)) numPosAcc + 1 else numPosAcc, numElementsAcc + 1)
|
|
}
|
|
numPos.toDouble / numElements
|
|
}
|
|
}
|
|
|
|
override def name: String = "probability_of_correct_ordering"
|
|
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size < 2)
|
|
None
|
|
else {
|
|
val pairs = for {
|
|
left <- candidates.iterator
|
|
right <- candidates.iterator
|
|
if left != right
|
|
} yield (left, right)
|
|
|
|
val probabilityOfCorrect = fractionOf(pairs) {
|
|
case (left, right) =>
|
|
(left.fullScore > right.fullScore) == (left.earlyScore > right.earlyScore)
|
|
}
|
|
|
|
Some(probabilityOfCorrect)
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Like `TopKRecall`, but uses `n` % of top candidates instead.
|
|
*/
|
|
case class TopNPercentRecall(percent: Double) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"top_${percent}_pct_recall"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
val k = Math.floor(candidates.size * percent).toInt
|
|
if (k > 0) {
|
|
val topFull = candidates.sortBy(-_.fullScore).take(k)
|
|
val topLight = candidates.sortBy(-_.earlyScore).take(k)
|
|
val overlap = topFull.map(_.tweetId).intersect(topLight.map(_.tweetId))
|
|
val truePos = overlap.size.toDouble
|
|
Some(truePos / k.toDouble)
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Picks the set of `k` top candidates using light scores, and calculates
|
|
* recall of selected light-score based candidates among set of actual
|
|
* shown candidates.
|
|
*/
|
|
case class ShownTweetRecall(k: Int) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"shown_tweet_recall_$k"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size <= k) {
|
|
None
|
|
} else {
|
|
val topLight = candidates.sortBy(-_.earlyScore).take(k)
|
|
val truePos = topLight.count(_.served).toDouble
|
|
val allPos = candidates.count(_.served).toDouble
|
|
if (allPos > 0) Some(truePos / allPos)
|
|
else None
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Like `ShownTweetRecall`, but uses `n` % of top candidates instead.
|
|
*/
|
|
case class ShownTweetPercentRecall(percent: Double) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"shown_tweet_recall_${percent}_pct"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
val k = Math.floor(candidates.size * percent).toInt
|
|
val topLight = candidates.sortBy(-_.earlyScore).take(k)
|
|
val truePos = topLight.count(_.served).toDouble
|
|
val allPos = candidates.count(_.served).toDouble
|
|
if (allPos > 0) Some(truePos / allPos)
|
|
else None
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Like `ShownTweetRecall`, but calculated using *full* scores. This is a sanity metric,
|
|
* because by definition the top full-scored candidates will be served. If the value is
|
|
* < 1, this is due to the ranked section being smaller than k.
|
|
*/
|
|
case class ShownTweetRecallWithFullScores(k: Int) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"shown_tweet_recall_with_full_scores_$k"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size <= k) {
|
|
None
|
|
} else {
|
|
val topFull = candidates.sortBy(-_.fullScore).take(k)
|
|
val truePos = topFull.count(_.served).toDouble
|
|
val allPos = candidates.count(_.served).toDouble
|
|
if (allPos > 0) Some(truePos / allPos)
|
|
else None
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Picks the set of `k` top candidates using the light scores, and calculates
|
|
* average full score for the candidates.
|
|
*/
|
|
case class AverageFullScoreForTopLight(k: Int) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"average_full_score_for_top_light_$k"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size <= k) {
|
|
None
|
|
} else {
|
|
val topLight = candidates.sortBy(-_.earlyScore).take(k)
|
|
Some(topLight.map(_.fullScore).sum / topLight.size)
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Picks the set of `k` top candidates using the light scores, and calculates
|
|
* sum of full scores for those. Divides that by sum of `k` top full scores,
|
|
* overall, to get a "score recall".
|
|
*/
|
|
case class SumScoreRecallForTopLight(k: Int) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"sum_score_recall_for_top_light_$k"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] = {
|
|
if (candidates.size <= k) {
|
|
None
|
|
} else {
|
|
val sumFullScoresForTopLight = candidates.sortBy(-_.earlyScore).take(k).map(_.fullScore).sum
|
|
val sumScoresForTopFull = candidates.sortBy(-_.fullScore).take(k).map(_.fullScore).sum
|
|
Some(sumFullScoresForTopLight / sumScoresForTopFull)
|
|
}
|
|
}
|
|
}
|
|
|
|
case class HasFewerThanKCandidates(k: Int) extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"has_fewer_than_${k}_candidates"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] =
|
|
Some(if (candidates.size <= k) 1.0 else 0.0)
|
|
}
|
|
|
|
case object NumberOfCandidates extends EarlybirdEvaluationMetric {
|
|
override val name: String = s"number_of_candidates"
|
|
override def apply(candidates: Seq[CandidateRecord]): Option[Double] =
|
|
Some(candidates.size.toDouble)
|
|
}
|