the-algorithm/representation-scorer/server/src/main/scala/com/twitter/representationscorer/twistlyfeatures/Scorer.scala

475 lines
17 KiB
Scala

package com.twitter.representationscorer.twistlyfeatures
import com.twitter.finagle.stats.Counter
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.representationscorer.common.TweetId
import com.twitter.representationscorer.common.UserId
import com.twitter.representationscorer.scorestore.ScoreStore
import com.twitter.representationscorer.thriftscala.SimClustersRecentEngagementSimilarities
import com.twitter.simclusters_v2.thriftscala.EmbeddingType
import com.twitter.simclusters_v2.thriftscala.InternalId
import com.twitter.simclusters_v2.thriftscala.ModelVersion
import com.twitter.simclusters_v2.thriftscala.ScoreId
import com.twitter.simclusters_v2.thriftscala.ScoreInternalId
import com.twitter.simclusters_v2.thriftscala.ScoringAlgorithm
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingId
import com.twitter.simclusters_v2.thriftscala.SimClustersEmbeddingPairScoreId
import com.twitter.stitch.Stitch
import javax.inject.Inject
class Scorer @Inject() (
fetchEngagementsFromUSS: Long => Stitch[Engagements],
scoreStore: ScoreStore,
stats: StatsReceiver) {
import Scorer._
private val scoreStats = stats.scope("score")
private val scoreCalculationStats = scoreStats.scope("calculation")
private val scoreResultStats = scoreStats.scope("result")
private val scoresNonEmptyCounter = scoreResultStats.scope("all").counter("nonEmpty")
private val scoresNonZeroCounter = scoreResultStats.scope("all").counter("nonZero")
private val tweetScoreStats = scoreCalculationStats.scope("tweetScore").stat("latency")
private val userScoreStats = scoreCalculationStats.scope("userScore").stat("latency")
private val favNonZero = scoreResultStats.scope("favs").counter("nonZero")
private val favNonEmpty = scoreResultStats.scope("favs").counter("nonEmpty")
private val retweetsNonZero = scoreResultStats.scope("retweets").counter("nonZero")
private val retweetsNonEmpty = scoreResultStats.scope("retweets").counter("nonEmpty")
private val followsNonZero = scoreResultStats.scope("follows").counter("nonZero")
private val followsNonEmpty = scoreResultStats.scope("follows").counter("nonEmpty")
private val sharesNonZero = scoreResultStats.scope("shares").counter("nonZero")
private val sharesNonEmpty = scoreResultStats.scope("shares").counter("nonEmpty")
private val repliesNonZero = scoreResultStats.scope("replies").counter("nonZero")
private val repliesNonEmpty = scoreResultStats.scope("replies").counter("nonEmpty")
private val originalTweetsNonZero = scoreResultStats.scope("originalTweets").counter("nonZero")
private val originalTweetsNonEmpty = scoreResultStats.scope("originalTweets").counter("nonEmpty")
private val videoViewsNonZero = scoreResultStats.scope("videoViews").counter("nonZero")
private val videoViewsNonEmpty = scoreResultStats.scope("videoViews").counter("nonEmpty")
private val blockNonZero = scoreResultStats.scope("block").counter("nonZero")
private val blockNonEmpty = scoreResultStats.scope("block").counter("nonEmpty")
private val muteNonZero = scoreResultStats.scope("mute").counter("nonZero")
private val muteNonEmpty = scoreResultStats.scope("mute").counter("nonEmpty")
private val reportNonZero = scoreResultStats.scope("report").counter("nonZero")
private val reportNonEmpty = scoreResultStats.scope("report").counter("nonEmpty")
private val dontlikeNonZero = scoreResultStats.scope("dontlike").counter("nonZero")
private val dontlikeNonEmpty = scoreResultStats.scope("dontlike").counter("nonEmpty")
private val seeFewerNonZero = scoreResultStats.scope("seeFewer").counter("nonZero")
private val seeFewerNonEmpty = scoreResultStats.scope("seeFewer").counter("nonEmpty")
private def getTweetScores(
candidateTweetId: TweetId,
sourceTweetIds: Seq[TweetId]
): Stitch[Seq[ScoreResult]] = {
val getScoresStitch = Stitch.traverse(sourceTweetIds) { sourceTweetId =>
scoreStore
.uniformScoringStoreStitch(getTweetScoreId(sourceTweetId, candidateTweetId))
.liftNotFoundToOption
.map(score => ScoreResult(sourceTweetId, score.map(_.score)))
}
Stitch.time(getScoresStitch).flatMap {
case (tryResult, duration) =>
tweetScoreStats.add(duration.inMillis)
Stitch.const(tryResult)
}
}
private def getUserScores(
tweetId: TweetId,
authorIds: Seq[UserId]
): Stitch[Seq[ScoreResult]] = {
val getScoresStitch = Stitch.traverse(authorIds) { authorId =>
scoreStore
.uniformScoringStoreStitch(getAuthorScoreId(authorId, tweetId))
.liftNotFoundToOption
.map(score => ScoreResult(authorId, score.map(_.score)))
}
Stitch.time(getScoresStitch).flatMap {
case (tryResult, duration) =>
userScoreStats.add(duration.inMillis)
Stitch.const(tryResult)
}
}
/**
* Get the [[SimClustersRecentEngagementSimilarities]] result containing the similarity
* features for the given userId-TweetId.
*/
def get(
userId: UserId,
tweetId: TweetId
): Stitch[SimClustersRecentEngagementSimilarities] = {
get(userId, Seq(tweetId)).map(x => x.head)
}
/**
* Get a list of [[SimClustersRecentEngagementSimilarities]] results containing the similarity
* features for the given tweets of the user Id.
* Guaranteed to be the same number/order as requested.
*/
def get(
userId: UserId,
tweetIds: Seq[TweetId]
): Stitch[Seq[SimClustersRecentEngagementSimilarities]] = {
fetchEngagementsFromUSS(userId)
.flatMap(engagements => {
// For each tweet received in the request, compute the similarity scores between them
// and the user signals fetched from USS.
Stitch
.join(
Stitch.traverse(tweetIds)(id => getTweetScores(id, engagements.tweetIds)),
Stitch.traverse(tweetIds)(id => getUserScores(id, engagements.authorIds)),
)
.map {
case (tweetScoresSeq, userScoreSeq) =>
// All seq have = size because when scores don't exist, they are returned as Option
(tweetScoresSeq, userScoreSeq).zipped.map { (tweetScores, userScores) =>
computeSimilarityScoresPerTweet(
engagements,
tweetScores.groupBy(_.id),
userScores.groupBy(_.id))
}
}
})
}
/**
*
* Computes the [[SimClustersRecentEngagementSimilarities]]
* using the given tweet-tweet and user-tweet scores in TweetScoresMap
* and the user signals in [[Engagements]].
*/
private def computeSimilarityScoresPerTweet(
engagements: Engagements,
tweetScores: Map[TweetId, Seq[ScoreResult]],
authorScores: Map[UserId, Seq[ScoreResult]]
): SimClustersRecentEngagementSimilarities = {
val favs7d = engagements.favs7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val favs1d = engagements.favs1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val retweets7d = engagements.retweets7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val retweets1d = engagements.retweets1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val follows30d = engagements.follows30d.view
.flatMap(s => authorScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val follows7d = engagements.follows7d.view
.flatMap(s => authorScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val shares7d = engagements.shares7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val shares1d = engagements.shares1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val replies7d = engagements.replies7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val replies1d = engagements.replies1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val originalTweets7d = engagements.originalTweets7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val originalTweets1d = engagements.originalTweets1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val videoViews7d = engagements.videoPlaybacks7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val videoViews1d = engagements.videoPlaybacks1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val block30d = engagements.block30d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val block7d = engagements.block7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val block1d = engagements.block1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val mute30d = engagements.mute30d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val mute7d = engagements.mute7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val mute1d = engagements.mute1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val report30d = engagements.report30d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val report7d = engagements.report7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val report1d = engagements.report1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val dontlike30d = engagements.dontlike30d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val dontlike7d = engagements.dontlike7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val dontlike1d = engagements.dontlike1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val seeFewer30d = engagements.seeFewer30d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val seeFewer7d = engagements.seeFewer7d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val seeFewer1d = engagements.seeFewer1d.view
.flatMap(s => tweetScores.get(s.targetId))
.flatten.flatMap(_.score)
.force
val result = SimClustersRecentEngagementSimilarities(
fav1dLast10Max = max(favs1d),
fav1dLast10Avg = avg(favs1d),
fav7dLast10Max = max(favs7d),
fav7dLast10Avg = avg(favs7d),
retweet1dLast10Max = max(retweets1d),
retweet1dLast10Avg = avg(retweets1d),
retweet7dLast10Max = max(retweets7d),
retweet7dLast10Avg = avg(retweets7d),
follow7dLast10Max = max(follows7d),
follow7dLast10Avg = avg(follows7d),
follow30dLast10Max = max(follows30d),
follow30dLast10Avg = avg(follows30d),
share1dLast10Max = max(shares1d),
share1dLast10Avg = avg(shares1d),
share7dLast10Max = max(shares7d),
share7dLast10Avg = avg(shares7d),
reply1dLast10Max = max(replies1d),
reply1dLast10Avg = avg(replies1d),
reply7dLast10Max = max(replies7d),
reply7dLast10Avg = avg(replies7d),
originalTweet1dLast10Max = max(originalTweets1d),
originalTweet1dLast10Avg = avg(originalTweets1d),
originalTweet7dLast10Max = max(originalTweets7d),
originalTweet7dLast10Avg = avg(originalTweets7d),
videoPlayback1dLast10Max = max(videoViews1d),
videoPlayback1dLast10Avg = avg(videoViews1d),
videoPlayback7dLast10Max = max(videoViews7d),
videoPlayback7dLast10Avg = avg(videoViews7d),
block1dLast10Max = max(block1d),
block1dLast10Avg = avg(block1d),
block7dLast10Max = max(block7d),
block7dLast10Avg = avg(block7d),
block30dLast10Max = max(block30d),
block30dLast10Avg = avg(block30d),
mute1dLast10Max = max(mute1d),
mute1dLast10Avg = avg(mute1d),
mute7dLast10Max = max(mute7d),
mute7dLast10Avg = avg(mute7d),
mute30dLast10Max = max(mute30d),
mute30dLast10Avg = avg(mute30d),
report1dLast10Max = max(report1d),
report1dLast10Avg = avg(report1d),
report7dLast10Max = max(report7d),
report7dLast10Avg = avg(report7d),
report30dLast10Max = max(report30d),
report30dLast10Avg = avg(report30d),
dontlike1dLast10Max = max(dontlike1d),
dontlike1dLast10Avg = avg(dontlike1d),
dontlike7dLast10Max = max(dontlike7d),
dontlike7dLast10Avg = avg(dontlike7d),
dontlike30dLast10Max = max(dontlike30d),
dontlike30dLast10Avg = avg(dontlike30d),
seeFewer1dLast10Max = max(seeFewer1d),
seeFewer1dLast10Avg = avg(seeFewer1d),
seeFewer7dLast10Max = max(seeFewer7d),
seeFewer7dLast10Avg = avg(seeFewer7d),
seeFewer30dLast10Max = max(seeFewer30d),
seeFewer30dLast10Avg = avg(seeFewer30d),
)
trackStats(result)
result
}
private def trackStats(result: SimClustersRecentEngagementSimilarities): Unit = {
val scores = Seq(
result.fav7dLast10Max,
result.retweet7dLast10Max,
result.follow30dLast10Max,
result.share1dLast10Max,
result.share7dLast10Max,
result.reply7dLast10Max,
result.originalTweet7dLast10Max,
result.videoPlayback7dLast10Max,
result.block30dLast10Max,
result.mute30dLast10Max,
result.report30dLast10Max,
result.dontlike30dLast10Max,
result.seeFewer30dLast10Max
)
val nonEmpty = scores.exists(_.isDefined)
val nonZero = scores.exists { case Some(score) if score > 0 => true; case _ => false }
if (nonEmpty) {
scoresNonEmptyCounter.incr()
}
if (nonZero) {
scoresNonZeroCounter.incr()
}
// We use the largest window of a given type of score,
// because the largest window is inclusive of smaller windows.
trackSignalStats(favNonEmpty, favNonZero, result.fav7dLast10Avg)
trackSignalStats(retweetsNonEmpty, retweetsNonZero, result.retweet7dLast10Avg)
trackSignalStats(followsNonEmpty, followsNonZero, result.follow30dLast10Avg)
trackSignalStats(sharesNonEmpty, sharesNonZero, result.share7dLast10Avg)
trackSignalStats(repliesNonEmpty, repliesNonZero, result.reply7dLast10Avg)
trackSignalStats(originalTweetsNonEmpty, originalTweetsNonZero, result.originalTweet7dLast10Avg)
trackSignalStats(videoViewsNonEmpty, videoViewsNonZero, result.videoPlayback7dLast10Avg)
trackSignalStats(blockNonEmpty, blockNonZero, result.block30dLast10Avg)
trackSignalStats(muteNonEmpty, muteNonZero, result.mute30dLast10Avg)
trackSignalStats(reportNonEmpty, reportNonZero, result.report30dLast10Avg)
trackSignalStats(dontlikeNonEmpty, dontlikeNonZero, result.dontlike30dLast10Avg)
trackSignalStats(seeFewerNonEmpty, seeFewerNonZero, result.seeFewer30dLast10Avg)
}
private def trackSignalStats(nonEmpty: Counter, nonZero: Counter, score: Option[Double]): Unit = {
if (score.nonEmpty) {
nonEmpty.incr()
if (score.get > 0)
nonZero.incr()
}
}
}
object Scorer {
def avg(s: Traversable[Double]): Option[Double] =
if (s.isEmpty) None else Some(s.sum / s.size)
def max(s: Traversable[Double]): Option[Double] =
if (s.isEmpty) None else Some(s.foldLeft(0.0D) { (curr, _max) => math.max(curr, _max) })
private def getAuthorScoreId(
userId: UserId,
tweetId: TweetId
) = {
ScoreId(
algorithm = ScoringAlgorithm.PairEmbeddingCosineSimilarity,
internalId = ScoreInternalId.SimClustersEmbeddingPairScoreId(
SimClustersEmbeddingPairScoreId(
SimClustersEmbeddingId(
internalId = InternalId.UserId(userId),
modelVersion = ModelVersion.Model20m145k2020,
embeddingType = EmbeddingType.FavBasedProducer
),
SimClustersEmbeddingId(
internalId = InternalId.TweetId(tweetId),
modelVersion = ModelVersion.Model20m145k2020,
embeddingType = EmbeddingType.LogFavBasedTweet
)
))
)
}
private def getTweetScoreId(
sourceTweetId: TweetId,
candidateTweetId: TweetId
) = {
ScoreId(
algorithm = ScoringAlgorithm.PairEmbeddingCosineSimilarity,
internalId = ScoreInternalId.SimClustersEmbeddingPairScoreId(
SimClustersEmbeddingPairScoreId(
SimClustersEmbeddingId(
internalId = InternalId.TweetId(sourceTweetId),
modelVersion = ModelVersion.Model20m145k2020,
embeddingType = EmbeddingType.LogFavLongestL2EmbeddingTweet
),
SimClustersEmbeddingId(
internalId = InternalId.TweetId(candidateTweetId),
modelVersion = ModelVersion.Model20m145k2020,
embeddingType = EmbeddingType.LogFavBasedTweet
)
))
)
}
}