mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-13 06:38:52 +02:00
b389c3d302
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
221 lines
12 KiB
Scala
221 lines
12 KiB
Scala
package com.twitter.frigate.pushservice.ml
|
|
|
|
import com.twitter.abuse.detection.scoring.thriftscala.{Model => TweetHealthModel}
|
|
import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest
|
|
import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse
|
|
import com.twitter.frigate.common.base.FeatureMap
|
|
import com.twitter.frigate.common.base.TweetAuthor
|
|
import com.twitter.frigate.common.base.TweetAuthorDetails
|
|
import com.twitter.frigate.common.base.TweetCandidate
|
|
import com.twitter.frigate.common.rec_types.RecTypes
|
|
import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate
|
|
import com.twitter.frigate.pushservice.params.PushConstants
|
|
import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams
|
|
import com.twitter.frigate.pushservice.predicate.HealthPredicates.userHealthSignalValueToDouble
|
|
import com.twitter.frigate.pushservice.util.CandidateHydrationUtil
|
|
import com.twitter.frigate.pushservice.util.CandidateUtil
|
|
import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil
|
|
import com.twitter.frigate.thriftscala.UserMediaRepresentation
|
|
import com.twitter.hss.api.thriftscala.SignalValue
|
|
import com.twitter.hss.api.thriftscala.UserHealthSignal
|
|
import com.twitter.hss.api.thriftscala.UserHealthSignal.AgathaCalibratedNsfwDouble
|
|
import com.twitter.hss.api.thriftscala.UserHealthSignal.NsfwTextUserScoreDouble
|
|
import com.twitter.hss.api.thriftscala.UserHealthSignalResponse
|
|
import com.twitter.storehaus.ReadableStore
|
|
import com.twitter.util.Future
|
|
import com.twitter.util.Time
|
|
|
|
object HealthFeatureGetter {
|
|
|
|
def getFeatures(
|
|
pushCandidate: PushCandidate,
|
|
producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation],
|
|
userHealthScoreStore: ReadableStore[Long, UserHealthSignalResponse],
|
|
tweetHealthScoreStoreOpt: Option[ReadableStore[TweetScoringRequest, TweetScoringResponse]] =
|
|
None
|
|
): Future[FeatureMap] = {
|
|
|
|
pushCandidate match {
|
|
case cand: PushCandidate with TweetCandidate with TweetAuthor with TweetAuthorDetails =>
|
|
val pMediaNsfwRequest =
|
|
TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore4)
|
|
val pTweetTextNsfwRequest =
|
|
TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore1)
|
|
|
|
cand.authorId match {
|
|
case Some(authorId) =>
|
|
Future
|
|
.join(
|
|
userHealthScoreStore.get(authorId),
|
|
producerMediaRepresentationStore.get(authorId),
|
|
tweetHealthScoreStoreOpt.map(_.get(pMediaNsfwRequest)).getOrElse(Future.None),
|
|
tweetHealthScoreStoreOpt.map(_.get(pTweetTextNsfwRequest)).getOrElse(Future.None),
|
|
cand.tweetAuthor
|
|
).map {
|
|
case (
|
|
healthSignalsResponseOpt,
|
|
producerMuOpt,
|
|
pMediaNsfwOpt,
|
|
pTweetTextNsfwOpt,
|
|
tweetAuthorOpt) =>
|
|
val healthSignalScoreMap = healthSignalsResponseOpt
|
|
.map(_.signalValues).getOrElse(Map.empty[UserHealthSignal, SignalValue])
|
|
val agathaNSFWScore = userHealthSignalValueToDouble(
|
|
healthSignalScoreMap
|
|
.getOrElse(AgathaCalibratedNsfwDouble, SignalValue.DoubleValue(0.5)))
|
|
val userTextNSFWScore = userHealthSignalValueToDouble(
|
|
healthSignalScoreMap
|
|
.getOrElse(NsfwTextUserScoreDouble, SignalValue.DoubleValue(0.15)))
|
|
val pMediaNsfwScore = pMediaNsfwOpt.map(_.score).getOrElse(0.0)
|
|
val pTweetTextNsfwScore = pTweetTextNsfwOpt.map(_.score).getOrElse(0.0)
|
|
|
|
val mediaRepresentationMap =
|
|
producerMuOpt.map(_.mediaRepresentation).getOrElse(Map.empty[String, Double])
|
|
val sumScore: Double = mediaRepresentationMap.values.sum
|
|
val nudityRate =
|
|
if (sumScore > 0)
|
|
mediaRepresentationMap.getOrElse(
|
|
MediaAnnotationsUtil.nudityCategoryId,
|
|
0.0) / sumScore
|
|
else 0.0
|
|
val beautyRate =
|
|
if (sumScore > 0)
|
|
mediaRepresentationMap.getOrElse(
|
|
MediaAnnotationsUtil.beautyCategoryId,
|
|
0.0) / sumScore
|
|
else 0.0
|
|
val singlePersonRate =
|
|
if (sumScore > 0)
|
|
mediaRepresentationMap.getOrElse(
|
|
MediaAnnotationsUtil.singlePersonCategoryId,
|
|
0.0) / sumScore
|
|
else 0.0
|
|
val dislikeCt = cand.numericFeatures.getOrElse(
|
|
"tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_ntab_disliked.any_feature.Duration.Top.count",
|
|
0.0)
|
|
val sentCt = cand.numericFeatures.getOrElse(
|
|
"tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_sent.any_feature.Duration.Top.count",
|
|
0.0)
|
|
val dislikeRate = if (sentCt > 0) dislikeCt / sentCt else 0.0
|
|
|
|
val authorDislikeCt = cand.numericFeatures.getOrElse(
|
|
"tweet_author_aggregate.pair.label.ntab.isDisliked.any_feature.28.days.count",
|
|
0.0)
|
|
val authorReportCt = cand.numericFeatures.getOrElse(
|
|
"tweet_author_aggregate.pair.label.reportTweetDone.any_feature.28.days.count",
|
|
0.0)
|
|
val authorSentCt = cand.numericFeatures
|
|
.getOrElse(
|
|
"tweet_author_aggregate.pair.any_label.any_feature.28.days.count",
|
|
0.0)
|
|
val authorDislikeRate =
|
|
if (authorSentCt > 0) authorDislikeCt / authorSentCt else 0.0
|
|
val authorReportRate =
|
|
if (authorSentCt > 0) authorReportCt / authorSentCt else 0.0
|
|
|
|
val (isNsfwAccount, authorAccountAge) = tweetAuthorOpt match {
|
|
case Some(tweetAuthor) =>
|
|
(
|
|
CandidateHydrationUtil.isNsfwAccount(
|
|
tweetAuthor,
|
|
cand.target.params(PushFeatureSwitchParams.NsfwTokensParam)),
|
|
(Time.now - Time.fromMilliseconds(tweetAuthor.createdAtMsec)).inHours
|
|
)
|
|
case _ => (false, 0)
|
|
}
|
|
|
|
val tweetSemanticCoreIds = cand.sparseBinaryFeatures
|
|
.getOrElse(PushConstants.TweetSemanticCoreIdFeature, Set.empty[String])
|
|
|
|
val continuousFeatures = Map[String, Double](
|
|
"agathaNsfwScore" -> agathaNSFWScore,
|
|
"textNsfwScore" -> userTextNSFWScore,
|
|
"pMediaNsfwScore" -> pMediaNsfwScore,
|
|
"pTweetTextNsfwScore" -> pTweetTextNsfwScore,
|
|
"nudityRate" -> nudityRate,
|
|
"beautyRate" -> beautyRate,
|
|
"singlePersonRate" -> singlePersonRate,
|
|
"numSources" -> CandidateUtil.getTagsCRCount(cand),
|
|
"favCount" -> cand.numericFeatures
|
|
.getOrElse("tweet.core.tweet_counts.favorite_count", 0.0),
|
|
"activeFollowers" -> cand.numericFeatures
|
|
.getOrElse("RecTweetAuthor.User.ActiveFollowers", 0.0),
|
|
"favorsRcvd28Days" -> cand.numericFeatures
|
|
.getOrElse("RecTweetAuthor.User.FavorsRcvd28Days", 0.0),
|
|
"tweets28Days" -> cand.numericFeatures
|
|
.getOrElse("RecTweetAuthor.User.Tweets28Days", 0.0),
|
|
"dislikeCount" -> dislikeCt,
|
|
"dislikeRate" -> dislikeRate,
|
|
"sentCount" -> sentCt,
|
|
"authorDislikeCount" -> authorDislikeCt,
|
|
"authorDislikeRate" -> authorDislikeRate,
|
|
"authorReportCount" -> authorReportCt,
|
|
"authorReportRate" -> authorReportRate,
|
|
"authorSentCount" -> authorSentCt,
|
|
"authorAgeInHour" -> authorAccountAge.toDouble
|
|
)
|
|
|
|
val booleanFeatures = Map[String, Boolean](
|
|
"isSimclusterBased" -> RecTypes.simclusterBasedTweets
|
|
.contains(cand.commonRecType),
|
|
"isTopicTweet" -> RecTypes.isTopicTweetType(cand.commonRecType),
|
|
"isHashSpace" -> RecTypes.tagspaceTypes.contains(cand.commonRecType),
|
|
"isFRS" -> RecTypes.frsTypes.contains(cand.commonRecType),
|
|
"isModelingBased" -> RecTypes.mrModelingBasedTypes.contains(cand.commonRecType),
|
|
"isGeoPop" -> RecTypes.GeoPopTweetTypes.contains(cand.commonRecType),
|
|
"hasPhoto" -> cand.booleanFeatures
|
|
.getOrElse("RecTweet.TweetyPieResult.HasPhoto", false),
|
|
"hasVideo" -> cand.booleanFeatures
|
|
.getOrElse("RecTweet.TweetyPieResult.HasVideo", false),
|
|
"hasUrl" -> cand.booleanFeatures
|
|
.getOrElse("RecTweet.TweetyPieResult.HasUrl", false),
|
|
"isMrTwistly" -> CandidateUtil.isMrTwistlyCandidate(cand),
|
|
"abuseStrikeTop2Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.AbuseStrike_Top2Percent_Id),
|
|
"abuseStrikeTop1Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.AbuseStrike_Top1Percent_Id),
|
|
"abuseStrikeTop05Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.AbuseStrike_Top05Percent_Id),
|
|
"abuseStrikeTop025Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.AbuseStrike_Top025Percent_Id),
|
|
"allSpamReportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.AllSpamReportsPerFav_Top1Percent_Id),
|
|
"reportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.ReportsPerFav_Top1Percent_Id),
|
|
"reportsPerFavTop2Percent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.ReportsPerFav_Top2Percent_Id),
|
|
"isNudity" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.MediaUnderstanding_Nudity_Id),
|
|
"beautyStyleFashion" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.MediaUnderstanding_Beauty_Id),
|
|
"singlePerson" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.MediaUnderstanding_SinglePerson_Id),
|
|
"pornList" -> tweetSemanticCoreIds.contains(PushConstants.PornList_Id),
|
|
"pornographyAndNsfwContent" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.PornographyAndNsfwContent_Id),
|
|
"sexLife" -> tweetSemanticCoreIds.contains(PushConstants.SexLife_Id),
|
|
"sexLifeOrSexualOrientation" -> tweetSemanticCoreIds.contains(
|
|
PushConstants.SexLifeOrSexualOrientation_Id),
|
|
"profanity" -> tweetSemanticCoreIds.contains(PushConstants.ProfanityFilter_Id),
|
|
"isVerified" -> cand.booleanFeatures
|
|
.getOrElse("RecTweetAuthor.User.IsVerified", false),
|
|
"hasNsfwToken" -> isNsfwAccount
|
|
)
|
|
|
|
val stringFeatures = Map[String, String](
|
|
"tweetLanguage" -> cand.categoricalFeatures
|
|
.getOrElse("tweet.core.tweet_text.language", "")
|
|
)
|
|
|
|
FeatureMap(
|
|
booleanFeatures = booleanFeatures,
|
|
numericFeatures = continuousFeatures,
|
|
categoricalFeatures = stringFeatures)
|
|
}
|
|
case _ => Future.value(FeatureMap())
|
|
}
|
|
case _ => Future.value(FeatureMap())
|
|
}
|
|
}
|
|
}
|