the-algorithm/pushservice/src/main/scala/com/twitter/frigate/pushservice/ml/HealthFeatureGetter.scala
twitter-team b389c3d302 Open-sourcing pushservice
Pushservice is the main recommendation service we use to surface recommendations to our users via notifications. It fetches candidates from various sources, ranks them in order of relevance, and applies filters to determine the best one to send.
2023-05-19 16:27:07 -05:00

221 lines
12 KiB
Scala

package com.twitter.frigate.pushservice.ml
import com.twitter.abuse.detection.scoring.thriftscala.{Model => TweetHealthModel}
import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringRequest
import com.twitter.abuse.detection.scoring.thriftscala.TweetScoringResponse
import com.twitter.frigate.common.base.FeatureMap
import com.twitter.frigate.common.base.TweetAuthor
import com.twitter.frigate.common.base.TweetAuthorDetails
import com.twitter.frigate.common.base.TweetCandidate
import com.twitter.frigate.common.rec_types.RecTypes
import com.twitter.frigate.pushservice.model.PushTypes.PushCandidate
import com.twitter.frigate.pushservice.params.PushConstants
import com.twitter.frigate.pushservice.params.PushFeatureSwitchParams
import com.twitter.frigate.pushservice.predicate.HealthPredicates.userHealthSignalValueToDouble
import com.twitter.frigate.pushservice.util.CandidateHydrationUtil
import com.twitter.frigate.pushservice.util.CandidateUtil
import com.twitter.frigate.pushservice.util.MediaAnnotationsUtil
import com.twitter.frigate.thriftscala.UserMediaRepresentation
import com.twitter.hss.api.thriftscala.SignalValue
import com.twitter.hss.api.thriftscala.UserHealthSignal
import com.twitter.hss.api.thriftscala.UserHealthSignal.AgathaCalibratedNsfwDouble
import com.twitter.hss.api.thriftscala.UserHealthSignal.NsfwTextUserScoreDouble
import com.twitter.hss.api.thriftscala.UserHealthSignalResponse
import com.twitter.storehaus.ReadableStore
import com.twitter.util.Future
import com.twitter.util.Time
object HealthFeatureGetter {
def getFeatures(
pushCandidate: PushCandidate,
producerMediaRepresentationStore: ReadableStore[Long, UserMediaRepresentation],
userHealthScoreStore: ReadableStore[Long, UserHealthSignalResponse],
tweetHealthScoreStoreOpt: Option[ReadableStore[TweetScoringRequest, TweetScoringResponse]] =
None
): Future[FeatureMap] = {
pushCandidate match {
case cand: PushCandidate with TweetCandidate with TweetAuthor with TweetAuthorDetails =>
val pMediaNsfwRequest =
TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore4)
val pTweetTextNsfwRequest =
TweetScoringRequest(cand.tweetId, TweetHealthModel.ExperimentalHealthModelScore1)
cand.authorId match {
case Some(authorId) =>
Future
.join(
userHealthScoreStore.get(authorId),
producerMediaRepresentationStore.get(authorId),
tweetHealthScoreStoreOpt.map(_.get(pMediaNsfwRequest)).getOrElse(Future.None),
tweetHealthScoreStoreOpt.map(_.get(pTweetTextNsfwRequest)).getOrElse(Future.None),
cand.tweetAuthor
).map {
case (
healthSignalsResponseOpt,
producerMuOpt,
pMediaNsfwOpt,
pTweetTextNsfwOpt,
tweetAuthorOpt) =>
val healthSignalScoreMap = healthSignalsResponseOpt
.map(_.signalValues).getOrElse(Map.empty[UserHealthSignal, SignalValue])
val agathaNSFWScore = userHealthSignalValueToDouble(
healthSignalScoreMap
.getOrElse(AgathaCalibratedNsfwDouble, SignalValue.DoubleValue(0.5)))
val userTextNSFWScore = userHealthSignalValueToDouble(
healthSignalScoreMap
.getOrElse(NsfwTextUserScoreDouble, SignalValue.DoubleValue(0.15)))
val pMediaNsfwScore = pMediaNsfwOpt.map(_.score).getOrElse(0.0)
val pTweetTextNsfwScore = pTweetTextNsfwOpt.map(_.score).getOrElse(0.0)
val mediaRepresentationMap =
producerMuOpt.map(_.mediaRepresentation).getOrElse(Map.empty[String, Double])
val sumScore: Double = mediaRepresentationMap.values.sum
val nudityRate =
if (sumScore > 0)
mediaRepresentationMap.getOrElse(
MediaAnnotationsUtil.nudityCategoryId,
0.0) / sumScore
else 0.0
val beautyRate =
if (sumScore > 0)
mediaRepresentationMap.getOrElse(
MediaAnnotationsUtil.beautyCategoryId,
0.0) / sumScore
else 0.0
val singlePersonRate =
if (sumScore > 0)
mediaRepresentationMap.getOrElse(
MediaAnnotationsUtil.singlePersonCategoryId,
0.0) / sumScore
else 0.0
val dislikeCt = cand.numericFeatures.getOrElse(
"tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_ntab_disliked.any_feature.Duration.Top.count",
0.0)
val sentCt = cand.numericFeatures.getOrElse(
"tweet.magic_recs_tweet_real_time_aggregates_v2.pair.v2.magicrecs.realtime.is_sent.any_feature.Duration.Top.count",
0.0)
val dislikeRate = if (sentCt > 0) dislikeCt / sentCt else 0.0
val authorDislikeCt = cand.numericFeatures.getOrElse(
"tweet_author_aggregate.pair.label.ntab.isDisliked.any_feature.28.days.count",
0.0)
val authorReportCt = cand.numericFeatures.getOrElse(
"tweet_author_aggregate.pair.label.reportTweetDone.any_feature.28.days.count",
0.0)
val authorSentCt = cand.numericFeatures
.getOrElse(
"tweet_author_aggregate.pair.any_label.any_feature.28.days.count",
0.0)
val authorDislikeRate =
if (authorSentCt > 0) authorDislikeCt / authorSentCt else 0.0
val authorReportRate =
if (authorSentCt > 0) authorReportCt / authorSentCt else 0.0
val (isNsfwAccount, authorAccountAge) = tweetAuthorOpt match {
case Some(tweetAuthor) =>
(
CandidateHydrationUtil.isNsfwAccount(
tweetAuthor,
cand.target.params(PushFeatureSwitchParams.NsfwTokensParam)),
(Time.now - Time.fromMilliseconds(tweetAuthor.createdAtMsec)).inHours
)
case _ => (false, 0)
}
val tweetSemanticCoreIds = cand.sparseBinaryFeatures
.getOrElse(PushConstants.TweetSemanticCoreIdFeature, Set.empty[String])
val continuousFeatures = Map[String, Double](
"agathaNsfwScore" -> agathaNSFWScore,
"textNsfwScore" -> userTextNSFWScore,
"pMediaNsfwScore" -> pMediaNsfwScore,
"pTweetTextNsfwScore" -> pTweetTextNsfwScore,
"nudityRate" -> nudityRate,
"beautyRate" -> beautyRate,
"singlePersonRate" -> singlePersonRate,
"numSources" -> CandidateUtil.getTagsCRCount(cand),
"favCount" -> cand.numericFeatures
.getOrElse("tweet.core.tweet_counts.favorite_count", 0.0),
"activeFollowers" -> cand.numericFeatures
.getOrElse("RecTweetAuthor.User.ActiveFollowers", 0.0),
"favorsRcvd28Days" -> cand.numericFeatures
.getOrElse("RecTweetAuthor.User.FavorsRcvd28Days", 0.0),
"tweets28Days" -> cand.numericFeatures
.getOrElse("RecTweetAuthor.User.Tweets28Days", 0.0),
"dislikeCount" -> dislikeCt,
"dislikeRate" -> dislikeRate,
"sentCount" -> sentCt,
"authorDislikeCount" -> authorDislikeCt,
"authorDislikeRate" -> authorDislikeRate,
"authorReportCount" -> authorReportCt,
"authorReportRate" -> authorReportRate,
"authorSentCount" -> authorSentCt,
"authorAgeInHour" -> authorAccountAge.toDouble
)
val booleanFeatures = Map[String, Boolean](
"isSimclusterBased" -> RecTypes.simclusterBasedTweets
.contains(cand.commonRecType),
"isTopicTweet" -> RecTypes.isTopicTweetType(cand.commonRecType),
"isHashSpace" -> RecTypes.tagspaceTypes.contains(cand.commonRecType),
"isFRS" -> RecTypes.frsTypes.contains(cand.commonRecType),
"isModelingBased" -> RecTypes.mrModelingBasedTypes.contains(cand.commonRecType),
"isGeoPop" -> RecTypes.GeoPopTweetTypes.contains(cand.commonRecType),
"hasPhoto" -> cand.booleanFeatures
.getOrElse("RecTweet.TweetyPieResult.HasPhoto", false),
"hasVideo" -> cand.booleanFeatures
.getOrElse("RecTweet.TweetyPieResult.HasVideo", false),
"hasUrl" -> cand.booleanFeatures
.getOrElse("RecTweet.TweetyPieResult.HasUrl", false),
"isMrTwistly" -> CandidateUtil.isMrTwistlyCandidate(cand),
"abuseStrikeTop2Percent" -> tweetSemanticCoreIds.contains(
PushConstants.AbuseStrike_Top2Percent_Id),
"abuseStrikeTop1Percent" -> tweetSemanticCoreIds.contains(
PushConstants.AbuseStrike_Top1Percent_Id),
"abuseStrikeTop05Percent" -> tweetSemanticCoreIds.contains(
PushConstants.AbuseStrike_Top05Percent_Id),
"abuseStrikeTop025Percent" -> tweetSemanticCoreIds.contains(
PushConstants.AbuseStrike_Top025Percent_Id),
"allSpamReportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains(
PushConstants.AllSpamReportsPerFav_Top1Percent_Id),
"reportsPerFavTop1Percent" -> tweetSemanticCoreIds.contains(
PushConstants.ReportsPerFav_Top1Percent_Id),
"reportsPerFavTop2Percent" -> tweetSemanticCoreIds.contains(
PushConstants.ReportsPerFav_Top2Percent_Id),
"isNudity" -> tweetSemanticCoreIds.contains(
PushConstants.MediaUnderstanding_Nudity_Id),
"beautyStyleFashion" -> tweetSemanticCoreIds.contains(
PushConstants.MediaUnderstanding_Beauty_Id),
"singlePerson" -> tweetSemanticCoreIds.contains(
PushConstants.MediaUnderstanding_SinglePerson_Id),
"pornList" -> tweetSemanticCoreIds.contains(PushConstants.PornList_Id),
"pornographyAndNsfwContent" -> tweetSemanticCoreIds.contains(
PushConstants.PornographyAndNsfwContent_Id),
"sexLife" -> tweetSemanticCoreIds.contains(PushConstants.SexLife_Id),
"sexLifeOrSexualOrientation" -> tweetSemanticCoreIds.contains(
PushConstants.SexLifeOrSexualOrientation_Id),
"profanity" -> tweetSemanticCoreIds.contains(PushConstants.ProfanityFilter_Id),
"isVerified" -> cand.booleanFeatures
.getOrElse("RecTweetAuthor.User.IsVerified", false),
"hasNsfwToken" -> isNsfwAccount
)
val stringFeatures = Map[String, String](
"tweetLanguage" -> cand.categoricalFeatures
.getOrElse("tweet.core.tweet_text.language", "")
)
FeatureMap(
booleanFeatures = booleanFeatures,
numericFeatures = continuousFeatures,
categoricalFeatures = stringFeatures)
}
case _ => Future.value(FeatureMap())
}
case _ => Future.value(FeatureMap())
}
}
}