the-algorithm/timelines/data_processing/ad_hoc/earlybird_ranking/earlybird_ranking/common/EarlybirdTrainingRectweetConfiguration.scala
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

101 lines
3.8 KiB
Scala

package com.twitter.timelines.data_processing.ad_hoc.earlybird_ranking.common
import com.twitter.ml.api.DataRecord
import com.twitter.ml.api.Feature
import com.twitter.ml.api.FeatureContext
import com.twitter.ml.api.ITransform
import com.twitter.ml.api.transform.CascadeTransform
import com.twitter.ml.api.util.SRichDataRecord
import com.twitter.search.common.features.SearchResultFeature
import com.twitter.search.common.features.TweetFeature
import com.twitter.timelines.prediction.features.itl.ITLFeatures._
import scala.collection.JavaConverters._
class EarlybirdTrainingRectweetConfiguration extends EarlybirdTrainingConfiguration {
override val labels: Map[String, Feature.Binary] = Map(
"detail_expanded" -> IS_CLICKED,
"favorited" -> IS_FAVORITED,
"open_linked" -> IS_OPEN_LINKED,
"photo_expanded" -> IS_PHOTO_EXPANDED,
"profile_clicked" -> IS_PROFILE_CLICKED,
"replied" -> IS_REPLIED,
"retweeted" -> IS_RETWEETED,
"video_playback50" -> IS_VIDEO_PLAYBACK_50
)
override val PositiveSamplingRate: Double = 0.5
override def featureToSearchResultFeatureMap: Map[Feature[_], SearchResultFeature] =
super.featureToSearchResultFeatureMap ++ Map(
TEXT_SCORE -> TweetFeature.TEXT_SCORE,
REPLY_COUNT -> TweetFeature.REPLY_COUNT,
RETWEET_COUNT -> TweetFeature.RETWEET_COUNT,
FAV_COUNT -> TweetFeature.FAVORITE_COUNT,
HAS_CARD -> TweetFeature.HAS_CARD_FLAG,
HAS_CONSUMER_VIDEO -> TweetFeature.HAS_CONSUMER_VIDEO_FLAG,
HAS_PRO_VIDEO -> TweetFeature.HAS_PRO_VIDEO_FLAG,
HAS_VINE -> TweetFeature.HAS_VINE_FLAG,
HAS_PERISCOPE -> TweetFeature.HAS_PERISCOPE_FLAG,
HAS_NATIVE_IMAGE -> TweetFeature.HAS_NATIVE_IMAGE_FLAG,
HAS_IMAGE -> TweetFeature.HAS_IMAGE_URL_FLAG,
HAS_NEWS -> TweetFeature.HAS_NEWS_URL_FLAG,
HAS_VIDEO -> TweetFeature.HAS_VIDEO_URL_FLAG,
// some features that exist for recap are not available in rectweet
// HAS_TREND
// HAS_MULTIPLE_HASHTAGS_OR_TRENDS
// IS_OFFENSIVE
// IS_REPLY
// IS_RETWEET
IS_AUTHOR_BOT -> TweetFeature.IS_USER_BOT_FLAG,
IS_AUTHOR_SPAM -> TweetFeature.IS_USER_SPAM_FLAG,
IS_AUTHOR_NSFW -> TweetFeature.IS_USER_NSFW_FLAG,
// FROM_VERIFIED_ACCOUNT
USER_REP -> TweetFeature.USER_REPUTATION,
// EMBEDS_IMPRESSION_COUNT
// EMBEDS_URL_COUNT
// VIDEO_VIEW_COUNT
FAV_COUNT_V2 -> TweetFeature.FAVORITE_COUNT_V2,
RETWEET_COUNT_V2 -> TweetFeature.RETWEET_COUNT_V2,
REPLY_COUNT_V2 -> TweetFeature.REPLY_COUNT_V2,
IS_SENSITIVE -> TweetFeature.IS_SENSITIVE_CONTENT,
HAS_MULTIPLE_MEDIA -> TweetFeature.HAS_MULTIPLE_MEDIA_FLAG,
IS_AUTHOR_PROFILE_EGG -> TweetFeature.PROFILE_IS_EGG_FLAG,
IS_AUTHOR_NEW -> TweetFeature.IS_USER_NEW_FLAG,
NUM_MENTIONS -> TweetFeature.NUM_MENTIONS,
NUM_HASHTAGS -> TweetFeature.NUM_HASHTAGS,
HAS_VISIBLE_LINK -> TweetFeature.HAS_VISIBLE_LINK_FLAG,
HAS_LINK -> TweetFeature.HAS_LINK_FLAG
)
override def derivedFeaturesAdder: CascadeTransform = {
// only LINK_LANGUAGE availabe in rectweet. no LANGUAGE feature
val linkLanguageTransform = new ITransform {
private val linkLanguageFeature = new Feature.Continuous(TweetFeature.LINK_LANGUAGE.getName)
override def transformContext(featureContext: FeatureContext): FeatureContext =
featureContext.addFeatures(
linkLanguageFeature
)
override def transform(record: DataRecord): Unit = {
val srecord = SRichDataRecord(record)
srecord.getFeatureValueOpt(LINK_LANGUAGE).map { link_language =>
srecord.setFeatureValue(
linkLanguageFeature,
link_language.toDouble
)
}
}
}
new CascadeTransform(
List(
super.derivedFeaturesAdder,
linkLanguageTransform
).asJava
)
}
}