mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-13 06:38:52 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
101 lines
3.8 KiB
Scala
101 lines
3.8 KiB
Scala
package com.twitter.timelines.data_processing.ad_hoc.earlybird_ranking.common
|
|
|
|
import com.twitter.ml.api.DataRecord
|
|
import com.twitter.ml.api.Feature
|
|
import com.twitter.ml.api.FeatureContext
|
|
import com.twitter.ml.api.ITransform
|
|
import com.twitter.ml.api.transform.CascadeTransform
|
|
import com.twitter.ml.api.util.SRichDataRecord
|
|
import com.twitter.search.common.features.SearchResultFeature
|
|
import com.twitter.search.common.features.TweetFeature
|
|
import com.twitter.timelines.prediction.features.itl.ITLFeatures._
|
|
import scala.collection.JavaConverters._
|
|
|
|
class EarlybirdTrainingRectweetConfiguration extends EarlybirdTrainingConfiguration {
|
|
|
|
override val labels: Map[String, Feature.Binary] = Map(
|
|
"detail_expanded" -> IS_CLICKED,
|
|
"favorited" -> IS_FAVORITED,
|
|
"open_linked" -> IS_OPEN_LINKED,
|
|
"photo_expanded" -> IS_PHOTO_EXPANDED,
|
|
"profile_clicked" -> IS_PROFILE_CLICKED,
|
|
"replied" -> IS_REPLIED,
|
|
"retweeted" -> IS_RETWEETED,
|
|
"video_playback50" -> IS_VIDEO_PLAYBACK_50
|
|
)
|
|
|
|
override val PositiveSamplingRate: Double = 0.5
|
|
|
|
override def featureToSearchResultFeatureMap: Map[Feature[_], SearchResultFeature] =
|
|
super.featureToSearchResultFeatureMap ++ Map(
|
|
TEXT_SCORE -> TweetFeature.TEXT_SCORE,
|
|
REPLY_COUNT -> TweetFeature.REPLY_COUNT,
|
|
RETWEET_COUNT -> TweetFeature.RETWEET_COUNT,
|
|
FAV_COUNT -> TweetFeature.FAVORITE_COUNT,
|
|
HAS_CARD -> TweetFeature.HAS_CARD_FLAG,
|
|
HAS_CONSUMER_VIDEO -> TweetFeature.HAS_CONSUMER_VIDEO_FLAG,
|
|
HAS_PRO_VIDEO -> TweetFeature.HAS_PRO_VIDEO_FLAG,
|
|
HAS_VINE -> TweetFeature.HAS_VINE_FLAG,
|
|
HAS_PERISCOPE -> TweetFeature.HAS_PERISCOPE_FLAG,
|
|
HAS_NATIVE_IMAGE -> TweetFeature.HAS_NATIVE_IMAGE_FLAG,
|
|
HAS_IMAGE -> TweetFeature.HAS_IMAGE_URL_FLAG,
|
|
HAS_NEWS -> TweetFeature.HAS_NEWS_URL_FLAG,
|
|
HAS_VIDEO -> TweetFeature.HAS_VIDEO_URL_FLAG,
|
|
// some features that exist for recap are not available in rectweet
|
|
// HAS_TREND
|
|
// HAS_MULTIPLE_HASHTAGS_OR_TRENDS
|
|
// IS_OFFENSIVE
|
|
// IS_REPLY
|
|
// IS_RETWEET
|
|
IS_AUTHOR_BOT -> TweetFeature.IS_USER_BOT_FLAG,
|
|
IS_AUTHOR_SPAM -> TweetFeature.IS_USER_SPAM_FLAG,
|
|
IS_AUTHOR_NSFW -> TweetFeature.IS_USER_NSFW_FLAG,
|
|
// FROM_VERIFIED_ACCOUNT
|
|
USER_REP -> TweetFeature.USER_REPUTATION,
|
|
// EMBEDS_IMPRESSION_COUNT
|
|
// EMBEDS_URL_COUNT
|
|
// VIDEO_VIEW_COUNT
|
|
FAV_COUNT_V2 -> TweetFeature.FAVORITE_COUNT_V2,
|
|
RETWEET_COUNT_V2 -> TweetFeature.RETWEET_COUNT_V2,
|
|
REPLY_COUNT_V2 -> TweetFeature.REPLY_COUNT_V2,
|
|
IS_SENSITIVE -> TweetFeature.IS_SENSITIVE_CONTENT,
|
|
HAS_MULTIPLE_MEDIA -> TweetFeature.HAS_MULTIPLE_MEDIA_FLAG,
|
|
IS_AUTHOR_PROFILE_EGG -> TweetFeature.PROFILE_IS_EGG_FLAG,
|
|
IS_AUTHOR_NEW -> TweetFeature.IS_USER_NEW_FLAG,
|
|
NUM_MENTIONS -> TweetFeature.NUM_MENTIONS,
|
|
NUM_HASHTAGS -> TweetFeature.NUM_HASHTAGS,
|
|
HAS_VISIBLE_LINK -> TweetFeature.HAS_VISIBLE_LINK_FLAG,
|
|
HAS_LINK -> TweetFeature.HAS_LINK_FLAG
|
|
)
|
|
|
|
override def derivedFeaturesAdder: CascadeTransform = {
|
|
// only LINK_LANGUAGE availabe in rectweet. no LANGUAGE feature
|
|
val linkLanguageTransform = new ITransform {
|
|
private val linkLanguageFeature = new Feature.Continuous(TweetFeature.LINK_LANGUAGE.getName)
|
|
|
|
override def transformContext(featureContext: FeatureContext): FeatureContext =
|
|
featureContext.addFeatures(
|
|
linkLanguageFeature
|
|
)
|
|
|
|
override def transform(record: DataRecord): Unit = {
|
|
val srecord = SRichDataRecord(record)
|
|
|
|
srecord.getFeatureValueOpt(LINK_LANGUAGE).map { link_language =>
|
|
srecord.setFeatureValue(
|
|
linkLanguageFeature,
|
|
link_language.toDouble
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
new CascadeTransform(
|
|
List(
|
|
super.derivedFeaturesAdder,
|
|
linkLanguageTransform
|
|
).asJava
|
|
)
|
|
}
|
|
}
|