the-algorithm/src/scala/com/twitter/timelines/prediction/features/time_features/TimeDataRecordFeatures.scala

112 lines
4.7 KiB
Scala

package com.twitter.timelines.prediction.features.time_features
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
import com.twitter.ml.api.Feature._
import scala.collection.JavaConverters._
import com.twitter.util.Duration
import com.twitter.conversions.DurationOps._
object TimeDataRecordFeatures {
val TIME_BETWEEN_NON_POLLING_REQUESTS_AVG = new Continuous(
"time_features.time_between_non_polling_requests_avg",
Set(PrivateTimestamp).asJava
)
val TIME_SINCE_TWEET_CREATION = new Continuous("time_features.time_since_tweet_creation")
val TIME_SINCE_SOURCE_TWEET_CREATION = new Continuous(
"time_features.time_since_source_tweet_creation"
)
val TIME_SINCE_LAST_NON_POLLING_REQUEST = new Continuous(
"time_features.time_since_last_non_polling_request",
Set(PrivateTimestamp).asJava
)
val NON_POLLING_REQUESTS_SINCE_TWEET_CREATION = new Continuous(
"time_features.non_polling_requests_since_tweet_creation",
Set(PrivateTimestamp).asJava
)
val TWEET_AGE_RATIO = new Continuous("time_features.tweet_age_ratio")
val IS_TWEET_RECYCLED = new Binary("time_features.is_tweet_recycled")
// Last Engagement features
val LAST_FAVORITE_SINCE_CREATION_HRS = new Continuous(
"time_features.earlybird.last_favorite_since_creation_hrs",
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
)
val LAST_RETWEET_SINCE_CREATION_HRS = new Continuous(
"time_features.earlybird.last_retweet_since_creation_hrs",
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
)
val LAST_REPLY_SINCE_CREATION_HRS = new Continuous(
"time_features.earlybird.last_reply_since_creation_hrs",
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
)
val LAST_QUOTE_SINCE_CREATION_HRS = new Continuous(
"time_features.earlybird.last_quote_since_creation_hrs",
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
)
val TIME_SINCE_LAST_FAVORITE_HRS = new Continuous(
"time_features.earlybird.time_since_last_favorite",
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
)
val TIME_SINCE_LAST_RETWEET_HRS = new Continuous(
"time_features.earlybird.time_since_last_retweet",
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
)
val TIME_SINCE_LAST_REPLY_HRS = new Continuous(
"time_features.earlybird.time_since_last_reply",
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
)
val TIME_SINCE_LAST_QUOTE_HRS = new Continuous(
"time_features.earlybird.time_since_last_quote",
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
)
val TIME_SINCE_VIEWER_ACCOUNT_CREATION_SECS =
new Continuous(
"time_features.time_since_viewer_account_creation_secs",
Set(AccountCreationTime, AgeOfAccount).asJava)
val USER_ID_IS_SNOWFLAKE_ID =
new Binary("time_features.time_user_id_is_snowflake_id", Set(UserType).asJava)
val IS_30_DAY_NEW_USER =
new Binary("time_features.is_day_30_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
val IS_12_MONTH_NEW_USER =
new Binary("time_features.is_month_12_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
val ACCOUNT_AGE_INTERVAL =
new Discrete("time_features.account_age_interval", Set(AgeOfAccount).asJava)
}
object AccountAgeInterval extends Enumeration {
val LTE_1_DAY, GT_1_DAY_LTE_5_DAY, GT_5_DAY_LTE_14_DAY, GT_14_DAY_LTE_30_DAY = Value
def fromDuration(accountAge: Duration): Option[AccountAgeInterval.Value] = {
accountAge match {
case a if (a <= 1.day) => Some(LTE_1_DAY)
case a if (1.day < a && a <= 5.days) => Some(GT_1_DAY_LTE_5_DAY)
case a if (5.days < a && a <= 14.days) => Some(GT_5_DAY_LTE_14_DAY)
case a if (14.days < a && a <= 30.days) => Some(GT_14_DAY_LTE_30_DAY)
case _ => None
}
}
}
case class TimeFeatures(
isTweetRecycled: Boolean,
timeSinceTweetCreation: Double,
isDay30NewUser: Boolean,
isMonth12NewUser: Boolean,
timeSinceSourceTweetCreation: Double, // same as timeSinceTweetCreation for non-retweets
timeSinceViewerAccountCreationSecs: Option[Double],
timeBetweenNonPollingRequestsAvg: Option[Double] = None,
timeSinceLastNonPollingRequest: Option[Double] = None,
nonPollingRequestsSinceTweetCreation: Option[Double] = None,
tweetAgeRatio: Option[Double] = None,
lastFavSinceCreationHrs: Option[Double] = None,
lastRetweetSinceCreationHrs: Option[Double] = None,
lastReplySinceCreationHrs: Option[Double] = None,
lastQuoteSinceCreationHrs: Option[Double] = None,
timeSinceLastFavoriteHrs: Option[Double] = None,
timeSinceLastRetweetHrs: Option[Double] = None,
timeSinceLastReplyHrs: Option[Double] = None,
timeSinceLastQuoteHrs: Option[Double] = None,
accountAgeInterval: Option[AccountAgeInterval.Value] = None)