mirror of
https://github.com/twitter/the-algorithm.git
synced 2025-06-16 12:58:43 +02:00
Open-sourcing Timelines Aggregation Framework
Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models.
This commit is contained in:
@ -0,0 +1,68 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.ml.api.ITransform
|
||||
import com.twitter.ml.api.constant.SharedFeatures
|
||||
import java.lang.{Double => JDouble}
|
||||
|
||||
import com.twitter.timelines.prediction.common.adapters.AdapterConsumer
|
||||
import com.twitter.timelines.prediction.common.adapters.EngagementLabelFeaturesDataRecordUtils
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.timelines.suggests.common.engagement.thriftscala.EngagementType
|
||||
import com.twitter.timelines.suggests.common.engagement.thriftscala.Engagement
|
||||
import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
|
||||
import com.twitter.timelines.prediction.features.common.CombinedFeatures
|
||||
|
||||
/**
|
||||
* To transfrom BCE events UUA data records that contain only continuous dwell time to datarecords that contain corresponding binary label features
|
||||
* The UUA datarecords inputted would have USER_ID, SOURCE_TWEET_ID,TIMESTAMP and
|
||||
* 0 or one of (TWEET_DETAIL_DWELL_TIME_MS, PROFILE_DWELL_TIME_MS, FULLSCREEN_VIDEO_DWELL_TIME_MS) features.
|
||||
* We will use the different engagement TIME_MS to differentiate different engagements,
|
||||
* and then re-use the function in EngagementTypeConverte to add the binary label to the datarecord.
|
||||
**/
|
||||
|
||||
object BCELabelTransformFromUUADataRecord extends ITransform {
|
||||
|
||||
val dwellTimeFeatureToEngagementMap = Map(
|
||||
TimelinesSharedFeatures.TWEET_DETAIL_DWELL_TIME_MS -> EngagementType.TweetDetailDwell,
|
||||
TimelinesSharedFeatures.PROFILE_DWELL_TIME_MS -> EngagementType.ProfileDwell,
|
||||
TimelinesSharedFeatures.FULLSCREEN_VIDEO_DWELL_TIME_MS -> EngagementType.FullscreenVideoDwell
|
||||
)
|
||||
|
||||
def dwellFeatureToEngagement(
|
||||
rdr: RichDataRecord,
|
||||
dwellTimeFeature: Feature[JDouble],
|
||||
engagementType: EngagementType
|
||||
): Option[Engagement] = {
|
||||
if (rdr.hasFeature(dwellTimeFeature)) {
|
||||
Some(
|
||||
Engagement(
|
||||
engagementType = engagementType,
|
||||
timestampMs = rdr.getFeatureValue(SharedFeatures.TIMESTAMP),
|
||||
weight = Some(rdr.getFeatureValue(dwellTimeFeature))
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
override def transformContext(featureContext: FeatureContext): FeatureContext = {
|
||||
featureContext.addFeatures(
|
||||
(CombinedFeatures.TweetDetailDwellEngagements ++ CombinedFeatures.ProfileDwellEngagements ++ CombinedFeatures.FullscreenVideoDwellEngagements).toSeq: _*)
|
||||
}
|
||||
override def transform(record: DataRecord): Unit = {
|
||||
val rdr = new RichDataRecord(record)
|
||||
val engagements = dwellTimeFeatureToEngagementMap
|
||||
.map {
|
||||
case (dwellTimeFeature, engagementType) =>
|
||||
dwellFeatureToEngagement(rdr, dwellTimeFeature, engagementType)
|
||||
}.flatten.toSeq
|
||||
|
||||
// Re-use BCE( behavior client events) label conversion in EngagementTypeConverter to align with BCE labels generation for offline training data
|
||||
EngagementLabelFeaturesDataRecordUtils.setDwellTimeFeatures(
|
||||
rdr,
|
||||
Some(engagements),
|
||||
AdapterConsumer.Combined)
|
||||
}
|
||||
}
|
@ -0,0 +1,353 @@
|
||||
create_datasets(
|
||||
base_name = "original_author_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/original_author_aggregates/1556496000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.OriginalAuthor",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "twitter_wide_user_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_aggregates/1556496000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUser",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "twitter_wide_user_author_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_author_aggregates/1556323200000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUserAuthor",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_aggregates/1556150400000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_author_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_author_aggregates/1556064000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserAuthor",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "aggregates_canary",
|
||||
fallback_path = "gs://user.timelines.dp.gcp.twttr.net//canaries/processed/aggregates_v2/user_aggregates/1622851200000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_engager_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_engager_aggregates/1556496000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserEngager",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_original_author_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1556496000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserOriginalAuthor",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "author_topic_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/author_topic_aggregates/1589932800000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.AuthorTopic",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_topic_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_topic_aggregates/1590278400000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserTopic",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_inferred_topic_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_inferred_topic_aggregates/1599696000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserInferredTopic",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_mention_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_mention_aggregates/1556582400000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMention",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_request_dow_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_dow_aggregates/1556236800000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestDow",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_request_hour_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_hour_aggregates/1556150400000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestHour",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_list_aggregates",
|
||||
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_list_aggregates/1590624000000",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserList",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
create_datasets(
|
||||
base_name = "user_media_understanding_annotation_aggregates",
|
||||
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
|
||||
platform = "java8",
|
||||
role = "timelines",
|
||||
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMediaUnderstandingAnnotation",
|
||||
segment_type = "snapshot",
|
||||
tags = ["bazel-compatible"],
|
||||
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
|
||||
scala_dependencies = [
|
||||
":injections",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
sources = [
|
||||
"BCELabelTransformFromUUADataRecord.scala",
|
||||
"FeatureSelectorConfig.scala",
|
||||
"RecapUserFeatureAggregation.scala",
|
||||
"RectweetUserFeatureAggregation.scala",
|
||||
"TimelinesAggregationConfig.scala",
|
||||
"TimelinesAggregationConfigDetails.scala",
|
||||
"TimelinesAggregationConfigTrait.scala",
|
||||
"TimelinesAggregationSources.scala",
|
||||
],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
":aggregates_canary-scala",
|
||||
":author_topic_aggregates-scala",
|
||||
":original_author_aggregates-scala",
|
||||
":twitter_wide_user_aggregates-scala",
|
||||
":twitter_wide_user_author_aggregates-scala",
|
||||
":user_aggregates-scala",
|
||||
":user_author_aggregates-scala",
|
||||
":user_engager_aggregates-scala",
|
||||
":user_inferred_topic_aggregates-scala",
|
||||
":user_list_aggregates-scala",
|
||||
":user_media_understanding_annotation_aggregates-scala",
|
||||
":user_mention_aggregates-scala",
|
||||
":user_original_author_aggregates-scala",
|
||||
":user_request_dow_aggregates-scala",
|
||||
":user_request_hour_aggregates-scala",
|
||||
":user_topic_aggregates-scala",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/ml/api/constant",
|
||||
"src/java/com/twitter/ml/api/matcher",
|
||||
"src/scala/com/twitter/common/text/util",
|
||||
"src/scala/com/twitter/dal/client/dataset",
|
||||
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
|
||||
"src/scala/com/twitter/scalding_internal/multiformat/format",
|
||||
"src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
|
||||
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/features/common",
|
||||
"src/scala/com/twitter/timelines/prediction/features/engagement_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/escherbird",
|
||||
"src/scala/com/twitter/timelines/prediction/features/itl",
|
||||
"src/scala/com/twitter/timelines/prediction/features/list_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/p_home_latest",
|
||||
"src/scala/com/twitter/timelines/prediction/features/real_graph",
|
||||
"src/scala/com/twitter/timelines/prediction/features/recap",
|
||||
"src/scala/com/twitter/timelines/prediction/features/request_context",
|
||||
"src/scala/com/twitter/timelines/prediction/features/simcluster",
|
||||
"src/scala/com/twitter/timelines/prediction/features/time_features",
|
||||
"src/scala/com/twitter/timelines/prediction/transform/filter",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
|
||||
"timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
|
||||
"util/util-core:scala",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
name = "injections",
|
||||
sources = [
|
||||
"FeatureSelectorConfig.scala",
|
||||
"RecapUserFeatureAggregation.scala",
|
||||
"RectweetUserFeatureAggregation.scala",
|
||||
"TimelinesAggregationConfigDetails.scala",
|
||||
"TimelinesAggregationConfigTrait.scala",
|
||||
"TimelinesAggregationKeyValInjections.scala",
|
||||
"TimelinesAggregationSources.scala",
|
||||
],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/ml/api/constant",
|
||||
"src/java/com/twitter/ml/api/matcher",
|
||||
"src/scala/com/twitter/common/text/util",
|
||||
"src/scala/com/twitter/dal/client/dataset",
|
||||
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
|
||||
"src/scala/com/twitter/scalding_internal/multiformat/format",
|
||||
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/features/common",
|
||||
"src/scala/com/twitter/timelines/prediction/features/engagement_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/escherbird",
|
||||
"src/scala/com/twitter/timelines/prediction/features/itl",
|
||||
"src/scala/com/twitter/timelines/prediction/features/list_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/p_home_latest",
|
||||
"src/scala/com/twitter/timelines/prediction/features/real_graph",
|
||||
"src/scala/com/twitter/timelines/prediction/features/recap",
|
||||
"src/scala/com/twitter/timelines/prediction/features/request_context",
|
||||
"src/scala/com/twitter/timelines/prediction/features/semantic_core_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/simcluster",
|
||||
"src/scala/com/twitter/timelines/prediction/features/time_features",
|
||||
"src/scala/com/twitter/timelines/prediction/transform/filter",
|
||||
"timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
|
||||
"util/util-core:scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,121 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.matcher.FeatureMatcher
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object FeatureSelectorConfig {
|
||||
val BasePairsToStore = Seq(
|
||||
("twitter_wide_user_aggregate.pair", "*"),
|
||||
("twitter_wide_user_author_aggregate.pair", "*"),
|
||||
("user_aggregate_v5.continuous.pair", "*"),
|
||||
("user_aggregate_v7.pair", "*"),
|
||||
("user_author_aggregate_v2.pair", "recap.earlybird.*"),
|
||||
("user_author_aggregate_v2.pair", "recap.searchfeature.*"),
|
||||
("user_author_aggregate_v2.pair", "recap.tweetfeature.embeds*"),
|
||||
("user_author_aggregate_v2.pair", "recap.tweetfeature.link_count*"),
|
||||
("user_author_aggregate_v2.pair", "engagement_features.in_network.*"),
|
||||
("user_author_aggregate_v2.pair", "recap.tweetfeature.is_reply.*"),
|
||||
("user_author_aggregate_v2.pair", "recap.tweetfeature.is_retweet.*"),
|
||||
("user_author_aggregate_v2.pair", "recap.tweetfeature.num_mentions.*"),
|
||||
("user_author_aggregate_v5.pair", "*"),
|
||||
("user_author_aggregate_tweetsource_v1.pair", "*"),
|
||||
("user_engager_aggregate.pair", "*"),
|
||||
("user_mention_aggregate.pair", "*"),
|
||||
("user_request_context_aggregate.dow.pair", "*"),
|
||||
("user_request_context_aggregate.hour.pair", "*"),
|
||||
("user_aggregate_v6.pair", "*"),
|
||||
("user_original_author_aggregate_v1.pair", "*"),
|
||||
("user_original_author_aggregate_v2.pair", "*"),
|
||||
("original_author_aggregate_v1.pair", "*"),
|
||||
("original_author_aggregate_v2.pair", "*"),
|
||||
("author_topic_aggregate.pair", "*"),
|
||||
("user_list_aggregate.pair", "*"),
|
||||
("user_topic_aggregate.pair", "*"),
|
||||
("user_topic_aggregate_v2.pair", "*"),
|
||||
("user_inferred_topic_aggregate.pair", "*"),
|
||||
("user_inferred_topic_aggregate_v2.pair", "*"),
|
||||
("user_media_annotation_aggregate.pair", "*"),
|
||||
("user_media_annotation_aggregate.pair", "*"),
|
||||
("user_author_good_click_aggregate.pair", "*"),
|
||||
("user_engager_good_click_aggregate.pair", "*")
|
||||
)
|
||||
val PairsToStore = BasePairsToStore ++ Seq(
|
||||
("user_aggregate_v2.pair", "*"),
|
||||
("user_aggregate_v5.boolean.pair", "*"),
|
||||
("user_aggregate_tweetsource_v1.pair", "*"),
|
||||
)
|
||||
|
||||
|
||||
val LabelsToStore = Seq(
|
||||
"any_label",
|
||||
"recap.engagement.is_favorited",
|
||||
"recap.engagement.is_retweeted",
|
||||
"recap.engagement.is_replied",
|
||||
"recap.engagement.is_open_linked",
|
||||
"recap.engagement.is_profile_clicked",
|
||||
"recap.engagement.is_clicked",
|
||||
"recap.engagement.is_photo_expanded",
|
||||
"recap.engagement.is_video_playback_50",
|
||||
"recap.engagement.is_video_quality_viewed",
|
||||
"recap.engagement.is_replied_reply_impressed_by_author",
|
||||
"recap.engagement.is_replied_reply_favorited_by_author",
|
||||
"recap.engagement.is_replied_reply_replied_by_author",
|
||||
"recap.engagement.is_report_tweet_clicked",
|
||||
"recap.engagement.is_block_clicked",
|
||||
"recap.engagement.is_mute_clicked",
|
||||
"recap.engagement.is_dont_like",
|
||||
"recap.engagement.is_good_clicked_convo_desc_favorited_or_replied",
|
||||
"recap.engagement.is_good_clicked_convo_desc_v2",
|
||||
"itl.engagement.is_favorited",
|
||||
"itl.engagement.is_retweeted",
|
||||
"itl.engagement.is_replied",
|
||||
"itl.engagement.is_open_linked",
|
||||
"itl.engagement.is_profile_clicked",
|
||||
"itl.engagement.is_clicked",
|
||||
"itl.engagement.is_photo_expanded",
|
||||
"itl.engagement.is_video_playback_50"
|
||||
)
|
||||
|
||||
val PairGlobsToStore = for {
|
||||
(prefix, suffix) <- PairsToStore
|
||||
label <- LabelsToStore
|
||||
} yield FeatureMatcher.glob(prefix + "." + label + "." + suffix)
|
||||
|
||||
val BaseAggregateV2FeatureSelector = FeatureMatcher
|
||||
.none()
|
||||
.or(
|
||||
FeatureMatcher.glob("meta.user_id"),
|
||||
FeatureMatcher.glob("meta.author_id"),
|
||||
FeatureMatcher.glob("entities.original_author_id"),
|
||||
FeatureMatcher.glob("entities.topic_id"),
|
||||
FeatureMatcher
|
||||
.glob("entities.inferred_topic_ids" + TypedAggregateGroup.SparseFeatureSuffix),
|
||||
FeatureMatcher.glob("timelines.meta.list_id"),
|
||||
FeatureMatcher.glob("list.id"),
|
||||
FeatureMatcher
|
||||
.glob("engagement_features.user_ids.public" + TypedAggregateGroup.SparseFeatureSuffix),
|
||||
FeatureMatcher
|
||||
.glob("entities.users.mentioned_screen_names" + TypedAggregateGroup.SparseFeatureSuffix),
|
||||
FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_dont_like.*"),
|
||||
FeatureMatcher.glob("user_author_aggregate_v2.pair.any_label.recap.tweetfeature.has_*"),
|
||||
FeatureMatcher.glob("request_context.country_code"),
|
||||
FeatureMatcher.glob("request_context.timestamp_gmt_dow"),
|
||||
FeatureMatcher.glob("request_context.timestamp_gmt_hour"),
|
||||
FeatureMatcher.glob(
|
||||
"semantic_core.media_understanding.high_recall.non_sensitive.entity_ids" + TypedAggregateGroup.SparseFeatureSuffix)
|
||||
)
|
||||
|
||||
val AggregatesV2ProdFeatureSelector = BaseAggregateV2FeatureSelector
|
||||
.orList(PairGlobsToStore.asJava)
|
||||
|
||||
val ReducedPairGlobsToStore = (for {
|
||||
(prefix, suffix) <- BasePairsToStore
|
||||
label <- LabelsToStore
|
||||
} yield FeatureMatcher.glob(prefix + "." + label + "." + suffix)) ++ Seq(
|
||||
FeatureMatcher.glob("user_aggregate_v2.pair.any_label.*"),
|
||||
FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_favorited.*"),
|
||||
FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_photo_expanded.*"),
|
||||
FeatureMatcher.glob("user_aggregate_v2.pair.recap.engagement.is_profile_clicked.*")
|
||||
)
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
## Timelines Aggregation Jobs
|
||||
|
||||
This directory contains the specific definition of aggregate jobs that generate features used by the Heavy Ranker.
|
||||
The primary files of interest are [`TimelinesAggregationConfigDetails.scala`](TimelinesAggregationConfigDetails.scala), which contains the defintion for the batch aggregate jobs and [`real_time/TimelinesOnlineAggregationConfigBase.scala`](real_time/TimelinesOnlineAggregationConfigBase.scala) which contains the definitions for the real time aggregate jobs.
|
||||
|
||||
The aggregation framework that these jobs are based on is [here](../../../../../../../../timelines/data_processing/ml_util/aggregation_framework).
|
@ -0,0 +1,415 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
|
||||
import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
|
||||
import com.twitter.timelines.prediction.features.real_graph.RealGraphDataRecordFeatures
|
||||
import com.twitter.timelines.prediction.features.recap.RecapFeatures
|
||||
import com.twitter.timelines.prediction.features.time_features.TimeDataRecordFeatures
|
||||
|
||||
object RecapUserFeatureAggregation {
|
||||
val RecapFeaturesForAggregation: Set[Feature[_]] =
|
||||
Set(
|
||||
RecapFeatures.HAS_IMAGE,
|
||||
RecapFeatures.HAS_VIDEO,
|
||||
RecapFeatures.FROM_MUTUAL_FOLLOW,
|
||||
RecapFeatures.HAS_CARD,
|
||||
RecapFeatures.HAS_NEWS,
|
||||
RecapFeatures.REPLY_COUNT,
|
||||
RecapFeatures.FAV_COUNT,
|
||||
RecapFeatures.RETWEET_COUNT,
|
||||
RecapFeatures.BLENDER_SCORE,
|
||||
RecapFeatures.CONVERSATIONAL_COUNT,
|
||||
RecapFeatures.IS_BUSINESS_SCORE,
|
||||
RecapFeatures.CONTAINS_MEDIA,
|
||||
RecapFeatures.RETWEET_SEARCHER,
|
||||
RecapFeatures.REPLY_SEARCHER,
|
||||
RecapFeatures.MENTION_SEARCHER,
|
||||
RecapFeatures.REPLY_OTHER,
|
||||
RecapFeatures.RETWEET_OTHER,
|
||||
RecapFeatures.MATCH_UI_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_LANGS,
|
||||
RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT,
|
||||
RecapFeatures.TEXT_SCORE,
|
||||
RealGraphDataRecordFeatures.NUM_RETWEETS_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_RETWEETS_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_RETWEETS_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_RETWEETS_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.NUM_FAVORITES_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_FAVORITES_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_FAVORITES_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_FAVORITES_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.NUM_MENTIONS_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_MENTIONS_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_MENTIONS_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_MENTIONS_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_TWEET_CLICKS_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_PROFILE_VIEWS_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_EWMA,
|
||||
RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.TOTAL_DWELL_TIME_DAYS_SINCE_LAST,
|
||||
RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_EWMA,
|
||||
RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_NON_ZERO_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_ELAPSED_DAYS,
|
||||
RealGraphDataRecordFeatures.NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST
|
||||
)
|
||||
|
||||
val RecapLabelsForAggregation: Set[Feature.Binary] =
|
||||
Set(
|
||||
RecapFeatures.IS_FAVORITED,
|
||||
RecapFeatures.IS_RETWEETED,
|
||||
RecapFeatures.IS_CLICKED,
|
||||
RecapFeatures.IS_PROFILE_CLICKED,
|
||||
RecapFeatures.IS_OPEN_LINKED
|
||||
)
|
||||
|
||||
val DwellDuration: Set[Feature[_]] =
|
||||
Set(
|
||||
TimelinesSharedFeatures.DWELL_TIME_MS,
|
||||
)
|
||||
|
||||
val UserFeaturesV2: Set[Feature[_]] = RecapFeaturesForAggregation ++ Set(
|
||||
RecapFeatures.HAS_VINE,
|
||||
RecapFeatures.HAS_PERISCOPE,
|
||||
RecapFeatures.HAS_PRO_VIDEO,
|
||||
RecapFeatures.HAS_VISIBLE_LINK,
|
||||
RecapFeatures.BIDIRECTIONAL_FAV_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_FAV_COUNT,
|
||||
RecapFeatures.BIDIRECTIONAL_REPLY_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT,
|
||||
RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT,
|
||||
RecapFeatures.EMBEDS_URL_COUNT,
|
||||
RecapFeatures.EMBEDS_IMPRESSION_COUNT,
|
||||
RecapFeatures.VIDEO_VIEW_COUNT,
|
||||
RecapFeatures.IS_RETWEET,
|
||||
RecapFeatures.IS_REPLY,
|
||||
RecapFeatures.IS_EXTENDED_REPLY,
|
||||
RecapFeatures.HAS_LINK,
|
||||
RecapFeatures.HAS_TREND,
|
||||
RecapFeatures.LINK_LANGUAGE,
|
||||
RecapFeatures.NUM_HASHTAGS,
|
||||
RecapFeatures.NUM_MENTIONS,
|
||||
RecapFeatures.IS_SENSITIVE,
|
||||
RecapFeatures.HAS_MULTIPLE_MEDIA,
|
||||
RecapFeatures.USER_REP,
|
||||
RecapFeatures.FAV_COUNT_V2,
|
||||
RecapFeatures.RETWEET_COUNT_V2,
|
||||
RecapFeatures.REPLY_COUNT_V2,
|
||||
RecapFeatures.LINK_COUNT,
|
||||
EngagementDataRecordFeatures.InNetworkFavoritesCount,
|
||||
EngagementDataRecordFeatures.InNetworkRetweetsCount,
|
||||
EngagementDataRecordFeatures.InNetworkRepliesCount
|
||||
)
|
||||
|
||||
val UserAuthorFeaturesV2: Set[Feature[_]] = Set(
|
||||
RecapFeatures.HAS_IMAGE,
|
||||
RecapFeatures.HAS_VINE,
|
||||
RecapFeatures.HAS_PERISCOPE,
|
||||
RecapFeatures.HAS_PRO_VIDEO,
|
||||
RecapFeatures.HAS_VIDEO,
|
||||
RecapFeatures.HAS_CARD,
|
||||
RecapFeatures.HAS_NEWS,
|
||||
RecapFeatures.HAS_VISIBLE_LINK,
|
||||
RecapFeatures.REPLY_COUNT,
|
||||
RecapFeatures.FAV_COUNT,
|
||||
RecapFeatures.RETWEET_COUNT,
|
||||
RecapFeatures.BLENDER_SCORE,
|
||||
RecapFeatures.CONVERSATIONAL_COUNT,
|
||||
RecapFeatures.IS_BUSINESS_SCORE,
|
||||
RecapFeatures.CONTAINS_MEDIA,
|
||||
RecapFeatures.RETWEET_SEARCHER,
|
||||
RecapFeatures.REPLY_SEARCHER,
|
||||
RecapFeatures.MENTION_SEARCHER,
|
||||
RecapFeatures.REPLY_OTHER,
|
||||
RecapFeatures.RETWEET_OTHER,
|
||||
RecapFeatures.MATCH_UI_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_LANGS,
|
||||
RecapFeatures.TWEET_COUNT_FROM_USER_IN_SNAPSHOT,
|
||||
RecapFeatures.TEXT_SCORE,
|
||||
RecapFeatures.BIDIRECTIONAL_FAV_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_FAV_COUNT,
|
||||
RecapFeatures.BIDIRECTIONAL_REPLY_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_REPLY_COUNT,
|
||||
RecapFeatures.BIDIRECTIONAL_RETWEET_COUNT,
|
||||
RecapFeatures.UNIDIRECTIONAL_RETWEET_COUNT,
|
||||
RecapFeatures.EMBEDS_URL_COUNT,
|
||||
RecapFeatures.EMBEDS_IMPRESSION_COUNT,
|
||||
RecapFeatures.VIDEO_VIEW_COUNT,
|
||||
RecapFeatures.IS_RETWEET,
|
||||
RecapFeatures.IS_REPLY,
|
||||
RecapFeatures.HAS_LINK,
|
||||
RecapFeatures.HAS_TREND,
|
||||
RecapFeatures.LINK_LANGUAGE,
|
||||
RecapFeatures.NUM_HASHTAGS,
|
||||
RecapFeatures.NUM_MENTIONS,
|
||||
RecapFeatures.IS_SENSITIVE,
|
||||
RecapFeatures.HAS_MULTIPLE_MEDIA,
|
||||
RecapFeatures.FAV_COUNT_V2,
|
||||
RecapFeatures.RETWEET_COUNT_V2,
|
||||
RecapFeatures.REPLY_COUNT_V2,
|
||||
RecapFeatures.LINK_COUNT,
|
||||
EngagementDataRecordFeatures.InNetworkFavoritesCount,
|
||||
EngagementDataRecordFeatures.InNetworkRetweetsCount,
|
||||
EngagementDataRecordFeatures.InNetworkRepliesCount
|
||||
)
|
||||
|
||||
val UserAuthorFeaturesV2Count: Set[Feature[_]] = Set(
|
||||
RecapFeatures.HAS_IMAGE,
|
||||
RecapFeatures.HAS_VINE,
|
||||
RecapFeatures.HAS_PERISCOPE,
|
||||
RecapFeatures.HAS_PRO_VIDEO,
|
||||
RecapFeatures.HAS_VIDEO,
|
||||
RecapFeatures.HAS_CARD,
|
||||
RecapFeatures.HAS_NEWS,
|
||||
RecapFeatures.HAS_VISIBLE_LINK,
|
||||
RecapFeatures.FAV_COUNT,
|
||||
RecapFeatures.CONTAINS_MEDIA,
|
||||
RecapFeatures.RETWEET_SEARCHER,
|
||||
RecapFeatures.REPLY_SEARCHER,
|
||||
RecapFeatures.MENTION_SEARCHER,
|
||||
RecapFeatures.REPLY_OTHER,
|
||||
RecapFeatures.RETWEET_OTHER,
|
||||
RecapFeatures.MATCH_UI_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_LANGS,
|
||||
RecapFeatures.IS_RETWEET,
|
||||
RecapFeatures.IS_REPLY,
|
||||
RecapFeatures.HAS_LINK,
|
||||
RecapFeatures.HAS_TREND,
|
||||
RecapFeatures.IS_SENSITIVE,
|
||||
RecapFeatures.HAS_MULTIPLE_MEDIA,
|
||||
EngagementDataRecordFeatures.InNetworkFavoritesCount
|
||||
)
|
||||
|
||||
val UserTopicFeaturesV2Count: Set[Feature[_]] = Set(
|
||||
RecapFeatures.HAS_IMAGE,
|
||||
RecapFeatures.HAS_VIDEO,
|
||||
RecapFeatures.HAS_CARD,
|
||||
RecapFeatures.HAS_NEWS,
|
||||
RecapFeatures.FAV_COUNT,
|
||||
RecapFeatures.CONTAINS_MEDIA,
|
||||
RecapFeatures.RETWEET_SEARCHER,
|
||||
RecapFeatures.REPLY_SEARCHER,
|
||||
RecapFeatures.MENTION_SEARCHER,
|
||||
RecapFeatures.REPLY_OTHER,
|
||||
RecapFeatures.RETWEET_OTHER,
|
||||
RecapFeatures.MATCH_UI_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_MAIN_LANG,
|
||||
RecapFeatures.MATCH_SEARCHER_LANGS,
|
||||
RecapFeatures.IS_RETWEET,
|
||||
RecapFeatures.IS_REPLY,
|
||||
RecapFeatures.HAS_LINK,
|
||||
RecapFeatures.HAS_TREND,
|
||||
RecapFeatures.IS_SENSITIVE,
|
||||
EngagementDataRecordFeatures.InNetworkFavoritesCount,
|
||||
EngagementDataRecordFeatures.InNetworkRetweetsCount,
|
||||
TimelinesSharedFeatures.NUM_CAPS,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_DEN,
|
||||
TimelinesSharedFeatures.NUM_NEWLINES,
|
||||
TimelinesSharedFeatures.IS_360,
|
||||
TimelinesSharedFeatures.IS_MANAGED,
|
||||
TimelinesSharedFeatures.IS_MONETIZABLE,
|
||||
TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
|
||||
TimelinesSharedFeatures.HAS_TITLE,
|
||||
TimelinesSharedFeatures.HAS_DESCRIPTION,
|
||||
TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
|
||||
TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
|
||||
)
|
||||
|
||||
val UserFeaturesV5Continuous: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.QUOTE_COUNT,
|
||||
TimelinesSharedFeatures.VISIBLE_TOKEN_RATIO,
|
||||
TimelinesSharedFeatures.WEIGHTED_FAV_COUNT,
|
||||
TimelinesSharedFeatures.WEIGHTED_RETWEET_COUNT,
|
||||
TimelinesSharedFeatures.WEIGHTED_REPLY_COUNT,
|
||||
TimelinesSharedFeatures.WEIGHTED_QUOTE_COUNT,
|
||||
TimelinesSharedFeatures.EMBEDS_IMPRESSION_COUNT_V2,
|
||||
TimelinesSharedFeatures.EMBEDS_URL_COUNT_V2,
|
||||
TimelinesSharedFeatures.DECAYED_FAVORITE_COUNT,
|
||||
TimelinesSharedFeatures.DECAYED_RETWEET_COUNT,
|
||||
TimelinesSharedFeatures.DECAYED_REPLY_COUNT,
|
||||
TimelinesSharedFeatures.DECAYED_QUOTE_COUNT,
|
||||
TimelinesSharedFeatures.FAKE_FAVORITE_COUNT,
|
||||
TimelinesSharedFeatures.FAKE_RETWEET_COUNT,
|
||||
TimelinesSharedFeatures.FAKE_REPLY_COUNT,
|
||||
TimelinesSharedFeatures.FAKE_QUOTE_COUNT,
|
||||
TimeDataRecordFeatures.LAST_FAVORITE_SINCE_CREATION_HRS,
|
||||
TimeDataRecordFeatures.LAST_RETWEET_SINCE_CREATION_HRS,
|
||||
TimeDataRecordFeatures.LAST_REPLY_SINCE_CREATION_HRS,
|
||||
TimeDataRecordFeatures.LAST_QUOTE_SINCE_CREATION_HRS,
|
||||
TimeDataRecordFeatures.TIME_SINCE_LAST_FAVORITE_HRS,
|
||||
TimeDataRecordFeatures.TIME_SINCE_LAST_RETWEET_HRS,
|
||||
TimeDataRecordFeatures.TIME_SINCE_LAST_REPLY_HRS,
|
||||
TimeDataRecordFeatures.TIME_SINCE_LAST_QUOTE_HRS
|
||||
)
|
||||
|
||||
val UserFeaturesV5Boolean: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_SPAM_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG,
|
||||
TimelinesSharedFeatures.PERISCOPE_EXISTS,
|
||||
TimelinesSharedFeatures.PERISCOPE_IS_LIVE,
|
||||
TimelinesSharedFeatures.PERISCOPE_HAS_BEEN_FEATURED,
|
||||
TimelinesSharedFeatures.PERISCOPE_IS_CURRENTLY_FEATURED,
|
||||
TimelinesSharedFeatures.PERISCOPE_IS_FROM_QUALITY_SOURCE,
|
||||
TimelinesSharedFeatures.HAS_QUOTE
|
||||
)
|
||||
|
||||
val UserAuthorFeaturesV5: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.HAS_QUOTE,
|
||||
TimelinesSharedFeatures.LABEL_ABUSIVE_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_ABUSIVE_HI_RCL_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_DUP_CONTENT_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_NSFW_HI_PRC_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_NSFW_HI_RCL_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_SPAM_FLAG,
|
||||
TimelinesSharedFeatures.LABEL_SPAM_HI_RCL_FLAG
|
||||
)
|
||||
|
||||
val UserTweetSourceFeaturesV1Continuous: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.NUM_CAPS,
|
||||
TimelinesSharedFeatures.NUM_WHITESPACES,
|
||||
TimelinesSharedFeatures.TWEET_LENGTH,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_DEN,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_NUM,
|
||||
TimelinesSharedFeatures.BIT_RATE,
|
||||
TimelinesSharedFeatures.HEIGHT_1,
|
||||
TimelinesSharedFeatures.HEIGHT_2,
|
||||
TimelinesSharedFeatures.HEIGHT_3,
|
||||
TimelinesSharedFeatures.HEIGHT_4,
|
||||
TimelinesSharedFeatures.VIDEO_DURATION,
|
||||
TimelinesSharedFeatures.WIDTH_1,
|
||||
TimelinesSharedFeatures.WIDTH_2,
|
||||
TimelinesSharedFeatures.WIDTH_3,
|
||||
TimelinesSharedFeatures.WIDTH_4,
|
||||
TimelinesSharedFeatures.NUM_MEDIA_TAGS
|
||||
)
|
||||
|
||||
val UserTweetSourceFeaturesV1Boolean: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.HAS_QUESTION,
|
||||
TimelinesSharedFeatures.RESIZE_METHOD_1,
|
||||
TimelinesSharedFeatures.RESIZE_METHOD_2,
|
||||
TimelinesSharedFeatures.RESIZE_METHOD_3,
|
||||
TimelinesSharedFeatures.RESIZE_METHOD_4
|
||||
)
|
||||
|
||||
val UserTweetSourceFeaturesV2Continuous: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.NUM_EMOJIS,
|
||||
TimelinesSharedFeatures.NUM_EMOTICONS,
|
||||
TimelinesSharedFeatures.NUM_NEWLINES,
|
||||
TimelinesSharedFeatures.NUM_STICKERS,
|
||||
TimelinesSharedFeatures.NUM_FACES,
|
||||
TimelinesSharedFeatures.NUM_COLOR_PALLETTE_ITEMS,
|
||||
TimelinesSharedFeatures.VIEW_COUNT,
|
||||
TimelinesSharedFeatures.TWEET_LENGTH_TYPE
|
||||
)
|
||||
|
||||
val UserTweetSourceFeaturesV2Boolean: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.IS_360,
|
||||
TimelinesSharedFeatures.IS_MANAGED,
|
||||
TimelinesSharedFeatures.IS_MONETIZABLE,
|
||||
TimelinesSharedFeatures.IS_EMBEDDABLE,
|
||||
TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
|
||||
TimelinesSharedFeatures.HAS_TITLE,
|
||||
TimelinesSharedFeatures.HAS_DESCRIPTION,
|
||||
TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
|
||||
TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
|
||||
)
|
||||
|
||||
val UserAuthorTweetSourceFeaturesV1: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.HAS_QUESTION,
|
||||
TimelinesSharedFeatures.TWEET_LENGTH,
|
||||
TimelinesSharedFeatures.VIDEO_DURATION,
|
||||
TimelinesSharedFeatures.NUM_MEDIA_TAGS
|
||||
)
|
||||
|
||||
val UserAuthorTweetSourceFeaturesV2: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.NUM_CAPS,
|
||||
TimelinesSharedFeatures.NUM_WHITESPACES,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_DEN,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_NUM,
|
||||
TimelinesSharedFeatures.BIT_RATE,
|
||||
TimelinesSharedFeatures.TWEET_LENGTH_TYPE,
|
||||
TimelinesSharedFeatures.NUM_EMOJIS,
|
||||
TimelinesSharedFeatures.NUM_EMOTICONS,
|
||||
TimelinesSharedFeatures.NUM_NEWLINES,
|
||||
TimelinesSharedFeatures.NUM_STICKERS,
|
||||
TimelinesSharedFeatures.NUM_FACES,
|
||||
TimelinesSharedFeatures.IS_360,
|
||||
TimelinesSharedFeatures.IS_MANAGED,
|
||||
TimelinesSharedFeatures.IS_MONETIZABLE,
|
||||
TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
|
||||
TimelinesSharedFeatures.HAS_TITLE,
|
||||
TimelinesSharedFeatures.HAS_DESCRIPTION,
|
||||
TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
|
||||
TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
|
||||
)
|
||||
|
||||
val UserAuthorTweetSourceFeaturesV2Count: Set[Feature[_]] = Set(
|
||||
TimelinesSharedFeatures.NUM_CAPS,
|
||||
TimelinesSharedFeatures.ASPECT_RATIO_DEN,
|
||||
TimelinesSharedFeatures.NUM_NEWLINES,
|
||||
TimelinesSharedFeatures.IS_360,
|
||||
TimelinesSharedFeatures.IS_MANAGED,
|
||||
TimelinesSharedFeatures.IS_MONETIZABLE,
|
||||
TimelinesSharedFeatures.HAS_SELECTED_PREVIEW_IMAGE,
|
||||
TimelinesSharedFeatures.HAS_TITLE,
|
||||
TimelinesSharedFeatures.HAS_DESCRIPTION,
|
||||
TimelinesSharedFeatures.HAS_VISIT_SITE_CALL_TO_ACTION,
|
||||
TimelinesSharedFeatures.HAS_WATCH_NOW_CALL_TO_ACTION
|
||||
)
|
||||
|
||||
val LabelsV2: Set[Feature.Binary] = RecapLabelsForAggregation ++ Set(
|
||||
RecapFeatures.IS_REPLIED,
|
||||
RecapFeatures.IS_PHOTO_EXPANDED,
|
||||
RecapFeatures.IS_VIDEO_PLAYBACK_50
|
||||
)
|
||||
|
||||
val TwitterWideFeatures: Set[Feature[_]] = Set(
|
||||
RecapFeatures.IS_REPLY,
|
||||
TimelinesSharedFeatures.HAS_QUOTE,
|
||||
RecapFeatures.HAS_MENTION,
|
||||
RecapFeatures.HAS_HASHTAG,
|
||||
RecapFeatures.HAS_LINK,
|
||||
RecapFeatures.HAS_CARD,
|
||||
RecapFeatures.CONTAINS_MEDIA
|
||||
)
|
||||
|
||||
val TwitterWideLabels: Set[Feature.Binary] = Set(
|
||||
RecapFeatures.IS_FAVORITED,
|
||||
RecapFeatures.IS_RETWEETED,
|
||||
RecapFeatures.IS_REPLIED
|
||||
)
|
||||
|
||||
val ReciprocalLabels: Set[Feature.Binary] = Set(
|
||||
RecapFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR,
|
||||
RecapFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR,
|
||||
RecapFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR
|
||||
)
|
||||
|
||||
val NegativeEngagementLabels: Set[Feature.Binary] = Set(
|
||||
RecapFeatures.IS_REPORT_TWEET_CLICKED,
|
||||
RecapFeatures.IS_BLOCK_CLICKED,
|
||||
RecapFeatures.IS_MUTE_CLICKED,
|
||||
RecapFeatures.IS_DONT_LIKE
|
||||
)
|
||||
|
||||
val GoodClickLabels: Set[Feature.Binary] = Set(
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1,
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2,
|
||||
)
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
|
||||
import com.twitter.timelines.prediction.features.itl.ITLFeatures
|
||||
|
||||
object RectweetUserFeatureAggregation {
|
||||
val RectweetLabelsForAggregation: Set[Feature.Binary] =
|
||||
Set(
|
||||
ITLFeatures.IS_FAVORITED,
|
||||
ITLFeatures.IS_RETWEETED,
|
||||
ITLFeatures.IS_REPLIED,
|
||||
ITLFeatures.IS_CLICKED,
|
||||
ITLFeatures.IS_PROFILE_CLICKED,
|
||||
ITLFeatures.IS_OPEN_LINKED,
|
||||
ITLFeatures.IS_PHOTO_EXPANDED,
|
||||
ITLFeatures.IS_VIDEO_PLAYBACK_50
|
||||
)
|
||||
|
||||
val TweetFeatures: Set[Feature[_]] = Set(
|
||||
ITLFeatures.HAS_IMAGE,
|
||||
ITLFeatures.HAS_CARD,
|
||||
ITLFeatures.HAS_NEWS,
|
||||
ITLFeatures.REPLY_COUNT,
|
||||
ITLFeatures.FAV_COUNT,
|
||||
ITLFeatures.REPLY_COUNT,
|
||||
ITLFeatures.RETWEET_COUNT,
|
||||
ITLFeatures.MATCHES_UI_LANG,
|
||||
ITLFeatures.MATCHES_SEARCHER_MAIN_LANG,
|
||||
ITLFeatures.MATCHES_SEARCHER_LANGS,
|
||||
ITLFeatures.TEXT_SCORE,
|
||||
ITLFeatures.LINK_LANGUAGE,
|
||||
ITLFeatures.NUM_HASHTAGS,
|
||||
ITLFeatures.NUM_MENTIONS,
|
||||
ITLFeatures.IS_SENSITIVE,
|
||||
ITLFeatures.HAS_VIDEO,
|
||||
ITLFeatures.HAS_LINK,
|
||||
ITLFeatures.HAS_VISIBLE_LINK,
|
||||
EngagementDataRecordFeatures.InNetworkFavoritesCount
|
||||
// nice to have, but currently not hydrated in the RecommendedTweet payload
|
||||
//EngagementDataRecordFeatures.InNetworkRetweetsCount,
|
||||
//EngagementDataRecordFeatures.InNetworkRepliesCount
|
||||
)
|
||||
|
||||
val ReciprocalLabels: Set[Feature.Binary] = Set(
|
||||
ITLFeatures.IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR,
|
||||
ITLFeatures.IS_REPLIED_REPLY_REPLIED_BY_AUTHOR,
|
||||
ITLFeatures.IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR,
|
||||
ITLFeatures.IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR,
|
||||
ITLFeatures.IS_REPLIED_REPLY_QUOTED_BY_AUTHOR
|
||||
)
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.dal.client.dataset.KeyValDALDataset
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.scalding_internal.multiformat.format.keyval
|
||||
import com.twitter.summingbird.batch.BatchID
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsPolicy
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateStore
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateDataRecordStore
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object TimelinesAggregationConfig extends TimelinesAggregationConfigTrait {
|
||||
override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2"
|
||||
|
||||
def storeToDatasetMap: Map[String, KeyValDALDataset[
|
||||
keyval.KeyVal[AggregationKey, (BatchID, DataRecord)]
|
||||
]] = Map(
|
||||
AuthorTopicAggregateStore -> AuthorTopicAggregatesScalaDataset,
|
||||
UserTopicAggregateStore -> UserTopicAggregatesScalaDataset,
|
||||
UserInferredTopicAggregateStore -> UserInferredTopicAggregatesScalaDataset,
|
||||
UserAggregateStore -> UserAggregatesScalaDataset,
|
||||
UserAuthorAggregateStore -> UserAuthorAggregatesScalaDataset,
|
||||
UserOriginalAuthorAggregateStore -> UserOriginalAuthorAggregatesScalaDataset,
|
||||
OriginalAuthorAggregateStore -> OriginalAuthorAggregatesScalaDataset,
|
||||
UserEngagerAggregateStore -> UserEngagerAggregatesScalaDataset,
|
||||
UserMentionAggregateStore -> UserMentionAggregatesScalaDataset,
|
||||
TwitterWideUserAggregateStore -> TwitterWideUserAggregatesScalaDataset,
|
||||
TwitterWideUserAuthorAggregateStore -> TwitterWideUserAuthorAggregatesScalaDataset,
|
||||
UserRequestHourAggregateStore -> UserRequestHourAggregatesScalaDataset,
|
||||
UserRequestDowAggregateStore -> UserRequestDowAggregatesScalaDataset,
|
||||
UserListAggregateStore -> UserListAggregatesScalaDataset,
|
||||
UserMediaUnderstandingAnnotationAggregateStore -> UserMediaUnderstandingAnnotationAggregatesScalaDataset,
|
||||
)
|
||||
|
||||
override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match {
|
||||
case s: OfflineAggregateDataRecordStore =>
|
||||
s.toOfflineAggregateDataRecordStoreWithDAL(storeToDatasetMap(s.name))
|
||||
case _ => throw new IllegalArgumentException("Unsupported logical dataset type.")
|
||||
}
|
||||
|
||||
object CombineCountPolicies {
|
||||
val EngagerCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_engager_aggregate")
|
||||
val EngagerGoodClickCountsPolicy: CombineCountsPolicy = mkCountsPolicy(
|
||||
"user_engager_good_click_aggregate")
|
||||
val RectweetEngagerCountsPolicy: CombineCountsPolicy =
|
||||
mkCountsPolicy("rectweet_user_engager_aggregate")
|
||||
val MentionCountsPolicy: CombineCountsPolicy = mkCountsPolicy("user_mention_aggregate")
|
||||
val RectweetSimclustersTweetCountsPolicy: CombineCountsPolicy =
|
||||
mkCountsPolicy("rectweet_user_simcluster_tweet_aggregate")
|
||||
val UserInferredTopicCountsPolicy: CombineCountsPolicy =
|
||||
mkCountsPolicy("user_inferred_topic_aggregate")
|
||||
val UserInferredTopicV2CountsPolicy: CombineCountsPolicy =
|
||||
mkCountsPolicy("user_inferred_topic_aggregate_v2")
|
||||
val UserMediaUnderstandingAnnotationCountsPolicy: CombineCountsPolicy =
|
||||
mkCountsPolicy("user_media_annotation_aggregate")
|
||||
|
||||
private[this] def mkCountsPolicy(prefix: String): CombineCountsPolicy = {
|
||||
val features = TimelinesAggregationConfig.aggregatesToCompute
|
||||
.filter(_.aggregatePrefix == prefix)
|
||||
.flatMap(_.allOutputFeatures)
|
||||
CombineCountsPolicy(
|
||||
topK = 2,
|
||||
aggregateContextToPrecompute = new FeatureContext(features.asJava),
|
||||
hardLimit = Some(20)
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object TimelinesAggregationCanaryConfig extends TimelinesAggregationConfigTrait {
|
||||
override def outputHdfsPath: String = "/user/timelines/canaries/processed/aggregates_v2"
|
||||
|
||||
override def mkPhysicalStore(store: AggregateStore): AggregateStore = store match {
|
||||
case s: OfflineAggregateDataRecordStore =>
|
||||
s.toOfflineAggregateDataRecordStoreWithDAL(dalDataset = AggregatesCanaryScalaDataset)
|
||||
case _ => throw new IllegalArgumentException("Unsupported logical dataset type.")
|
||||
}
|
||||
}
|
@ -0,0 +1,579 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.ml.api.constant.SharedFeatures.AUTHOR_ID
|
||||
import com.twitter.ml.api.constant.SharedFeatures.USER_ID
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework._
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics._
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveAuthorIdZero
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.RichRemoveUserIdZero
|
||||
import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
|
||||
import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures
|
||||
import com.twitter.timelines.prediction.features.engagement_features.EngagementDataRecordFeatures.RichUnifyPublicEngagersTransform
|
||||
import com.twitter.timelines.prediction.features.list_features.ListFeatures
|
||||
import com.twitter.timelines.prediction.features.recap.RecapFeatures
|
||||
import com.twitter.timelines.prediction.features.request_context.RequestContextFeatures
|
||||
import com.twitter.timelines.prediction.features.semantic_core_features.SemanticCoreFeatures
|
||||
import com.twitter.timelines.prediction.transform.filter.FilterInNetworkTransform
|
||||
import com.twitter.timelines.prediction.transform.filter.FilterImageTweetTransform
|
||||
import com.twitter.timelines.prediction.transform.filter.FilterVideoTweetTransform
|
||||
import com.twitter.timelines.prediction.transform.filter.FilterOutImageVideoTweetTransform
|
||||
import com.twitter.util.Duration
|
||||
|
||||
trait TimelinesAggregationConfigDetails extends Serializable {
|
||||
|
||||
import TimelinesAggregationSources._
|
||||
|
||||
def outputHdfsPath: String
|
||||
|
||||
/**
|
||||
* Converts the given logical store to a physical store. The reason we do not specify the
|
||||
* physical store directly with the [[AggregateGroup]] is because of a cyclic dependency when
|
||||
* create physical stores that are DalDataset with PersonalDataType annotations derived from
|
||||
* the [[AggregateGroup]].
|
||||
*
|
||||
*/
|
||||
def mkPhysicalStore(store: AggregateStore): AggregateStore
|
||||
|
||||
def defaultMaxKvSourceFailures: Int = 100
|
||||
|
||||
val timelinesOfflineAggregateSink = new OfflineStoreCommonConfig {
|
||||
override def apply(startDate: String) = OfflineAggregateStoreCommonConfig(
|
||||
outputHdfsPathPrefix = outputHdfsPath,
|
||||
dummyAppId = "timelines_aggregates_v2_ro",
|
||||
dummyDatasetPrefix = "timelines_aggregates_v2_ro",
|
||||
startDate = startDate
|
||||
)
|
||||
}
|
||||
|
||||
val UserAggregateStore = "user_aggregates"
|
||||
val UserAuthorAggregateStore = "user_author_aggregates"
|
||||
val UserOriginalAuthorAggregateStore = "user_original_author_aggregates"
|
||||
val OriginalAuthorAggregateStore = "original_author_aggregates"
|
||||
val UserEngagerAggregateStore = "user_engager_aggregates"
|
||||
val UserMentionAggregateStore = "user_mention_aggregates"
|
||||
val TwitterWideUserAggregateStore = "twitter_wide_user_aggregates"
|
||||
val TwitterWideUserAuthorAggregateStore = "twitter_wide_user_author_aggregates"
|
||||
val UserRequestHourAggregateStore = "user_request_hour_aggregates"
|
||||
val UserRequestDowAggregateStore = "user_request_dow_aggregates"
|
||||
val UserListAggregateStore = "user_list_aggregates"
|
||||
val AuthorTopicAggregateStore = "author_topic_aggregates"
|
||||
val UserTopicAggregateStore = "user_topic_aggregates"
|
||||
val UserInferredTopicAggregateStore = "user_inferred_topic_aggregates"
|
||||
val UserMediaUnderstandingAnnotationAggregateStore =
|
||||
"user_media_understanding_annotation_aggregates"
|
||||
val AuthorCountryCodeAggregateStore = "author_country_code_aggregates"
|
||||
val OriginalAuthorCountryCodeAggregateStore = "original_author_country_code_aggregates"
|
||||
|
||||
/**
|
||||
* Step 3: Configure all aggregates to compute.
|
||||
* Note that different subsets of aggregates in this list
|
||||
* can be launched by different summingbird job instances.
|
||||
* Any given job can be responsible for a set of AggregateGroup
|
||||
* configs whose outputStores share the same exact startDate.
|
||||
* AggregateGroups that do not share the same inputSource,
|
||||
* outputStore or startDate MUST be launched using different
|
||||
* summingbird jobs and passed in a different --start-time argument
|
||||
* See science/scalding/mesos/timelines/prod.yaml for an example
|
||||
* of how to configure your own job.
|
||||
*/
|
||||
val negativeDownsampleTransform =
|
||||
DownsampleTransform(
|
||||
negativeSamplingRate = 0.03,
|
||||
keepLabels = RecapUserFeatureAggregation.LabelsV2)
|
||||
val negativeRecTweetDownsampleTransform = DownsampleTransform(
|
||||
negativeSamplingRate = 0.03,
|
||||
keepLabels = RectweetUserFeatureAggregation.RectweetLabelsForAggregation
|
||||
)
|
||||
|
||||
val userAggregatesV2: AggregateGroup =
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_aggregate_v2",
|
||||
preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
|
||||
keys = Set(USER_ID),
|
||||
features = RecapUserFeatureAggregation.UserFeaturesV2,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric, SumMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userAuthorAggregatesV2: Set[AggregateGroup] = {
|
||||
|
||||
/**
|
||||
* NOTE: We need to remove records from out-of-network authors from the recap input
|
||||
* records (which now include out-of-network records as well after merging recap and
|
||||
* rectweet models) that are used to compute user-author aggregates. This is necessary
|
||||
* to limit the growth rate of user-author aggregates.
|
||||
*/
|
||||
val allFeatureAggregates = Set(
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_author_aggregate_v2",
|
||||
preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.UserAuthorFeaturesV2,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(SumMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAuthorAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
)
|
||||
|
||||
val countAggregates: Set[AggregateGroup] = Set(
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_author_aggregate_v2",
|
||||
preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.UserAuthorFeaturesV2Count,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAuthorAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
)
|
||||
|
||||
allFeatureAggregates ++ countAggregates
|
||||
}
|
||||
|
||||
val userAggregatesV5Continuous: AggregateGroup =
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_aggregate_v5.continuous",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID),
|
||||
features = RecapUserFeatureAggregation.UserFeaturesV5Continuous,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric, SumMetric, SumSqMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userAuthorAggregatesV5: AggregateGroup =
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_author_aggregate_v5",
|
||||
preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.UserAuthorFeaturesV5,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAuthorAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val tweetSourceUserAuthorAggregatesV1: AggregateGroup =
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_author_aggregate_tweetsource_v1",
|
||||
preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.UserAuthorTweetSourceFeaturesV1,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric, SumMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAuthorAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userEngagerAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_engager_aggregate",
|
||||
keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserEngagerAggregateStore,
|
||||
startDate = "2016-09-02 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
preTransforms = Seq(
|
||||
RichRemoveUserIdZero,
|
||||
RichUnifyPublicEngagersTransform
|
||||
)
|
||||
)
|
||||
|
||||
val userMentionAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
|
||||
aggregatePrefix = "user_mention_aggregate",
|
||||
keys = Set(USER_ID, RecapFeatures.MENTIONED_SCREEN_NAMES),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserMentionAggregateStore,
|
||||
startDate = "2017-03-01 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
val twitterWideUserAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyTwitterWideSource,
|
||||
preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
|
||||
aggregatePrefix = "twitter_wide_user_aggregate",
|
||||
keys = Set(USER_ID),
|
||||
features = RecapUserFeatureAggregation.TwitterWideFeatures,
|
||||
labels = RecapUserFeatureAggregation.TwitterWideLabels,
|
||||
metrics = Set(CountMetric, SumMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = TwitterWideUserAggregateStore,
|
||||
startDate = "2016-12-28 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val twitterWideUserAuthorAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyTwitterWideSource,
|
||||
preTransforms = Seq(RichRemoveUserIdZero), /* Eliminates reducer skew */
|
||||
aggregatePrefix = "twitter_wide_user_author_aggregate",
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.TwitterWideFeatures,
|
||||
labels = RecapUserFeatureAggregation.TwitterWideLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = TwitterWideUserAuthorAggregateStore,
|
||||
startDate = "2016-12-28 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
/**
|
||||
* User-HourOfDay and User-DayOfWeek aggregations, both for recap and rectweet
|
||||
*/
|
||||
val userRequestHourAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_request_context_aggregate.hour",
|
||||
preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform),
|
||||
keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_HOUR),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserRequestHourAggregateStore,
|
||||
startDate = "2017-08-01 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userRequestDowAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_request_context_aggregate.dow",
|
||||
preTransforms = Seq(RichRemoveUserIdZero, negativeDownsampleTransform),
|
||||
keys = Set(USER_ID, RequestContextFeatures.TIMESTAMP_GMT_DOW),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserRequestDowAggregateStore,
|
||||
startDate = "2017-08-01 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val authorTopicAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "author_topic_aggregate",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(AUTHOR_ID, TimelinesSharedFeatures.TOPIC_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = AuthorTopicAggregateStore,
|
||||
startDate = "2020-05-19 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userTopicAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_topic_aggregate",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserTopicAggregateStore,
|
||||
startDate = "2020-05-23 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userTopicAggregatesV2 = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_topic_aggregate_v2",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, TimelinesSharedFeatures.TOPIC_ID),
|
||||
features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
includeAnyFeature = false,
|
||||
includeAnyLabel = false,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserTopicAggregateStore,
|
||||
startDate = "2020-05-23 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userInferredTopicAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_inferred_topic_aggregate",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserInferredTopicAggregateStore,
|
||||
startDate = "2020-09-09 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userInferredTopicAggregatesV2 = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_inferred_topic_aggregate_v2",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, TimelinesSharedFeatures.INFERRED_TOPIC_IDS),
|
||||
features = RecapUserFeatureAggregation.UserTopicFeaturesV2Count,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
includeAnyFeature = false,
|
||||
includeAnyLabel = false,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserInferredTopicAggregateStore,
|
||||
startDate = "2020-09-09 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userReciprocalEngagementAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_aggregate_v6",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.ReciprocalLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
val userOriginalAuthorReciprocalEngagementAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_original_author_aggregate_v1",
|
||||
preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
|
||||
keys = Set(USER_ID, TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.ReciprocalLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserOriginalAuthorAggregateStore,
|
||||
startDate = "2018-12-26 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
val originalAuthorReciprocalEngagementAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "original_author_aggregate_v1",
|
||||
preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
|
||||
keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.ReciprocalLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = OriginalAuthorAggregateStore,
|
||||
startDate = "2023-02-25 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
val originalAuthorNegativeEngagementAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "original_author_aggregate_v2",
|
||||
preTransforms = Seq(RichRemoveUserIdZero, RichRemoveAuthorIdZero),
|
||||
keys = Set(TimelinesSharedFeatures.ORIGINAL_AUTHOR_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.NegativeEngagementLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = OriginalAuthorAggregateStore,
|
||||
startDate = "2023-02-25 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
includeAnyLabel = false
|
||||
)
|
||||
|
||||
val userListAggregates: AggregateGroup =
|
||||
AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_list_aggregate",
|
||||
keys = Set(USER_ID, ListFeatures.LIST_ID),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserListAggregateStore,
|
||||
startDate = "2020-05-28 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
preTransforms = Seq(RichRemoveUserIdZero)
|
||||
)
|
||||
|
||||
val userMediaUnderstandingAnnotationAggregates: AggregateGroup = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_media_annotation_aggregate",
|
||||
preTransforms = Seq(RichRemoveUserIdZero),
|
||||
keys =
|
||||
Set(USER_ID, SemanticCoreFeatures.mediaUnderstandingHighRecallNonSensitiveEntityIdsFeature),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.LabelsV2,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(50.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserMediaUnderstandingAnnotationAggregateStore,
|
||||
startDate = "2021-03-20 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink
|
||||
))
|
||||
)
|
||||
|
||||
val userAuthorGoodClickAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_author_good_click_aggregate",
|
||||
preTransforms = Seq(FilterInNetworkTransform, RichRemoveUserIdZero),
|
||||
keys = Set(USER_ID, AUTHOR_ID),
|
||||
features = RecapUserFeatureAggregation.UserAuthorFeaturesV2,
|
||||
labels = RecapUserFeatureAggregation.GoodClickLabels,
|
||||
metrics = Set(SumMetric),
|
||||
halfLives = Set(14.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserAuthorAggregateStore,
|
||||
startDate = "2016-07-15 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
))
|
||||
)
|
||||
|
||||
val userEngagerGoodClickAggregates = AggregateGroup(
|
||||
inputSource = timelinesDailyRecapMinimalSource,
|
||||
aggregatePrefix = "user_engager_good_click_aggregate",
|
||||
keys = Set(USER_ID, EngagementDataRecordFeatures.PublicEngagementUserIds),
|
||||
features = Set.empty,
|
||||
labels = RecapUserFeatureAggregation.GoodClickLabels,
|
||||
metrics = Set(CountMetric),
|
||||
halfLives = Set(14.days),
|
||||
outputStore = mkPhysicalStore(
|
||||
OfflineAggregateDataRecordStore(
|
||||
name = UserEngagerAggregateStore,
|
||||
startDate = "2016-09-02 00:00",
|
||||
commonConfig = timelinesOfflineAggregateSink,
|
||||
maxKvSourceFailures = defaultMaxKvSourceFailures
|
||||
)),
|
||||
preTransforms = Seq(
|
||||
RichRemoveUserIdZero,
|
||||
RichUnifyPublicEngagersTransform
|
||||
)
|
||||
)
|
||||
|
||||
}
|
@ -0,0 +1,50 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationConfig
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregateGroup
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
|
||||
trait TimelinesAggregationConfigTrait
|
||||
extends TimelinesAggregationConfigDetails
|
||||
with AggregationConfig {
|
||||
private val aggregateGroups = Set(
|
||||
authorTopicAggregates,
|
||||
userTopicAggregates,
|
||||
userTopicAggregatesV2,
|
||||
userInferredTopicAggregates,
|
||||
userInferredTopicAggregatesV2,
|
||||
userAggregatesV2,
|
||||
userAggregatesV5Continuous,
|
||||
userReciprocalEngagementAggregates,
|
||||
userAuthorAggregatesV5,
|
||||
userOriginalAuthorReciprocalEngagementAggregates,
|
||||
originalAuthorReciprocalEngagementAggregates,
|
||||
tweetSourceUserAuthorAggregatesV1,
|
||||
userEngagerAggregates,
|
||||
userMentionAggregates,
|
||||
twitterWideUserAggregates,
|
||||
twitterWideUserAuthorAggregates,
|
||||
userRequestHourAggregates,
|
||||
userRequestDowAggregates,
|
||||
userListAggregates,
|
||||
userMediaUnderstandingAnnotationAggregates,
|
||||
) ++ userAuthorAggregatesV2
|
||||
|
||||
val aggregatesToComputeList: Set[List[TypedAggregateGroup[_]]] =
|
||||
aggregateGroups.map(_.buildTypedAggregateGroups())
|
||||
|
||||
override val aggregatesToCompute: Set[TypedAggregateGroup[_]] = aggregatesToComputeList.flatten
|
||||
|
||||
/*
|
||||
* Feature selection config to save storage space and manhattan query bandwidth.
|
||||
* Only the most important features found using offline RCE simulations are used
|
||||
* when actually training and serving. This selector is used by
|
||||
* [[com.twitter.timelines.data_processing.jobs.timeline_ranking_user_features.TimelineRankingAggregatesV2FeaturesProdJob]]
|
||||
* but defined here to keep it in sync with the config that computes the aggregates.
|
||||
*/
|
||||
val AggregatesV2FeatureSelector = FeatureSelectorConfig.AggregatesV2ProdFeatureSelector
|
||||
|
||||
def filterAggregatesGroups(storeNames: Set[String]): Set[AggregateGroup] = {
|
||||
aggregateGroups.filter(aggregateGroup => storeNames.contains(aggregateGroup.outputStore.name))
|
||||
}
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.scalding_internal.multiformat.format.keyval.KeyValInjection
|
||||
import com.twitter.summingbird.batch.BatchID
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.{
|
||||
AggregateStore,
|
||||
AggregationKey,
|
||||
OfflineAggregateInjections,
|
||||
TypedAggregateGroup
|
||||
}
|
||||
|
||||
object TimelinesAggregationKeyValInjections extends TimelinesAggregationConfigTrait {
|
||||
|
||||
import OfflineAggregateInjections.getInjection
|
||||
|
||||
type KVInjection = KeyValInjection[AggregationKey, (BatchID, DataRecord)]
|
||||
|
||||
val AuthorTopic: KVInjection = getInjection(filter(AuthorTopicAggregateStore))
|
||||
val UserTopic: KVInjection = getInjection(filter(UserTopicAggregateStore))
|
||||
val UserInferredTopic: KVInjection = getInjection(filter(UserInferredTopicAggregateStore))
|
||||
val User: KVInjection = getInjection(filter(UserAggregateStore))
|
||||
val UserAuthor: KVInjection = getInjection(filter(UserAuthorAggregateStore))
|
||||
val UserOriginalAuthor: KVInjection = getInjection(filter(UserOriginalAuthorAggregateStore))
|
||||
val OriginalAuthor: KVInjection = getInjection(filter(OriginalAuthorAggregateStore))
|
||||
val UserEngager: KVInjection = getInjection(filter(UserEngagerAggregateStore))
|
||||
val UserMention: KVInjection = getInjection(filter(UserMentionAggregateStore))
|
||||
val TwitterWideUser: KVInjection = getInjection(filter(TwitterWideUserAggregateStore))
|
||||
val TwitterWideUserAuthor: KVInjection = getInjection(filter(TwitterWideUserAuthorAggregateStore))
|
||||
val UserRequestHour: KVInjection = getInjection(filter(UserRequestHourAggregateStore))
|
||||
val UserRequestDow: KVInjection = getInjection(filter(UserRequestDowAggregateStore))
|
||||
val UserList: KVInjection = getInjection(filter(UserListAggregateStore))
|
||||
val UserMediaUnderstandingAnnotation: KVInjection = getInjection(
|
||||
filter(UserMediaUnderstandingAnnotationAggregateStore))
|
||||
|
||||
private def filter(storeName: String): Set[TypedAggregateGroup[_]] = {
|
||||
val groups = aggregatesToCompute.filter(_.outputStore.name == storeName)
|
||||
require(groups.nonEmpty)
|
||||
groups
|
||||
}
|
||||
|
||||
override def outputHdfsPath: String = "/user/timelines/processed/aggregates_v2"
|
||||
|
||||
// Since this object is not used to execute any online or offline aggregates job, but is meant
|
||||
// to store all PDT enabled KeyValInjections, we do not need to construct a physical store.
|
||||
// We use the identity operation as a default.
|
||||
override def mkPhysicalStore(store: AggregateStore): AggregateStore = store
|
||||
}
|
@ -0,0 +1,45 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates
|
||||
|
||||
import com.twitter.ml.api.constant.SharedFeatures.TIMESTAMP
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.OfflineAggregateSource
|
||||
import com.twitter.timelines.prediction.features.p_home_latest.HomeLatestUserAggregatesFeatures
|
||||
import timelines.data_processing.ad_hoc.recap.data_record_preparation.RecapDataRecordsAggMinimalJavaDataset
|
||||
|
||||
/**
|
||||
* Any update here should be in sync with [[TimelinesFeatureGroups]] and [[AggMinimalDataRecordGeneratorJob]].
|
||||
*/
|
||||
object TimelinesAggregationSources {
|
||||
|
||||
/**
|
||||
* This is the recap data records after post-processing in [[GenerateRecapAggMinimalDataRecordsJob]]
|
||||
*/
|
||||
val timelinesDailyRecapMinimalSource = OfflineAggregateSource(
|
||||
name = "timelines_daily_recap",
|
||||
timestampFeature = TIMESTAMP,
|
||||
dalDataSet = Some(RecapDataRecordsAggMinimalJavaDataset),
|
||||
scaldingSuffixType = Some("dal"),
|
||||
withValidation = true
|
||||
)
|
||||
val timelinesDailyTwitterWideSource = OfflineAggregateSource(
|
||||
name = "timelines_daily_twitter_wide",
|
||||
timestampFeature = TIMESTAMP,
|
||||
scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/twitter_wide_data_records"),
|
||||
scaldingSuffixType = Some("daily"),
|
||||
withValidation = true
|
||||
)
|
||||
|
||||
val timelinesDailyListTimelineSource = OfflineAggregateSource(
|
||||
name = "timelines_daily_list_timeline",
|
||||
timestampFeature = TIMESTAMP,
|
||||
scaldingHdfsPath = Some("/user/timelines/processed/suggests/recap/all_features/list"),
|
||||
scaldingSuffixType = Some("hourly"),
|
||||
withValidation = true
|
||||
)
|
||||
|
||||
val timelinesDailyHomeLatestSource = OfflineAggregateSource(
|
||||
name = "timelines_daily_home_latest",
|
||||
timestampFeature = HomeLatestUserAggregatesFeatures.AGGREGATE_TIMESTAMP_MS,
|
||||
scaldingHdfsPath = Some("/user/timelines/processed/p_home_latest/user_aggregates"),
|
||||
scaldingSuffixType = Some("daily")
|
||||
)
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.{DataRecord, Feature, FeatureContext, RichDataRecord}
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.Author
|
||||
import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecord
|
||||
import com.twitter.ml.featurestore.lib.feature.{BoundFeature, BoundFeatureSet}
|
||||
import com.twitter.ml.featurestore.lib.{UserId, Discrete => FSDiscrete}
|
||||
import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
|
||||
import java.lang.{Boolean => JBoolean}
|
||||
import java.util
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object AuthorFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
|
||||
val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(Author)
|
||||
val UserFeaturesSet: BoundFeatureSet = BoundFeatureSet(UserStateBoundFeature)
|
||||
|
||||
/**
|
||||
* Boolean features about viewer's user state.
|
||||
* enum UserState {
|
||||
* NEW = 0,
|
||||
* NEAR_ZERO = 1,
|
||||
* VERY_LIGHT = 2,
|
||||
* LIGHT = 3,
|
||||
* MEDIUM_TWEETER = 4,
|
||||
* MEDIUM_NON_TWEETER = 5,
|
||||
* HEAVY_NON_TWEETER = 6,
|
||||
* HEAVY_TWEETER = 7
|
||||
* }(persisted='true')
|
||||
*/
|
||||
val IS_USER_NEW = new Binary("timelines.author.user_state.is_user_new", Set(UserState).asJava)
|
||||
val IS_USER_LIGHT = new Binary("timelines.author.user_state.is_user_light", Set(UserState).asJava)
|
||||
val IS_USER_MEDIUM_TWEETER =
|
||||
new Binary("timelines.author.user_state.is_user_medium_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_MEDIUM_NON_TWEETER =
|
||||
new Binary("timelines.author.user_state.is_user_medium_non_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_HEAVY_NON_TWEETER =
|
||||
new Binary("timelines.author.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_HEAVY_TWEETER =
|
||||
new Binary("timelines.author.user_state.is_user_heavy_tweeter", Set(UserState).asJava)
|
||||
val userStateToFeatureMap: Map[Long, Binary] = Map(
|
||||
0L -> IS_USER_NEW,
|
||||
1L -> IS_USER_LIGHT,
|
||||
2L -> IS_USER_LIGHT,
|
||||
3L -> IS_USER_LIGHT,
|
||||
4L -> IS_USER_MEDIUM_TWEETER,
|
||||
5L -> IS_USER_MEDIUM_NON_TWEETER,
|
||||
6L -> IS_USER_HEAVY_NON_TWEETER,
|
||||
7L -> IS_USER_HEAVY_TWEETER
|
||||
)
|
||||
|
||||
val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet
|
||||
|
||||
private val allFeatures: Seq[Feature[_]] = UserStateBooleanFeatures.toSeq
|
||||
override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*)
|
||||
override def commonFeatures: Set[Feature[_]] = Set.empty
|
||||
|
||||
override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
|
||||
val newRecord = new RichDataRecord(new DataRecord)
|
||||
record
|
||||
.getFeatureValue(UserStateBoundFeature)
|
||||
.flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach {
|
||||
booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true)
|
||||
}
|
||||
|
||||
List(newRecord.getRecord).asJava
|
||||
}
|
||||
}
|
@ -0,0 +1,199 @@
|
||||
heron_binary(
|
||||
name = "heron-without-jass",
|
||||
main = "com.twitter.timelines.prediction.common.aggregates.real_time.TypeSafeRunner",
|
||||
oss = True,
|
||||
platform = "java8",
|
||||
runtime_platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
":real_time",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-jdk14",
|
||||
],
|
||||
)
|
||||
|
||||
jvm_app(
|
||||
name = "rta_heron",
|
||||
binary = ":heron-without-jass",
|
||||
bundles = [
|
||||
bundle(
|
||||
fileset = ["resources/jaas.conf"],
|
||||
),
|
||||
],
|
||||
tags = [
|
||||
"bazel-compatible",
|
||||
"bazel-only",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
":online-configs",
|
||||
"3rdparty/src/jvm/com/twitter/summingbird:storm",
|
||||
"src/java/com/twitter/heron/util",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/ml/api/constant",
|
||||
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features",
|
||||
"src/scala/com/twitter/ml/api/util",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
|
||||
"src/scala/com/twitter/summingbird_internal/runner/store_config",
|
||||
"src/scala/com/twitter/summingbird_internal/runner/storm",
|
||||
"src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2",
|
||||
"src/scala/com/twitter/timelines/prediction/adapters/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr",
|
||||
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/features/common",
|
||||
"src/scala/com/twitter/timelines/prediction/features/list_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/recap",
|
||||
"src/scala/com/twitter/timelines/prediction/features/user_health",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:record-scala",
|
||||
"timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/heron",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/job",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/metrics",
|
||||
"timelines/data_processing/ml_util/transforms",
|
||||
"timelines/src/main/scala/com/twitter/timelines/clients/memcache_common",
|
||||
"util/util-core:scala",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
name = "online-configs",
|
||||
sources = [
|
||||
"AuthorFeaturesAdapter.scala",
|
||||
"Event.scala",
|
||||
"FeatureStoreUtils.scala",
|
||||
"StormAggregateSourceUtils.scala",
|
||||
"TimelinesOnlineAggregationConfig.scala",
|
||||
"TimelinesOnlineAggregationConfigBase.scala",
|
||||
"TimelinesOnlineAggregationSources.scala",
|
||||
"TimelinesStormAggregateSource.scala",
|
||||
"TweetFeaturesReadableStore.scala",
|
||||
"UserFeaturesAdapter.scala",
|
||||
"UserFeaturesReadableStore.scala",
|
||||
],
|
||||
platform = "java8",
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
":base-config",
|
||||
"3rdparty/src/jvm/com/twitter/scalding:db",
|
||||
"3rdparty/src/jvm/com/twitter/storehaus:core",
|
||||
"3rdparty/src/jvm/com/twitter/summingbird:core",
|
||||
"3rdparty/src/jvm/com/twitter/summingbird:online",
|
||||
"3rdparty/src/jvm/com/twitter/summingbird:storm",
|
||||
"abuse/detection/src/main/thrift/com/twitter/abuse/detection/mention_interactions:thrift-scala",
|
||||
"snowflake/src/main/scala/com/twitter/snowflake/id",
|
||||
"snowflake/src/main/thrift:thrift-scala",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/ml/api/constant",
|
||||
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core:core-features",
|
||||
"src/scala/com/twitter/ml/api/util:datarecord",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/datasets/geo:geo-user-location",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/entities/core",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/core:user",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/geo",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/data",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/dataset/offline",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/export/strato:app-names",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/feature",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/online",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/params",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/scala/com/twitter/summingbird_internal/bijection:bijection-implicits",
|
||||
"src/scala/com/twitter/summingbird_internal/runner/store_config",
|
||||
"src/scala/com/twitter/summingbird_internal/runner/storm",
|
||||
"src/scala/com/twitter/summingbird_internal/sources/common",
|
||||
"src/scala/com/twitter/summingbird_internal/sources/common/remote:ClientEventSourceScrooge",
|
||||
"src/scala/com/twitter/summingbird_internal/sources/storm/remote:ClientEventSourceScrooge2",
|
||||
"src/scala/com/twitter/timelines/prediction/adapters/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/adapters/client_log_event_mr",
|
||||
"src/scala/com/twitter/timelines/prediction/common/adapters:base",
|
||||
"src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
|
||||
"src/scala/com/twitter/timelines/prediction/common/aggregates",
|
||||
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/features/common",
|
||||
"src/scala/com/twitter/timelines/prediction/features/list_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/recap",
|
||||
"src/scala/com/twitter/timelines/prediction/features/user_health",
|
||||
"src/thrift/com/twitter/clientapp/gen:clientapp-scala",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:engagement-java",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:record-scala",
|
||||
"src/thrift/com/twitter/timelineservice/injection:thrift-scala",
|
||||
"src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala",
|
||||
"strato/src/main/scala/com/twitter/strato/client",
|
||||
"timelinemixer/common/src/main/scala/com/twitter/timelinemixer/clients/served_features_cache",
|
||||
"timelines/data_processing/ad_hoc/suggests/common:raw_training_data_creator",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/heron:configs",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/metrics",
|
||||
"timelines/data_processing/ml_util/transforms",
|
||||
"timelines/data_processing/util:rich-request",
|
||||
"tweetsource/common/src/main/thrift:thrift-scala",
|
||||
"twitter-server-internal/src/main/scala",
|
||||
"unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/config",
|
||||
"unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/summingbird",
|
||||
"unified_user_actions/thrift/src/main/thrift/com/twitter/unified_user_actions:unified_user_actions-scala",
|
||||
"util/util-core:scala",
|
||||
"util/util-stats/src/main/scala/com/twitter/finagle/stats",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
name = "base-config",
|
||||
sources = [
|
||||
"AuthorFeaturesAdapter.scala",
|
||||
"TimelinesOnlineAggregationConfigBase.scala",
|
||||
"TweetFeaturesAdapter.scala",
|
||||
"UserFeaturesAdapter.scala",
|
||||
],
|
||||
platform = "java8",
|
||||
strict_deps = True,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/java/com/twitter/ml/api/constant",
|
||||
"src/resources/com/twitter/timelines/prediction/common/aggregates/real_time",
|
||||
"src/scala/com/twitter/ml/api/util:datarecord",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/datasets/magicrecs:user-features",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/entities/core",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/core:user",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/geo",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-activity",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/magicrecs:user-info",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/trends:tweet_trends_scores",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/data",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/feature",
|
||||
"src/scala/com/twitter/timelines/prediction/common/adapters:base",
|
||||
"src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
|
||||
"src/scala/com/twitter/timelines/prediction/common/aggregates",
|
||||
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
|
||||
"src/scala/com/twitter/timelines/prediction/features/common",
|
||||
"src/scala/com/twitter/timelines/prediction/features/list_features",
|
||||
"src/scala/com/twitter/timelines/prediction/features/recap",
|
||||
"src/scala/com/twitter/timelines/prediction/features/user_health",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/ml/api:feature_context-java",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/heron:base-config",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/metrics",
|
||||
"timelines/data_processing/ml_util/transforms",
|
||||
"util/util-core:scala",
|
||||
"util/util-core:util-core-util",
|
||||
],
|
||||
)
|
@ -0,0 +1,11 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
private[real_time] sealed trait Event[T] { def event: T }
|
||||
|
||||
private[real_time] case class HomeEvent[T](override val event: T) extends Event[T]
|
||||
|
||||
private[real_time] case class ProfileEvent[T](override val event: T) extends Event[T]
|
||||
|
||||
private[real_time] case class SearchEvent[T](override val event: T) extends Event[T]
|
||||
|
||||
private[real_time] case class UuaEvent[T](override val event: T) extends Event[T]
|
@ -0,0 +1,53 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.ml.featurestore.catalog.datasets.magicrecs.UserFeaturesDataset
|
||||
import com.twitter.ml.featurestore.catalog.datasets.geo.GeoUserLocationDataset
|
||||
import com.twitter.ml.featurestore.lib.dataset.DatasetParams
|
||||
import com.twitter.ml.featurestore.lib.export.strato.FeatureStoreAppNames
|
||||
import com.twitter.ml.featurestore.lib.online.FeatureStoreClient
|
||||
import com.twitter.ml.featurestore.lib.params.FeatureStoreParams
|
||||
import com.twitter.strato.client.{Client, Strato}
|
||||
import com.twitter.strato.opcontext.Attribution.ManhattanAppId
|
||||
import com.twitter.util.Duration
|
||||
|
||||
private[real_time] object FeatureStoreUtils {
|
||||
private def mkStratoClient(serviceIdentifier: ServiceIdentifier): Client =
|
||||
Strato.client
|
||||
.withMutualTls(serviceIdentifier)
|
||||
.withRequestTimeout(Duration.fromMilliseconds(50))
|
||||
.build()
|
||||
|
||||
private val featureStoreParams: FeatureStoreParams =
|
||||
FeatureStoreParams(
|
||||
perDataset = Map(
|
||||
UserFeaturesDataset.id ->
|
||||
DatasetParams(
|
||||
stratoSuffix = Some(FeatureStoreAppNames.Timelines),
|
||||
attributions = Seq(ManhattanAppId("athena", "timelines_aggregates_v2_features_by_user"))
|
||||
),
|
||||
GeoUserLocationDataset.id ->
|
||||
DatasetParams(
|
||||
attributions = Seq(ManhattanAppId("starbuck", "timelines_geo_features_by_user"))
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def mkFeatureStoreClient(
|
||||
serviceIdentifier: ServiceIdentifier,
|
||||
statsReceiver: StatsReceiver
|
||||
): FeatureStoreClient = {
|
||||
com.twitter.server.Init() // necessary in order to use WilyNS path
|
||||
|
||||
val stratoClient: Client = mkStratoClient(serviceIdentifier)
|
||||
val featureStoreClient: FeatureStoreClient = FeatureStoreClient(
|
||||
featureSet =
|
||||
UserFeaturesAdapter.UserFeaturesSet ++ AuthorFeaturesAdapter.UserFeaturesSet ++ TweetFeaturesAdapter.TweetFeaturesSet,
|
||||
client = stratoClient,
|
||||
statsReceiver = statsReceiver,
|
||||
featureStoreParams = featureStoreParams
|
||||
)
|
||||
featureStoreClient
|
||||
}
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.storehaus.ReplicatedReadableStore
|
||||
import com.twitter.storehaus.Store
|
||||
import com.twitter.timelines.clients.memcache_common._
|
||||
import com.twitter.timelines.util.FailOpenHandler
|
||||
import com.twitter.util.Future
|
||||
|
||||
object ServedFeaturesMemcacheConfigBuilder {
|
||||
def getTwCacheDestination(cluster: String, isProd: Boolean = false): String =
|
||||
if (!isProd) {
|
||||
s"/srv#/test/$cluster/cache//twemcache_timelines_served_features_cache"
|
||||
} else {
|
||||
s"/srv#/prod/$cluster/cache/timelines_served_features"
|
||||
}
|
||||
|
||||
/**
|
||||
* @cluster The DC of the cache that this client will send requests to. This
|
||||
* can be different to the DC where the summingbird job is running in.
|
||||
* @isProd Define if this client is part of a production summingbird job as
|
||||
* different accesspoints will need to be chosen.
|
||||
*/
|
||||
def build(cluster: String, isProd: Boolean = false): StorehausMemcacheConfig =
|
||||
StorehausMemcacheConfig(
|
||||
destName = getTwCacheDestination(cluster, isProd),
|
||||
keyPrefix = "",
|
||||
requestTimeout = 200.milliseconds,
|
||||
numTries = 2,
|
||||
globalTimeout = 400.milliseconds,
|
||||
tcpConnectTimeout = 200.milliseconds,
|
||||
connectionAcquisitionTimeout = 200.milliseconds,
|
||||
numPendingRequests = 1000,
|
||||
isReadOnly = false
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* If lookup key does not exist locally, make a call to the replicated store(s).
|
||||
* If value exists remotely, write the first returned value to the local store
|
||||
* and return it. Map any exceptions to None so that the subsequent operations
|
||||
* may proceed.
|
||||
*/
|
||||
class LocallyReplicatedStore[-K, V](
|
||||
localStore: Store[K, V],
|
||||
remoteStore: ReplicatedReadableStore[K, V],
|
||||
scopedStatsReceiver: StatsReceiver)
|
||||
extends Store[K, V] {
|
||||
private[this] val failOpenHandler = new FailOpenHandler(scopedStatsReceiver.scope("failOpen"))
|
||||
private[this] val localFailsCounter = scopedStatsReceiver.counter("localFails")
|
||||
private[this] val localWritesCounter = scopedStatsReceiver.counter("localWrites")
|
||||
private[this] val remoteFailsCounter = scopedStatsReceiver.counter("remoteFails")
|
||||
|
||||
override def get(k: K): Future[Option[V]] =
|
||||
failOpenHandler {
|
||||
localStore
|
||||
.get(k)
|
||||
.flatMap {
|
||||
case Some(v) => Future.value(Some(v))
|
||||
case _ => {
|
||||
localFailsCounter.incr()
|
||||
val replicatedOptFu = remoteStore.get(k)
|
||||
// async write if result is not empty
|
||||
replicatedOptFu.onSuccess {
|
||||
case Some(v) => {
|
||||
localWritesCounter.incr()
|
||||
localStore.put((k, Some(v)))
|
||||
}
|
||||
case _ => {
|
||||
remoteFailsCounter.incr()
|
||||
Unit
|
||||
}
|
||||
}
|
||||
replicatedOptFu
|
||||
}
|
||||
}
|
||||
} { _: Throwable => Future.None }
|
||||
}
|
@ -0,0 +1,254 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.finagle.stats.Counter
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.ml.api.constant.SharedFeatures
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.DataRecordMerger
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.Author
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.Tweet
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.User
|
||||
import com.twitter.ml.featurestore.lib.online.FeatureStoreClient
|
||||
import com.twitter.summingbird.Producer
|
||||
import com.twitter.summingbird.storm.Storm
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig
|
||||
import com.twitter.timelines.prediction.features.common.TimelinesSharedFeatures
|
||||
import java.lang.{Long => JLong}
|
||||
|
||||
import com.twitter.unified_user_actions.thriftscala.ActionType
|
||||
import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
|
||||
|
||||
private[real_time] object StormAggregateSourceUtils {
|
||||
type UserId = Long
|
||||
type AuthorId = Long
|
||||
type TweetId = Long
|
||||
|
||||
/**
|
||||
* Attaches a [[FeatureStoreClient]] to the underyling [[Producer]]. The FeatureStoreClient
|
||||
* hydrates additional user features.
|
||||
*
|
||||
* @param underlyingProducer converts a stream of [[com.twitter.clientapp.thriftscala.LogEvent]]
|
||||
* to a stream of [[DataRecord]].
|
||||
*/
|
||||
def wrapByFeatureStoreClient(
|
||||
underlyingProducer: Producer[Storm, Event[DataRecord]],
|
||||
jobConfig: RealTimeAggregatesJobConfig,
|
||||
scopedStatsReceiver: StatsReceiver
|
||||
): Producer[Storm, Event[DataRecord]] = {
|
||||
lazy val keyDataRecordCounter = scopedStatsReceiver.counter("keyDataRecord")
|
||||
lazy val keyFeatureCounter = scopedStatsReceiver.counter("keyFeature")
|
||||
lazy val leftDataRecordCounter = scopedStatsReceiver.counter("leftDataRecord")
|
||||
lazy val rightDataRecordCounter = scopedStatsReceiver.counter("rightDataRecord")
|
||||
lazy val mergeNumFeaturesCounter = scopedStatsReceiver.counter("mergeNumFeatures")
|
||||
lazy val authorKeyDataRecordCounter = scopedStatsReceiver.counter("authorKeyDataRecord")
|
||||
lazy val authorKeyFeatureCounter = scopedStatsReceiver.counter("authorKeyFeature")
|
||||
lazy val authorLeftDataRecordCounter = scopedStatsReceiver.counter("authorLeftDataRecord")
|
||||
lazy val authorRightDataRecordCounter = scopedStatsReceiver.counter("authorRightDataRecord")
|
||||
lazy val authorMergeNumFeaturesCounter = scopedStatsReceiver.counter("authorMergeNumFeatures")
|
||||
lazy val tweetKeyDataRecordCounter =
|
||||
scopedStatsReceiver.counter("tweetKeyDataRecord")
|
||||
lazy val tweetKeyFeatureCounter = scopedStatsReceiver.counter("tweetKeyFeature")
|
||||
lazy val tweetLeftDataRecordCounter =
|
||||
scopedStatsReceiver.counter("tweetLeftDataRecord")
|
||||
lazy val tweetRightDataRecordCounter =
|
||||
scopedStatsReceiver.counter("tweetRightDataRecord")
|
||||
lazy val tweetMergeNumFeaturesCounter =
|
||||
scopedStatsReceiver.counter("tweetMergeNumFeatures")
|
||||
|
||||
@transient lazy val featureStoreClient: FeatureStoreClient =
|
||||
FeatureStoreUtils.mkFeatureStoreClient(
|
||||
serviceIdentifier = jobConfig.serviceIdentifier,
|
||||
statsReceiver = scopedStatsReceiver
|
||||
)
|
||||
|
||||
lazy val joinUserFeaturesDataRecordProducer =
|
||||
if (jobConfig.keyedByUserEnabled) {
|
||||
lazy val keyedByUserFeaturesStormService: Storm#Service[Set[UserId], DataRecord] =
|
||||
Storm.service(
|
||||
new UserFeaturesReadableStore(
|
||||
featureStoreClient = featureStoreClient,
|
||||
userEntity = User,
|
||||
userFeaturesAdapter = UserFeaturesAdapter
|
||||
)
|
||||
)
|
||||
|
||||
leftJoinDataRecordProducer(
|
||||
keyFeature = SharedFeatures.USER_ID,
|
||||
leftDataRecordProducer = underlyingProducer,
|
||||
rightStormService = keyedByUserFeaturesStormService,
|
||||
keyDataRecordCounter = keyDataRecordCounter,
|
||||
keyFeatureCounter = keyFeatureCounter,
|
||||
leftDataRecordCounter = leftDataRecordCounter,
|
||||
rightDataRecordCounter = rightDataRecordCounter,
|
||||
mergeNumFeaturesCounter = mergeNumFeaturesCounter
|
||||
)
|
||||
} else {
|
||||
underlyingProducer
|
||||
}
|
||||
|
||||
lazy val joinAuthorFeaturesDataRecordProducer =
|
||||
if (jobConfig.keyedByAuthorEnabled) {
|
||||
lazy val keyedByAuthorFeaturesStormService: Storm#Service[Set[AuthorId], DataRecord] =
|
||||
Storm.service(
|
||||
new UserFeaturesReadableStore(
|
||||
featureStoreClient = featureStoreClient,
|
||||
userEntity = Author,
|
||||
userFeaturesAdapter = AuthorFeaturesAdapter
|
||||
)
|
||||
)
|
||||
|
||||
leftJoinDataRecordProducer(
|
||||
keyFeature = TimelinesSharedFeatures.SOURCE_AUTHOR_ID,
|
||||
leftDataRecordProducer = joinUserFeaturesDataRecordProducer,
|
||||
rightStormService = keyedByAuthorFeaturesStormService,
|
||||
keyDataRecordCounter = authorKeyDataRecordCounter,
|
||||
keyFeatureCounter = authorKeyFeatureCounter,
|
||||
leftDataRecordCounter = authorLeftDataRecordCounter,
|
||||
rightDataRecordCounter = authorRightDataRecordCounter,
|
||||
mergeNumFeaturesCounter = authorMergeNumFeaturesCounter
|
||||
)
|
||||
} else {
|
||||
joinUserFeaturesDataRecordProducer
|
||||
}
|
||||
|
||||
lazy val joinTweetFeaturesDataRecordProducer = {
|
||||
if (jobConfig.keyedByTweetEnabled) {
|
||||
lazy val keyedByTweetFeaturesStormService: Storm#Service[Set[TweetId], DataRecord] =
|
||||
Storm.service(
|
||||
new TweetFeaturesReadableStore(
|
||||
featureStoreClient = featureStoreClient,
|
||||
tweetEntity = Tweet,
|
||||
tweetFeaturesAdapter = TweetFeaturesAdapter
|
||||
)
|
||||
)
|
||||
|
||||
leftJoinDataRecordProducer(
|
||||
keyFeature = TimelinesSharedFeatures.SOURCE_TWEET_ID,
|
||||
leftDataRecordProducer = joinAuthorFeaturesDataRecordProducer,
|
||||
rightStormService = keyedByTweetFeaturesStormService,
|
||||
keyDataRecordCounter = tweetKeyDataRecordCounter,
|
||||
keyFeatureCounter = tweetKeyFeatureCounter,
|
||||
leftDataRecordCounter = tweetLeftDataRecordCounter,
|
||||
rightDataRecordCounter = tweetRightDataRecordCounter,
|
||||
mergeNumFeaturesCounter = tweetMergeNumFeaturesCounter
|
||||
)
|
||||
} else {
|
||||
joinAuthorFeaturesDataRecordProducer
|
||||
}
|
||||
}
|
||||
|
||||
joinTweetFeaturesDataRecordProducer
|
||||
}
|
||||
|
||||
private[this] lazy val DataRecordMerger = new DataRecordMerger
|
||||
|
||||
/**
|
||||
* Make join key from the client event data record and return both.
|
||||
* @param keyFeature Feature to extract join key value: USER_ID, SOURCE_TWEET_ID, etc.
|
||||
* @param record DataRecord containing client engagement and basic tweet-side features
|
||||
* @return The return type is a tuple of this key and original data record which will be used
|
||||
* in the subsequent leftJoin operation.
|
||||
*/
|
||||
private[this] def mkKey(
|
||||
keyFeature: Feature[JLong],
|
||||
record: DataRecord,
|
||||
keyDataRecordCounter: Counter,
|
||||
keyFeatureCounter: Counter
|
||||
): Set[Long] = {
|
||||
keyDataRecordCounter.incr()
|
||||
val richRecord = new RichDataRecord(record)
|
||||
if (richRecord.hasFeature(keyFeature)) {
|
||||
keyFeatureCounter.incr()
|
||||
val key: Long = richRecord.getFeatureValue(keyFeature).toLong
|
||||
Set(key)
|
||||
} else {
|
||||
Set.empty[Long]
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* After the leftJoin, merge the client event data record and the joined data record
|
||||
* into a single data record used for further aggregation.
|
||||
*/
|
||||
private[this] def mergeDataRecord(
|
||||
leftRecord: Event[DataRecord],
|
||||
rightRecordOpt: Option[DataRecord],
|
||||
leftDataRecordCounter: Counter,
|
||||
rightDataRecordCounter: Counter,
|
||||
mergeNumFeaturesCounter: Counter
|
||||
): Event[DataRecord] = {
|
||||
leftDataRecordCounter.incr()
|
||||
rightRecordOpt.foreach { rightRecord =>
|
||||
rightDataRecordCounter.incr()
|
||||
DataRecordMerger.merge(leftRecord.event, rightRecord)
|
||||
mergeNumFeaturesCounter.incr(new RichDataRecord(leftRecord.event).numFeatures())
|
||||
}
|
||||
leftRecord
|
||||
}
|
||||
|
||||
private[this] def leftJoinDataRecordProducer(
|
||||
keyFeature: Feature[JLong],
|
||||
leftDataRecordProducer: Producer[Storm, Event[DataRecord]],
|
||||
rightStormService: Storm#Service[Set[Long], DataRecord],
|
||||
keyDataRecordCounter: => Counter,
|
||||
keyFeatureCounter: => Counter,
|
||||
leftDataRecordCounter: => Counter,
|
||||
rightDataRecordCounter: => Counter,
|
||||
mergeNumFeaturesCounter: => Counter
|
||||
): Producer[Storm, Event[DataRecord]] = {
|
||||
val keyedLeftDataRecordProducer: Producer[Storm, (Set[Long], Event[DataRecord])] =
|
||||
leftDataRecordProducer.map {
|
||||
case dataRecord: HomeEvent[DataRecord] =>
|
||||
val key = mkKey(
|
||||
keyFeature = keyFeature,
|
||||
record = dataRecord.event,
|
||||
keyDataRecordCounter = keyDataRecordCounter,
|
||||
keyFeatureCounter = keyFeatureCounter
|
||||
)
|
||||
(key, dataRecord)
|
||||
case dataRecord: ProfileEvent[DataRecord] =>
|
||||
val key = Set.empty[Long]
|
||||
(key, dataRecord)
|
||||
case dataRecord: SearchEvent[DataRecord] =>
|
||||
val key = Set.empty[Long]
|
||||
(key, dataRecord)
|
||||
case dataRecord: UuaEvent[DataRecord] =>
|
||||
val key = Set.empty[Long]
|
||||
(key, dataRecord)
|
||||
}
|
||||
|
||||
keyedLeftDataRecordProducer
|
||||
.leftJoin(rightStormService)
|
||||
.map {
|
||||
case (_, (leftRecord, rightRecordOpt)) =>
|
||||
mergeDataRecord(
|
||||
leftRecord = leftRecord,
|
||||
rightRecordOpt = rightRecordOpt,
|
||||
leftDataRecordCounter = leftDataRecordCounter,
|
||||
rightDataRecordCounter = rightDataRecordCounter,
|
||||
mergeNumFeaturesCounter = mergeNumFeaturesCounter
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter Unified User Actions events to include only actions that has home timeline visit prior to landing on the page
|
||||
*/
|
||||
def isUuaBCEEventsFromHome(event: UnifiedUserAction): Boolean = {
|
||||
def breadcrumbViewsContain(view: String): Boolean =
|
||||
event.eventMetadata.breadcrumbViews.map(_.contains(view)).getOrElse(false)
|
||||
|
||||
(event.actionType) match {
|
||||
case ActionType.ClientTweetV2Impression if breadcrumbViewsContain("home") =>
|
||||
true
|
||||
case ActionType.ClientTweetVideoFullscreenV2Impression
|
||||
if (breadcrumbViewsContain("home") & breadcrumbViewsContain("video")) =>
|
||||
true
|
||||
case ActionType.ClientProfileV2Impression if breadcrumbViewsContain("home") =>
|
||||
true
|
||||
case _ => false
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.{
|
||||
OnlineAggregationStoresTrait,
|
||||
RealTimeAggregateStore
|
||||
}
|
||||
|
||||
object TimelinesOnlineAggregationConfig
|
||||
extends TimelinesOnlineAggregationDefinitionsTrait
|
||||
with OnlineAggregationStoresTrait {
|
||||
|
||||
import TimelinesOnlineAggregationSources._
|
||||
|
||||
override lazy val ProductionStore = RealTimeAggregateStore(
|
||||
memcacheDataSet = "timelines_real_time_aggregates",
|
||||
isProd = true,
|
||||
cacheTTL = 5.days
|
||||
)
|
||||
|
||||
override lazy val StagingStore = RealTimeAggregateStore(
|
||||
memcacheDataSet = "twemcache_timelines_real_time_aggregates",
|
||||
isProd = false,
|
||||
cacheTTL = 5.days
|
||||
)
|
||||
|
||||
override lazy val inputSource = timelinesOnlineAggregateSource
|
||||
|
||||
/**
|
||||
* AggregateToCompute: This defines the complete set of aggregates to be
|
||||
* computed by the aggregation job and to be stored in memcache.
|
||||
*/
|
||||
override lazy val AggregatesToCompute = ProdAggregates ++ StagingAggregates
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,5 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
object TimelinesOnlineAggregationSources {
|
||||
val timelinesOnlineAggregateSource = new TimelinesStormAggregateSource
|
||||
}
|
@ -0,0 +1,182 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.DefaultStatsReceiver
|
||||
import com.twitter.summingbird.Options
|
||||
import com.twitter.summingbird.online.option.FlatMapParallelism
|
||||
import com.twitter.summingbird.online.option.SourceParallelism
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron._
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.DownsampleTransform
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.UserDownsampleTransform
|
||||
|
||||
import com.twitter.timelines.prediction.common.aggregates.BCELabelTransformFromUUADataRecord
|
||||
|
||||
/**
|
||||
* Sets up relevant topology parameters. Our primary goal is to handle the
|
||||
* LogEvent stream and aggregate (sum) on the parsed DataRecords without falling
|
||||
* behind. Our constraint is the resulting write (and read) QPS to the backing
|
||||
* memcache store.
|
||||
*
|
||||
* If the job is falling behind, add more flatMappers and/or Summers after
|
||||
* inspecting the viz panels for the respective job (go/heron-ui). An increase in
|
||||
* Summers (and/or aggregation keys and features in the config) results in an
|
||||
* increase in memcache QPS (go/cb and search for our cache). Adjust with CacheSize
|
||||
* settings until QPS is well-controlled.
|
||||
*
|
||||
*/
|
||||
object TimelinesRealTimeAggregatesJobConfigs extends RealTimeAggregatesJobConfigs {
|
||||
import TimelinesOnlineAggregationUtils._
|
||||
|
||||
/**
|
||||
* We remove input records that do not contain a label/engagement as defined in AllTweetLabels, which includes
|
||||
* explicit user engagements including public, private and impression events. By avoiding ingesting records without
|
||||
* engagemnts, we guarantee that no distribution shifts occur in computed aggregate features when we add a new spout
|
||||
* to input aggregate sources. Counterfactual signal is still available since we aggregate on explicit dwell
|
||||
* engagements.
|
||||
*/
|
||||
val NegativeDownsampleTransform =
|
||||
DownsampleTransform(
|
||||
negativeSamplingRate = 0.0,
|
||||
keepLabels = AllTweetLabels,
|
||||
positiveSamplingRate = 1.0)
|
||||
|
||||
/**
|
||||
* We downsample positive engagements for devel topology to reduce traffic, aiming for equivalent of 10% of prod traffic.
|
||||
* First apply consistent downsampling to 10% of users, and then apply downsampling to remove records without
|
||||
* explicit labels. We apply user-consistent sampling to more closely approximate prod query patterns.
|
||||
*/
|
||||
val StagingUserBasedDownsampleTransform =
|
||||
UserDownsampleTransform(
|
||||
availability = 1000,
|
||||
featureName = "rta_devel"
|
||||
)
|
||||
|
||||
override val Prod = RealTimeAggregatesJobConfig(
|
||||
appId = "summingbird_timelines_rta",
|
||||
topologyWorkers = 1450,
|
||||
sourceCount = 120,
|
||||
flatMapCount = 1800,
|
||||
summerCount = 3850,
|
||||
cacheSize = 200,
|
||||
containerRamGigaBytes = 54,
|
||||
name = "timelines_real_time_aggregates",
|
||||
teamName = "timelines",
|
||||
teamEmail = "",
|
||||
// If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap.
|
||||
// Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead.
|
||||
componentToMetaSpaceSizeMap = Map(
|
||||
"Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M",
|
||||
"Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M"
|
||||
),
|
||||
// If either component is hitting memory limit at prod
|
||||
// its memory need to increase: either increase total memory of container (containerRamGigaBytes),
|
||||
// or allocate more memory for one component while keeping total memory unchanged.
|
||||
componentToRamGigaBytesMap = Map(
|
||||
"Tail-FlatMap-Source" -> 3, // Home source
|
||||
"Tail-FlatMap-Source.2" -> 3, // Profile source
|
||||
"Tail-FlatMap-Source.3" -> 3, // Search source
|
||||
"Tail-FlatMap-Source.4" -> 3, // UUA source
|
||||
"Tail-FlatMap" -> 8
|
||||
// Tail will use the leftover memory in the container.
|
||||
// Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB.
|
||||
),
|
||||
topologyNamedOptions = Map(
|
||||
"TL_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(120)),
|
||||
"PROFILE_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(30)),
|
||||
"SEARCH_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(10)),
|
||||
"UUA_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(10)),
|
||||
"COMBINED_PRODUCER" -> Options()
|
||||
.set(FlatMapParallelism(1800))
|
||||
),
|
||||
// The UUA datarecord for BCE events inputted will not have binary labels populated.
|
||||
// BCELabelTransform will set the datarecord with binary BCE dwell labels features based on the corresponding dwell_time_ms.
|
||||
// It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform
|
||||
// because ProdNegativeDownsampleTransform will remove datarecord that contains no features from AllTweetLabels.
|
||||
onlinePreTransforms =
|
||||
Seq(RichITransform(BCELabelTransformFromUUADataRecord), NegativeDownsampleTransform)
|
||||
)
|
||||
|
||||
/**
|
||||
* we downsample 10% computation of devel RTA based on [[StagingNegativeDownsampleTransform]].
|
||||
* To better test scalability of topology, we reduce computing resource of components "Tail-FlatMap"
|
||||
* and "Tail" to be 10% of prod but keep computing resource of component "Tail-FlatMap-Source" unchanged.
|
||||
* hence flatMapCount=110, summerCount=105 and sourceCount=100. Hence topologyWorkers =(110+105+100)/5 = 63.
|
||||
*/
|
||||
override val Devel = RealTimeAggregatesJobConfig(
|
||||
appId = "summingbird_timelines_rta_devel",
|
||||
topologyWorkers = 120,
|
||||
sourceCount = 120,
|
||||
flatMapCount = 150,
|
||||
summerCount = 300,
|
||||
cacheSize = 200,
|
||||
containerRamGigaBytes = 54,
|
||||
name = "timelines_real_time_aggregates_devel",
|
||||
teamName = "timelines",
|
||||
teamEmail = "",
|
||||
// If one component is hitting GC limit at prod, tune componentToMetaSpaceSizeMap
|
||||
// Except for Source bolts. Tune componentToRamGigaBytesMap for Source bolts instead.
|
||||
componentToMetaSpaceSizeMap = Map(
|
||||
"Tail-FlatMap" -> "-XX:MaxMetaspaceSize=1024M -XX:MetaspaceSize=1024M",
|
||||
"Tail" -> "-XX:MaxMetaspaceSize=2560M -XX:MetaspaceSize=2560M"
|
||||
),
|
||||
// If either component is hitting memory limit at prod
|
||||
// its memory need to increase: either increase total memory of container (containerRamGigaBytes),
|
||||
// or allocate more memory for one component while keeping total memory unchanged.
|
||||
componentToRamGigaBytesMap = Map(
|
||||
"Tail-FlatMap-Source" -> 3, // Home source
|
||||
"Tail-FlatMap-Source.2" -> 3, // Profile source
|
||||
"Tail-FlatMap-Source.3" -> 3, // Search source
|
||||
"Tail-FlatMap-Source.4" -> 3, // UUA source
|
||||
"Tail-FlatMap" -> 8
|
||||
// Tail will use the leftover memory in the container.
|
||||
// Make sure to tune topologyWorkers and containerRamGigaBytes such that this is greater than 10 GB.
|
||||
),
|
||||
topologyNamedOptions = Map(
|
||||
"TL_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(120)),
|
||||
"PROFILE_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(30)),
|
||||
"SEARCH_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(10)),
|
||||
"UUA_EVENTS_SOURCE" -> Options()
|
||||
.set(SourceParallelism(10)),
|
||||
"COMBINED_PRODUCER" -> Options()
|
||||
.set(FlatMapParallelism(150))
|
||||
),
|
||||
// It's important to have the BCELabelTransformFromUUADataRecord before ProdNegativeDownsampleTransform
|
||||
onlinePreTransforms = Seq(
|
||||
StagingUserBasedDownsampleTransform,
|
||||
RichITransform(BCELabelTransformFromUUADataRecord),
|
||||
NegativeDownsampleTransform),
|
||||
enableUserReindexingNighthawkBtreeStore = true,
|
||||
enableUserReindexingNighthawkHashStore = true,
|
||||
userReindexingNighthawkBtreeStoreConfig = NighthawkUnderlyingStoreConfig(
|
||||
serversetPath =
|
||||
"/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_btree_test_api",
|
||||
// NOTE: table names are prefixed to every pkey so keep it short
|
||||
tableName = "u_r_v1", // (u)ser_(r)eindexing_v1
|
||||
// keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day
|
||||
cacheTTL = 1.day
|
||||
),
|
||||
userReindexingNighthawkHashStoreConfig = NighthawkUnderlyingStoreConfig(
|
||||
// For prod: "/s/cache-user/nighthawk_timelines_real_time_aggregates_hash_api",
|
||||
serversetPath =
|
||||
"/twitter/service/cache-user/test/nighthawk_timelines_real_time_aggregates_hash_test_api",
|
||||
// NOTE: table names are prefixed to every pkey so keep it short
|
||||
tableName = "u_r_v1", // (u)ser_(r)eindexing_v1
|
||||
// keep ttl <= 1 day because it's keyed on user, and we will have limited hit rates beyond 1 day
|
||||
cacheTTL = 1.day
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
object TimelinesRealTimeAggregatesJob extends RealTimeAggregatesJobBase {
|
||||
override lazy val statsReceiver = DefaultStatsReceiver.scope("timelines_real_time_aggregates")
|
||||
override lazy val jobConfigs = TimelinesRealTimeAggregatesJobConfigs
|
||||
override lazy val aggregatesToCompute = TimelinesOnlineAggregationConfig.AggregatesToCompute
|
||||
}
|
@ -0,0 +1,185 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.clientapp.thriftscala.LogEvent
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.Counter
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.constant.SharedFeatures
|
||||
import com.twitter.snowflake.id.SnowflakeId
|
||||
import com.twitter.summingbird._
|
||||
import com.twitter.summingbird.storm.Storm
|
||||
import com.twitter.summingbird_internal.sources.AppId
|
||||
import com.twitter.summingbird_internal.sources.storm.remote.ClientEventSourceScrooge2
|
||||
import com.twitter.timelines.data_processing.ad_hoc.suggests.common.AllScribeProcessor
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.RealTimeAggregatesJobConfig
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.heron.StormAggregateSource
|
||||
import com.twitter.timelines.prediction.adapters.client_log_event.ClientLogEventAdapter
|
||||
import com.twitter.timelines.prediction.adapters.client_log_event.ProfileClientLogEventAdapter
|
||||
import com.twitter.timelines.prediction.adapters.client_log_event.SearchClientLogEventAdapter
|
||||
import com.twitter.timelines.prediction.adapters.client_log_event.UuaEventAdapter
|
||||
import com.twitter.unified_user_actions.client.config.KafkaConfigs
|
||||
import com.twitter.unified_user_actions.client.summingbird.UnifiedUserActionsSourceScrooge
|
||||
import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
/**
|
||||
* Storm Producer for client events generated on Home, Profile, and Search
|
||||
*/
|
||||
class TimelinesStormAggregateSource extends StormAggregateSource {
|
||||
|
||||
override val name = "timelines_rta"
|
||||
override val timestampFeature = SharedFeatures.TIMESTAMP
|
||||
|
||||
private lazy val TimelinesClientEventSourceName = "TL_EVENTS_SOURCE"
|
||||
private lazy val ProfileClientEventSourceName = "PROFILE_EVENTS_SOURCE"
|
||||
private lazy val SearchClientEventSourceName = "SEARCH_EVENTS_SOURCE"
|
||||
private lazy val UuaEventSourceName = "UUA_EVENTS_SOURCE"
|
||||
private lazy val CombinedProducerName = "COMBINED_PRODUCER"
|
||||
private lazy val FeatureStoreProducerName = "FEATURE_STORE_PRODUCER"
|
||||
|
||||
private def isNewUserEvent(event: LogEvent): Boolean = {
|
||||
event.logBase.flatMap(_.userId).flatMap(SnowflakeId.timeFromIdOpt).exists(_.untilNow < 30.days)
|
||||
}
|
||||
|
||||
private def mkDataRecords(event: LogEvent, dataRecordCounter: Counter): Seq[DataRecord] = {
|
||||
val dataRecords: Seq[DataRecord] =
|
||||
if (AllScribeProcessor.isValidSuggestTweetEvent(event)) {
|
||||
ClientLogEventAdapter.adaptToDataRecords(event).asScala
|
||||
} else {
|
||||
Seq.empty[DataRecord]
|
||||
}
|
||||
dataRecordCounter.incr(dataRecords.size)
|
||||
dataRecords
|
||||
}
|
||||
|
||||
private def mkProfileDataRecords(
|
||||
event: LogEvent,
|
||||
dataRecordCounter: Counter
|
||||
): Seq[DataRecord] = {
|
||||
val dataRecords: Seq[DataRecord] =
|
||||
ProfileClientLogEventAdapter.adaptToDataRecords(event).asScala
|
||||
dataRecordCounter.incr(dataRecords.size)
|
||||
dataRecords
|
||||
}
|
||||
|
||||
private def mkSearchDataRecords(
|
||||
event: LogEvent,
|
||||
dataRecordCounter: Counter
|
||||
): Seq[DataRecord] = {
|
||||
val dataRecords: Seq[DataRecord] =
|
||||
SearchClientLogEventAdapter.adaptToDataRecords(event).asScala
|
||||
dataRecordCounter.incr(dataRecords.size)
|
||||
dataRecords
|
||||
}
|
||||
|
||||
private def mkUuaDataRecords(
|
||||
event: UnifiedUserAction,
|
||||
dataRecordCounter: Counter
|
||||
): Seq[DataRecord] = {
|
||||
val dataRecords: Seq[DataRecord] =
|
||||
UuaEventAdapter.adaptToDataRecords(event).asScala
|
||||
dataRecordCounter.incr(dataRecords.size)
|
||||
dataRecords
|
||||
}
|
||||
|
||||
override def build(
|
||||
statsReceiver: StatsReceiver,
|
||||
jobConfig: RealTimeAggregatesJobConfig
|
||||
): Producer[Storm, DataRecord] = {
|
||||
lazy val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
|
||||
lazy val dataRecordCounter = scopedStatsReceiver.counter("dataRecord")
|
||||
|
||||
// Home Timeline Engagements
|
||||
// Step 1: => LogEvent
|
||||
lazy val clientEventProducer: Producer[Storm, HomeEvent[LogEvent]] =
|
||||
ClientEventSourceScrooge2(
|
||||
appId = AppId(jobConfig.appId),
|
||||
topic = "julep_client_event_suggests",
|
||||
resumeAtLastReadOffset = false,
|
||||
enableTls = true
|
||||
).source.map(HomeEvent[LogEvent]).name(TimelinesClientEventSourceName)
|
||||
|
||||
// Profile Engagements
|
||||
// Step 1: => LogEvent
|
||||
lazy val profileClientEventProducer: Producer[Storm, ProfileEvent[LogEvent]] =
|
||||
ClientEventSourceScrooge2(
|
||||
appId = AppId(jobConfig.appId),
|
||||
topic = "julep_client_event_profile_real_time_engagement_metrics",
|
||||
resumeAtLastReadOffset = false,
|
||||
enableTls = true
|
||||
).source
|
||||
.map(ProfileEvent[LogEvent])
|
||||
.name(ProfileClientEventSourceName)
|
||||
|
||||
// Search Engagements
|
||||
// Step 1: => LogEvent
|
||||
// Only process events for all users to save resource
|
||||
lazy val searchClientEventProducer: Producer[Storm, SearchEvent[LogEvent]] =
|
||||
ClientEventSourceScrooge2(
|
||||
appId = AppId(jobConfig.appId),
|
||||
topic = "julep_client_event_search_real_time_engagement_metrics",
|
||||
resumeAtLastReadOffset = false,
|
||||
enableTls = true
|
||||
).source
|
||||
.map(SearchEvent[LogEvent])
|
||||
.name(SearchClientEventSourceName)
|
||||
|
||||
// Unified User Actions (includes Home and other product surfaces)
|
||||
lazy val uuaEventProducer: Producer[Storm, UuaEvent[UnifiedUserAction]] =
|
||||
UnifiedUserActionsSourceScrooge(
|
||||
appId = AppId(jobConfig.appId),
|
||||
parallelism = 10,
|
||||
kafkaConfig = KafkaConfigs.ProdUnifiedUserActionsEngagementOnly
|
||||
).source
|
||||
.filter(StormAggregateSourceUtils.isUuaBCEEventsFromHome(_))
|
||||
.map(UuaEvent[UnifiedUserAction])
|
||||
.name(UuaEventSourceName)
|
||||
|
||||
// Combined
|
||||
// Step 2:
|
||||
// (a) Combine
|
||||
// (b) Transform LogEvent => Seq[DataRecord]
|
||||
// (c) Apply sampler
|
||||
lazy val combinedClientEventDataRecordProducer: Producer[Storm, Event[DataRecord]] =
|
||||
profileClientEventProducer // This becomes the bottom branch
|
||||
.merge(clientEventProducer) // This becomes the middle branch
|
||||
.merge(searchClientEventProducer)
|
||||
.merge(uuaEventProducer) // This becomes the top
|
||||
.flatMap { // LogEvent => Seq[DataRecord]
|
||||
case e: HomeEvent[LogEvent] =>
|
||||
mkDataRecords(e.event, dataRecordCounter).map(HomeEvent[DataRecord])
|
||||
case e: ProfileEvent[LogEvent] =>
|
||||
mkProfileDataRecords(e.event, dataRecordCounter).map(ProfileEvent[DataRecord])
|
||||
case e: SearchEvent[LogEvent] =>
|
||||
mkSearchDataRecords(e.event, dataRecordCounter).map(SearchEvent[DataRecord])
|
||||
case e: UuaEvent[UnifiedUserAction] =>
|
||||
mkUuaDataRecords(
|
||||
e.event,
|
||||
dataRecordCounter
|
||||
).map(UuaEvent[DataRecord])
|
||||
}
|
||||
.flatMap { // Apply sampler
|
||||
case e: HomeEvent[DataRecord] =>
|
||||
jobConfig.sequentiallyTransform(e.event).map(HomeEvent[DataRecord])
|
||||
case e: ProfileEvent[DataRecord] =>
|
||||
jobConfig.sequentiallyTransform(e.event).map(ProfileEvent[DataRecord])
|
||||
case e: SearchEvent[DataRecord] =>
|
||||
jobConfig.sequentiallyTransform(e.event).map(SearchEvent[DataRecord])
|
||||
case e: UuaEvent[DataRecord] =>
|
||||
jobConfig.sequentiallyTransform(e.event).map(UuaEvent[DataRecord])
|
||||
}
|
||||
.name(CombinedProducerName)
|
||||
|
||||
// Step 3: Join with Feature Store features
|
||||
lazy val featureStoreDataRecordProducer: Producer[Storm, DataRecord] =
|
||||
StormAggregateSourceUtils
|
||||
.wrapByFeatureStoreClient(
|
||||
underlyingProducer = combinedClientEventDataRecordProducer,
|
||||
jobConfig = jobConfig,
|
||||
scopedStatsReceiver = scopedStatsReceiver
|
||||
).map(_.event).name(FeatureStoreProducerName)
|
||||
|
||||
featureStoreDataRecordProducer
|
||||
}
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.Tweet
|
||||
import com.twitter.ml.featurestore.catalog.features.trends.TweetTrendsScores
|
||||
import com.twitter.ml.featurestore.lib.TweetId
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecord
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecordAdapter
|
||||
import com.twitter.ml.featurestore.lib.feature.BoundFeature
|
||||
import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
|
||||
import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
|
||||
import java.util
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object TweetFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
|
||||
|
||||
private val ContinuousFeatureMap: Map[BoundFeature[TweetId, Double], Feature.Continuous] = Map()
|
||||
|
||||
val TweetFeaturesSet: BoundFeatureSet = new BoundFeatureSet(ContinuousFeatureMap.keys.toSet)
|
||||
|
||||
val AllFeatures: Seq[Feature[_]] =
|
||||
ContinuousFeatureMap.values.toSeq
|
||||
|
||||
private val adapter = PredictionRecordAdapter.oneToOne(TweetFeaturesSet)
|
||||
|
||||
override def getFeatureContext: FeatureContext = new FeatureContext(AllFeatures: _*)
|
||||
|
||||
override def commonFeatures: Set[Feature[_]] = Set.empty
|
||||
|
||||
override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
|
||||
List(adapter.adaptToDataRecord(record)).asJava
|
||||
}
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.featurestore.lib.TweetId
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecord
|
||||
import com.twitter.ml.featurestore.lib.entity.Entity
|
||||
import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
|
||||
import com.twitter.util.Future
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class TweetFeaturesReadableStore(
|
||||
featureStoreClient: FeatureStoreClient,
|
||||
tweetEntity: Entity[TweetId],
|
||||
tweetFeaturesAdapter: TimelinesAdapterBase[PredictionRecord])
|
||||
extends ReadableStore[Set[Long], DataRecord] {
|
||||
|
||||
override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = {
|
||||
val orderedKeys: Seq[K] = keys.toSeq
|
||||
val featureStoreRequests: Seq[FeatureStoreRequest] = getFeatureStoreRequests(orderedKeys)
|
||||
val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient(
|
||||
featureStoreRequests)
|
||||
|
||||
getDataRecordMap(orderedKeys, predictionRecordsFut)
|
||||
}
|
||||
|
||||
private def getFeatureStoreRequests[K <: Set[Long]](
|
||||
orderedKeys: Seq[K]
|
||||
): Seq[FeatureStoreRequest] = {
|
||||
orderedKeys.map { key: Set[Long] =>
|
||||
FeatureStoreRequest(
|
||||
entityIds = key.map { tweetId => tweetEntity.withId(TweetId(tweetId)) }.toSeq
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private def getDataRecordMap[K <: Set[Long]](
|
||||
orderedKeys: Seq[K],
|
||||
predictionRecordsFut: Future[Seq[PredictionRecord]]
|
||||
): Map[K, Future[Option[DataRecord]]] = {
|
||||
orderedKeys.zipWithIndex.map {
|
||||
case (tweetIdSet, index) =>
|
||||
val dataRecordFutOpt: Future[Option[DataRecord]] = predictionRecordsFut.map {
|
||||
predictionRecords =>
|
||||
predictionRecords.lift(index).flatMap { predictionRecordAtIndex: PredictionRecord =>
|
||||
tweetFeaturesAdapter.adaptToDataRecords(predictionRecordAtIndex).asScala.headOption
|
||||
}
|
||||
}
|
||||
(tweetIdSet, dataRecordFutOpt)
|
||||
}.toMap
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.summingbird_internal.runner.storm.GenericRunner
|
||||
|
||||
object TypeSafeRunner {
|
||||
def main(args: Array[String]): Unit = GenericRunner(args, TimelinesRealTimeAggregatesJob(_))
|
||||
}
|
@ -0,0 +1,108 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType.InferredGender
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType.UserState
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Text
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.ml.api.RichDataRecord
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.User
|
||||
import com.twitter.ml.featurestore.catalog.features.core.UserAccount
|
||||
import com.twitter.ml.featurestore.catalog.features.geo.UserLocation
|
||||
import com.twitter.ml.featurestore.catalog.features.magicrecs.UserActivity
|
||||
import com.twitter.ml.featurestore.lib.EntityId
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecord
|
||||
import com.twitter.ml.featurestore.lib.feature.BoundFeature
|
||||
import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
|
||||
import com.twitter.ml.featurestore.lib.UserId
|
||||
import com.twitter.ml.featurestore.lib.{Discrete => FSDiscrete}
|
||||
import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
|
||||
import com.twitter.timelines.prediction.features.user_health.UserHealthFeatures
|
||||
import java.lang.{Boolean => JBoolean}
|
||||
import java.lang.{String => JString}
|
||||
import java.util
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object UserFeaturesAdapter extends TimelinesAdapterBase[PredictionRecord] {
|
||||
val UserStateBoundFeature: BoundFeature[UserId, FSDiscrete] = UserActivity.UserState.bind(User)
|
||||
|
||||
/**
|
||||
* Boolean features about viewer's user state.
|
||||
* enum UserState {
|
||||
* NEW = 0,
|
||||
* NEAR_ZERO = 1,
|
||||
* VERY_LIGHT = 2,
|
||||
* LIGHT = 3,
|
||||
* MEDIUM_TWEETER = 4,
|
||||
* MEDIUM_NON_TWEETER = 5,
|
||||
* HEAVY_NON_TWEETER = 6,
|
||||
* HEAVY_TWEETER = 7
|
||||
* }(persisted='true')
|
||||
*/
|
||||
val IS_USER_NEW = new Binary("timelines.user_state.is_user_new", Set(UserState).asJava)
|
||||
val IS_USER_LIGHT = new Binary("timelines.user_state.is_user_light", Set(UserState).asJava)
|
||||
val IS_USER_MEDIUM_TWEETER =
|
||||
new Binary("timelines.user_state.is_user_medium_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_MEDIUM_NON_TWEETER =
|
||||
new Binary("timelines.user_state.is_user_medium_non_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_HEAVY_NON_TWEETER =
|
||||
new Binary("timelines.user_state.is_user_heavy_non_tweeter", Set(UserState).asJava)
|
||||
val IS_USER_HEAVY_TWEETER =
|
||||
new Binary("timelines.user_state.is_user_heavy_tweeter", Set(UserState).asJava)
|
||||
val userStateToFeatureMap: Map[Long, Binary] = Map(
|
||||
0L -> IS_USER_NEW,
|
||||
1L -> IS_USER_LIGHT,
|
||||
2L -> IS_USER_LIGHT,
|
||||
3L -> IS_USER_LIGHT,
|
||||
4L -> IS_USER_MEDIUM_TWEETER,
|
||||
5L -> IS_USER_MEDIUM_NON_TWEETER,
|
||||
6L -> IS_USER_HEAVY_NON_TWEETER,
|
||||
7L -> IS_USER_HEAVY_TWEETER
|
||||
)
|
||||
|
||||
val UserStateBooleanFeatures: Set[Feature[_]] = userStateToFeatureMap.values.toSet
|
||||
|
||||
|
||||
val USER_COUNTRY_ID = new Text("geo.user_location.country_code")
|
||||
val UserCountryCodeFeature: BoundFeature[UserId, String] =
|
||||
UserLocation.CountryCodeAlpha2.bind(User)
|
||||
val UserLocationFeatures: Set[Feature[_]] = Set(USER_COUNTRY_ID)
|
||||
|
||||
private val UserVerifiedFeaturesSet = Set(
|
||||
UserAccount.IsUserVerified.bind(User),
|
||||
UserAccount.IsUserBlueVerified.bind(User),
|
||||
UserAccount.IsUserGoldVerified.bind(User),
|
||||
UserAccount.IsUserGrayVerified.bind(User)
|
||||
)
|
||||
|
||||
val UserFeaturesSet: BoundFeatureSet =
|
||||
BoundFeatureSet(UserStateBoundFeature, UserCountryCodeFeature) ++
|
||||
BoundFeatureSet(UserVerifiedFeaturesSet.asInstanceOf[Set[BoundFeature[_ <: EntityId, _]]])
|
||||
|
||||
private val allFeatures: Seq[Feature[_]] =
|
||||
UserStateBooleanFeatures.toSeq ++ GenderBooleanFeatures.toSeq ++
|
||||
UserLocationFeatures.toSeq ++ Seq(UserHealthFeatures.IsUserVerifiedUnion)
|
||||
|
||||
override def getFeatureContext: FeatureContext = new FeatureContext(allFeatures: _*)
|
||||
override def commonFeatures: Set[Feature[_]] = Set.empty
|
||||
|
||||
override def adaptToDataRecords(record: PredictionRecord): util.List[DataRecord] = {
|
||||
val newRecord = new RichDataRecord(new DataRecord)
|
||||
record
|
||||
.getFeatureValue(UserStateBoundFeature)
|
||||
.flatMap { userState => userStateToFeatureMap.get(userState.value) }.foreach {
|
||||
booleanFeature => newRecord.setFeatureValue[JBoolean](booleanFeature, true)
|
||||
}
|
||||
record.getFeatureValue(UserCountryCodeFeature).foreach { countryCodeFeatureValue =>
|
||||
newRecord.setFeatureValue[JString](USER_COUNTRY_ID, countryCodeFeatureValue)
|
||||
}
|
||||
|
||||
val isUserVerifiedUnion =
|
||||
UserVerifiedFeaturesSet.exists(feature => record.getFeatureValue(feature).getOrElse(false))
|
||||
newRecord.setFeatureValue[JBoolean](UserHealthFeatures.IsUserVerifiedUnion, isUserVerifiedUnion)
|
||||
|
||||
List(newRecord.getRecord).asJava
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package com.twitter.timelines.prediction.common.aggregates.real_time
|
||||
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.featurestore.lib.UserId
|
||||
import com.twitter.ml.featurestore.lib.data.PredictionRecord
|
||||
import com.twitter.ml.featurestore.lib.entity.Entity
|
||||
import com.twitter.ml.featurestore.lib.online.{FeatureStoreClient, FeatureStoreRequest}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.timelines.prediction.common.adapters.TimelinesAdapterBase
|
||||
import com.twitter.util.Future
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class UserFeaturesReadableStore(
|
||||
featureStoreClient: FeatureStoreClient,
|
||||
userEntity: Entity[UserId],
|
||||
userFeaturesAdapter: TimelinesAdapterBase[PredictionRecord])
|
||||
extends ReadableStore[Set[Long], DataRecord] {
|
||||
|
||||
override def multiGet[K <: Set[Long]](keys: Set[K]): Map[K, Future[Option[DataRecord]]] = {
|
||||
val orderedKeys = keys.toSeq
|
||||
val featureStoreRequests: Seq[FeatureStoreRequest] = orderedKeys.map { key: Set[Long] =>
|
||||
FeatureStoreRequest(
|
||||
entityIds = key.map(userId => userEntity.withId(UserId(userId))).toSeq
|
||||
)
|
||||
}
|
||||
val predictionRecordsFut: Future[Seq[PredictionRecord]] = featureStoreClient(
|
||||
featureStoreRequests)
|
||||
|
||||
orderedKeys.zipWithIndex.map {
|
||||
case (userId, index) =>
|
||||
val dataRecordFutOpt = predictionRecordsFut.map { predictionRecords =>
|
||||
userFeaturesAdapter.adaptToDataRecords(predictionRecords(index)).asScala.headOption
|
||||
}
|
||||
(userId, dataRecordFutOpt)
|
||||
}.toMap
|
||||
}
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
## Prediction Features
|
||||
|
||||
This directory contains a collection of `Features` (`com.twitter.ml.api.Feature`) which are definitions of feature names and datatypes which allow the features to be efficiently processed and passed to the different ranking models.
|
||||
By predefining the features with their names and datatypes, when features are being generated, scribed or used to score they can be identified with only a hash of their name.
|
||||
|
||||
Not all of these features are used in the model, many are experimental or deprecated.
|
@ -0,0 +1,11 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/scala/com/twitter/suggests/controller_data",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/timelineservice/server/suggests/logging:thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,169 @@
|
||||
package com.twitter.timelines.prediction.features.client_log_event
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.Discrete
|
||||
import scala.collection.JavaConverters._
|
||||
import com.twitter.timelineservice.suggests.logging.candidate_tweet_source_id.thriftscala.CandidateTweetSourceId
|
||||
|
||||
object ClientLogEventDataRecordFeatures {
|
||||
val HasConsumerVideo = new Binary(
|
||||
"client_log_event.tweet.has_consumer_video",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val PhotoCount = new Continuous(
|
||||
"client_log_event.tweet.photo_count",
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val HasImage = new Binary(
|
||||
"client_log_event.tweet.has_image",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val IsReply =
|
||||
new Binary("client_log_event.tweet.is_reply", Set(PublicReplies, PrivateReplies).asJava)
|
||||
val IsRetweet =
|
||||
new Binary("client_log_event.tweet.is_retweet", Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IsPromoted =
|
||||
new Binary(
|
||||
"client_log_event.tweet.is_promoted",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HasVisibleLink = new Binary(
|
||||
"client_log_event.tweet.has_visible_link",
|
||||
Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HasHashtag = new Binary(
|
||||
"client_log_event.tweet.has_hashtag",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val FromMutualFollow = new Binary("client_log_event.tweet.from_mutual_follow")
|
||||
val IsInNetwork = new Binary("client_log_event.tweet.is_in_network")
|
||||
val IsNotInNetwork = new Binary("client_log_event.tweet.is_not_in_network")
|
||||
val FromRecap = new Binary("client_log_event.tweet.from_recap")
|
||||
val FromRecycled = new Binary("client_log_event.tweet.from_recycled")
|
||||
val FromActivity = new Binary("client_log_event.tweet.from_activity")
|
||||
val FromSimcluster = new Binary("client_log_event.tweet.from_simcluster")
|
||||
val FromErg = new Binary("client_log_event.tweet.from_erg")
|
||||
val FromCroon = new Binary("client_log_event.tweet.from_croon")
|
||||
val FromList = new Binary("client_log_event.tweet.from_list")
|
||||
val FromRecTopic = new Binary("client_log_event.tweet.from_rec_topic")
|
||||
val InjectedPosition = new Discrete("client_log_event.tweet.injectedPosition")
|
||||
val TextOnly = new Binary("client_log_event.tweet.text_only")
|
||||
val HasLikedBySocialContext = new Binary("client_log_event.tweet.has_liked_by_social_context")
|
||||
val HasFollowedBySocialContext = new Binary(
|
||||
"client_log_event.tweet.has_followed_by_social_context")
|
||||
val HasTopicSocialContext = new Binary("client_log_event.tweet.has_topic_social_context")
|
||||
val IsFollowedTopicTweet = new Binary("client_log_event.tweet.is_followed_topic_tweet")
|
||||
val IsRecommendedTopicTweet = new Binary("client_log_event.tweet.is_recommended_topic_tweet")
|
||||
val IsTweetAgeLessThan15Seconds = new Binary(
|
||||
"client_log_event.tweet.tweet_age_less_than_15_seconds")
|
||||
val IsTweetAgeLessThanOrEqualTo30Minutes = new Binary(
|
||||
"client_log_event.tweet.tweet_age_lte_30_minutes")
|
||||
val IsTweetAgeLessThanOrEqualTo1Hour = new Binary("client_log_event.tweet.tweet_age_lte_1_hour")
|
||||
val IsTweetAgeLessThanOrEqualTo6Hours = new Binary("client_log_event.tweet.tweet_age_lte_6_hours")
|
||||
val IsTweetAgeLessThanOrEqualTo12Hours = new Binary(
|
||||
"client_log_event.tweet.tweet_age_lte_12_hours")
|
||||
val IsTweetAgeGreaterThanOrEqualTo24Hours = new Binary(
|
||||
"client_log_event.tweet.tweet_age_gte_24_hours")
|
||||
val HasGreaterThanOrEqualTo100Favs = new Binary("client_log_event.tweet.has_gte_100_favs")
|
||||
val HasGreaterThanOrEqualTo1KFavs = new Binary("client_log_event.tweet.has_gte_1k_favs")
|
||||
val HasGreaterThanOrEqualTo10KFavs = new Binary("client_log_event.tweet.has_gte_10k_favs")
|
||||
val HasGreaterThanOrEqualTo100KFavs = new Binary("client_log_event.tweet.has_gte_100k_favs")
|
||||
val HasGreaterThanOrEqualTo10Retweets = new Binary("client_log_event.tweet.has_gte_10_retweets")
|
||||
val HasGreaterThanOrEqualTo100Retweets = new Binary("client_log_event.tweet.has_gte_100_retweets")
|
||||
val HasGreaterThanOrEqualTo1KRetweets = new Binary("client_log_event.tweet.has_gte_1k_retweets")
|
||||
|
||||
val TweetTypeToFeatureMap: Map[String, Binary] = Map(
|
||||
"link" -> HasVisibleLink,
|
||||
"hashtag" -> HasHashtag,
|
||||
"mutual_follow" -> FromMutualFollow,
|
||||
"in_network" -> IsInNetwork,
|
||||
"text_only" -> TextOnly,
|
||||
"has_liked_by_social_context" -> HasLikedBySocialContext,
|
||||
"has_followed_by_social_context" -> HasFollowedBySocialContext,
|
||||
"has_topic_social_context" -> HasTopicSocialContext,
|
||||
"is_followed_topic_tweet" -> IsFollowedTopicTweet,
|
||||
"is_recommended_topic_tweet" -> IsRecommendedTopicTweet,
|
||||
"tweet_age_less_than_15_seconds" -> IsTweetAgeLessThan15Seconds,
|
||||
"tweet_age_lte_30_minutes" -> IsTweetAgeLessThanOrEqualTo30Minutes,
|
||||
"tweet_age_lte_1_hour" -> IsTweetAgeLessThanOrEqualTo1Hour,
|
||||
"tweet_age_lte_6_hours" -> IsTweetAgeLessThanOrEqualTo6Hours,
|
||||
"tweet_age_lte_12_hours" -> IsTweetAgeLessThanOrEqualTo12Hours,
|
||||
"tweet_age_gte_24_hours" -> IsTweetAgeGreaterThanOrEqualTo24Hours,
|
||||
"has_gte_100_favs" -> HasGreaterThanOrEqualTo100Favs,
|
||||
"has_gte_1k_favs" -> HasGreaterThanOrEqualTo1KFavs,
|
||||
"has_gte_10k_favs" -> HasGreaterThanOrEqualTo10KFavs,
|
||||
"has_gte_100k_favs" -> HasGreaterThanOrEqualTo100KFavs,
|
||||
"has_gte_10_retweets" -> HasGreaterThanOrEqualTo10Retweets,
|
||||
"has_gte_100_retweets" -> HasGreaterThanOrEqualTo100Retweets,
|
||||
"has_gte_1k_retweets" -> HasGreaterThanOrEqualTo1KRetweets
|
||||
)
|
||||
|
||||
val CandidateTweetSourceIdFeatureMap: Map[Int, Binary] = Map(
|
||||
CandidateTweetSourceId.RecapTweet.value -> FromRecap,
|
||||
CandidateTweetSourceId.RecycledTweet.value -> FromRecycled,
|
||||
CandidateTweetSourceId.RecommendedTweet.value -> FromActivity,
|
||||
CandidateTweetSourceId.Simcluster.value -> FromSimcluster,
|
||||
CandidateTweetSourceId.ErgTweet.value -> FromErg,
|
||||
CandidateTweetSourceId.CroonTopicTweet.value -> FromCroon,
|
||||
CandidateTweetSourceId.CroonTweet.value -> FromCroon,
|
||||
CandidateTweetSourceId.ListTweet.value -> FromList,
|
||||
CandidateTweetSourceId.RecommendedTopicTweet.value -> FromRecTopic
|
||||
)
|
||||
|
||||
val TweetFeaturesV2: Set[Feature[_]] = Set(
|
||||
HasImage,
|
||||
IsReply,
|
||||
IsRetweet,
|
||||
HasVisibleLink,
|
||||
HasHashtag,
|
||||
FromMutualFollow,
|
||||
IsInNetwork
|
||||
)
|
||||
|
||||
val ContentTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
HasImage,
|
||||
HasVisibleLink,
|
||||
HasHashtag,
|
||||
TextOnly,
|
||||
HasVisibleLink
|
||||
)
|
||||
|
||||
val FreshnessTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
IsTweetAgeLessThan15Seconds,
|
||||
IsTweetAgeLessThanOrEqualTo30Minutes,
|
||||
IsTweetAgeLessThanOrEqualTo1Hour,
|
||||
IsTweetAgeLessThanOrEqualTo6Hours,
|
||||
IsTweetAgeLessThanOrEqualTo12Hours,
|
||||
IsTweetAgeGreaterThanOrEqualTo24Hours
|
||||
)
|
||||
|
||||
val SocialProofTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
HasLikedBySocialContext,
|
||||
HasFollowedBySocialContext,
|
||||
HasTopicSocialContext
|
||||
)
|
||||
|
||||
val TopicTweetPreferenceTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
IsFollowedTopicTweet,
|
||||
IsRecommendedTopicTweet
|
||||
)
|
||||
|
||||
val TweetPopularityTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
HasGreaterThanOrEqualTo100Favs,
|
||||
HasGreaterThanOrEqualTo1KFavs,
|
||||
HasGreaterThanOrEqualTo10KFavs,
|
||||
HasGreaterThanOrEqualTo100KFavs,
|
||||
HasGreaterThanOrEqualTo10Retweets,
|
||||
HasGreaterThanOrEqualTo100Retweets,
|
||||
HasGreaterThanOrEqualTo1KRetweets
|
||||
)
|
||||
|
||||
val UserGraphInteractionTweetTypeFeatures: Set[Feature[_]] = Set(
|
||||
IsInNetwork,
|
||||
FromMutualFollow,
|
||||
IsNotInNetwork,
|
||||
IsPromoted
|
||||
)
|
||||
|
||||
val UserContentPreferenceTweetTypeFeatures: Set[Feature[_]] =
|
||||
ContentTweetTypeFeatures ++ FreshnessTweetTypeFeatures ++ SocialProofTweetTypeFeatures ++ TopicTweetPreferenceTweetTypeFeatures ++ TweetPopularityTweetTypeFeatures ++ UserGraphInteractionTweetTypeFeatures
|
||||
val AuthorContentPreferenceTweetTypeFeatures: Set[Feature[_]] =
|
||||
Set(IsInNetwork, FromMutualFollow, IsNotInNetwork) ++ ContentTweetTypeFeatures
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
],
|
||||
)
|
@ -0,0 +1,536 @@
|
||||
package com.twitter.timelines.prediction.features.common
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.FeatureType
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import java.lang.{Boolean => JBoolean}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object CombinedFeatures {
|
||||
val IS_CLICKED =
|
||||
new Binary("timelines.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_DWELLED =
|
||||
new Binary("timelines.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_IN_BOUNDS_V1 = new Binary(
|
||||
"timelines.engagement.is_dwelled_in_bounds_v1",
|
||||
Set(TweetsViewed, EngagementsPrivate).asJava)
|
||||
val IS_FAVORITED = new Binary(
|
||||
"timelines.engagement.is_favorited",
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_FOLLOWED = new Binary(
|
||||
"timelines.engagement.is_followed",
|
||||
Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
|
||||
val IS_IMPRESSED =
|
||||
new Binary("timelines.engagement.is_impressed", Set(TweetsViewed, EngagementsPrivate).asJava)
|
||||
val IS_OPEN_LINKED = new Binary(
|
||||
"timelines.engagement.is_open_linked",
|
||||
Set(EngagementsPrivate, LinksClickedOn).asJava)
|
||||
val IS_PHOTO_EXPANDED = new Binary(
|
||||
"timelines.engagement.is_photo_expanded",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED = new Binary(
|
||||
"timelines.engagement.is_profile_clicked",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_QUOTED = new Binary(
|
||||
"timelines.engagement.is_quoted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED = new Binary(
|
||||
"timelines.engagement.is_replied",
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_RETWEETED = new Binary(
|
||||
"timelines.engagement.is_retweeted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
|
||||
"timelines.enagagement.is_retweeted_without_quote",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_SHARE_DM_CLICKED =
|
||||
new Binary("timelines.engagement.is_tweet_share_dm_clicked", Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARE_DM_SENT =
|
||||
new Binary("timelines.engagement.is_tweet_share_dm_sent", Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_25 = new Binary(
|
||||
"timelines.engagement.is_video_playback_25",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_50 = new Binary(
|
||||
"timelines.engagement.is_video_playback_50",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_75 = new Binary(
|
||||
"timelines.engagement.is_video_playback_75",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_95 = new Binary(
|
||||
"timelines.engagement.is_video_playback_95",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_COMPLETE = new Binary(
|
||||
"timelines.engagement.is_video_playback_complete",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_START = new Binary(
|
||||
"timelines.engagement.is_video_playback_start",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_VIEWED = new Binary(
|
||||
"timelines.engagement.is_video_viewed",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_QUALITY_VIEWED = new Binary(
|
||||
"timelines.engagement.is_video_quality_viewed",
|
||||
Set(MediaEngagementActivities, EngagementsPrivate).asJava
|
||||
)
|
||||
// v1: post click engagements: fav, reply
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied",
|
||||
Set(
|
||||
TweetsClicked,
|
||||
PublicLikes,
|
||||
PrivateLikes,
|
||||
PublicReplies,
|
||||
PrivateReplies,
|
||||
EngagementsPrivate,
|
||||
EngagementsPublic).asJava)
|
||||
// v2: post click engagements: click
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_v2",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs",
|
||||
Set(
|
||||
TweetsClicked,
|
||||
PublicLikes,
|
||||
PrivateLikes,
|
||||
PublicReplies,
|
||||
PrivateReplies,
|
||||
EngagementsPrivate,
|
||||
EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_favorited",
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_replied",
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_retweeted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_followed",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_share_dm_clicked",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_profile_clicked",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_uam_gt_0",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_uam_gt_1",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_uam_gt_2",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
|
||||
"timelines.engagement.is_good_clicked_convo_desc_uam_gt_3",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
val IS_TWEET_DETAIL_DWELLED = new Binary(
|
||||
"timelines.engagement.is_tweet_detail_dwelled",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
|
||||
"timelines.engagement.is_tweet_detail_dwelled_8_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
|
||||
"timelines.engagement.is_tweet_detail_dwelled_15_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
|
||||
"timelines.engagement.is_tweet_detail_dwelled_25_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
|
||||
"timelines.engagement.is_tweet_detail_dwelled_30_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_PROFILE_DWELLED = new Binary(
|
||||
"timelines.engagement.is_profile_dwelled",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_10_SEC = new Binary(
|
||||
"timelines.engagement.is_profile_dwelled_10_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_20_SEC = new Binary(
|
||||
"timelines.engagement.is_profile_dwelled_20_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_30_SEC = new Binary(
|
||||
"timelines.engagement.is_profile_dwelled_30_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
|
||||
"timelines.engagement.is_fullscreen_video_dwelled",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
|
||||
"timelines.engagement.is_fullscreen_video_dwelled_5_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
|
||||
"timelines.engagement.is_fullscreen_video_dwelled_10_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
|
||||
"timelines.engagement.is_fullscreen_video_dwelled_20_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
|
||||
"timelines.engagement.is_fullscreen_video_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_15_SEC = new Binary(
|
||||
"timelines.engagement.is_link_dwelled_15_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_30_SEC = new Binary(
|
||||
"timelines.engagement.is_link_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_60_SEC = new Binary(
|
||||
"timelines.engagement.is_link_dwelled_60_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_HOME_LATEST_VISITED =
|
||||
new Binary("timelines.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_BOOKMARKED =
|
||||
new Binary("timelines.engagement.is_bookmarked", Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARED =
|
||||
new Binary("timelines.engagement.is_shared", Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARE_MENU_CLICKED =
|
||||
new Binary("timelines.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Negative engagements
|
||||
val IS_DONT_LIKE = new Binary("timelines.engagement.is_dont_like", Set(EngagementsPrivate).asJava)
|
||||
val IS_BLOCK_CLICKED = new Binary(
|
||||
"timelines.engagement.is_block_clicked",
|
||||
Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_BLOCK_DIALOG_BLOCKED = new Binary(
|
||||
"timelines.engagement.is_block_dialog_blocked",
|
||||
Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_MUTE_CLICKED = new Binary(
|
||||
"timelines.engagement.is_mute_clicked",
|
||||
Set(Mutes, TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_MUTE_DIALOG_MUTED =
|
||||
new Binary("timelines.engagement.is_mute_dialog_muted", Set(Mutes, EngagementsPrivate).asJava)
|
||||
val IS_REPORT_TWEET_CLICKED = new Binary(
|
||||
"timelines.engagement.is_report_tweet_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_CARET_CLICKED =
|
||||
new Binary("timelines.engagement.is_caret_clicked", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_ABOUT_TOPIC =
|
||||
new Binary("timelines.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RECENT =
|
||||
new Binary("timelines.engagement.is_not_recent", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RELEVANT =
|
||||
new Binary("timelines.engagement.is_not_relevant", Set(EngagementsPrivate).asJava)
|
||||
val IS_SEE_FEWER =
|
||||
new Binary("timelines.engagement.is_see_fewer", Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC =
|
||||
new Binary("timelines.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_FOLLOW_TOPIC =
|
||||
new Binary("timelines.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN_TOPIC =
|
||||
new Binary("timelines.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NEGATIVE_FEEDBACK =
|
||||
new Binary("timelines.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
|
||||
val IS_IMPLICIT_POSITIVE_FEEDBACK_UNION =
|
||||
new Binary(
|
||||
"timelines.engagement.is_implicit_positive_feedback_union",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_EXPLICIT_POSITIVE_FEEDBACK_UNION =
|
||||
new Binary(
|
||||
"timelines.engagement.is_explicit_positive_feedback_union",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_ALL_NEGATIVE_FEEDBACK_UNION =
|
||||
new Binary(
|
||||
"timelines.engagement.is_all_negative_feedback_union",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
// Reciprocal engagements for reply forward engagement
|
||||
val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_impressed_by_author",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_favorited_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava)
|
||||
val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_quoted_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
|
||||
val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_replied_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava)
|
||||
val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_retweeted_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
|
||||
val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_blocked_by_author",
|
||||
Set(Blocks, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_followed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
|
||||
val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_unfollowed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_muted_by_author",
|
||||
Set(Mutes, EngagementsPrivate).asJava)
|
||||
val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_replied_reply_reported_by_author",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Reciprocal engagements for fav forward engagement
|
||||
val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_favorited_fav_favorited_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_favorited_fav_replied_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_favorited_fav_retweeted_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
"timelines.engagement.is_favorited_fav_followed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
)
|
||||
|
||||
// define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_follow",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_fav",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_reply",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_retweet",
|
||||
Set(
|
||||
ProfilesViewed,
|
||||
ProfilesClicked,
|
||||
EngagementsPrivate,
|
||||
PrivateRetweets,
|
||||
PublicRetweets).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_tweet_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_share_dm_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of all binary features above
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_engaged",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_user_report_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_tweet_report_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_mute",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
|
||||
"timelines.engagement.is_profile_clicked_and_profile_block",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of bad profile click engagements and existing negative feedback
|
||||
val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
|
||||
"timelines.engagement.is_negative_feedback_v2",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_NEGATIVE_FEEDBACK_UNION = new Binary(
|
||||
"timelines.engagement.is_negative_feedback_union",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// don't like, mute or profile page -> mute
|
||||
val IS_WEAK_NEGATIVE_FEEDBACK = new Binary(
|
||||
"timelines.engagement.is_weak_negative_feedback",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// report, block or profile page -> report, block
|
||||
val IS_STRONG_NEGATIVE_FEEDBACK = new Binary(
|
||||
"timelines.engagement.is_strong_negative_feedback",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// engagement for following user from any surface area
|
||||
val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
|
||||
"timelines.engagement.is_followed_from_any_surface_area",
|
||||
Set(EngagementsPublic, EngagementsPrivate).asJava)
|
||||
val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary(
|
||||
"timelines.engagement.is_relevance_prompt_yes_clicked",
|
||||
Set(EngagementsPublic, EngagementsPrivate).asJava)
|
||||
|
||||
// Reply downvote engagements
|
||||
val IS_REPLY_DOWNVOTED =
|
||||
new Binary("timelines.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLY_DOWNVOTE_REMOVED =
|
||||
new Binary("timelines.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava)
|
||||
|
||||
/**
|
||||
* Contains all engagements that are used/consumed by real-time
|
||||
* aggregates summingbird jobs. These engagements need to be
|
||||
* extractable from [[ClientEvent]].
|
||||
*/
|
||||
val EngagementsRealTime: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_DWELLED,
|
||||
IS_FAVORITED,
|
||||
IS_FOLLOWED,
|
||||
IS_OPEN_LINKED,
|
||||
IS_PHOTO_EXPANDED,
|
||||
IS_PROFILE_CLICKED,
|
||||
IS_QUOTED,
|
||||
IS_REPLIED,
|
||||
IS_RETWEETED,
|
||||
IS_RETWEETED_WITHOUT_QUOTE,
|
||||
IS_SHARE_DM_CLICKED,
|
||||
IS_SHARE_DM_SENT,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_VIDEO_VIEWED,
|
||||
IS_VIDEO_QUALITY_VIEWED
|
||||
)
|
||||
|
||||
val NegativeEngagementsRealTime: Set[Feature[JBoolean]] = Set(
|
||||
IS_REPORT_TWEET_CLICKED,
|
||||
IS_BLOCK_CLICKED,
|
||||
IS_MUTE_CLICKED
|
||||
)
|
||||
|
||||
val NegativeEngagementsRealTimeDontLike: Set[Feature[JBoolean]] = Set(
|
||||
IS_DONT_LIKE
|
||||
)
|
||||
|
||||
val NegativeEngagementsSecondary: Set[Feature[JBoolean]] = Set(
|
||||
IS_NOT_INTERESTED_IN_TOPIC,
|
||||
IS_NOT_ABOUT_TOPIC,
|
||||
IS_NOT_RECENT,
|
||||
IS_NOT_RELEVANT,
|
||||
IS_SEE_FEWER,
|
||||
IS_UNFOLLOW_TOPIC
|
||||
)
|
||||
|
||||
val PrivateEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_DWELLED,
|
||||
IS_OPEN_LINKED,
|
||||
IS_PHOTO_EXPANDED,
|
||||
IS_PROFILE_CLICKED,
|
||||
IS_QUOTED,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_VIDEO_QUALITY_VIEWED
|
||||
)
|
||||
|
||||
val ImpressedEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_IMPRESSED
|
||||
)
|
||||
|
||||
val PrivateEngagementsV2: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_OPEN_LINKED,
|
||||
IS_PHOTO_EXPANDED,
|
||||
IS_PROFILE_CLICKED,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_VIDEO_QUALITY_VIEWED
|
||||
) ++ ImpressedEngagements
|
||||
|
||||
val CoreEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_FAVORITED,
|
||||
IS_REPLIED,
|
||||
IS_RETWEETED
|
||||
)
|
||||
|
||||
val DwellEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_DWELLED
|
||||
)
|
||||
|
||||
val PrivateCoreEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_OPEN_LINKED,
|
||||
IS_PHOTO_EXPANDED,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_VIDEO_QUALITY_VIEWED
|
||||
)
|
||||
|
||||
val ConditionalEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_GOOD_CLICKED_CONVO_DESC_V1,
|
||||
IS_GOOD_CLICKED_CONVO_DESC_V2,
|
||||
IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S
|
||||
)
|
||||
|
||||
val ShareEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_SHARED,
|
||||
IS_SHARE_MENU_CLICKED
|
||||
)
|
||||
|
||||
val BookmarkEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_BOOKMARKED
|
||||
)
|
||||
|
||||
val TweetDetailDwellEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_TWEET_DETAIL_DWELLED,
|
||||
IS_TWEET_DETAIL_DWELLED_8_SEC,
|
||||
IS_TWEET_DETAIL_DWELLED_15_SEC,
|
||||
IS_TWEET_DETAIL_DWELLED_25_SEC,
|
||||
IS_TWEET_DETAIL_DWELLED_30_SEC
|
||||
)
|
||||
|
||||
val ProfileDwellEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_PROFILE_DWELLED,
|
||||
IS_PROFILE_DWELLED_10_SEC,
|
||||
IS_PROFILE_DWELLED_20_SEC,
|
||||
IS_PROFILE_DWELLED_30_SEC
|
||||
)
|
||||
|
||||
val FullscreenVideoDwellEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_FULLSCREEN_VIDEO_DWELLED,
|
||||
IS_FULLSCREEN_VIDEO_DWELLED_5_SEC,
|
||||
IS_FULLSCREEN_VIDEO_DWELLED_10_SEC,
|
||||
IS_FULLSCREEN_VIDEO_DWELLED_20_SEC,
|
||||
IS_FULLSCREEN_VIDEO_DWELLED_30_SEC
|
||||
)
|
||||
|
||||
// Please do not add new engagements here until having estimated the impact
|
||||
// to capacity requirements. User-author real-time aggregates have a very
|
||||
// large key space.
|
||||
val UserAuthorEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ DwellEngagements ++ Set(
|
||||
IS_CLICKED,
|
||||
IS_PROFILE_CLICKED,
|
||||
IS_PHOTO_EXPANDED,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_NEGATIVE_FEEDBACK_UNION
|
||||
)
|
||||
|
||||
val ImplicitPositiveEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_DWELLED,
|
||||
IS_OPEN_LINKED,
|
||||
IS_PROFILE_CLICKED,
|
||||
IS_QUOTED,
|
||||
IS_VIDEO_PLAYBACK_50,
|
||||
IS_VIDEO_QUALITY_VIEWED,
|
||||
IS_TWEET_DETAIL_DWELLED,
|
||||
IS_GOOD_CLICKED_CONVO_DESC_V1,
|
||||
IS_GOOD_CLICKED_CONVO_DESC_V2,
|
||||
IS_SHARED,
|
||||
IS_SHARE_MENU_CLICKED,
|
||||
IS_SHARE_DM_SENT,
|
||||
IS_SHARE_DM_CLICKED
|
||||
)
|
||||
|
||||
val ExplicitPositiveEngagements: Set[Feature[JBoolean]] = CoreEngagements ++ Set(
|
||||
IS_FOLLOWED,
|
||||
IS_QUOTED
|
||||
)
|
||||
|
||||
val AllNegativeEngagements: Set[Feature[JBoolean]] =
|
||||
NegativeEngagementsRealTime ++ NegativeEngagementsRealTimeDontLike ++ Set(
|
||||
IS_NOT_RECENT,
|
||||
IS_NOT_RELEVANT,
|
||||
IS_SEE_FEWER
|
||||
)
|
||||
}
|
@ -0,0 +1,97 @@
|
||||
package com.twitter.timelines.prediction.features.common
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import java.lang.{Boolean => JBoolean}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ProfileLabelFeatures {
|
||||
private val prefix = "profile"
|
||||
|
||||
val IS_CLICKED =
|
||||
new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_DWELLED =
|
||||
new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
|
||||
val IS_FAVORITED = new Binary(
|
||||
s"${prefix}.engagement.is_favorited",
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED = new Binary(
|
||||
s"${prefix}.engagement.is_replied",
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_RETWEETED = new Binary(
|
||||
s"${prefix}.engagement.is_retweeted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// Negative engagements
|
||||
val IS_DONT_LIKE =
|
||||
new Binary(s"${prefix}.engagement.is_dont_like", Set(EngagementsPrivate).asJava)
|
||||
val IS_BLOCK_CLICKED = new Binary(
|
||||
s"${prefix}.engagement.is_block_clicked",
|
||||
Set(Blocks, TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_MUTE_CLICKED = new Binary(
|
||||
s"${prefix}.engagement.is_mute_clicked",
|
||||
Set(Mutes, TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_REPORT_TWEET_CLICKED = new Binary(
|
||||
s"${prefix}.engagement.is_report_tweet_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_NEGATIVE_FEEDBACK_UNION = new Binary(
|
||||
s"${prefix}.engagement.is_negative_feedback_union",
|
||||
Set(EngagementsPrivate, Blocks, Mutes, TweetsClicked, EngagementsPublic).asJava)
|
||||
|
||||
val CoreEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_DWELLED,
|
||||
IS_FAVORITED,
|
||||
IS_REPLIED,
|
||||
IS_RETWEETED
|
||||
)
|
||||
|
||||
val NegativeEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_DONT_LIKE,
|
||||
IS_BLOCK_CLICKED,
|
||||
IS_MUTE_CLICKED,
|
||||
IS_REPORT_TWEET_CLICKED
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
object SearchLabelFeatures {
|
||||
private val prefix = "search"
|
||||
|
||||
val IS_CLICKED =
|
||||
new Binary(s"${prefix}.engagement.is_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_DWELLED =
|
||||
new Binary(s"${prefix}.engagement.is_dwelled", Set(TweetsViewed, EngagementsPrivate).asJava)
|
||||
val IS_FAVORITED = new Binary(
|
||||
s"${prefix}.engagement.is_favorited",
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED = new Binary(
|
||||
s"${prefix}.engagement.is_replied",
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_RETWEETED = new Binary(
|
||||
s"${prefix}.engagement.is_retweeted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_PROFILE_CLICKED_SEARCH_RESULT_USER = new Binary(
|
||||
s"${prefix}.engagement.is_profile_clicked_search_result_user",
|
||||
Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET = new Binary(
|
||||
s"${prefix}.engagement.is_profile_clicked_search_result_tweet",
|
||||
Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_TYPEAHEAD_USER = new Binary(
|
||||
s"${prefix}.engagement.is_profile_clicked_typeahead_user",
|
||||
Set(ProfilesClicked, ProfilesViewed, EngagementsPrivate).asJava)
|
||||
|
||||
val CoreEngagements: Set[Feature[JBoolean]] = Set(
|
||||
IS_CLICKED,
|
||||
IS_DWELLED,
|
||||
IS_FAVORITED,
|
||||
IS_REPLIED,
|
||||
IS_RETWEETED,
|
||||
IS_PROFILE_CLICKED_SEARCH_RESULT_USER,
|
||||
IS_PROFILE_CLICKED_SEARCH_RESULT_TWEET,
|
||||
IS_PROFILE_CLICKED_TYPEAHEAD_USER
|
||||
)
|
||||
}
|
||||
// Add Tweet Detail labels later
|
@ -0,0 +1,759 @@
|
||||
package com.twitter.timelines.prediction.features.common
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.Discrete
|
||||
import com.twitter.ml.api.Feature.SparseBinary
|
||||
import com.twitter.ml.api.Feature.SparseContinuous
|
||||
import com.twitter.ml.api.Feature.Text
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object TimelinesSharedFeatures extends TimelinesSharedFeatures("")
|
||||
object InReplyToTweetTimelinesSharedFeatures extends TimelinesSharedFeatures("in_reply_to_tweet")
|
||||
|
||||
/**
|
||||
* Defines shared features
|
||||
*/
|
||||
class TimelinesSharedFeatures(prefix: String) {
|
||||
private def name(featureName: String): String = {
|
||||
if (prefix.nonEmpty) {
|
||||
s"$prefix.$featureName"
|
||||
} else {
|
||||
featureName
|
||||
}
|
||||
}
|
||||
|
||||
// meta
|
||||
val EXPERIMENT_META = new SparseBinary(
|
||||
name("timelines.meta.experiment_meta"),
|
||||
Set(ExperimentId, ExperimentName).asJava)
|
||||
|
||||
// historically used in the "combined models" to distinguish in-network and out of network tweets.
|
||||
// now the feature denotes which adapter (recap or rectweet) was used to generate the datarecords.
|
||||
// and is used by the data collection pipeline to split the training data.
|
||||
val INJECTION_TYPE = new Discrete(name("timelines.meta.injection_type"))
|
||||
|
||||
// Used to indicate which injection module is this
|
||||
val INJECTION_MODULE_NAME = new Text(name("timelines.meta.injection_module_name"))
|
||||
|
||||
val LIST_ID = new Discrete(name("timelines.meta.list_id"))
|
||||
val LIST_IS_PINNED = new Binary(name("timelines.meta.list_is_pinned"))
|
||||
|
||||
// internal id per each PS request. mainly to join back commomn features and candidate features later
|
||||
val PREDICTION_REQUEST_ID = new Discrete(name("timelines.meta.prediction_request_id"))
|
||||
// internal id per each TLM request. mainly to deduplicate re-served cached tweets in logging
|
||||
val SERVED_REQUEST_ID = new Discrete(name("timelines.meta.served_request_id"))
|
||||
// internal id used for join key in kafka logging, equal to servedRequestId if tweet is cached,
|
||||
// else equal to predictionRequestId
|
||||
val SERVED_ID = new Discrete(name("timelines.meta.served_id"))
|
||||
val REQUEST_JOIN_ID = new Discrete(name("timelines.meta.request_join_id"))
|
||||
|
||||
// Internal boolean flag per tweet, whether the tweet is served from RankedTweetsCache: TQ-14050
|
||||
// this feature should not be trained on, blacklisted in feature_config: D838346
|
||||
val IS_READ_FROM_CACHE = new Binary(name("timelines.meta.is_read_from_cache"))
|
||||
|
||||
// model score discounts
|
||||
val PHOTO_DISCOUNT = new Continuous(name("timelines.score_discounts.photo"))
|
||||
val VIDEO_DISCOUNT = new Continuous(name("timelines.score_discounts.video"))
|
||||
val TWEET_HEIGHT_DISCOUNT = new Continuous(name("timelines.score_discounts.tweet_height"))
|
||||
val TOXICITY_DISCOUNT = new Continuous(name("timelines.score_discounts.toxicity"))
|
||||
|
||||
// engagements
|
||||
val ENGAGEMENT_TYPE = new Discrete(name("timelines.engagement.type"))
|
||||
val PREDICTED_IS_FAVORITED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_favorited"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_RETWEETED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_QUOTED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_quoted"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_REPLIED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_replied"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_OPEN_LINKED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_open_linked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous(
|
||||
name("timelines.engagement_predicted.is_good_open_link"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PROFILE_CLICKED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_profile_clicked"),
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_profile_clicked_and_profile_engaged"),
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val PREDICTED_IS_CLICKED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_clicked"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PHOTO_EXPANDED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_photo_expanded"),
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val PREDICTED_IS_FOLLOWED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_followed"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DONT_LIKE =
|
||||
new Continuous(name("timelines.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous(
|
||||
name("timelines.engagement_predicted.is_video_playback_50"),
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_video_quality_viewed"),
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous(
|
||||
name("timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous(
|
||||
name("timelines.engagement_predicted.is_good_clicked_convo_desc_v2"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_tweet_detail_dwelled_8_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_tweet_detail_dwelled_15_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_tweet_detail_dwelled_25_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_tweet_detail_dwelled_30_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous(
|
||||
name(
|
||||
"timelines.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous(
|
||||
name("timelines.engagement_predicted.is_favorited_fav_engaged_by_author"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_REPORT_TWEET_CLICKED =
|
||||
new Continuous(
|
||||
name("timelines.engagement_predicted.is_report_tweet_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_NEGATIVE_FEEDBACK = new Continuous(
|
||||
name("timelines.engagement_predicted.is_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 = new Continuous(
|
||||
name("timelines.engagement_predicted.is_negative_feedback_v2"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK = new Continuous(
|
||||
name("timelines.engagement_predicted.is_weak_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK = new Continuous(
|
||||
name("timelines.engagement_predicted.is_strong_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous(
|
||||
name("timelines.engagement_predicted.is_dwelled_in_bounds_v1"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous(
|
||||
name("timelines.engagement_predicted.dwell_normalized_overall"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_CDF =
|
||||
new Continuous(name("timelines.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_CDF_OVERALL = new Continuous(
|
||||
name("timelines.engagement_predicted.dwell_cdf_overall"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_home_latest_visited"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_BOOKMARKED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_bookmarked"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_SHARED =
|
||||
new Continuous(name("timelines.engagement_predicted.is_shared"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SHARE_MENU_CLICKED = new Continuous(
|
||||
name("timelines.engagement_predicted.is_share_menu_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_profile_dwelled_20_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_fullscreen_video_dwelled_5_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_fullscreen_video_dwelled_10_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_fullscreen_video_dwelled_20_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous(
|
||||
name("timelines.engagement_predicted.is_fullscreen_video_dwelled_30_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
// Please use this timestamp, not the `meta.timestamp`, for the actual served timestamp.
|
||||
val SERVED_TIMESTAMP =
|
||||
new Discrete("timelines.meta.timestamp.served", Set(PrivateTimestamp).asJava)
|
||||
|
||||
// timestamp when the engagement has occurred. do not train on these features
|
||||
val TIMESTAMP_FAVORITED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.favorited", Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_RETWEETED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.retweeted", Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_REPLIED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.replied", Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_PROFILE_CLICKED = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.profile_clicked",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_CLICKED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.clicked", Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_PHOTO_EXPANDED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.photo_expanded", Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_DWELLED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.dwelled", Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_VIDEO_PLAYBACK_50 = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.video_playback_50",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
// reply engaged by author
|
||||
val TIMESTAMP_REPLY_FAVORITED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.reply_favorited_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_REPLY_REPLIED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.reply_replied_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_REPLY_RETWEETED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.reply_retweeted_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
// fav engaged by author
|
||||
val TIMESTAMP_FAV_FAVORITED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.fav_favorited_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_FAV_REPLIED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.fav_replied_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_FAV_RETWEETED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.fav_retweeted_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_FAV_FOLLOWED_BY_AUTHOR = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.fav_followed_by_author",
|
||||
Set(PublicTimestamp).asJava)
|
||||
// good click
|
||||
val TIMESTAMP_GOOD_CLICK_CONVO_DESC_FAVORITED = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.good_click_convo_desc_favorited",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_GOOD_CLICK_CONVO_DESC_REPLIIED = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.good_click_convo_desc_replied",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_GOOD_CLICK_CONVO_DESC_PROFILE_CLICKED = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.good_click_convo_desc_profiile_clicked",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_NEGATIVE_FEEDBACK = new Discrete(
|
||||
"timelines.meta.timestamp.engagement.negative_feedback",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_REPORT_TWEET_CLICK =
|
||||
new Discrete(
|
||||
"timelines.meta.timestamp.engagement.report_tweet_click",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_IMPRESSED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.impressed", Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_TWEET_DETAIL_DWELLED =
|
||||
new Discrete(
|
||||
"timelines.meta.timestamp.engagement.tweet_detail_dwelled",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_PROFILE_DWELLED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.profile_dwelled", Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_FULLSCREEN_VIDEO_DWELLED =
|
||||
new Discrete(
|
||||
"timelines.meta.timestamp.engagement.fullscreen_video_dwelled",
|
||||
Set(PublicTimestamp).asJava)
|
||||
val TIMESTAMP_LINK_DWELLED =
|
||||
new Discrete("timelines.meta.timestamp.engagement.link_dwelled", Set(PublicTimestamp).asJava)
|
||||
|
||||
// these are used to dup and split the negative instances during streaming processing (kafka)
|
||||
val TRAINING_FOR_FAVORITED =
|
||||
new Binary("timelines.meta.training_data.for_favorited", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_RETWEETED =
|
||||
new Binary("timelines.meta.training_data.for_retweeted", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_REPLIED =
|
||||
new Binary("timelines.meta.training_data.for_replied", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_PROFILE_CLICKED =
|
||||
new Binary("timelines.meta.training_data.for_profile_clicked", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_CLICKED =
|
||||
new Binary("timelines.meta.training_data.for_clicked", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_PHOTO_EXPANDED =
|
||||
new Binary("timelines.meta.training_data.for_photo_expanded", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_VIDEO_PLAYBACK_50 =
|
||||
new Binary("timelines.meta.training_data.for_video_playback_50", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_NEGATIVE_FEEDBACK =
|
||||
new Binary("timelines.meta.training_data.for_negative_feedback", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_REPORTED =
|
||||
new Binary("timelines.meta.training_data.for_reported", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_DWELLED =
|
||||
new Binary("timelines.meta.training_data.for_dwelled", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_SHARED =
|
||||
new Binary("timelines.meta.training_data.for_shared", Set(EngagementId).asJava)
|
||||
val TRAINING_FOR_SHARE_MENU_CLICKED =
|
||||
new Binary("timelines.meta.training_data.for_share_menu_clicked", Set(EngagementId).asJava)
|
||||
|
||||
// Warning: do not train on these features
|
||||
val PREDICTED_SCORE = new Continuous(name("timelines.score"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_FAV = new Continuous(name("timelines.score.fav"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_RETWEET =
|
||||
new Continuous(name("timelines.score.retweet"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_REPLY =
|
||||
new Continuous(name("timelines.score.reply"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_OPEN_LINK =
|
||||
new Continuous(name("timelines.score.open_link"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_GOOD_OPEN_LINK =
|
||||
new Continuous(name("timelines.score.good_open_link"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_PROFILE_CLICK =
|
||||
new Continuous(name("timelines.score.profile_click"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DETAIL_EXPAND =
|
||||
new Continuous(name("timelines.score.detail_expand"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_PHOTO_EXPAND =
|
||||
new Continuous(name("timelines.score.photo_expand"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_PLAYBACK_50 =
|
||||
new Continuous(name("timelines.score.playback_50"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_VIDEO_QUALITY_VIEW =
|
||||
new Continuous(name("timelines.score.video_quality_view"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DONT_LIKE =
|
||||
new Continuous(name("timelines.score.dont_like"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_PROFILE_CLICKED_AND_PROFILE_ENGAGED =
|
||||
new Continuous(
|
||||
name("timelines.score.profile_clicked_and_profile_engaged"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_GOOD_CLICKED_V1 =
|
||||
new Continuous(name("timelines.score.good_clicked_v1"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_GOOD_CLICKED_V2 =
|
||||
new Continuous(name("timelines.score.good_clicked_v2"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DWELL =
|
||||
new Continuous(name("timelines.score.dwell"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DWELL_CDF =
|
||||
new Continuous(name("timelines.score.dwell_cfd"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DWELL_CDF_OVERALL =
|
||||
new Continuous(name("timelines.score.dwell_cfd_overall"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_DWELL_NORMALIZED_OVERALL =
|
||||
new Continuous(name("timelines.score.dwell_normalized_overall"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_NEGATIVE_FEEDBACK =
|
||||
new Continuous(name("timelines.score.negative_feedback"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_NEGATIVE_FEEDBACK_V2 =
|
||||
new Continuous(name("timelines.score.negative_feedback_v2"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_WEAK_NEGATIVE_FEEDBACK =
|
||||
new Continuous(name("timelines.score.weak_negative_feedback"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_STRONG_NEGATIVE_FEEDBACK =
|
||||
new Continuous(name("timelines.score.strong_negative_feedback"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_REPORT_TWEET_CLICKED =
|
||||
new Continuous(name("timelines.score.report_tweet_clicked"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_UNFOLLOW_TOPIC =
|
||||
new Continuous(name("timelines.score.unfollow_topic"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_FOLLOW =
|
||||
new Continuous(name("timelines.score.follow"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_RELEVANCE_PROMPT_YES_CLICKED =
|
||||
new Continuous(
|
||||
name("timelines.score.relevance_prompt_yes_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_BOOKMARK =
|
||||
new Continuous(name("timelines.score.bookmark"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_SHARE =
|
||||
new Continuous(name("timelines.score.share"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_SHARE_MENU_CLICK =
|
||||
new Continuous(name("timelines.score.share_menu_click"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_PROFILE_DWELLED =
|
||||
new Continuous(name("timelines.score.good_profile_dwelled"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_TWEET_DETAIL_DWELLED =
|
||||
new Continuous(name("timelines.score.tweet_detail_dwelled"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_SCORE_FULLSCREEN_VIDEO_DWELL =
|
||||
new Continuous(name("timelines.score.fullscreen_video_dwell"), Set(EngagementScore).asJava)
|
||||
|
||||
// hydrated in TimelinesSharedFeaturesAdapter that recap adapter calls
|
||||
val ORIGINAL_AUTHOR_ID = new Discrete(name("entities.original_author_id"), Set(UserId).asJava)
|
||||
val SOURCE_AUTHOR_ID = new Discrete(name("entities.source_author_id"), Set(UserId).asJava)
|
||||
val SOURCE_TWEET_ID = new Discrete(name("entities.source_tweet_id"), Set(TweetId).asJava)
|
||||
val TOPIC_ID = new Discrete(name("entities.topic_id"), Set(SemanticcoreClassification).asJava)
|
||||
val INFERRED_TOPIC_IDS =
|
||||
new SparseBinary(name("entities.inferred_topic_ids"), Set(SemanticcoreClassification).asJava)
|
||||
val INFERRED_TOPIC_ID = TypedAggregateGroup.sparseFeature(INFERRED_TOPIC_IDS)
|
||||
|
||||
val WEIGHTED_FAV_COUNT = new Continuous(
|
||||
name("timelines.earlybird.weighted_fav_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val WEIGHTED_RETWEET_COUNT = new Continuous(
|
||||
name("timelines.earlybird.weighted_retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val WEIGHTED_REPLY_COUNT = new Continuous(
|
||||
name("timelines.earlybird.weighted_reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val WEIGHTED_QUOTE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.weighted_quote_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val EMBEDS_IMPRESSION_COUNT_V2 = new Continuous(
|
||||
name("timelines.earlybird.embeds_impression_count_v2"),
|
||||
Set(CountOfImpression).asJava)
|
||||
val EMBEDS_URL_COUNT_V2 = new Continuous(
|
||||
name("timelines.earlybird.embeds_url_count_v2"),
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val DECAYED_FAVORITE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.decayed_favorite_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val DECAYED_RETWEET_COUNT = new Continuous(
|
||||
name("timelines.earlybird.decayed_retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val DECAYED_REPLY_COUNT = new Continuous(
|
||||
name("timelines.earlybird.decayed_reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val DECAYED_QUOTE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.decayed_quote_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val FAKE_FAVORITE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.fake_favorite_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val FAKE_RETWEET_COUNT = new Continuous(
|
||||
name("timelines.earlybird.fake_retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val FAKE_REPLY_COUNT = new Continuous(
|
||||
name("timelines.earlybird.fake_reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val FAKE_QUOTE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.fake_quote_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val QUOTE_COUNT = new Continuous(
|
||||
name("timelines.earlybird.quote_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
|
||||
// Safety features
|
||||
val LABEL_ABUSIVE_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_abusive_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_ABUSIVE_HI_RCL_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_abusive_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_DUP_CONTENT_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_dup_content_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_NSFW_HI_PRC_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_nsfw_hi_prc_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_NSFW_HI_RCL_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_nsfw_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_SPAM_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_spam_flag"), Set(TweetSafetyLabels).asJava)
|
||||
val LABEL_SPAM_HI_RCL_FLAG =
|
||||
new Binary(name("timelines.earlybird.label_spam_hi_rcl_flag"), Set(TweetSafetyLabels).asJava)
|
||||
|
||||
// Periscope features
|
||||
val PERISCOPE_EXISTS = new Binary(
|
||||
name("timelines.earlybird.periscope_exists"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val PERISCOPE_IS_LIVE = new Binary(
|
||||
name("timelines.earlybird.periscope_is_live"),
|
||||
Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava)
|
||||
val PERISCOPE_HAS_BEEN_FEATURED = new Binary(
|
||||
name("timelines.earlybird.periscope_has_been_featured"),
|
||||
Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava)
|
||||
val PERISCOPE_IS_CURRENTLY_FEATURED = new Binary(
|
||||
name("timelines.earlybird.periscope_is_currently_featured"),
|
||||
Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava
|
||||
)
|
||||
val PERISCOPE_IS_FROM_QUALITY_SOURCE = new Binary(
|
||||
name("timelines.earlybird.periscope_is_from_quality_source"),
|
||||
Set(PrivateBroadcastMetrics, PublicBroadcastMetrics).asJava
|
||||
)
|
||||
|
||||
val VISIBLE_TOKEN_RATIO = new Continuous(name("timelines.earlybird.visible_token_ratio"))
|
||||
val HAS_QUOTE = new Binary(
|
||||
name("timelines.earlybird.has_quote"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val IS_COMPOSER_SOURCE_CAMERA = new Binary(
|
||||
name("timelines.earlybird.is_composer_source_camera"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
|
||||
val EARLYBIRD_SCORE = new Continuous(
|
||||
name("timelines.earlybird_score"),
|
||||
Set(EngagementScore).asJava
|
||||
) // separating from the rest of "timelines.earlybird." namespace
|
||||
|
||||
val DWELL_TIME_MS = new Continuous(
|
||||
name("timelines.engagement.dwell_time_ms"),
|
||||
Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
|
||||
|
||||
val TWEET_DETAIL_DWELL_TIME_MS = new Continuous(
|
||||
name("timelines.engagement.tweet_detail_dwell_time_ms"),
|
||||
Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
|
||||
|
||||
val PROFILE_DWELL_TIME_MS = new Continuous(
|
||||
name("timelines.engagement.profile_dwell_time_ms"),
|
||||
Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
|
||||
|
||||
val FULLSCREEN_VIDEO_DWELL_TIME_MS = new Continuous(
|
||||
name("timelines.engagement.fullscreen_video_dwell_time_ms"),
|
||||
Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
|
||||
|
||||
val LINK_DWELL_TIME_MS = new Continuous(
|
||||
name("timelines.engagement.link_dwell_time_ms"),
|
||||
Set(EngagementDurationAndTimestamp, ImpressionMetadata, PrivateTimestamp).asJava)
|
||||
|
||||
val ASPECT_RATIO_DEN = new Continuous(
|
||||
name("tweetsource.tweet.media.aspect_ratio_den"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val ASPECT_RATIO_NUM = new Continuous(
|
||||
name("tweetsource.tweet.media.aspect_ratio_num"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val BIT_RATE = new Continuous(
|
||||
name("tweetsource.tweet.media.bit_rate"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HEIGHT_2 = new Continuous(
|
||||
name("tweetsource.tweet.media.height_2"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HEIGHT_1 = new Continuous(
|
||||
name("tweetsource.tweet.media.height_1"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HEIGHT_3 = new Continuous(
|
||||
name("tweetsource.tweet.media.height_3"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HEIGHT_4 = new Continuous(
|
||||
name("tweetsource.tweet.media.height_4"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val RESIZE_METHOD_1 = new Discrete(
|
||||
name("tweetsource.tweet.media.resize_method_1"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val RESIZE_METHOD_2 = new Discrete(
|
||||
name("tweetsource.tweet.media.resize_method_2"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val RESIZE_METHOD_3 = new Discrete(
|
||||
name("tweetsource.tweet.media.resize_method_3"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val RESIZE_METHOD_4 = new Discrete(
|
||||
name("tweetsource.tweet.media.resize_method_4"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val VIDEO_DURATION = new Continuous(
|
||||
name("tweetsource.tweet.media.video_duration"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val WIDTH_1 = new Continuous(
|
||||
name("tweetsource.tweet.media.width_1"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val WIDTH_2 = new Continuous(
|
||||
name("tweetsource.tweet.media.width_2"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val WIDTH_3 = new Continuous(
|
||||
name("tweetsource.tweet.media.width_3"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val WIDTH_4 = new Continuous(
|
||||
name("tweetsource.tweet.media.width_4"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val NUM_MEDIA_TAGS = new Continuous(
|
||||
name("tweetsource.tweet.media.num_tags"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val MEDIA_TAG_SCREEN_NAMES = new SparseBinary(
|
||||
name("tweetsource.tweet.media.tag_screen_names"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val STICKER_IDS = new SparseBinary(
|
||||
name("tweetsource.tweet.media.sticker_ids"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
|
||||
val NUM_COLOR_PALLETTE_ITEMS = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.num_color_pallette_items"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val COLOR_1_RED = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.color_1_red"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val COLOR_1_BLUE = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.color_1_blue"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val COLOR_1_GREEN = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.color_1_green"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val COLOR_1_PERCENTAGE = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.color_1_percentage"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val MEDIA_PROVIDERS = new SparseBinary(
|
||||
name("tweetsource.v2.tweet.media.providers"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val IS_360 = new Binary(
|
||||
name("tweetsource.v2.tweet.media.is_360"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val VIEW_COUNT =
|
||||
new Continuous(name("tweetsource.v2.tweet.media.view_count"), Set(MediaContentMetrics).asJava)
|
||||
val IS_MANAGED = new Binary(
|
||||
name("tweetsource.v2.tweet.media.is_managed"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val IS_MONETIZABLE = new Binary(
|
||||
name("tweetsource.v2.tweet.media.is_monetizable"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val IS_EMBEDDABLE = new Binary(
|
||||
name("tweetsource.v2.tweet.media.is_embeddable"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val CLASSIFICATION_LABELS = new SparseContinuous(
|
||||
name("tweetsource.v2.tweet.media.classification_labels"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
|
||||
val NUM_STICKERS = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.num_stickers"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val NUM_FACES = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.num_faces"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val FACE_AREAS = new Continuous(
|
||||
name("tweetsource.v2.tweet.media.face_areas"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_SELECTED_PREVIEW_IMAGE = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_selected_preview_image"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_TITLE = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_title"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_DESCRIPTION = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_description"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_VISIT_SITE_CALL_TO_ACTION = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_visit_site_call_to_action"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_APP_INSTALL_CALL_TO_ACTION = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_app_install_call_to_action"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
val HAS_WATCH_NOW_CALL_TO_ACTION = new Binary(
|
||||
name("tweetsource.v2.tweet.media.has_watch_now_call_to_action"),
|
||||
Set(MediaFile, MediaProcessingInformation).asJava)
|
||||
|
||||
val NUM_CAPS =
|
||||
new Continuous(name("tweetsource.tweet.text.num_caps"), Set(PublicTweets, PrivateTweets).asJava)
|
||||
val TWEET_LENGTH =
|
||||
new Continuous(name("tweetsource.tweet.text.length"), Set(PublicTweets, PrivateTweets).asJava)
|
||||
val TWEET_LENGTH_TYPE = new Discrete(
|
||||
name("tweetsource.tweet.text.length_type"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val NUM_WHITESPACES = new Continuous(
|
||||
name("tweetsource.tweet.text.num_whitespaces"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val HAS_QUESTION =
|
||||
new Binary(name("tweetsource.tweet.text.has_question"), Set(PublicTweets, PrivateTweets).asJava)
|
||||
val NUM_NEWLINES = new Continuous(
|
||||
name("tweetsource.tweet.text.num_newlines"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val EMOJI_TOKENS = new SparseBinary(
|
||||
name("tweetsource.v3.tweet.text.emoji_tokens"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val EMOTICON_TOKENS = new SparseBinary(
|
||||
name("tweetsource.v3.tweet.text.emoticon_tokens"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val NUM_EMOJIS = new Continuous(
|
||||
name("tweetsource.v3.tweet.text.num_emojis"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val NUM_EMOTICONS = new Continuous(
|
||||
name("tweetsource.v3.tweet.text.num_emoticons"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val POS_UNIGRAMS = new SparseBinary(
|
||||
name("tweetsource.v3.tweet.text.pos_unigrams"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val POS_BIGRAMS = new SparseBinary(
|
||||
name("tweetsource.v3.tweet.text.pos_bigrams"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
val TEXT_TOKENS = new SparseBinary(
|
||||
name("tweetsource.v4.tweet.text.tokens"),
|
||||
Set(PublicTweets, PrivateTweets).asJava)
|
||||
|
||||
// Health features model scores (see go/toxicity, go/pblock, go/pspammytweet)
|
||||
val PBLOCK_SCORE =
|
||||
new Continuous(name("timelines.earlybird.pblock_score"), Set(TweetSafetyScores).asJava)
|
||||
val TOXICITY_SCORE =
|
||||
new Continuous(name("timelines.earlybird.toxicity_score"), Set(TweetSafetyScores).asJava)
|
||||
val EXPERIMENTAL_HEALTH_MODEL_SCORE_1 =
|
||||
new Continuous(
|
||||
name("timelines.earlybird.experimental_health_model_score_1"),
|
||||
Set(TweetSafetyScores).asJava)
|
||||
val EXPERIMENTAL_HEALTH_MODEL_SCORE_2 =
|
||||
new Continuous(
|
||||
name("timelines.earlybird.experimental_health_model_score_2"),
|
||||
Set(TweetSafetyScores).asJava)
|
||||
val EXPERIMENTAL_HEALTH_MODEL_SCORE_3 =
|
||||
new Continuous(
|
||||
name("timelines.earlybird.experimental_health_model_score_3"),
|
||||
Set(TweetSafetyScores).asJava)
|
||||
val EXPERIMENTAL_HEALTH_MODEL_SCORE_4 =
|
||||
new Continuous(
|
||||
name("timelines.earlybird.experimental_health_model_score_4"),
|
||||
Set(TweetSafetyScores).asJava)
|
||||
val PSPAMMY_TWEET_SCORE =
|
||||
new Continuous(name("timelines.earlybird.pspammy_tweet_score"), Set(TweetSafetyScores).asJava)
|
||||
val PREPORTED_TWEET_SCORE =
|
||||
new Continuous(name("timelines.earlybird.preported_tweet_score"), Set(TweetSafetyScores).asJava)
|
||||
|
||||
// where record was displayed e.g. recap vs ranked timeline vs recycled
|
||||
// (do NOT use for training in prediction, since this is set post-scoring)
|
||||
// This differs from TimelinesSharedFeatures.INJECTION_TYPE, which is only
|
||||
// set to Recap or Rectweet, and is available pre-scoring.
|
||||
// This also differs from TimeFeatures.IS_TWEET_RECYCLED, which is set
|
||||
// pre-scoring and indicates if a tweet is being considered for recycling.
|
||||
// In contrast, DISPLAY_SUGGEST_TYPE == RecycledTweet means the tweet
|
||||
// was actually served in a recycled tweet module. The two should currently
|
||||
// have the same value, but need not in future, so please only use
|
||||
// IS_TWEET_RECYCLED/CANDIDATE_TWEET_SOURCE_ID for training models and
|
||||
// only use DISPLAY_SUGGEST_TYPE for offline analysis of tweets actually
|
||||
// served in recycled modules.
|
||||
val DISPLAY_SUGGEST_TYPE = new Discrete(name("recap.display.suggest_type"))
|
||||
|
||||
// Candidate tweet source id - related to DISPLAY_SUGGEST_TYPE above, but this is a
|
||||
// property of the candidate rather than display location so is safe to use
|
||||
// in model training, unlike DISPLAY_SUGGEST_TYPE.
|
||||
val CANDIDATE_TWEET_SOURCE_ID =
|
||||
new Discrete(name("timelines.meta.candidate_tweet_source_id"), Set(TweetId).asJava)
|
||||
|
||||
// Was at least 50% of this tweet in the user's viewport for at least 500 ms,
|
||||
// OR did the user engage with the tweet publicly or privately
|
||||
val IS_LINGER_IMPRESSION =
|
||||
new Binary(name("timelines.engagement.is_linger_impression"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Features to create rollups
|
||||
val LANGUAGE_GROUP = new Discrete(name("timelines.tweet.text.language_group"))
|
||||
|
||||
// The final position index of the tweet being trained on in the timeline
|
||||
// served from TLM (could still change later in TLS-API), as recorded by
|
||||
// PositionIndexLoggingEnvelopeTransform.
|
||||
val FINAL_POSITION_INDEX = new Discrete(name("timelines.display.final_position_index"))
|
||||
|
||||
// The traceId of the timeline request, can be used to group tweets in the same response.
|
||||
val TRACE_ID = new Discrete(name("timelines.display.trace_id"), Set(TfeTransactionId).asJava)
|
||||
|
||||
// Whether this tweet was randomly injected into the timeline or not, for exploration purposes
|
||||
val IS_RANDOM_TWEET = new Binary(name("timelines.display.is_random_tweet"))
|
||||
|
||||
// Whether this tweet was reordered with softmax ranking for explore/exploit, and needs to
|
||||
// be excluded from exploit only holdback
|
||||
val IS_SOFTMAX_RANKING_TWEET = new Binary(name("timelines.display.is_softmax_ranking_tweet"))
|
||||
|
||||
// Whether the user viewing the tweet has disabled ranked timeline.
|
||||
val IS_RANKED_TIMELINE_DISABLER = new Binary(
|
||||
name("timelines.user_features.is_ranked_timeline_disabler"),
|
||||
Set(AnnotationValue, GeneralSettings).asJava)
|
||||
|
||||
// Whether the user viewing the tweet was one of those released from DDG 4205 control
|
||||
// as part of http://go/shrink-4205 process to shrink the quality features holdback.
|
||||
val IS_USER_RELEASED_FROM_QUALITY_HOLDBACK = new Binary(
|
||||
name("timelines.user_features.is_released_from_quality_holdback"),
|
||||
Set(ExperimentId, ExperimentName).asJava)
|
||||
|
||||
val INITIAL_PREDICTION_FAV =
|
||||
new Continuous(name("timelines.initial_prediction.fav"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_RETWEET =
|
||||
new Continuous(name("timelines.initial_prediction.retweet"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_REPLY =
|
||||
new Continuous(name("timelines.initial_prediction.reply"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_OPEN_LINK =
|
||||
new Continuous(name("timelines.initial_prediction.open_link"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_PROFILE_CLICK =
|
||||
new Continuous(name("timelines.initial_prediction.profile_click"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_VIDEO_PLAYBACK_50 = new Continuous(
|
||||
name("timelines.initial_prediction.video_playback_50"),
|
||||
Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_DETAIL_EXPAND =
|
||||
new Continuous(name("timelines.initial_prediction.detail_expand"), Set(EngagementScore).asJava)
|
||||
val INITIAL_PREDICTION_PHOTO_EXPAND =
|
||||
new Continuous(name("timelines.initial_prediction.photo_expand"), Set(EngagementScore).asJava)
|
||||
|
||||
val VIEWER_FOLLOWS_ORIGINAL_AUTHOR =
|
||||
new Binary(name("timelines.viewer_follows_original_author"), Set(Follow).asJava)
|
||||
|
||||
val IS_TOP_ONE = new Binary(name("timelines.position.is_top_one"))
|
||||
val IS_TOP_FIVE =
|
||||
new Binary(name(featureName = "timelines.position.is_top_five"))
|
||||
val IS_TOP_TEN =
|
||||
new Binary(name(featureName = "timelines.position.is_top_ten"))
|
||||
|
||||
val LOG_POSITION =
|
||||
new Continuous(name(featureName = "timelines.position.log_10"))
|
||||
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/timelineservice/server/suggests/features/engagement_features:thrift-scala",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
"timelines/data_processing/ml_util/transforms",
|
||||
],
|
||||
)
|
@ -0,0 +1,246 @@
|
||||
package com.twitter.timelines.prediction.features.engagement_features
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.SparseBinary
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.OneToSomeTransform
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.RichITransform
|
||||
import com.twitter.timelines.data_processing.ml_util.transforms.SparseBinaryUnion
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
import com.twitter.timelineservice.suggests.features.engagement_features.thriftscala.{
|
||||
EngagementFeatures => ThriftEngagementFeatures
|
||||
}
|
||||
import com.twitter.timelineservice.suggests.features.engagement_features.v1.thriftscala.{
|
||||
EngagementFeatures => ThriftEngagementFeaturesV1
|
||||
}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object EngagementFeatures {
|
||||
private[this] val logger = Logger.get(getClass.getSimpleName)
|
||||
|
||||
sealed trait EngagementFeature
|
||||
case object Count extends EngagementFeature
|
||||
case object RealGraphWeightAverage extends EngagementFeature
|
||||
case object RealGraphWeightMax extends EngagementFeature
|
||||
case object RealGraphWeightMin extends EngagementFeature
|
||||
case object RealGraphWeightMissing extends EngagementFeature
|
||||
case object RealGraphWeightVariance extends EngagementFeature
|
||||
case object UserIds extends EngagementFeature
|
||||
|
||||
def fromThrift(thriftEngagementFeatures: ThriftEngagementFeatures): Option[EngagementFeatures] = {
|
||||
thriftEngagementFeatures match {
|
||||
case thriftEngagementFeaturesV1: ThriftEngagementFeatures.V1 =>
|
||||
Some(
|
||||
EngagementFeatures(
|
||||
favoritedBy = thriftEngagementFeaturesV1.v1.favoritedBy,
|
||||
retweetedBy = thriftEngagementFeaturesV1.v1.retweetedBy,
|
||||
repliedBy = thriftEngagementFeaturesV1.v1.repliedBy,
|
||||
)
|
||||
)
|
||||
case _ => {
|
||||
logger.error("Unexpected EngagementFeatures version found.")
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
val empty: EngagementFeatures = EngagementFeatures()
|
||||
}
|
||||
|
||||
/**
|
||||
* Contains user IDs who have engaged with a target entity, such as a Tweet,
|
||||
* and any additional data needed for derived features.
|
||||
*/
|
||||
case class EngagementFeatures(
|
||||
favoritedBy: Seq[Long] = Nil,
|
||||
retweetedBy: Seq[Long] = Nil,
|
||||
repliedBy: Seq[Long] = Nil,
|
||||
realGraphWeightByUser: Map[Long, Double] = Map.empty) {
|
||||
def isEmpty: Boolean = favoritedBy.isEmpty && retweetedBy.isEmpty && repliedBy.isEmpty
|
||||
def nonEmpty: Boolean = !isEmpty
|
||||
def toLogThrift: ThriftEngagementFeatures.V1 =
|
||||
ThriftEngagementFeatures.V1(
|
||||
ThriftEngagementFeaturesV1(
|
||||
favoritedBy = favoritedBy,
|
||||
retweetedBy = retweetedBy,
|
||||
repliedBy = repliedBy
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents engagement features derived from the Real Graph weight.
|
||||
*
|
||||
* These features are from the perspective of the source user, who is viewing their
|
||||
* timeline, to the destination users (or user), who created engagements.
|
||||
*
|
||||
* @param count number of engagements present
|
||||
* @param max max score of the engaging users
|
||||
* @param mean average score of the engaging users
|
||||
* @param min minimum score of the engaging users
|
||||
* @param missing for engagements present, how many Real Graph scores were missing
|
||||
* @param variance variance of scores of the engaging users
|
||||
*/
|
||||
case class RealGraphDerivedEngagementFeatures(
|
||||
count: Int,
|
||||
max: Double,
|
||||
mean: Double,
|
||||
min: Double,
|
||||
missing: Int,
|
||||
variance: Double)
|
||||
|
||||
object EngagementDataRecordFeatures {
|
||||
import EngagementFeatures._
|
||||
|
||||
val FavoritedByUserIds = new SparseBinary(
|
||||
"engagement_features.user_ids.favorited_by",
|
||||
Set(UserId, PrivateLikes, PublicLikes).asJava)
|
||||
val RetweetedByUserIds = new SparseBinary(
|
||||
"engagement_features.user_ids.retweeted_by",
|
||||
Set(UserId, PrivateRetweets, PublicRetweets).asJava)
|
||||
val RepliedByUserIds = new SparseBinary(
|
||||
"engagement_features.user_ids.replied_by",
|
||||
Set(UserId, PrivateReplies, PublicReplies).asJava)
|
||||
|
||||
val InNetworkFavoritesCount = new Continuous(
|
||||
"engagement_features.in_network.favorites.count",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val InNetworkRetweetsCount = new Continuous(
|
||||
"engagement_features.in_network.retweets.count",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val InNetworkRepliesCount = new Continuous(
|
||||
"engagement_features.in_network.replies.count",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
|
||||
// real graph derived features
|
||||
val InNetworkFavoritesAvgRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.favorites.avg_weight",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val InNetworkFavoritesMaxRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.favorites.max_weight",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val InNetworkFavoritesMinRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.favorites.min_weight",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val InNetworkFavoritesRealGraphWeightMissing = new Continuous(
|
||||
"engagement_features.real_graph.favorites.missing"
|
||||
)
|
||||
val InNetworkFavoritesRealGraphWeightVariance = new Continuous(
|
||||
"engagement_features.real_graph.favorites.weight_variance"
|
||||
)
|
||||
|
||||
val InNetworkRetweetsMaxRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.retweets.max_weight",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val InNetworkRetweetsMinRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.retweets.min_weight",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val InNetworkRetweetsAvgRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.retweets.avg_weight",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val InNetworkRetweetsRealGraphWeightMissing = new Continuous(
|
||||
"engagement_features.real_graph.retweets.missing"
|
||||
)
|
||||
val InNetworkRetweetsRealGraphWeightVariance = new Continuous(
|
||||
"engagement_features.real_graph.retweets.weight_variance"
|
||||
)
|
||||
|
||||
val InNetworkRepliesMaxRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.replies.max_weight",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val InNetworkRepliesMinRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.replies.min_weight",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val InNetworkRepliesAvgRealGraphWeight = new Continuous(
|
||||
"engagement_features.real_graph.replies.avg_weight",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val InNetworkRepliesRealGraphWeightMissing = new Continuous(
|
||||
"engagement_features.real_graph.replies.missing"
|
||||
)
|
||||
val InNetworkRepliesRealGraphWeightVariance = new Continuous(
|
||||
"engagement_features.real_graph.replies.weight_variance"
|
||||
)
|
||||
|
||||
sealed trait FeatureGroup {
|
||||
def continuousFeatures: Map[EngagementFeature, Continuous]
|
||||
def sparseBinaryFeatures: Map[EngagementFeature, SparseBinary]
|
||||
def allFeatures: Seq[Feature[_]] =
|
||||
(continuousFeatures.values ++ sparseBinaryFeatures.values).toSeq
|
||||
}
|
||||
|
||||
case object Favorites extends FeatureGroup {
|
||||
override val continuousFeatures: Map[EngagementFeature, Continuous] =
|
||||
Map(
|
||||
Count -> InNetworkFavoritesCount,
|
||||
RealGraphWeightAverage -> InNetworkFavoritesAvgRealGraphWeight,
|
||||
RealGraphWeightMax -> InNetworkFavoritesMaxRealGraphWeight,
|
||||
RealGraphWeightMin -> InNetworkFavoritesMinRealGraphWeight,
|
||||
RealGraphWeightMissing -> InNetworkFavoritesRealGraphWeightMissing,
|
||||
RealGraphWeightVariance -> InNetworkFavoritesRealGraphWeightVariance
|
||||
)
|
||||
|
||||
override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
|
||||
Map(UserIds -> FavoritedByUserIds)
|
||||
}
|
||||
|
||||
case object Retweets extends FeatureGroup {
|
||||
override val continuousFeatures: Map[EngagementFeature, Continuous] =
|
||||
Map(
|
||||
Count -> InNetworkRetweetsCount,
|
||||
RealGraphWeightAverage -> InNetworkRetweetsAvgRealGraphWeight,
|
||||
RealGraphWeightMax -> InNetworkRetweetsMaxRealGraphWeight,
|
||||
RealGraphWeightMin -> InNetworkRetweetsMinRealGraphWeight,
|
||||
RealGraphWeightMissing -> InNetworkRetweetsRealGraphWeightMissing,
|
||||
RealGraphWeightVariance -> InNetworkRetweetsRealGraphWeightVariance
|
||||
)
|
||||
|
||||
override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
|
||||
Map(UserIds -> RetweetedByUserIds)
|
||||
}
|
||||
|
||||
case object Replies extends FeatureGroup {
|
||||
override val continuousFeatures: Map[EngagementFeature, Continuous] =
|
||||
Map(
|
||||
Count -> InNetworkRepliesCount,
|
||||
RealGraphWeightAverage -> InNetworkRepliesAvgRealGraphWeight,
|
||||
RealGraphWeightMax -> InNetworkRepliesMaxRealGraphWeight,
|
||||
RealGraphWeightMin -> InNetworkRepliesMinRealGraphWeight,
|
||||
RealGraphWeightMissing -> InNetworkRepliesRealGraphWeightMissing,
|
||||
RealGraphWeightVariance -> InNetworkRepliesRealGraphWeightVariance
|
||||
)
|
||||
|
||||
override val sparseBinaryFeatures: Map[EngagementFeature, SparseBinary] =
|
||||
Map(UserIds -> RepliedByUserIds)
|
||||
}
|
||||
|
||||
val PublicEngagerSets = Set(FavoritedByUserIds, RetweetedByUserIds, RepliedByUserIds)
|
||||
val PublicEngagementUserIds = new SparseBinary(
|
||||
"engagement_features.user_ids.public",
|
||||
Set(UserId, EngagementsPublic).asJava
|
||||
)
|
||||
val ENGAGER_ID = TypedAggregateGroup.sparseFeature(PublicEngagementUserIds)
|
||||
|
||||
val UnifyPublicEngagersTransform = SparseBinaryUnion(
|
||||
featuresToUnify = PublicEngagerSets,
|
||||
outputFeature = PublicEngagementUserIds
|
||||
)
|
||||
|
||||
object RichUnifyPublicEngagersTransform extends OneToSomeTransform {
|
||||
override def apply(dataRecord: DataRecord): Option[DataRecord] =
|
||||
RichITransform(EngagementDataRecordFeatures.UnifyPublicEngagersTransform)(dataRecord)
|
||||
override def featuresToTransform: Set[Feature[_]] =
|
||||
EngagementDataRecordFeatures.UnifyPublicEngagersTransform.featuresToUnify.toSet
|
||||
}
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
],
|
||||
)
|
||||
|
||||
scala_library(
|
||||
name = "escherbird-features",
|
||||
sources = ["EscherbirdFeatures.scala"],
|
||||
tags = ["bazel-only"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,19 @@
|
||||
package com.twitter.timelines.prediction.features.escherbird
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature
|
||||
import java.util.{Set => JSet}
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object EscherbirdFeatures {
|
||||
val TweetGroupIds = new Feature.SparseBinary("escherbird.tweet_group_ids")
|
||||
val TweetDomainIds = new Feature.SparseBinary("escherbird.tweet_domain_ids", Set(DomainId).asJava)
|
||||
val TweetEntityIds =
|
||||
new Feature.SparseBinary("escherbird.tweet_entity_ids", Set(SemanticcoreClassification).asJava)
|
||||
}
|
||||
|
||||
case class EscherbirdFeatures(
|
||||
tweetId: Long,
|
||||
tweetGroupIds: JSet[String],
|
||||
tweetDomainIds: JSet[String],
|
||||
tweetEntityIds: JSet[String])
|
@ -0,0 +1,19 @@
|
||||
package com.twitter.timelines.prediction.features.escherbird
|
||||
|
||||
import com.twitter.tweetypie.thriftscala.Tweet
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object EscherbirdFeaturesConverter {
|
||||
val DeprecatedOrTestDomains = Set(1L, 5L, 7L, 9L, 14L, 19L, 20L, 31L)
|
||||
|
||||
def fromTweet(tweet: Tweet): Option[EscherbirdFeatures] = tweet.escherbirdEntityAnnotations.map {
|
||||
escherbirdEntityAnnotations =>
|
||||
val annotations = escherbirdEntityAnnotations.entityAnnotations
|
||||
.filterNot(annotation => DeprecatedOrTestDomains.contains(annotation.domainId))
|
||||
val tweetGroupIds = annotations.map(_.groupId.toString).toSet.asJava
|
||||
val tweetDomainIds = annotations.map(_.domainId.toString).toSet.asJava
|
||||
// An entity is only unique within a given domain
|
||||
val tweetEntityIds = annotations.map(a => s"${a.domainId}.${a.entityId}").toSet.asJava
|
||||
EscherbirdFeatures(tweet.id, tweetGroupIds, tweetDomainIds, tweetEntityIds)
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,53 @@
|
||||
package com.twitter.timelines.prediction.features.followsource
|
||||
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object FollowSourceFeatures {
|
||||
|
||||
// Corresponds to an algorithm constant from com.twitter.hermit.profile.HermitProfileConstants
|
||||
val FollowSourceAlgorithm = new Feature.Text("follow_source.algorithm")
|
||||
|
||||
// Type of follow action: one of "unfollow", "follow", "follow_back", "follow_many", "follow_all"
|
||||
val FollowAction = new Feature.Text(
|
||||
"follow_source.action",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
|
||||
// Millisecond timestamp when follow occurred
|
||||
val FollowTimestamp =
|
||||
new Feature.Discrete("follow_source.follow_timestamp", Set(Follow, PrivateTimestamp).asJava)
|
||||
|
||||
// Age of follow (in minutes)
|
||||
val FollowAgeMinutes =
|
||||
new Feature.Continuous("follow_source.follow_age_minutes", Set(Follow).asJava)
|
||||
|
||||
// Tweet ID of tweet details page from where follow happened (if applicable)
|
||||
val FollowCauseTweetId = new Feature.Discrete("follow_source.cause_tweet_id", Set(TweetId).asJava)
|
||||
|
||||
// String representation of follow client (android, web, iphone, etc). Derived from "client"
|
||||
// portion of client event namespace.
|
||||
val FollowClientId = new Feature.Text("follow_source.client_id", Set(ClientType).asJava)
|
||||
|
||||
// If the follow happens via a profile's Following or Followers,
|
||||
// the id of the profile owner is recorded here.
|
||||
val FollowAssociationId =
|
||||
new Feature.Discrete("follow_source.association_id", Set(Follow, UserId).asJava)
|
||||
|
||||
// The "friendly name" here is computed using FollowSourceUtil.getSource. It represents
|
||||
// a grouping on a few client events that reflect where the event occurred. For example,
|
||||
// events on the tweet details page are grouped using "tweetDetails":
|
||||
// case (Some("web"), Some("permalink"), _, _, _) => "tweetDetails"
|
||||
// case (Some("iphone"), Some("tweet"), _, _, _) => "tweetDetails"
|
||||
// case (Some("android"), Some("tweet"), _, _, _) => "tweetDetails"
|
||||
val FollowSourceFriendlyName = new Feature.Text("follow_source.friendly_name", Set(Follow).asJava)
|
||||
|
||||
// Up to two sources and actions that preceded the follow (for example, a profile visit
|
||||
// through a mention click, which itself was on a tweet detail page reached through a tweet
|
||||
// click in the Home tab). See go/followsource for more details and examples.
|
||||
// The "source" here is computed using FollowSourceUtil.getSource
|
||||
val PreFollowAction1 = new Feature.Text("follow_source.pre_follow_action_1", Set(Follow).asJava)
|
||||
val PreFollowAction2 = new Feature.Text("follow_source.pre_follow_action_2", Set(Follow).asJava)
|
||||
val PreFollowSource1 = new Feature.Text("follow_source.pre_follow_source_1", Set(Follow).asJava)
|
||||
val PreFollowSource2 = new Feature.Text("follow_source.pre_follow_source_2", Set(Follow).asJava)
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,575 @@
|
||||
package com.twitter.timelines.prediction.features.itl
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.Discrete
|
||||
import com.twitter.ml.api.Feature.SparseBinary
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ITLFeatures {
|
||||
// engagement
|
||||
val IS_RETWEETED =
|
||||
new Binary("itl.engagement.is_retweeted", Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IS_FAVORITED =
|
||||
new Binary("itl.engagement.is_favorited", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val IS_REPLIED =
|
||||
new Binary("itl.engagement.is_replied", Set(PublicReplies, PrivateReplies).asJava)
|
||||
// v1: post click engagements: fav, reply
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_favorited_or_replied",
|
||||
Set(
|
||||
PublicLikes,
|
||||
PrivateLikes,
|
||||
PublicReplies,
|
||||
PrivateReplies,
|
||||
EngagementsPrivate,
|
||||
EngagementsPublic).asJava)
|
||||
// v2: post click engagements: click
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_v2",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_favorited",
|
||||
Set(PublicLikes, PrivateLikes).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_replied",
|
||||
Set(PublicReplies, PrivateReplies).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_retweeted",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED =
|
||||
new Binary("itl.engagement.is_good_clicked_convo_desc_followed", Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_share_dm_clicked",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_profile_clicked",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_uam_gt_0",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_uam_gt_1",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_uam_gt_2",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
|
||||
"itl.engagement.is_good_clicked_convo_desc_uam_gt_3",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
val IS_TWEET_DETAIL_DWELLED = new Binary(
|
||||
"itl.engagement.is_tweet_detail_dwelled",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
|
||||
"itl.engagement.is_tweet_detail_dwelled_8_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
|
||||
"itl.engagement.is_tweet_detail_dwelled_15_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
|
||||
"itl.engagement.is_tweet_detail_dwelled_25_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
|
||||
"itl.engagement.is_tweet_detail_dwelled_30_sec",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_PROFILE_DWELLED = new Binary(
|
||||
"itl.engagement.is_profile_dwelled",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_10_SEC = new Binary(
|
||||
"itl.engagement.is_profile_dwelled_10_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_20_SEC = new Binary(
|
||||
"itl.engagement.is_profile_dwelled_20_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_30_SEC = new Binary(
|
||||
"itl.engagement.is_profile_dwelled_30_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
|
||||
"itl.engagement.is_fullscreen_video_dwelled",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
|
||||
"itl.engagement.is_fullscreen_video_dwelled_5_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
|
||||
"itl.engagement.is_fullscreen_video_dwelled_10_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
|
||||
"itl.engagement.is_fullscreen_video_dwelled_20_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
|
||||
"itl.engagement.is_fullscreen_video_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_15_SEC = new Binary(
|
||||
"itl.engagement.is_link_dwelled_15_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_30_SEC = new Binary(
|
||||
"itl.engagement.is_link_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_60_SEC = new Binary(
|
||||
"itl.engagement.is_link_dwelled_60_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_QUOTED =
|
||||
new Binary("itl.engagement.is_quoted", Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
|
||||
"itl.engagement.is_retweeted_without_quote",
|
||||
Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IS_CLICKED = new Binary(
|
||||
"itl.engagement.is_clicked",
|
||||
Set(EngagementsPrivate, TweetsClicked, LinksClickedOn).asJava)
|
||||
val IS_PROFILE_CLICKED = new Binary(
|
||||
"itl.engagement.is_profile_clicked",
|
||||
Set(EngagementsPrivate, TweetsClicked, ProfilesViewed, ProfilesClicked).asJava)
|
||||
val IS_DWELLED = new Binary("itl.engagement.is_dwelled", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_IN_BOUNDS_V1 =
|
||||
new Binary("itl.engagement.is_dwelled_in_bounds_v1", Set(EngagementsPrivate).asJava)
|
||||
val DWELL_NORMALIZED_OVERALL =
|
||||
new Continuous("itl.engagement.dwell_normalized_overall", Set(EngagementsPrivate).asJava)
|
||||
val DWELL_CDF_OVERALL =
|
||||
new Continuous("itl.engagement.dwell_cdf_overall", Set(EngagementsPrivate).asJava)
|
||||
val DWELL_CDF = new Continuous("itl.engagement.dwell_cdf", Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_DWELLED_1S = new Binary("itl.engagement.is_dwelled_1s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_2S = new Binary("itl.engagement.is_dwelled_2s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_3S = new Binary("itl.engagement.is_dwelled_3s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_4S = new Binary("itl.engagement.is_dwelled_4s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_5S = new Binary("itl.engagement.is_dwelled_5s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_6S = new Binary("itl.engagement.is_dwelled_6s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_7S = new Binary("itl.engagement.is_dwelled_7s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_8S = new Binary("itl.engagement.is_dwelled_8s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_9S = new Binary("itl.engagement.is_dwelled_9s", Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_10S = new Binary("itl.engagement.is_dwelled_10s", Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_SKIPPED_1S = new Binary("itl.engagement.is_skipped_1s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_2S = new Binary("itl.engagement.is_skipped_2s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_3S = new Binary("itl.engagement.is_skipped_3s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_4S = new Binary("itl.engagement.is_skipped_4s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_5S = new Binary("itl.engagement.is_skipped_5s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_6S = new Binary("itl.engagement.is_skipped_6s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_7S = new Binary("itl.engagement.is_skipped_7s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_8S = new Binary("itl.engagement.is_skipped_8s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_9S = new Binary("itl.engagement.is_skipped_9s", Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_10S = new Binary("itl.engagement.is_skipped_10s", Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FOLLOWED =
|
||||
new Binary("itl.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_IMPRESSED = new Binary("itl.engagement.is_impressed", Set(EngagementsPrivate).asJava)
|
||||
val IS_OPEN_LINKED =
|
||||
new Binary("itl.engagement.is_open_linked", Set(EngagementsPrivate, LinksClickedOn).asJava)
|
||||
val IS_PHOTO_EXPANDED = new Binary(
|
||||
"itl.engagement.is_photo_expanded",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_VIDEO_VIEWED =
|
||||
new Binary("itl.engagement.is_video_viewed", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_VIDEO_PLAYBACK_50 = new Binary(
|
||||
"itl.engagement.is_video_playback_50",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_VIDEO_QUALITY_VIEWED = new Binary(
|
||||
"itl.engagement.is_video_quality_viewed",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
)
|
||||
val IS_BOOKMARKED =
|
||||
new Binary("itl.engagement.is_bookmarked", Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARED =
|
||||
new Binary("itl.engagement.is_shared", Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARE_MENU_CLICKED =
|
||||
new Binary("itl.engagement.is_share_menu_clicked", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Negative engagements
|
||||
val IS_DONT_LIKE =
|
||||
new Binary("itl.engagement.is_dont_like", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_BLOCK_CLICKED = new Binary(
|
||||
"itl.engagement.is_block_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_BLOCK_DIALOG_BLOCKED = new Binary(
|
||||
"itl.engagement.is_block_dialog_blocked",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_MUTE_CLICKED =
|
||||
new Binary("itl.engagement.is_mute_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_MUTE_DIALOG_MUTED =
|
||||
new Binary("itl.engagement.is_mute_dialog_muted", Set(EngagementsPrivate).asJava)
|
||||
val IS_REPORT_TWEET_CLICKED = new Binary(
|
||||
"itl.engagement.is_report_tweet_clicked",
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_CARET_CLICKED =
|
||||
new Binary("itl.engagement.is_caret_clicked", Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_NOT_ABOUT_TOPIC =
|
||||
new Binary("itl.engagement.is_not_about_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RECENT =
|
||||
new Binary("itl.engagement.is_not_recent", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RELEVANT =
|
||||
new Binary("itl.engagement.is_not_relevant", Set(EngagementsPrivate).asJava)
|
||||
val IS_SEE_FEWER =
|
||||
new Binary("itl.engagement.is_see_fewer", Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC =
|
||||
new Binary("itl.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_FOLLOW_TOPIC =
|
||||
new Binary("itl.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN_TOPIC =
|
||||
new Binary("itl.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_HOME_LATEST_VISITED =
|
||||
new Binary("itl.engagement.is_home_latest_visited", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// This derived label is the logical OR of IS_DONT_LIKE, IS_BLOCK_CLICKED, IS_MUTE_CLICKED and IS_REPORT_TWEET_CLICKED
|
||||
val IS_NEGATIVE_FEEDBACK =
|
||||
new Binary("itl.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Reciprocal engagements for reply forward engagement
|
||||
val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_impressed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_favorited_by_author",
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_quoted_by_author",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_replied_by_author",
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_retweeted_by_author",
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_blocked_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_followed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_unfollowed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_muted_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_reported_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED
|
||||
val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_replied_reply_engaged_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// Reciprocal engagements for fav forward engagement
|
||||
val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_favorited_fav_favorited_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_favorited_fav_replied_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_favorited_fav_retweeted_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_favorited_fav_followed_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
)
|
||||
// This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED
|
||||
val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary(
|
||||
"itl.engagement.is_favorited_fav_engaged_by_author",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
)
|
||||
|
||||
// define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_follow",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_fav",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_reply",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_retweet",
|
||||
Set(
|
||||
ProfilesViewed,
|
||||
ProfilesClicked,
|
||||
EngagementsPrivate,
|
||||
PrivateRetweets,
|
||||
PublicRetweets).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_tweet_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_share_dm_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of all binary features above
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_engaged",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_user_report_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_tweet_report_click",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_mute",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
|
||||
"itl.engagement.is_profile_clicked_and_profile_block",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of bad profile click engagements and existing negative feedback
|
||||
val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
|
||||
"itl.engagement.is_negative_feedback_v2",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// engagement for following user from any surface area
|
||||
val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
|
||||
"itl.engagement.is_followed_from_any_surface_area",
|
||||
Set(EngagementsPublic, EngagementsPrivate).asJava)
|
||||
|
||||
// Relevance prompt tweet engagements
|
||||
val IS_RELEVANCE_PROMPT_YES_CLICKED =
|
||||
new Binary("itl.engagement.is_relevance_prompt_yes_clicked", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Reply downvote engagements
|
||||
val IS_REPLY_DOWNVOTED =
|
||||
new Binary("itl.engagement.is_reply_downvoted", Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLY_DOWNVOTE_REMOVED =
|
||||
new Binary("itl.engagement.is_reply_downvote_removed", Set(EngagementsPrivate).asJava)
|
||||
|
||||
// features from RecommendedTweet
|
||||
val RECTWEET_SCORE = new Continuous("itl.recommended_tweet_features.rectweet_score")
|
||||
val NUM_FAVORITING_USERS = new Continuous("itl.recommended_tweet_features.num_favoriting_users")
|
||||
val NUM_FOLLOWING_USERS = new Continuous("itl.recommended_tweet_features.num_following_users")
|
||||
val CONTENT_SOURCE_TYPE = new Discrete("itl.recommended_tweet_features.content_source_type")
|
||||
|
||||
val RECOS_SCORE = new Continuous(
|
||||
"itl.recommended_tweet_features.recos_score",
|
||||
Set(EngagementScore, UsersRealGraphScore, UsersSalsaScore).asJava)
|
||||
val AUTHOR_REALGRAPH_SCORE = new Continuous(
|
||||
"itl.recommended_tweet_features.realgraph_score",
|
||||
Set(UsersRealGraphScore).asJava)
|
||||
val AUTHOR_SARUS_SCORE = new Continuous(
|
||||
"itl.recommended_tweet_features.sarus_score",
|
||||
Set(EngagementScore, UsersSalsaScore).asJava)
|
||||
|
||||
val NUM_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.num_interacting_users",
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
val MAX_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.max_realgraph_score_of_interacting_users",
|
||||
Set(UsersRealGraphScore, EngagementScore).asJava
|
||||
)
|
||||
val SUM_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.sum_realgraph_score_of_interacting_users",
|
||||
Set(UsersRealGraphScore, EngagementScore).asJava
|
||||
)
|
||||
val AVG_REALGRAPH_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.avg_realgraph_score_of_interacting_users",
|
||||
Set(UsersRealGraphScore, EngagementScore).asJava
|
||||
)
|
||||
val MAX_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.max_sarus_score_of_interacting_users",
|
||||
Set(EngagementScore, UsersSalsaScore).asJava
|
||||
)
|
||||
val SUM_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.sum_sarus_score_of_interacting_users",
|
||||
Set(EngagementScore, UsersSalsaScore).asJava
|
||||
)
|
||||
val AVG_SARUS_SCORE_OF_INTERACTING_USERS = new Continuous(
|
||||
"itl.recommended_tweet_features.avg_sarus_score_of_interacting_users",
|
||||
Set(EngagementScore, UsersSalsaScore).asJava
|
||||
)
|
||||
|
||||
val NUM_INTERACTING_FOLLOWINGS = new Continuous(
|
||||
"itl.recommended_tweet_features.num_interacting_followings",
|
||||
Set(EngagementScore).asJava
|
||||
)
|
||||
|
||||
// features from HydratedTweetFeatures
|
||||
val REAL_GRAPH_WEIGHT =
|
||||
new Continuous("itl.hydrated_tweet_features.real_graph_weight", Set(UsersRealGraphScore).asJava)
|
||||
val SARUS_GRAPH_WEIGHT = new Continuous("itl.hydrated_tweet_features.sarus_graph_weight")
|
||||
val FROM_TOP_ENGAGED_USER = new Binary("itl.hydrated_tweet_features.from_top_engaged_user")
|
||||
val FROM_TOP_INFLUENCER = new Binary("itl.hydrated_tweet_features.from_top_influencer")
|
||||
val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous(
|
||||
"itl.hydrated_tweet_features.topic_sim_searcher_interested_in_author_known_for"
|
||||
)
|
||||
val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous(
|
||||
"itl.hydrated_tweet_features.topic_sim_searcher_author_both_interested_in"
|
||||
)
|
||||
val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous(
|
||||
"itl.hydrated_tweet_features.topic_sim_searcher_author_both_known_for"
|
||||
)
|
||||
val USER_REP = new Continuous("itl.hydrated_tweet_features.user_rep")
|
||||
val NORMALIZED_PARUS_SCORE = new Continuous("itl.hydrated_tweet_features.normalized_parus_score")
|
||||
val CONTAINS_MEDIA = new Binary("itl.hydrated_tweet_features.contains_media")
|
||||
val FROM_NEARBY = new Binary("itl.hydrated_tweet_features.from_nearby")
|
||||
val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous(
|
||||
"itl.hydrated_tweet_features.topic_sim_searcher_interested_in_tweet"
|
||||
)
|
||||
val MATCHES_UI_LANG = new Binary(
|
||||
"itl.hydrated_tweet_features.matches_ui_lang",
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val MATCHES_SEARCHER_MAIN_LANG = new Binary(
|
||||
"itl.hydrated_tweet_features.matches_searcher_main_lang",
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava
|
||||
)
|
||||
val MATCHES_SEARCHER_LANGS = new Binary(
|
||||
"itl.hydrated_tweet_features.matches_searcher_langs",
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val HAS_CARD = new Binary(
|
||||
"itl.hydrated_tweet_features.has_card",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_IMAGE = new Binary(
|
||||
"itl.hydrated_tweet_features.has_image",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NATIVE_IMAGE = new Binary(
|
||||
"itl.hydrated_tweet_features.has_native_image",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VIDEO = new Binary("itl.hydrated_tweet_features.has_video")
|
||||
val HAS_CONSUMER_VIDEO = new Binary(
|
||||
"itl.hydrated_tweet_features.has_consumer_video",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_PRO_VIDEO = new Binary(
|
||||
"itl.hydrated_tweet_features.has_pro_video",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_PERISCOPE = new Binary(
|
||||
"itl.hydrated_tweet_features.has_periscope",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VINE = new Binary(
|
||||
"itl.hydrated_tweet_features.has_vine",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NATIVE_VIDEO = new Binary(
|
||||
"itl.hydrated_tweet_features.has_native_video",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_LINK = new Binary(
|
||||
"itl.hydrated_tweet_features.has_link",
|
||||
Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val LINK_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.link_count",
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val URL_DOMAINS = new SparseBinary(
|
||||
"itl.hydrated_tweet_features.url_domains",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VISIBLE_LINK = new Binary(
|
||||
"itl.hydrated_tweet_features.has_visible_link",
|
||||
Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NEWS = new Binary(
|
||||
"itl.hydrated_tweet_features.has_news",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_TREND = new Binary(
|
||||
"itl.hydrated_tweet_features.has_trend",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val BLENDER_SCORE =
|
||||
new Continuous("itl.hydrated_tweet_features.blender_score", Set(EngagementScore).asJava)
|
||||
val PARUS_SCORE =
|
||||
new Continuous("itl.hydrated_tweet_features.parus_score", Set(EngagementScore).asJava)
|
||||
val TEXT_SCORE =
|
||||
new Continuous("itl.hydrated_tweet_features.text_score", Set(EngagementScore).asJava)
|
||||
val BIDIRECTIONAL_REPLY_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.bidirectional_reply_count",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val UNIDIRECTIONAL_REPLY_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.unidirectional_reply_count",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val BIDIRECTIONAL_RETWEET_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.bidirectional_retweet_count",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.unidirectional_retweet_count",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val BIDIRECTIONAL_FAV_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.bidirectional_fav_count",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val UNIDIRECTIONAL_FAV_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.unidirectional_fav_count",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val CONVERSATION_COUNT = new Continuous("itl.hydrated_tweet_features.conversation_count")
|
||||
val FAV_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.fav_count",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val REPLY_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.reply_count",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val RETWEET_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.retweet_count",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val PREV_USER_TWEET_ENGAGEMENT = new Continuous(
|
||||
"itl.hydrated_tweet_features.prev_user_tweet_enagagement",
|
||||
Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava
|
||||
)
|
||||
val IS_SENSITIVE = new Binary("itl.hydrated_tweet_features.is_sensitive")
|
||||
val HAS_MULTIPLE_MEDIA = new Binary(
|
||||
"itl.hydrated_tweet_features.has_multiple_media",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary(
|
||||
"itl.hydrated_tweet_features.has_multiple_hashtag_or_trend",
|
||||
Set(
|
||||
UserVisibleFlag,
|
||||
CountOfPrivateTweetEntitiesAndMetadata,
|
||||
CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val IS_AUTHOR_PROFILE_EGG =
|
||||
new Binary("itl.hydrated_tweet_features.is_author_profile_egg", Set(ProfileImage).asJava)
|
||||
val IS_AUTHOR_NEW =
|
||||
new Binary("itl.hydrated_tweet_features.is_author_new", Set(UserType, UserState).asJava)
|
||||
val NUM_MENTIONS = new Continuous(
|
||||
"itl.hydrated_tweet_features.num_mentions",
|
||||
Set(
|
||||
UserVisibleFlag,
|
||||
CountOfPrivateTweetEntitiesAndMetadata,
|
||||
CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val NUM_HASHTAGS = new Continuous(
|
||||
"itl.hydrated_tweet_features.num_hashtags",
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val LANGUAGE = new Discrete(
|
||||
"itl.hydrated_tweet_features.language",
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val LINK_LANGUAGE = new Continuous(
|
||||
"itl.hydrated_tweet_features.link_language",
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val IS_AUTHOR_NSFW =
|
||||
new Binary("itl.hydrated_tweet_features.is_author_nsfw", Set(UserType).asJava)
|
||||
val IS_AUTHOR_SPAM =
|
||||
new Binary("itl.hydrated_tweet_features.is_author_spam", Set(UserType).asJava)
|
||||
val IS_AUTHOR_BOT = new Binary("itl.hydrated_tweet_features.is_author_bot", Set(UserType).asJava)
|
||||
val IS_OFFENSIVE = new Binary("itl.hydrated_tweet_features.is_offensive")
|
||||
val FROM_VERIFIED_ACCOUNT =
|
||||
new Binary("itl.hydrated_tweet_features.from_verified_account", Set(UserVerifiedFlag).asJava)
|
||||
val EMBEDS_IMPRESSION_COUNT = new Continuous(
|
||||
"itl.hydrated_tweet_features.embeds_impression_count",
|
||||
Set(CountOfImpression).asJava)
|
||||
val EMBEDS_URL_COUNT =
|
||||
new Continuous("itl.hydrated_tweet_features.embeds_url_count", Set(UrlFoundFlag).asJava)
|
||||
val FAV_COUNT_V2 = new Continuous(
|
||||
"recap.earlybird.fav_count_v2",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val RETWEET_COUNT_V2 = new Continuous(
|
||||
"recap.earlybird.retweet_count_v2",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val REPLY_COUNT_V2 = new Continuous(
|
||||
"recap.earlybird.reply_count_v2",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,24 @@
|
||||
package com.twitter.timelines.prediction.features.list_features
|
||||
|
||||
import com.twitter.ml.api.Feature.{Binary, Discrete}
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object ListFeatures {
|
||||
|
||||
// list.id is used for list tweet injections in home. timelines.meta.list_id is used for list tweets in list timeline.
|
||||
val LIST_ID = new Discrete("list.id")
|
||||
|
||||
val VIEWER_IS_OWNER =
|
||||
new Binary("list.viewer.is_owner", Set(ListsNonpublicList, ListsPublicList).asJava)
|
||||
val VIEWER_IS_SUBSCRIBER = new Binary("list.viewer.is_subscriber")
|
||||
val IS_PINNED_LIST = new Binary("list.is_pinned")
|
||||
|
||||
val featureContext = new FeatureContext(
|
||||
LIST_ID,
|
||||
VIEWER_IS_OWNER,
|
||||
VIEWER_IS_SUBSCRIBER,
|
||||
IS_PINNED_LIST
|
||||
)
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,49 @@
|
||||
package com.twitter.timelines.prediction.features.p_home_latest
|
||||
|
||||
import com.twitter.ml.api.Feature.{Continuous, Discrete}
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object HomeLatestUserFeatures {
|
||||
val LAST_LOGIN_TIMESTAMP_MS =
|
||||
new Discrete("home_latest.user_feature.last_login_timestamp_ms", Set(PrivateTimestamp).asJava)
|
||||
}
|
||||
|
||||
object HomeLatestUserAggregatesFeatures {
|
||||
|
||||
/**
|
||||
* Used as `timestampFeature` in `OfflineAggregateSource` required by feature aggregations, set to
|
||||
* the `dateRange` end timestamp by default
|
||||
*/
|
||||
val AGGREGATE_TIMESTAMP_MS =
|
||||
new Discrete("home_latest.user_feature.aggregate_timestamp_ms", Set(PrivateTimestamp).asJava)
|
||||
val HOME_TOP_IMPRESSIONS =
|
||||
new Continuous("home_latest.user_feature.home_top_impressions", Set(CountOfImpression).asJava)
|
||||
val HOME_LATEST_IMPRESSIONS =
|
||||
new Continuous(
|
||||
"home_latest.user_feature.home_latest_impressions",
|
||||
Set(CountOfImpression).asJava)
|
||||
val HOME_TOP_LAST_LOGIN_TIMESTAMP_MS =
|
||||
new Discrete(
|
||||
"home_latest.user_feature.home_top_last_login_timestamp_ms",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val HOME_LATEST_LAST_LOGIN_TIMESTAMP_MS =
|
||||
new Discrete(
|
||||
"home_latest.user_feature.home_latest_last_login_timestamp_ms",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
val HOME_LATEST_MOST_RECENT_CLICK_TIMESTAMP_MS =
|
||||
new Discrete(
|
||||
"home_latest.user_feature.home_latest_most_recent_click_timestamp_ms",
|
||||
Set(PrivateTimestamp).asJava)
|
||||
}
|
||||
|
||||
case class HomeLatestUserFeatures(userId: Long, lastLoginTimestampMs: Long)
|
||||
|
||||
case class HomeLatestUserAggregatesFeatures(
|
||||
userId: Long,
|
||||
aggregateTimestampMs: Long,
|
||||
homeTopImpressions: Option[Double],
|
||||
homeLatestImpressions: Option[Double],
|
||||
homeTopLastLoginTimestampMs: Option[Long],
|
||||
homeLatestLastLoginTimestampMs: Option[Long],
|
||||
homeLatestMostRecentClickTimestampMs: Option[Long])
|
@ -0,0 +1,8 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
],
|
||||
)
|
@ -0,0 +1,7 @@
|
||||
package com.twitter.timelines.prediction.features.ppmi
|
||||
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
|
||||
object PpmiDataRecordFeatures {
|
||||
val PPMI_SCORE = new Continuous("ppmi.source_author.score")
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/entities/core",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/entities/timelines",
|
||||
"src/scala/com/twitter/ml/featurestore/catalog/features/timelines:realgraph",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/entity",
|
||||
"src/scala/com/twitter/ml/featurestore/lib/feature",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/timelines/real_graph:real_graph-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,232 @@
|
||||
package com.twitter.timelines.prediction.features.real_graph
|
||||
|
||||
import com.twitter.ml.featurestore.catalog.entities.core.UserAuthor
|
||||
import com.twitter.ml.featurestore.catalog.features.timelines.RealGraph
|
||||
import com.twitter.ml.featurestore.lib.EdgeEntityId
|
||||
import com.twitter.ml.featurestore.lib.UserId
|
||||
import com.twitter.ml.featurestore.lib.feature.BoundFeatureSet
|
||||
import com.twitter.ml.featurestore.lib.feature.Feature
|
||||
import com.twitter.ml.featurestore.lib.feature.FeatureSet
|
||||
|
||||
object RealGraphDataRecordFeatureStoreFeatures {
|
||||
val boundUserAuthorfeatureSet: BoundFeatureSet = FeatureSet(
|
||||
RealGraph.DestId,
|
||||
RealGraph.AddressBookEmail.DaysSinceLast,
|
||||
RealGraph.AddressBookEmail.ElapsedDays,
|
||||
RealGraph.AddressBookEmail.Ewma,
|
||||
RealGraph.AddressBookEmail.IsMissing,
|
||||
RealGraph.AddressBookEmail.Mean,
|
||||
RealGraph.AddressBookEmail.NonZeroDays,
|
||||
RealGraph.AddressBookEmail.Variance,
|
||||
RealGraph.AddressBookInBoth.DaysSinceLast,
|
||||
RealGraph.AddressBookInBoth.ElapsedDays,
|
||||
RealGraph.AddressBookInBoth.Ewma,
|
||||
RealGraph.AddressBookInBoth.IsMissing,
|
||||
RealGraph.AddressBookInBoth.Mean,
|
||||
RealGraph.AddressBookInBoth.NonZeroDays,
|
||||
RealGraph.AddressBookInBoth.Variance,
|
||||
RealGraph.AddressBookMutualEdgeEmail.DaysSinceLast,
|
||||
RealGraph.AddressBookMutualEdgeEmail.ElapsedDays,
|
||||
RealGraph.AddressBookMutualEdgeEmail.Ewma,
|
||||
RealGraph.AddressBookMutualEdgeEmail.IsMissing,
|
||||
RealGraph.AddressBookMutualEdgeEmail.Mean,
|
||||
RealGraph.AddressBookMutualEdgeEmail.NonZeroDays,
|
||||
RealGraph.AddressBookMutualEdgeEmail.Variance,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.DaysSinceLast,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.ElapsedDays,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.Ewma,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.IsMissing,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.Mean,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.NonZeroDays,
|
||||
RealGraph.AddressBookMutualEdgeInBoth.Variance,
|
||||
RealGraph.AddressBookMutualEdgePhone.DaysSinceLast,
|
||||
RealGraph.AddressBookMutualEdgePhone.ElapsedDays,
|
||||
RealGraph.AddressBookMutualEdgePhone.Ewma,
|
||||
RealGraph.AddressBookMutualEdgePhone.IsMissing,
|
||||
RealGraph.AddressBookMutualEdgePhone.Mean,
|
||||
RealGraph.AddressBookMutualEdgePhone.NonZeroDays,
|
||||
RealGraph.AddressBookMutualEdgePhone.Variance,
|
||||
RealGraph.AddressBookPhone.DaysSinceLast,
|
||||
RealGraph.AddressBookPhone.ElapsedDays,
|
||||
RealGraph.AddressBookPhone.Ewma,
|
||||
RealGraph.AddressBookPhone.IsMissing,
|
||||
RealGraph.AddressBookPhone.Mean,
|
||||
RealGraph.AddressBookPhone.NonZeroDays,
|
||||
RealGraph.AddressBookPhone.Variance,
|
||||
RealGraph.DirectMessages.DaysSinceLast,
|
||||
RealGraph.DirectMessages.ElapsedDays,
|
||||
RealGraph.DirectMessages.Ewma,
|
||||
RealGraph.DirectMessages.IsMissing,
|
||||
RealGraph.DirectMessages.Mean,
|
||||
RealGraph.DirectMessages.NonZeroDays,
|
||||
RealGraph.DirectMessages.Variance,
|
||||
RealGraph.DwellTime.DaysSinceLast,
|
||||
RealGraph.DwellTime.ElapsedDays,
|
||||
RealGraph.DwellTime.Ewma,
|
||||
RealGraph.DwellTime.IsMissing,
|
||||
RealGraph.DwellTime.Mean,
|
||||
RealGraph.DwellTime.NonZeroDays,
|
||||
RealGraph.DwellTime.Variance,
|
||||
RealGraph.Follow.DaysSinceLast,
|
||||
RealGraph.Follow.ElapsedDays,
|
||||
RealGraph.Follow.Ewma,
|
||||
RealGraph.Follow.IsMissing,
|
||||
RealGraph.Follow.Mean,
|
||||
RealGraph.Follow.NonZeroDays,
|
||||
RealGraph.Follow.Variance,
|
||||
RealGraph.InspectedStatuses.DaysSinceLast,
|
||||
RealGraph.InspectedStatuses.ElapsedDays,
|
||||
RealGraph.InspectedStatuses.Ewma,
|
||||
RealGraph.InspectedStatuses.IsMissing,
|
||||
RealGraph.InspectedStatuses.Mean,
|
||||
RealGraph.InspectedStatuses.NonZeroDays,
|
||||
RealGraph.InspectedStatuses.Variance,
|
||||
RealGraph.Likes.DaysSinceLast,
|
||||
RealGraph.Likes.ElapsedDays,
|
||||
RealGraph.Likes.Ewma,
|
||||
RealGraph.Likes.IsMissing,
|
||||
RealGraph.Likes.Mean,
|
||||
RealGraph.Likes.NonZeroDays,
|
||||
RealGraph.Likes.Variance,
|
||||
RealGraph.LinkClicks.DaysSinceLast,
|
||||
RealGraph.LinkClicks.ElapsedDays,
|
||||
RealGraph.LinkClicks.Ewma,
|
||||
RealGraph.LinkClicks.IsMissing,
|
||||
RealGraph.LinkClicks.Mean,
|
||||
RealGraph.LinkClicks.NonZeroDays,
|
||||
RealGraph.LinkClicks.Variance,
|
||||
RealGraph.Mentions.DaysSinceLast,
|
||||
RealGraph.Mentions.ElapsedDays,
|
||||
RealGraph.Mentions.Ewma,
|
||||
RealGraph.Mentions.IsMissing,
|
||||
RealGraph.Mentions.Mean,
|
||||
RealGraph.Mentions.NonZeroDays,
|
||||
RealGraph.Mentions.Variance,
|
||||
RealGraph.MutualFollow.DaysSinceLast,
|
||||
RealGraph.MutualFollow.ElapsedDays,
|
||||
RealGraph.MutualFollow.Ewma,
|
||||
RealGraph.MutualFollow.IsMissing,
|
||||
RealGraph.MutualFollow.Mean,
|
||||
RealGraph.MutualFollow.NonZeroDays,
|
||||
RealGraph.MutualFollow.Variance,
|
||||
RealGraph.NumTweetQuotes.DaysSinceLast,
|
||||
RealGraph.NumTweetQuotes.ElapsedDays,
|
||||
RealGraph.NumTweetQuotes.Ewma,
|
||||
RealGraph.NumTweetQuotes.IsMissing,
|
||||
RealGraph.NumTweetQuotes.Mean,
|
||||
RealGraph.NumTweetQuotes.NonZeroDays,
|
||||
RealGraph.NumTweetQuotes.Variance,
|
||||
RealGraph.PhotoTags.DaysSinceLast,
|
||||
RealGraph.PhotoTags.ElapsedDays,
|
||||
RealGraph.PhotoTags.Ewma,
|
||||
RealGraph.PhotoTags.IsMissing,
|
||||
RealGraph.PhotoTags.Mean,
|
||||
RealGraph.PhotoTags.NonZeroDays,
|
||||
RealGraph.PhotoTags.Variance,
|
||||
RealGraph.ProfileViews.DaysSinceLast,
|
||||
RealGraph.ProfileViews.ElapsedDays,
|
||||
RealGraph.ProfileViews.Ewma,
|
||||
RealGraph.ProfileViews.IsMissing,
|
||||
RealGraph.ProfileViews.Mean,
|
||||
RealGraph.ProfileViews.NonZeroDays,
|
||||
RealGraph.ProfileViews.Variance,
|
||||
RealGraph.Retweets.DaysSinceLast,
|
||||
RealGraph.Retweets.ElapsedDays,
|
||||
RealGraph.Retweets.Ewma,
|
||||
RealGraph.Retweets.IsMissing,
|
||||
RealGraph.Retweets.Mean,
|
||||
RealGraph.Retweets.NonZeroDays,
|
||||
RealGraph.Retweets.Variance,
|
||||
RealGraph.SmsFollow.DaysSinceLast,
|
||||
RealGraph.SmsFollow.ElapsedDays,
|
||||
RealGraph.SmsFollow.Ewma,
|
||||
RealGraph.SmsFollow.IsMissing,
|
||||
RealGraph.SmsFollow.Mean,
|
||||
RealGraph.SmsFollow.NonZeroDays,
|
||||
RealGraph.SmsFollow.Variance,
|
||||
RealGraph.TweetClicks.DaysSinceLast,
|
||||
RealGraph.TweetClicks.ElapsedDays,
|
||||
RealGraph.TweetClicks.Ewma,
|
||||
RealGraph.TweetClicks.IsMissing,
|
||||
RealGraph.TweetClicks.Mean,
|
||||
RealGraph.TweetClicks.NonZeroDays,
|
||||
RealGraph.TweetClicks.Variance,
|
||||
RealGraph.Weight
|
||||
).bind(UserAuthor)
|
||||
|
||||
private[this] val edgeFeatures: Seq[RealGraph.EdgeFeature] = Seq(
|
||||
RealGraph.AddressBookEmail,
|
||||
RealGraph.AddressBookInBoth,
|
||||
RealGraph.AddressBookMutualEdgeEmail,
|
||||
RealGraph.AddressBookMutualEdgeInBoth,
|
||||
RealGraph.AddressBookMutualEdgePhone,
|
||||
RealGraph.AddressBookPhone,
|
||||
RealGraph.DirectMessages,
|
||||
RealGraph.DwellTime,
|
||||
RealGraph.Follow,
|
||||
RealGraph.InspectedStatuses,
|
||||
RealGraph.Likes,
|
||||
RealGraph.LinkClicks,
|
||||
RealGraph.Mentions,
|
||||
RealGraph.MutualFollow,
|
||||
RealGraph.PhotoTags,
|
||||
RealGraph.ProfileViews,
|
||||
RealGraph.Retweets,
|
||||
RealGraph.SmsFollow,
|
||||
RealGraph.TweetClicks
|
||||
)
|
||||
|
||||
val htlDoubleFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Double]] = {
|
||||
val features = edgeFeatures.flatMap { ef =>
|
||||
Seq(ef.Ewma, ef.Mean, ef.Variance)
|
||||
} ++ Seq(RealGraph.Weight)
|
||||
features.toSet
|
||||
}
|
||||
|
||||
val htlLongFeatures: Set[Feature[EdgeEntityId[UserId, UserId], Long]] = {
|
||||
val features = edgeFeatures.flatMap { ef =>
|
||||
Seq(ef.DaysSinceLast, ef.ElapsedDays, ef.NonZeroDays)
|
||||
}
|
||||
features.toSet
|
||||
}
|
||||
|
||||
private val edgeFeatureToLegacyName = Map(
|
||||
RealGraph.AddressBookEmail -> "num_address_book_email",
|
||||
RealGraph.AddressBookInBoth -> "num_address_book_in_both",
|
||||
RealGraph.AddressBookMutualEdgeEmail -> "num_address_book_mutual_edge_email",
|
||||
RealGraph.AddressBookMutualEdgeInBoth -> "num_address_book_mutual_edge_in_both",
|
||||
RealGraph.AddressBookMutualEdgePhone -> "num_address_book_mutual_edge_phone",
|
||||
RealGraph.AddressBookPhone -> "num_address_book_phone",
|
||||
RealGraph.DirectMessages -> "direct_messages",
|
||||
RealGraph.DwellTime -> "total_dwell_time",
|
||||
RealGraph.Follow -> "num_follow",
|
||||
RealGraph.InspectedStatuses -> "num_inspected_tweets",
|
||||
RealGraph.Likes -> "num_favorites",
|
||||
RealGraph.LinkClicks -> "num_link_clicks",
|
||||
RealGraph.Mentions -> "num_mentions",
|
||||
RealGraph.MutualFollow -> "num_mutual_follow",
|
||||
RealGraph.PhotoTags -> "num_photo_tags",
|
||||
RealGraph.ProfileViews -> "num_profile_views",
|
||||
RealGraph.Retweets -> "num_retweets",
|
||||
RealGraph.SmsFollow -> "num_sms_follow",
|
||||
RealGraph.TweetClicks -> "num_tweet_clicks",
|
||||
)
|
||||
|
||||
def convertFeatureToLegacyName(
|
||||
prefix: String,
|
||||
variance: String = "variance"
|
||||
): Map[Feature[EdgeEntityId[UserId, UserId], _ >: Long with Double <: AnyVal], String] =
|
||||
edgeFeatureToLegacyName.flatMap {
|
||||
case (k, v) =>
|
||||
Seq(
|
||||
k.NonZeroDays -> s"${prefix}.${v}.non_zero_days",
|
||||
k.DaysSinceLast -> s"${prefix}.${v}.days_since_last",
|
||||
k.ElapsedDays -> s"${prefix}.${v}.elapsed_days",
|
||||
k.Ewma -> s"${prefix}.${v}.ewma",
|
||||
k.Mean -> s"${prefix}.${v}.mean",
|
||||
k.Variance -> s"${prefix}.${v}.${variance}",
|
||||
)
|
||||
} ++ Map(
|
||||
RealGraph.Weight -> (prefix + ".weight")
|
||||
)
|
||||
}
|
@ -0,0 +1,534 @@
|
||||
package com.twitter.timelines.prediction.features.real_graph
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature._
|
||||
import com.twitter.timelines.real_graph.v1.thriftscala.RealGraphEdgeFeature
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
|
||||
object RealGraphDataRecordFeatures {
|
||||
// the source user id
|
||||
val SRC_ID = new Discrete("realgraph.src_id", Set(UserId).asJava)
|
||||
// the destination user id
|
||||
val DST_ID = new Discrete("realgraph.dst_id", Set(UserId).asJava)
|
||||
// real graph weight
|
||||
val WEIGHT = new Continuous("realgraph.weight", Set(UsersRealGraphScore).asJava)
|
||||
// the number of retweets that the source user sent to the destination user
|
||||
val NUM_RETWEETS_MEAN =
|
||||
new Continuous("realgraph.num_retweets.mean", Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_EWMA =
|
||||
new Continuous("realgraph.num_retweets.ewma", Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_VARIANCE =
|
||||
new Continuous("realgraph.num_retweets.variance", Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_retweets.non_zero_days",
|
||||
Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_retweets.elapsed_days",
|
||||
Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_retweets.days_since_last",
|
||||
Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
val NUM_RETWEETS_IS_MISSING =
|
||||
new Binary("realgraph.num_retweets.is_missing", Set(PrivateRetweets, PublicRetweets).asJava)
|
||||
// the number of favories that the source user sent to the destination user
|
||||
val NUM_FAVORITES_MEAN =
|
||||
new Continuous("realgraph.num_favorites.mean", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_EWMA =
|
||||
new Continuous("realgraph.num_favorites.ewma", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_VARIANCE =
|
||||
new Continuous("realgraph.num_favorites.variance", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_favorites.non_zero_days", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_favorites.elapsed_days", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_DAYS_SINCE_LAST =
|
||||
new Continuous("realgraph.num_favorites.days_since_last", Set(PublicLikes, PrivateLikes).asJava)
|
||||
val NUM_FAVORITES_IS_MISSING =
|
||||
new Binary("realgraph.num_favorites.is_missing", Set(PublicLikes, PrivateLikes).asJava)
|
||||
// the number of mentions that the source user sent to the destination user
|
||||
val NUM_MENTIONS_MEAN =
|
||||
new Continuous("realgraph.num_mentions.mean", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_EWMA =
|
||||
new Continuous("realgraph.num_mentions.ewma", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_VARIANCE = new Continuous(
|
||||
"realgraph.num_mentions.variance",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_mentions.non_zero_days",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_mentions.elapsed_days",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_mentions.days_since_last",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_MENTIONS_IS_MISSING = new Binary(
|
||||
"realgraph.num_mentions.is_missing",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
// the number of direct messages that the source user sent to the destination user
|
||||
val NUM_DIRECT_MESSAGES_MEAN = new Continuous(
|
||||
"realgraph.num_direct_messages.mean",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava)
|
||||
val NUM_DIRECT_MESSAGES_EWMA = new Continuous(
|
||||
"realgraph.num_direct_messages.ewma",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava)
|
||||
val NUM_DIRECT_MESSAGES_VARIANCE = new Continuous(
|
||||
"realgraph.num_direct_messages.variance",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava)
|
||||
val NUM_DIRECT_MESSAGES_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_direct_messages.non_zero_days",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava
|
||||
)
|
||||
val NUM_DIRECT_MESSAGES_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_direct_messages.elapsed_days",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava
|
||||
)
|
||||
val NUM_DIRECT_MESSAGES_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_direct_messages.days_since_last",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava
|
||||
)
|
||||
val NUM_DIRECT_MESSAGES_IS_MISSING = new Binary(
|
||||
"realgraph.num_direct_messages.is_missing",
|
||||
Set(DmEntitiesAndMetadata, CountOfDms).asJava)
|
||||
// the number of tweet clicks that the source user sent to the destination user
|
||||
val NUM_TWEET_CLICKS_MEAN =
|
||||
new Continuous("realgraph.num_tweet_clicks.mean", Set(TweetsClicked).asJava)
|
||||
val NUM_TWEET_CLICKS_EWMA =
|
||||
new Continuous("realgraph.num_tweet_clicks.ewma", Set(TweetsClicked).asJava)
|
||||
val NUM_TWEET_CLICKS_VARIANCE =
|
||||
new Continuous("realgraph.num_tweet_clicks.variance", Set(TweetsClicked).asJava)
|
||||
val NUM_TWEET_CLICKS_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_tweet_clicks.non_zero_days", Set(TweetsClicked).asJava)
|
||||
val NUM_TWEET_CLICKS_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_tweet_clicks.elapsed_days", Set(TweetsClicked).asJava)
|
||||
val NUM_TWEET_CLICKS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_tweet_clicks.days_since_last",
|
||||
Set(TweetsClicked).asJava
|
||||
)
|
||||
val NUM_TWEET_CLICKS_IS_MISSING =
|
||||
new Binary("realgraph.num_tweet_clicks.is_missing", Set(TweetsClicked).asJava)
|
||||
// the number of link clicks that the source user sent to the destination user
|
||||
val NUM_LINK_CLICKS_MEAN =
|
||||
new Continuous("realgraph.num_link_clicks.mean", Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_EWMA =
|
||||
new Continuous("realgraph.num_link_clicks.ewma", Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_VARIANCE =
|
||||
new Continuous("realgraph.num_link_clicks.variance", Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_link_clicks.non_zero_days",
|
||||
Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_link_clicks.elapsed_days",
|
||||
Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_link_clicks.days_since_last",
|
||||
Set(CountOfTweetEntitiesClicked).asJava)
|
||||
val NUM_LINK_CLICKS_IS_MISSING =
|
||||
new Binary("realgraph.num_link_clicks.is_missing", Set(CountOfTweetEntitiesClicked).asJava)
|
||||
// the number of profile views that the source user sent to the destination user
|
||||
val NUM_PROFILE_VIEWS_MEAN =
|
||||
new Continuous("realgraph.num_profile_views.mean", Set(ProfilesViewed).asJava)
|
||||
val NUM_PROFILE_VIEWS_EWMA =
|
||||
new Continuous("realgraph.num_profile_views.ewma", Set(ProfilesViewed).asJava)
|
||||
val NUM_PROFILE_VIEWS_VARIANCE =
|
||||
new Continuous("realgraph.num_profile_views.variance", Set(ProfilesViewed).asJava)
|
||||
val NUM_PROFILE_VIEWS_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_profile_views.non_zero_days", Set(ProfilesViewed).asJava)
|
||||
val NUM_PROFILE_VIEWS_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_profile_views.elapsed_days", Set(ProfilesViewed).asJava)
|
||||
val NUM_PROFILE_VIEWS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_profile_views.days_since_last",
|
||||
Set(ProfilesViewed).asJava
|
||||
)
|
||||
val NUM_PROFILE_VIEWS_IS_MISSING =
|
||||
new Binary("realgraph.num_profile_views.is_missing", Set(ProfilesViewed).asJava)
|
||||
// the total dwell time the source user spends on the target user's tweets
|
||||
val TOTAL_DWELL_TIME_MEAN =
|
||||
new Continuous("realgraph.total_dwell_time.mean", Set(CountOfImpression).asJava)
|
||||
val TOTAL_DWELL_TIME_EWMA =
|
||||
new Continuous("realgraph.total_dwell_time.ewma", Set(CountOfImpression).asJava)
|
||||
val TOTAL_DWELL_TIME_VARIANCE =
|
||||
new Continuous("realgraph.total_dwell_time.variance", Set(CountOfImpression).asJava)
|
||||
val TOTAL_DWELL_TIME_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.total_dwell_time.non_zero_days", Set(CountOfImpression).asJava)
|
||||
val TOTAL_DWELL_TIME_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.total_dwell_time.elapsed_days", Set(CountOfImpression).asJava)
|
||||
val TOTAL_DWELL_TIME_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.total_dwell_time.days_since_last",
|
||||
Set(CountOfImpression).asJava
|
||||
)
|
||||
val TOTAL_DWELL_TIME_IS_MISSING =
|
||||
new Binary("realgraph.total_dwell_time.is_missing", Set(CountOfImpression).asJava)
|
||||
// the number of the target user's tweets that the source user has inspected
|
||||
val NUM_INSPECTED_TWEETS_MEAN =
|
||||
new Continuous("realgraph.num_inspected_tweets.mean", Set(CountOfImpression).asJava)
|
||||
val NUM_INSPECTED_TWEETS_EWMA =
|
||||
new Continuous("realgraph.num_inspected_tweets.ewma", Set(CountOfImpression).asJava)
|
||||
val NUM_INSPECTED_TWEETS_VARIANCE =
|
||||
new Continuous("realgraph.num_inspected_tweets.variance", Set(CountOfImpression).asJava)
|
||||
val NUM_INSPECTED_TWEETS_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_inspected_tweets.non_zero_days",
|
||||
Set(CountOfImpression).asJava
|
||||
)
|
||||
val NUM_INSPECTED_TWEETS_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_inspected_tweets.elapsed_days",
|
||||
Set(CountOfImpression).asJava
|
||||
)
|
||||
val NUM_INSPECTED_TWEETS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_inspected_tweets.days_since_last",
|
||||
Set(CountOfImpression).asJava
|
||||
)
|
||||
val NUM_INSPECTED_TWEETS_IS_MISSING =
|
||||
new Binary("realgraph.num_inspected_tweets.is_missing", Set(CountOfImpression).asJava)
|
||||
// the number of photos in which the source user has tagged the target user
|
||||
val NUM_PHOTO_TAGS_MEAN = new Continuous(
|
||||
"realgraph.num_photo_tags.mean",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_EWMA = new Continuous(
|
||||
"realgraph.num_photo_tags.ewma",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_VARIANCE = new Continuous(
|
||||
"realgraph.num_photo_tags.variance",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_photo_tags.non_zero_days",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_photo_tags.elapsed_days",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_photo_tags.days_since_last",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val NUM_PHOTO_TAGS_IS_MISSING = new Binary(
|
||||
"realgraph.num_photo_tags.is_missing",
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
val NUM_FOLLOW_MEAN = new Continuous(
|
||||
"realgraph.num_follow.mean",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_EWMA = new Continuous(
|
||||
"realgraph.num_follow.ewma",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_VARIANCE = new Continuous(
|
||||
"realgraph.num_follow.variance",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_follow.non_zero_days",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_follow.elapsed_days",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_follow.days_since_last",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_FOLLOW_IS_MISSING = new Binary(
|
||||
"realgraph.num_follow.is_missing",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
// the number of blocks that the source user sent to the destination user
|
||||
val NUM_BLOCKS_MEAN =
|
||||
new Continuous("realgraph.num_blocks.mean", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_EWMA =
|
||||
new Continuous("realgraph.num_blocks.ewma", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_VARIANCE =
|
||||
new Continuous("realgraph.num_blocks.variance", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_blocks.non_zero_days", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_blocks.elapsed_days", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_DAYS_SINCE_LAST =
|
||||
new Continuous("realgraph.num_blocks.days_since_last", Set(CountOfBlocks).asJava)
|
||||
val NUM_BLOCKS_IS_MISSING =
|
||||
new Binary("realgraph.num_blocks.is_missing", Set(CountOfBlocks).asJava)
|
||||
// the number of mutes that the source user sent to the destination user
|
||||
val NUM_MUTES_MEAN =
|
||||
new Continuous("realgraph.num_mutes.mean", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_EWMA =
|
||||
new Continuous("realgraph.num_mutes.ewma", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_VARIANCE =
|
||||
new Continuous("realgraph.num_mutes.variance", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_mutes.non_zero_days", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_mutes.elapsed_days", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_DAYS_SINCE_LAST =
|
||||
new Continuous("realgraph.num_mutes.days_since_last", Set(CountOfMutes).asJava)
|
||||
val NUM_MUTES_IS_MISSING =
|
||||
new Binary("realgraph.num_mutes.is_missing", Set(CountOfMutes).asJava)
|
||||
// the number of report as abuses that the source user sent to the destination user
|
||||
val NUM_REPORTS_AS_ABUSES_MEAN =
|
||||
new Continuous("realgraph.num_report_as_abuses.mean", Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_EWMA =
|
||||
new Continuous("realgraph.num_report_as_abuses.ewma", Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_VARIANCE =
|
||||
new Continuous("realgraph.num_report_as_abuses.variance", Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_NON_ZERO_DAYS =
|
||||
new Continuous("realgraph.num_report_as_abuses.non_zero_days", Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_ELAPSED_DAYS =
|
||||
new Continuous("realgraph.num_report_as_abuses.elapsed_days", Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_DAYS_SINCE_LAST =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_abuses.days_since_last",
|
||||
Set(CountOfAbuseReports).asJava)
|
||||
val NUM_REPORTS_AS_ABUSES_IS_MISSING =
|
||||
new Binary("realgraph.num_report_as_abuses.is_missing", Set(CountOfAbuseReports).asJava)
|
||||
// the number of report as spams that the source user sent to the destination user
|
||||
val NUM_REPORTS_AS_SPAMS_MEAN =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.mean",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_EWMA =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.ewma",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_VARIANCE =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.variance",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_NON_ZERO_DAYS =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.non_zero_days",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_ELAPSED_DAYS =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.elapsed_days",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_DAYS_SINCE_LAST =
|
||||
new Continuous(
|
||||
"realgraph.num_report_as_spams.days_since_last",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
val NUM_REPORTS_AS_SPAMS_IS_MISSING =
|
||||
new Binary(
|
||||
"realgraph.num_report_as_spams.is_missing",
|
||||
Set(CountOfAbuseReports, SafetyRelationships).asJava)
|
||||
|
||||
val NUM_MUTUAL_FOLLOW_MEAN = new Continuous(
|
||||
"realgraph.num_mutual_follow.mean",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_EWMA = new Continuous(
|
||||
"realgraph.num_mutual_follow.ewma",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_VARIANCE = new Continuous(
|
||||
"realgraph.num_mutual_follow.variance",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_mutual_follow.non_zero_days",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_mutual_follow.elapsed_days",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_mutual_follow.days_since_last",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
val NUM_MUTUAL_FOLLOW_IS_MISSING = new Binary(
|
||||
"realgraph.num_mutual_follow.is_missing",
|
||||
Set(
|
||||
Follow,
|
||||
PrivateAccountsFollowedBy,
|
||||
PublicAccountsFollowedBy,
|
||||
PrivateAccountsFollowing,
|
||||
PublicAccountsFollowing).asJava
|
||||
)
|
||||
|
||||
val NUM_SMS_FOLLOW_MEAN = new Continuous(
|
||||
"realgraph.num_sms_follow.mean",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_EWMA = new Continuous(
|
||||
"realgraph.num_sms_follow.ewma",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_VARIANCE = new Continuous(
|
||||
"realgraph.num_sms_follow.variance",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_sms_follow.non_zero_days",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_sms_follow.elapsed_days",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_sms_follow.days_since_last",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
val NUM_SMS_FOLLOW_IS_MISSING = new Binary(
|
||||
"realgraph.num_sms_follow.is_missing",
|
||||
Set(Follow, PrivateAccountsFollowedBy, PublicAccountsFollowedBy).asJava)
|
||||
|
||||
val NUM_ADDRESS_BOOK_EMAIL_MEAN =
|
||||
new Continuous("realgraph.num_address_book_email.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_EWMA =
|
||||
new Continuous("realgraph.num_address_book_email.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_VARIANCE =
|
||||
new Continuous("realgraph.num_address_book_email.variance", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_email.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_email.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_email.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_EMAIL_IS_MISSING =
|
||||
new Binary("realgraph.num_address_book_email.is_missing", Set(AddressBook).asJava)
|
||||
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_MEAN =
|
||||
new Continuous("realgraph.num_address_book_in_both.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_EWMA =
|
||||
new Continuous("realgraph.num_address_book_in_both.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_VARIANCE = new Continuous(
|
||||
"realgraph.num_address_book_in_both.variance",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_in_both.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_in_both.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_in_both.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_IN_BOTH_IS_MISSING = new Binary(
|
||||
"realgraph.num_address_book_in_both.is_missing",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
|
||||
val NUM_ADDRESS_BOOK_PHONE_MEAN =
|
||||
new Continuous("realgraph.num_address_book_phone.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_PHONE_EWMA =
|
||||
new Continuous("realgraph.num_address_book_phone.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_PHONE_VARIANCE =
|
||||
new Continuous("realgraph.num_address_book_phone.variance", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_PHONE_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_phone.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_PHONE_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_phone.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_PHONE_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_phone.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_PHONE_IS_MISSING =
|
||||
new Binary("realgraph.num_address_book_phone.is_missing", Set(AddressBook).asJava)
|
||||
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_MEAN =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_email.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_EWMA =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_email.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_VARIANCE =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_email.variance", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_email.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_email.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_email.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_EMAIL_IS_MISSING =
|
||||
new Binary("realgraph.num_address_book_mutual_edge_email.is_missing", Set(AddressBook).asJava)
|
||||
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_MEAN =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_in_both.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_EWMA =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_in_both.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_VARIANCE = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_in_both.variance",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_in_both.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_in_both.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_in_both.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_IN_BOTH_IS_MISSING = new Binary(
|
||||
"realgraph.num_address_book_mutual_edge_in_both.is_missing",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_MEAN =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_phone.mean", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_EWMA =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_phone.ewma", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_VARIANCE =
|
||||
new Continuous("realgraph.num_address_book_mutual_edge_phone.variance", Set(AddressBook).asJava)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_NON_ZERO_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_phone.non_zero_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_ELAPSED_DAYS = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_phone.elapsed_days",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_DAYS_SINCE_LAST = new Continuous(
|
||||
"realgraph.num_address_book_mutual_edge_phone.days_since_last",
|
||||
Set(AddressBook).asJava
|
||||
)
|
||||
val NUM_ADDRESS_BOOK_MUTUAL_EDGE_PHONE_IS_MISSING =
|
||||
new Binary("realgraph.num_address_book_mutual_edge_phone.is_missing", Set(AddressBook).asJava)
|
||||
}
|
||||
|
||||
case class RealGraphEdgeDataRecordFeatures(
|
||||
edgeFeatureOpt: Option[RealGraphEdgeFeature],
|
||||
meanFeature: Continuous,
|
||||
ewmaFeature: Continuous,
|
||||
varianceFeature: Continuous,
|
||||
nonZeroDaysFeature: Continuous,
|
||||
elapsedDaysFeature: Continuous,
|
||||
daysSinceLastFeature: Continuous,
|
||||
isMissingFeature: Binary)
|
@ -0,0 +1,9 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,967 @@
|
||||
package com.twitter.timelines.prediction.features.recap
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.Discrete
|
||||
import com.twitter.ml.api.Feature.SparseBinary
|
||||
import com.twitter.ml.api.Feature.Text
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object RecapFeatures extends RecapFeatures("")
|
||||
object InReplyToRecapFeatures extends RecapFeatures("in_reply_to_tweet")
|
||||
|
||||
class RecapFeatures(prefix: String) {
|
||||
private def name(featureName: String): String = {
|
||||
if (prefix.nonEmpty) {
|
||||
s"$prefix.$featureName"
|
||||
} else {
|
||||
featureName
|
||||
}
|
||||
}
|
||||
|
||||
val IS_IPAD_CLIENT = new Binary(name("recap.client.is_ipad"), Set(ClientType).asJava)
|
||||
val IS_WEB_CLIENT = new Binary(name("recap.client.is_web"), Set(ClientType).asJava)
|
||||
val IS_IPHONE_CLIENT = new Binary(name("recap.client.is_phone"), Set(ClientType).asJava)
|
||||
val IS_ANDROID_CLIENT = new Binary(name("recap.client.is_android"), Set(ClientType).asJava)
|
||||
val IS_ANDROID_TABLET_CLIENT =
|
||||
new Binary(name("recap.client.is_android_tablet"), Set(ClientType).asJava)
|
||||
|
||||
// features from userAgent
|
||||
val CLIENT_NAME = new Text(name("recap.user_agent.client_name"), Set(ClientType).asJava)
|
||||
val CLIENT_SOURCE = new Discrete(name("recap.user_agent.client_source"), Set(ClientType).asJava)
|
||||
val CLIENT_VERSION = new Text(name("recap.user_agent.client_version"), Set(ClientVersion).asJava)
|
||||
val CLIENT_VERSION_CODE =
|
||||
new Text(name("recap.user_agent.client_version_code"), Set(ClientVersion).asJava)
|
||||
val DEVICE = new Text(name("recap.user_agent.device"), Set(DeviceType).asJava)
|
||||
val FROM_DOG_FOOD = new Binary(name("recap.meta.from_dog_food"), Set(UserAgent).asJava)
|
||||
val FROM_TWITTER_CLIENT =
|
||||
new Binary(name("recap.user_agent.from_twitter_client"), Set(UserAgent).asJava)
|
||||
val MANUFACTURER = new Text(name("recap.user_agent.manufacturer"), Set(UserAgent).asJava)
|
||||
val MODEL = new Text(name("recap.user_agent.model"), Set(UserAgent).asJava)
|
||||
val NETWORK_CONNECTION =
|
||||
new Discrete(name("recap.user_agent.network_connection"), Set(UserAgent).asJava)
|
||||
val SDK_VERSION = new Text(name("recap.user_agent.sdk_version"), Set(AppId, UserAgent).asJava)
|
||||
|
||||
// engagement
|
||||
val IS_RETWEETED = new Binary(
|
||||
name("recap.engagement.is_retweeted"),
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_FAVORITED = new Binary(
|
||||
name("recap.engagement.is_favorited"),
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED = new Binary(
|
||||
name("recap.engagement.is_replied"),
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
// v1: post click engagements: fav, reply
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V1 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_favorited_or_replied"),
|
||||
Set(
|
||||
PublicLikes,
|
||||
PrivateLikes,
|
||||
PublicReplies,
|
||||
PrivateReplies,
|
||||
EngagementsPrivate,
|
||||
EngagementsPublic).asJava)
|
||||
// v2: post click engagements: click
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_V2 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_v2"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FAVORITED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_favorited"),
|
||||
Set(PublicLikes, PrivateLikes, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_REPLIED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_replied"),
|
||||
Set(PublicReplies, PrivateReplies, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_RETWEETED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_retweeted"),
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_CLICKED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_clicked"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_FOLLOWED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_followed"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_SHARE_DM_CLICKED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_share_dm_clicked"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_PROFILE_CLICKED = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_profile_clicked"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_0 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_uam_gt_0"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_1 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_uam_gt_1"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_2 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_uam_gt_2"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_GOOD_CLICKED_CONVO_DESC_UAM_GT_3 = new Binary(
|
||||
name("recap.engagement.is_good_clicked_convo_desc_uam_gt_3"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
val IS_TWEET_DETAIL_DWELLED = new Binary(
|
||||
name("recap.engagement.is_tweet_detail_dwelled"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_8_SEC = new Binary(
|
||||
name("recap.engagement.is_tweet_detail_dwelled_8_sec"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_15_SEC = new Binary(
|
||||
name("recap.engagement.is_tweet_detail_dwelled_15_sec"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_25_SEC = new Binary(
|
||||
name("recap.engagement.is_tweet_detail_dwelled_25_sec"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_TWEET_DETAIL_DWELLED_30_SEC = new Binary(
|
||||
name("recap.engagement.is_tweet_detail_dwelled_30_sec"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_PROFILE_DWELLED = new Binary(
|
||||
"recap.engagement.is_profile_dwelled",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_10_SEC = new Binary(
|
||||
"recap.engagement.is_profile_dwelled_10_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_20_SEC = new Binary(
|
||||
"recap.engagement.is_profile_dwelled_20_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_DWELLED_30_SEC = new Binary(
|
||||
"recap.engagement.is_profile_dwelled_30_sec",
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED = new Binary(
|
||||
"recap.engagement.is_fullscreen_video_dwelled",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Binary(
|
||||
"recap.engagement.is_fullscreen_video_dwelled_5_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Binary(
|
||||
"recap.engagement.is_fullscreen_video_dwelled_10_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Binary(
|
||||
"recap.engagement.is_fullscreen_video_dwelled_20_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Binary(
|
||||
"recap.engagement.is_fullscreen_video_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_15_SEC = new Binary(
|
||||
"recap.engagement.is_link_dwelled_15_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_30_SEC = new Binary(
|
||||
"recap.engagement.is_link_dwelled_30_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_LINK_DWELLED_60_SEC = new Binary(
|
||||
"recap.engagement.is_link_dwelled_60_sec",
|
||||
Set(MediaEngagementActivities, EngagementTypePrivate, EngagementsPrivate).asJava)
|
||||
|
||||
val IS_QUOTED = new Binary(
|
||||
name("recap.engagement.is_quoted"),
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_RETWEETED_WITHOUT_QUOTE = new Binary(
|
||||
name("recap.engagement.is_retweeted_without_quote"),
|
||||
Set(PublicRetweets, PrivateRetweets, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_CLICKED =
|
||||
new Binary(name("recap.engagement.is_clicked"), Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_DWELLED = new Binary(name("recap.engagement.is_dwelled"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_IN_BOUNDS_V1 =
|
||||
new Binary(name("recap.engagement.is_dwelled_in_bounds_v1"), Set(EngagementsPrivate).asJava)
|
||||
val DWELL_NORMALIZED_OVERALL = new Continuous(
|
||||
name("recap.engagement.dwell_normalized_overall"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val DWELL_CDF_OVERALL =
|
||||
new Continuous(name("recap.engagement.dwell_cdf_overall"), Set(EngagementsPrivate).asJava)
|
||||
val DWELL_CDF = new Continuous(name("recap.engagement.dwell_cdf"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_DWELLED_1S =
|
||||
new Binary(name("recap.engagement.is_dwelled_1s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_2S =
|
||||
new Binary(name("recap.engagement.is_dwelled_2s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_3S =
|
||||
new Binary(name("recap.engagement.is_dwelled_3s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_4S =
|
||||
new Binary(name("recap.engagement.is_dwelled_4s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_5S =
|
||||
new Binary(name("recap.engagement.is_dwelled_5s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_6S =
|
||||
new Binary(name("recap.engagement.is_dwelled_6s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_7S =
|
||||
new Binary(name("recap.engagement.is_dwelled_7s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_8S =
|
||||
new Binary(name("recap.engagement.is_dwelled_8s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_9S =
|
||||
new Binary(name("recap.engagement.is_dwelled_9s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_DWELLED_10S =
|
||||
new Binary(name("recap.engagement.is_dwelled_10s"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_SKIPPED_1S =
|
||||
new Binary(name("recap.engagement.is_skipped_1s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_2S =
|
||||
new Binary(name("recap.engagement.is_skipped_2s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_3S =
|
||||
new Binary(name("recap.engagement.is_skipped_3s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_4S =
|
||||
new Binary(name("recap.engagement.is_skipped_4s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_5S =
|
||||
new Binary(name("recap.engagement.is_skipped_5s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_6S =
|
||||
new Binary(name("recap.engagement.is_skipped_6s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_7S =
|
||||
new Binary(name("recap.engagement.is_skipped_7s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_8S =
|
||||
new Binary(name("recap.engagement.is_skipped_8s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_9S =
|
||||
new Binary(name("recap.engagement.is_skipped_9s"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SKIPPED_10S =
|
||||
new Binary(name("recap.engagement.is_skipped_10s"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
val IS_IMPRESSED =
|
||||
new Binary(name("recap.engagement.is_impressed"), Set(EngagementsPrivate).asJava)
|
||||
val IS_FOLLOWED =
|
||||
new Binary("recap.engagement.is_followed", Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_PROFILE_CLICKED = new Binary(
|
||||
name("recap.engagement.is_profile_clicked"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_OPEN_LINKED = new Binary(
|
||||
name("recap.engagement.is_open_linked"),
|
||||
Set(EngagementsPrivate, LinksClickedOn).asJava)
|
||||
val IS_PHOTO_EXPANDED =
|
||||
new Binary(name("recap.engagement.is_photo_expanded"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_VIEWED =
|
||||
new Binary(name("recap.engagement.is_video_viewed"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_START =
|
||||
new Binary(name("recap.engagement.is_video_playback_start"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_25 =
|
||||
new Binary(name("recap.engagement.is_video_playback_25"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_50 =
|
||||
new Binary(name("recap.engagement.is_video_playback_50"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_75 =
|
||||
new Binary(name("recap.engagement.is_video_playback_75"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_95 =
|
||||
new Binary(name("recap.engagement.is_video_playback_95"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_PLAYBACK_COMPLETE =
|
||||
new Binary(name("recap.engagement.is_video_playback_complete"), Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_VIEWED_AND_PLAYBACK_50 = new Binary(
|
||||
name("recap.engagement.is_video_viewed_and_playback_50"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_VIDEO_QUALITY_VIEWED = new Binary(
|
||||
name("recap.engagement.is_video_quality_viewed"),
|
||||
Set(EngagementsPrivate).asJava
|
||||
)
|
||||
val IS_TWEET_SHARE_DM_CLICKED =
|
||||
new Binary(name("recap.engagement.is_tweet_share_dm_clicked"), Set(EngagementsPrivate).asJava)
|
||||
val IS_TWEET_SHARE_DM_SENT =
|
||||
new Binary(name("recap.engagement.is_tweet_share_dm_sent"), Set(EngagementsPrivate).asJava)
|
||||
val IS_BOOKMARKED =
|
||||
new Binary(name("recap.engagement.is_bookmarked"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARED =
|
||||
new Binary(name("recap.engagement.is_shared"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SHARE_MENU_CLICKED =
|
||||
new Binary(name("recap.engagement.is_share_menu_clicked"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Negative engagements
|
||||
val IS_DONT_LIKE =
|
||||
new Binary(name("recap.engagement.is_dont_like"), Set(EngagementsPrivate).asJava)
|
||||
val IS_BLOCK_CLICKED = new Binary(
|
||||
name("recap.engagement.is_block_clicked"),
|
||||
Set(TweetsClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_BLOCK_DIALOG_BLOCKED = new Binary(
|
||||
name("recap.engagement.is_block_dialog_blocked"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_MUTE_CLICKED = new Binary(
|
||||
name("recap.engagement.is_mute_clicked"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_MUTE_DIALOG_MUTED =
|
||||
new Binary(name("recap.engagement.is_mute_dialog_muted"), Set(EngagementsPrivate).asJava)
|
||||
val IS_REPORT_TWEET_CLICKED = new Binary(
|
||||
name("recap.engagement.is_report_tweet_clicked"),
|
||||
Set(TweetsClicked, EngagementsPrivate).asJava)
|
||||
val IS_NEGATIVE_FEEDBACK =
|
||||
new Binary("recap.engagement.is_negative_feedback", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_ABOUT_TOPIC =
|
||||
new Binary(name("recap.engagement.is_not_about_topic"), Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RECENT =
|
||||
new Binary(name("recap.engagement.is_not_recent"), Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_RELEVANT =
|
||||
new Binary(name("recap.engagement.is_not_relevant"), Set(EngagementsPrivate).asJava)
|
||||
val IS_SEE_FEWER =
|
||||
new Binary(name("recap.engagement.is_see_fewer"), Set(EngagementsPrivate).asJava)
|
||||
val IS_TOPIC_SPEC_NEG_ENGAGEMENT =
|
||||
new Binary("recap.engagement.is_topic_spec_neg_engagement", Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC =
|
||||
new Binary("recap.engagement.is_unfollow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC_EXPLICIT_POSITIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_unfollow_topic_explicit_positive_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC_IMPLICIT_POSITIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_unfollow_topic_implicit_positive_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC_STRONG_EXPLICIT_NEGATIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_unfollow_topic_strong_explicit_negative_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_UNFOLLOW_TOPIC_EXPLICIT_NEGATIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_unfollow_topic_explicit_negative_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN =
|
||||
new Binary("recap.engagement.is_not_interested_in", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN_EXPLICIT_POSITIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_not_interested_in_explicit_positive_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN_EXPLICIT_NEGATIVE_LABEL =
|
||||
new Binary(
|
||||
"recap.engagement.is_not_interested_in_explicit_negative_label",
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_CARET_CLICKED =
|
||||
new Binary(name("recap.engagement.is_caret_clicked"), Set(EngagementsPrivate).asJava)
|
||||
val IS_FOLLOW_TOPIC =
|
||||
new Binary("recap.engagement.is_follow_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_NOT_INTERESTED_IN_TOPIC =
|
||||
new Binary("recap.engagement.is_not_interested_in_topic", Set(EngagementsPrivate).asJava)
|
||||
val IS_HOME_LATEST_VISITED =
|
||||
new Binary(name("recap.engagement.is_home_latest_visited"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Relevance prompt tweet engagements
|
||||
val IS_RELEVANCE_PROMPT_YES_CLICKED = new Binary(
|
||||
name("recap.engagement.is_relevance_prompt_yes_clicked"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_RELEVANCE_PROMPT_NO_CLICKED = new Binary(
|
||||
name("recap.engagement.is_relevance_prompt_no_clicked"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_RELEVANCE_PROMPT_IMPRESSED = new Binary(
|
||||
name("recap.engagement.is_relevance_prompt_impressed"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Reciprocal engagements for reply forward engagement
|
||||
val IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_impressed_by_author"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLIED_REPLY_FAVORITED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_favorited_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava)
|
||||
val IS_REPLIED_REPLY_QUOTED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_quoted_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
|
||||
val IS_REPLIED_REPLY_REPLIED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_replied_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava)
|
||||
val IS_REPLIED_REPLY_RETWEETED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_retweeted_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava)
|
||||
val IS_REPLIED_REPLY_BLOCKED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_blocked_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_followed_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, Follow).asJava)
|
||||
val IS_REPLIED_REPLY_UNFOLLOWED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_unfollowed_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_REPLIED_REPLY_MUTED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_muted_by_author"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLIED_REPLY_REPORTED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_reported_by_author"),
|
||||
Set(EngagementsPrivate).asJava)
|
||||
|
||||
// This derived label is the logical OR of REPLY_REPLIED, REPLY_FAVORITED, REPLY_RETWEETED
|
||||
val IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_replied_reply_engaged_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// Reciprocal engagements for fav forward engagement
|
||||
val IS_FAVORITED_FAV_FAVORITED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_favorited_fav_favorited_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateLikes, PublicLikes).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_REPLIED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_favorited_fav_replied_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateReplies, PublicReplies).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_RETWEETED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_favorited_fav_retweeted_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
|
||||
)
|
||||
val IS_FAVORITED_FAV_FOLLOWED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_favorited_fav_followed_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic, PrivateRetweets, PublicRetweets).asJava
|
||||
)
|
||||
// This derived label is the logical OR of FAV_REPLIED, FAV_FAVORITED, FAV_RETWEETED, FAV_FOLLOWED
|
||||
val IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Binary(
|
||||
name("recap.engagement.is_favorited_fav_engaged_by_author"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// define good profile click by considering following engagements (follow, fav, reply, retweet, etc.) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FOLLOW = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_follow"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, Follow).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_FAV = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_fav"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateLikes, PublicLikes).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_REPLY = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_reply"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, PrivateReplies, PublicReplies).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_RETWEET = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_retweet"),
|
||||
Set(
|
||||
ProfilesViewed,
|
||||
ProfilesClicked,
|
||||
EngagementsPrivate,
|
||||
PrivateRetweets,
|
||||
PublicRetweets).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_CLICK = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_tweet_click"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, TweetsClicked).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_SHARE_DM_CLICK = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_share_dm_click"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of all binary features above
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_engaged"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
|
||||
// define bad profile click by considering following engagements (user report, tweet report, mute, block, etc) at profile page
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_USER_REPORT_CLICK = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_user_report_click"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_TWEET_REPORT_CLICK = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_tweet_report_click"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_MUTE = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_mute"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_PROFILE_CLICKED_AND_PROFILE_BLOCK = new Binary(
|
||||
name("recap.engagement.is_profile_clicked_and_profile_block"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// This derived label is the union of bad profile click engagements and existing negative feedback
|
||||
val IS_NEGATIVE_FEEDBACK_V2 = new Binary(
|
||||
name("recap.engagement.is_negative_feedback_v2"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_STRONG_NEGATIVE_FEEDBACK = new Binary(
|
||||
name("recap.engagement.is_strong_negative_feedback"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
val IS_WEAK_NEGATIVE_FEEDBACK = new Binary(
|
||||
name("recap.engagement.is_weak_negative_feedback"),
|
||||
Set(ProfilesViewed, ProfilesClicked, EngagementsPrivate).asJava)
|
||||
// engagement for following user from any surface area
|
||||
val IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Binary(
|
||||
"recap.engagement.is_followed_from_any_surface_area",
|
||||
Set(EngagementsPublic, EngagementsPrivate).asJava)
|
||||
|
||||
// Reply downvote engagements
|
||||
val IS_REPLY_DOWNVOTED =
|
||||
new Binary(name("recap.engagement.is_reply_downvoted"), Set(EngagementsPrivate).asJava)
|
||||
val IS_REPLY_DOWNVOTE_REMOVED =
|
||||
new Binary(name("recap.engagement.is_reply_downvote_removed"), Set(EngagementsPrivate).asJava)
|
||||
|
||||
// Other engagements
|
||||
val IS_GOOD_OPEN_LINK = new Binary(
|
||||
name("recap.engagement.is_good_open_link"),
|
||||
Set(EngagementsPrivate, LinksClickedOn).asJava)
|
||||
val IS_ENGAGED = new Binary(
|
||||
name("recap.engagement.any"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
) // Deprecated - to be removed shortly
|
||||
val IS_EARLYBIRD_UNIFIED_ENGAGEMENT = new Binary(
|
||||
name("recap.engagement.is_unified_engagement"),
|
||||
Set(EngagementsPrivate, EngagementsPublic).asJava
|
||||
) // A subset of IS_ENGAGED specifically intended for use in earlybird models
|
||||
|
||||
// features from ThriftTweetFeatures
|
||||
val PREV_USER_TWEET_ENGAGEMENT = new Continuous(
|
||||
name("recap.tweetfeature.prev_user_tweet_enagagement"),
|
||||
Set(EngagementScore, EngagementsPrivate, EngagementsPublic).asJava)
|
||||
val IS_SENSITIVE = new Binary(name("recap.tweetfeature.is_sensitive"))
|
||||
val HAS_MULTIPLE_MEDIA = new Binary(
|
||||
name("recap.tweetfeature.has_multiple_media"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val IS_AUTHOR_PROFILE_EGG = new Binary(name("recap.tweetfeature.is_author_profile_egg"))
|
||||
val IS_AUTHOR_NEW =
|
||||
new Binary(name("recap.tweetfeature.is_author_new"), Set(UserState, UserType).asJava)
|
||||
val NUM_MENTIONS = new Continuous(
|
||||
name("recap.tweetfeature.num_mentions"),
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_MENTION = new Binary(name("recap.tweetfeature.has_mention"), Set(UserVisibleFlag).asJava)
|
||||
val NUM_HASHTAGS = new Continuous(
|
||||
name("recap.tweetfeature.num_hashtags"),
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_HASHTAG = new Binary(
|
||||
name("recap.tweetfeature.has_hashtag"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val LINK_LANGUAGE = new Continuous(
|
||||
name("recap.tweetfeature.link_language"),
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val IS_AUTHOR_NSFW =
|
||||
new Binary(name("recap.tweetfeature.is_author_nsfw"), Set(UserSafetyLabels, UserType).asJava)
|
||||
val IS_AUTHOR_SPAM =
|
||||
new Binary(name("recap.tweetfeature.is_author_spam"), Set(UserSafetyLabels, UserType).asJava)
|
||||
val IS_AUTHOR_BOT =
|
||||
new Binary(name("recap.tweetfeature.is_author_bot"), Set(UserSafetyLabels, UserType).asJava)
|
||||
val SIGNATURE =
|
||||
new Discrete(name("recap.tweetfeature.signature"), Set(DigitalSignatureNonrepudiation).asJava)
|
||||
val LANGUAGE = new Discrete(
|
||||
name("recap.tweetfeature.language"),
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val FROM_INACTIVE_USER =
|
||||
new Binary(name("recap.tweetfeature.from_inactive_user"), Set(UserActiveFlag).asJava)
|
||||
val PROBABLY_FROM_FOLLOWED_AUTHOR = new Binary(name("recap.v3.tweetfeature.probably_from_follow"))
|
||||
val FROM_MUTUAL_FOLLOW = new Binary(name("recap.tweetfeature.from_mutual_follow"))
|
||||
val USER_REP = new Continuous(name("recap.tweetfeature.user_rep"))
|
||||
val FROM_VERIFIED_ACCOUNT =
|
||||
new Binary(name("recap.tweetfeature.from_verified_account"), Set(UserVerifiedFlag).asJava)
|
||||
val IS_BUSINESS_SCORE = new Continuous(name("recap.tweetfeature.is_business_score"))
|
||||
val HAS_CONSUMER_VIDEO = new Binary(
|
||||
name("recap.tweetfeature.has_consumer_video"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_PRO_VIDEO = new Binary(
|
||||
name("recap.tweetfeature.has_pro_video"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VINE = new Binary(
|
||||
name("recap.tweetfeature.has_vine"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_PERISCOPE = new Binary(
|
||||
name("recap.tweetfeature.has_periscope"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NATIVE_VIDEO = new Binary(
|
||||
name("recap.tweetfeature.has_native_video"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NATIVE_IMAGE = new Binary(
|
||||
name("recap.tweetfeature.has_native_image"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_CARD = new Binary(
|
||||
name("recap.tweetfeature.has_card"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_IMAGE = new Binary(
|
||||
name("recap.tweetfeature.has_image"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_NEWS = new Binary(
|
||||
name("recap.tweetfeature.has_news"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VIDEO = new Binary(
|
||||
name("recap.tweetfeature.has_video"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_VISIBLE_LINK = new Binary(
|
||||
name("recap.tweetfeature.has_visible_link"),
|
||||
Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val LINK_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.link_count"),
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_LINK = new Binary(
|
||||
name("recap.tweetfeature.has_link"),
|
||||
Set(UrlFoundFlag, PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val IS_OFFENSIVE = new Binary(name("recap.tweetfeature.is_offensive"))
|
||||
val HAS_TREND = new Binary(
|
||||
name("recap.tweetfeature.has_trend"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val HAS_MULTIPLE_HASHTAGS_OR_TRENDS = new Binary(
|
||||
name("recap.tweetfeature.has_multiple_hashtag_or_trend"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val URL_DOMAINS = new SparseBinary(
|
||||
name("recap.tweetfeature.url_domains"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val CONTAINS_MEDIA = new Binary(
|
||||
name("recap.tweetfeature.contains_media"),
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val RETWEET_SEARCHER = new Binary(name("recap.tweetfeature.retweet_searcher"))
|
||||
val REPLY_SEARCHER = new Binary(name("recap.tweetfeature.reply_searcher"))
|
||||
val MENTION_SEARCHER =
|
||||
new Binary(name("recap.tweetfeature.mention_searcher"), Set(UserVisibleFlag).asJava)
|
||||
val REPLY_OTHER =
|
||||
new Binary(name("recap.tweetfeature.reply_other"), Set(PublicReplies, PrivateReplies).asJava)
|
||||
val RETWEET_OTHER = new Binary(
|
||||
name("recap.tweetfeature.retweet_other"),
|
||||
Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IS_REPLY =
|
||||
new Binary(name("recap.tweetfeature.is_reply"), Set(PublicReplies, PrivateReplies).asJava)
|
||||
val IS_RETWEET =
|
||||
new Binary(name("recap.tweetfeature.is_retweet"), Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val IS_EXTENDED_REPLY = new Binary(
|
||||
name("recap.tweetfeature.is_extended_reply"),
|
||||
Set(PublicReplies, PrivateReplies).asJava)
|
||||
val MATCH_UI_LANG = new Binary(
|
||||
name("recap.tweetfeature.match_ui_lang"),
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val MATCH_SEARCHER_MAIN_LANG = new Binary(
|
||||
name("recap.tweetfeature.match_searcher_main_lang"),
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val MATCH_SEARCHER_LANGS = new Binary(
|
||||
name("recap.tweetfeature.match_searcher_langs"),
|
||||
Set(ProvidedLanguage, InferredLanguage).asJava)
|
||||
val BIDIRECTIONAL_REPLY_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.bidirectional_reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val UNIDIRECTIONAL_REPLY_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.unidirectional_reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val BIDIRECTIONAL_RETWEET_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.bidirectional_retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val UNIDIRECTIONAL_RETWEET_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.unidirectional_retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val BIDIRECTIONAL_FAV_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.bidirectional_fav_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val UNIDIRECTIONAL_FAV_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.unidirectiona_fav_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val CONVERSATIONAL_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.conversational_count"),
|
||||
Set(CountOfPrivateTweets, CountOfPublicTweets).asJava)
|
||||
// tweet impressions on an embedded tweet
|
||||
val EMBEDS_IMPRESSION_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.embeds_impression_count"),
|
||||
Set(CountOfImpression).asJava)
|
||||
// number of URLs that embed the tweet
|
||||
val EMBEDS_URL_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.embeds_url_count"),
|
||||
Set(CountOfPrivateTweetEntitiesAndMetadata, CountOfPublicTweetEntitiesAndMetadata).asJava)
|
||||
// currently only counts views on Snappy and Amplify pro videos. Counts for other videos forthcoming
|
||||
val VIDEO_VIEW_COUNT = new Continuous(
|
||||
name("recap.tweetfeature.video_view_count"),
|
||||
Set(
|
||||
CountOfTweetEntitiesClicked,
|
||||
CountOfPrivateTweetEntitiesAndMetadata,
|
||||
CountOfPublicTweetEntitiesAndMetadata,
|
||||
EngagementsPrivate,
|
||||
EngagementsPublic).asJava
|
||||
)
|
||||
val TWEET_COUNT_FROM_USER_IN_SNAPSHOT = new Continuous(
|
||||
name("recap.tweetfeature.tweet_count_from_user_in_snapshot"),
|
||||
Set(CountOfPrivateTweets, CountOfPublicTweets).asJava)
|
||||
val NORMALIZED_PARUS_SCORE =
|
||||
new Continuous("recap.tweetfeature.normalized_parus_score", Set(EngagementScore).asJava)
|
||||
val PARUS_SCORE = new Continuous("recap.tweetfeature.parus_score", Set(EngagementScore).asJava)
|
||||
val REAL_GRAPH_WEIGHT =
|
||||
new Continuous("recap.tweetfeature.real_graph_weight", Set(UsersRealGraphScore).asJava)
|
||||
val SARUS_GRAPH_WEIGHT = new Continuous("recap.tweetfeature.sarus_graph_weight")
|
||||
val TOPIC_SIM_SEARCHER_INTERSTED_IN_AUTHOR_KNOWN_FOR = new Continuous(
|
||||
"recap.tweetfeature.topic_sim_searcher_interested_in_author_known_for")
|
||||
val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_INTERESTED_IN = new Continuous(
|
||||
"recap.tweetfeature.topic_sim_searcher_author_both_interested_in")
|
||||
val TOPIC_SIM_SEARCHER_AUTHOR_BOTH_KNOWN_FOR = new Continuous(
|
||||
"recap.tweetfeature.topic_sim_searcher_author_both_known_for")
|
||||
val TOPIC_SIM_SEARCHER_INTERESTED_IN_TWEET = new Continuous(
|
||||
"recap.tweetfeature.topic_sim_searcher_interested_in_tweet")
|
||||
val IS_RETWEETER_PROFILE_EGG =
|
||||
new Binary(name("recap.v2.tweetfeature.is_retweeter_profile_egg"), Set(UserType).asJava)
|
||||
val IS_RETWEETER_NEW =
|
||||
new Binary(name("recap.v2.tweetfeature.is_retweeter_new"), Set(UserType, UserState).asJava)
|
||||
val IS_RETWEETER_BOT =
|
||||
new Binary(
|
||||
name("recap.v2.tweetfeature.is_retweeter_bot"),
|
||||
Set(UserType, UserSafetyLabels).asJava)
|
||||
val IS_RETWEETER_NSFW =
|
||||
new Binary(
|
||||
name("recap.v2.tweetfeature.is_retweeter_nsfw"),
|
||||
Set(UserType, UserSafetyLabels).asJava)
|
||||
val IS_RETWEETER_SPAM =
|
||||
new Binary(
|
||||
name("recap.v2.tweetfeature.is_retweeter_spam"),
|
||||
Set(UserType, UserSafetyLabels).asJava)
|
||||
val RETWEET_OF_MUTUAL_FOLLOW = new Binary(
|
||||
name("recap.v2.tweetfeature.retweet_of_mutual_follow"),
|
||||
Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val SOURCE_AUTHOR_REP = new Continuous(name("recap.v2.tweetfeature.source_author_rep"))
|
||||
val IS_RETWEET_OF_REPLY = new Binary(
|
||||
name("recap.v2.tweetfeature.is_retweet_of_reply"),
|
||||
Set(PublicRetweets, PrivateRetweets).asJava)
|
||||
val RETWEET_DIRECTED_AT_USER_IN_FIRST_DEGREE = new Binary(
|
||||
name("recap.v2.tweetfeature.is_retweet_directed_at_user_in_first_degree"),
|
||||
Set(PublicRetweets, PrivateRetweets, Follow).asJava)
|
||||
val MENTIONED_SCREEN_NAMES = new SparseBinary(
|
||||
"entities.users.mentioned_screen_names",
|
||||
Set(DisplayName, UserVisibleFlag).asJava)
|
||||
val MENTIONED_SCREEN_NAME = new Text(
|
||||
"entities.users.mentioned_screen_names.member",
|
||||
Set(DisplayName, UserVisibleFlag).asJava)
|
||||
val HASHTAGS = new SparseBinary(
|
||||
"entities.hashtags",
|
||||
Set(PublicTweetEntitiesAndMetadata, PrivateTweetEntitiesAndMetadata).asJava)
|
||||
val URL_SLUGS = new SparseBinary(name("recap.linkfeature.url_slugs"), Set(UrlFoundFlag).asJava)
|
||||
|
||||
// features from ThriftSearchResultMetadata
|
||||
val REPLY_COUNT = new Continuous(
|
||||
name("recap.searchfeature.reply_count"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
val RETWEET_COUNT = new Continuous(
|
||||
name("recap.searchfeature.retweet_count"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val FAV_COUNT = new Continuous(
|
||||
name("recap.searchfeature.fav_count"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val BLENDER_SCORE = new Continuous(name("recap.searchfeature.blender_score"))
|
||||
val TEXT_SCORE = new Continuous(name("recap.searchfeature.text_score"))
|
||||
|
||||
// features related to content source
|
||||
val SOURCE_TYPE = new Discrete(name("recap.source.type"))
|
||||
|
||||
// features from addressbook
|
||||
// the author is in the user's email addressbook
|
||||
val USER_TO_AUTHOR_EMAIL_REACHABLE =
|
||||
new Binary(name("recap.addressbook.user_to_author_email_reachable"), Set(AddressBook).asJava)
|
||||
// the author is in the user's phone addressbook
|
||||
val USER_TO_AUTHOR_PHONE_REACHABLE =
|
||||
new Binary(name("recap.addressbook.user_to_author_phone_reachable"), Set(AddressBook).asJava)
|
||||
// the user is in the author's email addressbook
|
||||
val AUTHOR_TO_USER_EMAIL_REACHABLE =
|
||||
new Binary(name("recap.addressbook.author_to_user_email_reachable"), Set(AddressBook).asJava)
|
||||
// the user is in the user's phone addressbook
|
||||
val AUTHOR_TO_USER_PHONE_REACHABLE =
|
||||
new Binary(name("recap.addressbook.author_to_user_phone_reachable"), Set(AddressBook).asJava)
|
||||
|
||||
// predicted engagement (these features are used by prediction service to return the predicted engagement probability)
|
||||
// these should match the names in engagement_to_score_feature_mapping
|
||||
val PREDICTED_IS_FAVORITED =
|
||||
new Continuous(name("recap.engagement_predicted.is_favorited"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_RETWEETED =
|
||||
new Continuous(name("recap.engagement_predicted.is_retweeted"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_QUOTED =
|
||||
new Continuous(name("recap.engagement_predicted.is_quoted"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_REPLIED =
|
||||
new Continuous(name("recap.engagement_predicted.is_replied"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_OPEN_LINK = new Continuous(
|
||||
name("recap.engagement_predicted.is_good_open_link"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PROFILE_CLICKED = new Continuous(
|
||||
name("recap.engagement_predicted.is_profile_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED = new Continuous(
|
||||
name("recap.engagement_predicted.is_profile_clicked_and_profile_engaged"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_CLICKED =
|
||||
new Continuous(name("recap.engagement_predicted.is_clicked"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PHOTO_EXPANDED = new Continuous(
|
||||
name("recap.engagement_predicted.is_photo_expanded"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DONT_LIKE =
|
||||
new Continuous(name("recap.engagement_predicted.is_dont_like"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_VIDEO_PLAYBACK_50 = new Continuous(
|
||||
name("recap.engagement_predicted.is_video_playback_50"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_VIDEO_QUALITY_VIEWED = new Continuous(
|
||||
name("recap.engagement_predicted.is_video_quality_viewed"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_BOOKMARKED =
|
||||
new Continuous(name("recap.engagement_predicted.is_bookmarked"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SHARED =
|
||||
new Continuous(name("recap.engagement_predicted.is_shared"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SHARE_MENU_CLICKED =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_share_menu_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_PROFILE_DWELLED_20_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_profile_dwelled_20_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_5_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_fullscreen_video_dwelled_5_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_10_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_fullscreen_video_dwelled_10_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_20_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_fullscreen_video_dwelled_20_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FULLSCREEN_VIDEO_DWELLED_30_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_fullscreen_video_dwelled_30_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_UNIFIED_ENGAGEMENT = new Continuous(
|
||||
name("recap.engagement_predicted.is_unified_engagement"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_COMPOSE_TRIGGERED = new Continuous(
|
||||
name("recap.engagement_predicted.is_compose_triggered"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_REPLIED_REPLY_IMPRESSED_BY_AUTHOR = new Continuous(
|
||||
name("recap.engagement_predicted.is_replied_reply_impressed_by_author"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR = new Continuous(
|
||||
name("recap.engagement_predicted.is_replied_reply_engaged_by_author"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_CLICKED_V1 = new Continuous(
|
||||
name("recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_CLICKED_V2 = new Continuous(
|
||||
name("recap.engagement_predicted.is_good_clicked_convo_desc_v2"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_8_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_tweet_detail_dwelled_8_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_15_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_tweet_detail_dwelled_15_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_25_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_tweet_detail_dwelled_25_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_TWEET_DETAIL_DWELLED_30_SEC = new Continuous(
|
||||
name("recap.engagement_predicted.is_tweet_detail_dwelled_30_sec"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_FAVORITED_FAV_ENGAGED_BY_AUTHOR = new Continuous(
|
||||
name("recap.engagement_predicted.is_favorited_fav_engaged_by_author"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_GOOD_CLICKED_WITH_DWELL_SUM_GTE_60S = new Continuous(
|
||||
name(
|
||||
"recap.engagement_predicted.is_good_clicked_convo_desc_favorited_or_replied_or_dwell_sum_gte_60_secs"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_IN_BOUNDS_V1 = new Continuous(
|
||||
name("recap.engagement_predicted.is_dwelled_in_bounds_v1"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_NORMALIZED_OVERALL = new Continuous(
|
||||
name("recap.engagement_predicted.dwell_normalized_overall"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_CDF =
|
||||
new Continuous(name("recap.engagement_predicted.dwell_cdf"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_DWELL_CDF_OVERALL = new Continuous(
|
||||
name("recap.engagement_predicted.dwell_cdf_overall"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled"), Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_DWELLED_1S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_1s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_2S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_2s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_3S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_3s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_4S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_4s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_5S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_5s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_6S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_6s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_7S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_7s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_8S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_8s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_9S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_9s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_DWELLED_10S =
|
||||
new Continuous(name("recap.engagement_predicted.is_dwelled_10s"), Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_SKIPPED_1S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_1s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_2S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_2s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_3S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_3s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_4S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_4s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_5S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_5s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_6S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_6s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_7S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_7s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_8S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_8s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_9S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_9s"), Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_SKIPPED_10S =
|
||||
new Continuous(name("recap.engagement_predicted.is_skipped_10s"), Set(EngagementScore).asJava)
|
||||
|
||||
val PREDICTED_IS_HOME_LATEST_VISITED = new Continuous(
|
||||
name("recap.engagement_predicted.is_home_latest_visited"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_NEGATIVE_FEEDBACK =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_NEGATIVE_FEEDBACK_V2 =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_negative_feedback_v2"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_WEAK_NEGATIVE_FEEDBACK =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_weak_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_STRONG_NEGATIVE_FEEDBACK =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_strong_negative_feedback"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_REPORT_TWEET_CLICKED =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_report_tweet_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_UNFOLLOW_TOPIC =
|
||||
new Continuous(
|
||||
name("recap.engagement_predicted.is_unfollow_topic"),
|
||||
Set(EngagementScore).asJava)
|
||||
val PREDICTED_IS_RELEVANCE_PROMPT_YES_CLICKED = new Continuous(
|
||||
name("recap.engagement_predicted.is_relevance_prompt_yes_clicked"),
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
// engagement for following user from any surface area
|
||||
val PREDICTED_IS_FOLLOWED_FROM_ANY_SURFACE_AREA = new Continuous(
|
||||
"recap.engagement_predicted.is_followed_from_any_surface_area",
|
||||
Set(EngagementScore).asJava)
|
||||
|
||||
|
||||
// These are global engagement counts for the Tweets.
|
||||
val FAV_COUNT_V2 = new Continuous(
|
||||
name("recap.earlybird.fav_count_v2"),
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava)
|
||||
val RETWEET_COUNT_V2 = new Continuous(
|
||||
name("recap.earlybird.retweet_count_v2"),
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava)
|
||||
val REPLY_COUNT_V2 = new Continuous(
|
||||
name("recap.earlybird.reply_count_v2"),
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION = new Binary(
|
||||
name("recap.has_us_political_annotation"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ALL_GROUPS_ANNOTATION = new Binary(
|
||||
name("recap.has_us_political_all_groups_annotation"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL = new Binary(
|
||||
name("recap.has_us_political_annotation_high_recall"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V2 = new Binary(
|
||||
name("recap.has_us_political_annotation_high_recall_v2"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V0 = new Binary(
|
||||
name("recap.has_us_political_annotation_high_precision_v0"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_BALANCED_PRECISION_RECALL_V0 = new Binary(
|
||||
name("recap.has_us_political_annotation_balanced_precision_recall_v0"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_HIGH_RECALL_V3 = new Binary(
|
||||
name("recap.has_us_political_annotation_high_recall_v3"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_HIGH_PRECISION_V3 = new Binary(
|
||||
name("recap.has_us_political_annotation_high_precision_v3"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
val HAS_US_POLITICAL_ANNOTATION_BALANCED_V3 = new Binary(
|
||||
name("recap.has_us_political_annotation_balanced_v3"),
|
||||
Set(SemanticcoreClassification).asJava
|
||||
)
|
||||
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
package com.twitter.timelines.prediction.features.recap
|
||||
|
||||
object RecapFeaturesUtils {
|
||||
// This needs to be updated if an engagement model is added or removed from prediction service.
|
||||
val scoreFeatureIdsMap: Map[String, Long] = Map(
|
||||
RecapFeatures.IS_FAVORITED.getFeatureName -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId,
|
||||
RecapFeatures.IS_REPLIED.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId,
|
||||
RecapFeatures.IS_RETWEETED.getFeatureName -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId,
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId,
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId,
|
||||
// RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId,
|
||||
RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureName -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId,
|
||||
RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureName -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId
|
||||
)
|
||||
|
||||
// This needs to be updated if an engagement model is added or removed from prediction service.
|
||||
val labelFeatureIdToScoreFeatureIdsMap: Map[Long, Long] = Map(
|
||||
RecapFeatures.IS_FAVORITED.getFeatureId -> RecapFeatures.PREDICTED_IS_FAVORITED.getFeatureId,
|
||||
RecapFeatures.IS_REPLIED.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED.getFeatureId,
|
||||
RecapFeatures.IS_RETWEETED.getFeatureId -> RecapFeatures.PREDICTED_IS_RETWEETED.getFeatureId,
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V1.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V1.getFeatureId,
|
||||
RecapFeatures.IS_GOOD_CLICKED_CONVO_DESC_V2.getFeatureId -> RecapFeatures.PREDICTED_IS_GOOD_CLICKED_V2.getFeatureId,
|
||||
// RecapFeatures.IS_NEGATIVE_FEEDBACK_V2.getFeatureName -> RecapFeatures.PREDICTED_IS_NEGATIVE_FEEDBACK_V2.getFeatureId,
|
||||
RecapFeatures.IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId -> RecapFeatures.PREDICTED_IS_PROFILE_CLICKED_AND_PROFILE_ENGAGED.getFeatureId,
|
||||
RecapFeatures.IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId -> RecapFeatures.PREDICTED_IS_REPLIED_REPLY_ENGAGED_BY_AUTHOR.getFeatureId
|
||||
)
|
||||
|
||||
val labelFeatureNames: Seq[String] = scoreFeatureIdsMap.keys.toSeq
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,57 @@
|
||||
package com.twitter.timelines.prediction.features.request_context
|
||||
|
||||
import com.twitter.ml.api.FeatureContext
|
||||
import com.twitter.ml.api.Feature._
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object RequestContextFeatures {
|
||||
val COUNTRY_CODE =
|
||||
new Text("request_context.country_code", Set(PrivateCountryOrRegion, InferredCountry).asJava)
|
||||
val LANGUAGE_CODE = new Text(
|
||||
"request_context.language_code",
|
||||
Set(GeneralSettings, ProvidedLanguage, InferredLanguage).asJava)
|
||||
val REQUEST_PROVENANCE = new Text("request_context.request_provenance", Set(AppUsage).asJava)
|
||||
val DISPLAY_WIDTH = new Continuous("request_context.display_width", Set(OtherDeviceInfo).asJava)
|
||||
val DISPLAY_HEIGHT = new Continuous("request_context.display_height", Set(OtherDeviceInfo).asJava)
|
||||
val DISPLAY_DPI = new Continuous("request_context.display_dpi", Set(OtherDeviceInfo).asJava)
|
||||
|
||||
// the following features are not Continuous Features because for e.g. continuity between
|
||||
// 23 and 0 hours cannot be handled that way. instead, we will treat each slice of hours/days
|
||||
// independently, like a set of sparse binary features.
|
||||
val TIMESTAMP_GMT_HOUR =
|
||||
new Discrete("request_context.timestamp_gmt_hour", Set(PrivateTimestamp).asJava)
|
||||
val TIMESTAMP_GMT_DOW =
|
||||
new Discrete("request_context.timestamp_gmt_dow", Set(PrivateTimestamp).asJava)
|
||||
|
||||
val IS_GET_INITIAL = new Binary("request_context.is_get_initial")
|
||||
val IS_GET_MIDDLE = new Binary("request_context.is_get_middle")
|
||||
val IS_GET_NEWER = new Binary("request_context.is_get_newer")
|
||||
val IS_GET_OLDER = new Binary("request_context.is_get_older")
|
||||
|
||||
// the following features are not Binary Features because the source field is Option[Boolean],
|
||||
// and we want to distinguish Some(false) from None. None will be converted to -1.
|
||||
val IS_POLLING = new Discrete("request_context.is_polling")
|
||||
val IS_SESSION_START = new Discrete("request_context.is_session_start")
|
||||
|
||||
// Helps distinguish requests from "home" vs "home_latest" (reverse chron home view).
|
||||
val TIMELINE_KIND = new Text("request_context.timeline_kind")
|
||||
|
||||
val featureContext = new FeatureContext(
|
||||
COUNTRY_CODE,
|
||||
LANGUAGE_CODE,
|
||||
REQUEST_PROVENANCE,
|
||||
DISPLAY_WIDTH,
|
||||
DISPLAY_HEIGHT,
|
||||
DISPLAY_DPI,
|
||||
TIMESTAMP_GMT_HOUR,
|
||||
TIMESTAMP_GMT_DOW,
|
||||
IS_GET_INITIAL,
|
||||
IS_GET_MIDDLE,
|
||||
IS_GET_NEWER,
|
||||
IS_GET_OLDER,
|
||||
IS_POLLING,
|
||||
IS_SESSION_START,
|
||||
TIMELINE_KIND
|
||||
)
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/simclusters_v2:simclusters_v2-thrift-scala",
|
||||
"src/thrift/com/twitter/timelines/suggests/common:record-scala",
|
||||
"timelines/data_processing/ml_util/aggregation_framework:common_types",
|
||||
"timelines/data_processing/ml_util/aggregation_framework/conversion:for-timelines",
|
||||
],
|
||||
)
|
@ -0,0 +1,61 @@
|
||||
package com.twitter.timelines.prediction.features.simcluster
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.ml.api.Feature._
|
||||
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class SimclusterFeaturesHelper(statsReceiver: StatsReceiver) {
|
||||
import SimclusterFeatures._
|
||||
|
||||
private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
|
||||
private[this] val invalidSimclusterModelVersion = scopedStatsReceiver
|
||||
.counter("invalidSimclusterModelVersion")
|
||||
|
||||
def fromUserClusterInterestsPair(
|
||||
userInterestClustersPair: (Long, ClustersUserIsInterestedIn)
|
||||
): Option[SimclusterFeatures] = {
|
||||
val (userId, userInterestClusters) = userInterestClustersPair
|
||||
if (userInterestClusters.knownForModelVersion == SIMCLUSTER_MODEL_VERSION) {
|
||||
val userInterestClustersFavScores = for {
|
||||
(clusterId, scores) <- userInterestClusters.clusterIdToScores
|
||||
favScore <- scores.favScore
|
||||
} yield (clusterId.toString, favScore)
|
||||
Some(
|
||||
SimclusterFeatures(
|
||||
userId,
|
||||
userInterestClusters.knownForModelVersion,
|
||||
userInterestClustersFavScores.toMap
|
||||
)
|
||||
)
|
||||
} else {
|
||||
// We maintain this counter to make sure that the hardcoded modelVersion we are using is correct.
|
||||
invalidSimclusterModelVersion.incr
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
object SimclusterFeatures {
|
||||
// Check http://go/simclustersv2runbook for production versions
|
||||
// Our models are trained for this specific model version only.
|
||||
val SIMCLUSTER_MODEL_VERSION = "20M_145K_dec11"
|
||||
val prefix = s"simcluster.v2.$SIMCLUSTER_MODEL_VERSION"
|
||||
|
||||
val SIMCLUSTER_USER_INTEREST_CLUSTER_SCORES = new SparseContinuous(
|
||||
s"$prefix.user_interest_cluster_scores",
|
||||
Set(EngagementScore, InferredInterests).asJava
|
||||
)
|
||||
val SIMCLUSTER_USER_INTEREST_CLUSTER_IDS = new SparseBinary(
|
||||
s"$prefix.user_interest_cluster_ids",
|
||||
Set(InferredInterests).asJava
|
||||
)
|
||||
val SIMCLUSTER_MODEL_VERSION_METADATA = new Text("meta.simcluster_version")
|
||||
}
|
||||
|
||||
case class SimclusterFeatures(
|
||||
userId: Long,
|
||||
modelVersion: String,
|
||||
interestClusterScoresMap: Map[String, Double])
|
@ -0,0 +1,150 @@
|
||||
package com.twitter.timelines.prediction.features.simcluster
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.ml.api.{Feature, FeatureContext}
|
||||
import com.twitter.ml.api.Feature.{Continuous, SparseBinary, SparseContinuous}
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion._
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
|
||||
import com.twitter.timelines.suggests.common.record.thriftscala.SuggestionRecord
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
class SimclusterTweetFeatures(statsReceiver: StatsReceiver) extends CombineCountsBase {
|
||||
import SimclusterTweetFeatures._
|
||||
|
||||
private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
|
||||
private[this] val invalidSimclusterModelVersion = scopedStatsReceiver
|
||||
.counter("invalidSimclusterModelVersion")
|
||||
private[this] val getFeaturesFromOverlappingSimclusterIdsCount = scopedStatsReceiver
|
||||
.counter("getFeaturesFromOverlappingSimclusterIdsCount")
|
||||
private[this] val emptySimclusterMaps = scopedStatsReceiver
|
||||
.counter("emptySimclusterMaps")
|
||||
private[this] val nonOverlappingSimclusterMaps = scopedStatsReceiver
|
||||
.counter("nonOverlappingSimclusterMaps")
|
||||
|
||||
// Parameters required by CombineCountsBase
|
||||
override val topK: Int = 5
|
||||
override val hardLimit: Option[Int] = None
|
||||
override val precomputedCountFeatures: Seq[Feature[_]] = Seq(
|
||||
SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE,
|
||||
SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE
|
||||
)
|
||||
|
||||
private def getFeaturesFromOverlappingSimclusterIds(
|
||||
userSimclustersInterestedInMap: Map[String, Double],
|
||||
tweetSimclustersTopKMap: Map[String, Double]
|
||||
): Map[Feature[_], List[Double]] = {
|
||||
getFeaturesFromOverlappingSimclusterIdsCount.incr
|
||||
if (userSimclustersInterestedInMap.isEmpty || tweetSimclustersTopKMap.isEmpty) {
|
||||
emptySimclusterMaps.incr
|
||||
Map.empty
|
||||
} else {
|
||||
val overlappingSimclusterIds =
|
||||
userSimclustersInterestedInMap.keySet intersect tweetSimclustersTopKMap.keySet
|
||||
if (overlappingSimclusterIds.isEmpty) {
|
||||
nonOverlappingSimclusterMaps.incr
|
||||
Map.empty
|
||||
} else {
|
||||
val (combinedScores, tweetScores) = overlappingSimclusterIds.map { id =>
|
||||
val tweetScore = tweetSimclustersTopKMap.getOrElse(id, 0.0)
|
||||
val combinedScore = userSimclustersInterestedInMap.getOrElse(id, 0.0) * tweetScore
|
||||
(combinedScore, tweetScore)
|
||||
}.unzip
|
||||
Map(
|
||||
SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE -> combinedScores.toList,
|
||||
SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE -> tweetScores.toList
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def getCountFeaturesValuesMap(
|
||||
suggestionRecord: SuggestionRecord,
|
||||
simclustersTweetTopKMap: Map[String, Double]
|
||||
): Map[Feature[_], List[Double]] = {
|
||||
val userSimclustersInterestedInMap = formatUserSimclustersInterestedIn(suggestionRecord)
|
||||
|
||||
val tweetSimclustersTopKMap = formatTweetSimclustersTopK(simclustersTweetTopKMap)
|
||||
|
||||
getFeaturesFromOverlappingSimclusterIds(userSimclustersInterestedInMap, tweetSimclustersTopKMap)
|
||||
}
|
||||
|
||||
def filterByModelVersion(
|
||||
simclustersMapOpt: Option[Map[String, Double]]
|
||||
): Option[Map[String, Double]] = {
|
||||
simclustersMapOpt.flatMap { simclustersMap =>
|
||||
val filteredSimclustersMap = simclustersMap.filter {
|
||||
case (clusterId, score) =>
|
||||
// The clusterId format is ModelVersion.IntegerClusterId.ScoreType as specified at
|
||||
// com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters
|
||||
clusterId.contains(SimclusterFeatures.SIMCLUSTER_MODEL_VERSION)
|
||||
}
|
||||
|
||||
// The assumption is that the simclustersMap will contain clusterIds with the same modelVersion.
|
||||
// We maintain this counter to make sure that the hardcoded modelVersion we are using is correct.
|
||||
if (simclustersMap.size > filteredSimclustersMap.size) {
|
||||
invalidSimclusterModelVersion.incr
|
||||
}
|
||||
|
||||
if (filteredSimclustersMap.nonEmpty) Some(filteredSimclustersMap) else None
|
||||
}
|
||||
}
|
||||
|
||||
val allFeatures: Seq[Feature[_]] = outputFeaturesPostMerge.toSeq ++ Seq(
|
||||
SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS,
|
||||
SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES)
|
||||
val featureContext = new FeatureContext(allFeatures: _*)
|
||||
}
|
||||
|
||||
object SimclusterTweetFeatures {
|
||||
val SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS = new SparseBinary(
|
||||
s"${SimclusterFeatures.prefix}.tweet_topk_cluster_ids",
|
||||
Set(InferredInterests).asJava
|
||||
)
|
||||
val SIMCLUSTER_TWEET_TOPK_CLUSTER_SCORES = new SparseContinuous(
|
||||
s"${SimclusterFeatures.prefix}.tweet_topk_cluster_scores",
|
||||
Set(EngagementScore, InferredInterests).asJava
|
||||
)
|
||||
|
||||
val SIMCLUSTER_TWEET_TOPK_CLUSTER_ID =
|
||||
TypedAggregateGroup.sparseFeature(SIMCLUSTER_TWEET_TOPK_CLUSTER_IDS)
|
||||
|
||||
val SIMCLUSTER_TWEET_TOPK_SORT_BY_TWEET_SCORE = new Continuous(
|
||||
s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_tweet_score",
|
||||
Set(EngagementScore, InferredInterests).asJava
|
||||
)
|
||||
|
||||
val SIMCLUSTER_TWEET_TOPK_SORT_BY_COMBINED_SCORE = new Continuous(
|
||||
s"${SimclusterFeatures.prefix}.tweet_topk_sort_by_combined_score",
|
||||
Set(EngagementScore, InferredInterests).asJava
|
||||
)
|
||||
|
||||
def formatUserSimclustersInterestedIn(suggestionRecord: SuggestionRecord): Map[String, Double] = {
|
||||
suggestionRecord.userSimclustersInterestedIn
|
||||
.map { clustersUserIsInterestedIn =>
|
||||
if (clustersUserIsInterestedIn.knownForModelVersion == SimclusterFeatures.SIMCLUSTER_MODEL_VERSION) {
|
||||
clustersUserIsInterestedIn.clusterIdToScores.collect {
|
||||
case (clusterId, scores) if scores.favScore.isDefined =>
|
||||
(clusterId.toString, scores.favScore.get)
|
||||
}
|
||||
} else Map.empty[String, Double]
|
||||
}.getOrElse(Map.empty[String, Double])
|
||||
.toMap
|
||||
}
|
||||
|
||||
def formatTweetSimclustersTopK(
|
||||
simclustersTweetTopKMap: Map[String, Double]
|
||||
): Map[String, Double] = {
|
||||
simclustersTweetTopKMap.collect {
|
||||
case (clusterId, score) =>
|
||||
// The clusterId format is <ModelVersion.IntegerClusterId.ScoreType> as specified at
|
||||
// com.twitter.ml.featurestore.catalog.features.recommendations.SimClustersV2TweetTopClusters
|
||||
// and we want to extract the IntegerClusterId.
|
||||
// The split function takes a regex; therefore, we need to escape . and we also need to escape
|
||||
// \ since they are both special characters. Hence, the double \\.
|
||||
val clusterIdSplit = clusterId.split("\\.")
|
||||
val integerClusterId = clusterIdSplit(1) // The IntegerClusterId is at position 1.
|
||||
(integerClusterId, score)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
package com.twitter.timelines.prediction.features.simcluster
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType.SemanticcoreClassification
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion.CombineCountsBase
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object SimclustersScoresFeatures extends CombineCountsBase {
|
||||
override def topK: Int = 2
|
||||
|
||||
override def hardLimit: Option[Int] = Some(20)
|
||||
|
||||
val prefix = s"recommendations.sim_clusters_scores"
|
||||
val TOPIC_CONSUMER_TWEET_EMBEDDING_Cs = new Continuous(
|
||||
s"$prefix.localized_topic_consumer_tweet_embedding_cosine_similarity",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
val TOPIC_PRODUCER_TWEET_EMBEDDING_Cs = new Continuous(
|
||||
s"$prefix.topic_producer_tweet_embedding_cosine_similarity",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM = new Continuous(
|
||||
s"$prefix.user_interested_in_localized_topic_consumer_embedding_cosine_similarity",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
val USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous(
|
||||
s"$prefix.user_interested_in_localized_topic_consumer_embedding_dot_product",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM = new Continuous(
|
||||
s"$prefix.user_interested_in_localized_topic_producer_embedding_cosine_similarity",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
val USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT = new Continuous(
|
||||
s"$prefix.user_interested_in_localized_topic_producer_embedding_dot_product",
|
||||
Set(SemanticcoreClassification).asJava)
|
||||
|
||||
override def precomputedCountFeatures: Seq[Feature[_]] =
|
||||
Seq(
|
||||
TOPIC_CONSUMER_TWEET_EMBEDDING_Cs,
|
||||
TOPIC_PRODUCER_TWEET_EMBEDDING_Cs,
|
||||
USER_TOPIC_CONSUMER_TWEET_EMBEDDING_COSINE_SIM,
|
||||
USER_TOPIC_CONSUMER_TWEET_EMBEDDING_DOT_PRODUCT,
|
||||
USER_TOPIC_PRODUCER_TWEET_EMBEDDING_COSINE_SIM,
|
||||
USER_TOPIC_PRODUCER_TWEET_EMBEDDING_DOT_PRODUCT
|
||||
)
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
scala_library(
|
||||
name = "socialproof_features",
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/ibm/icu:icu4j",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/scala/com/twitter/ml/api/util",
|
||||
"src/scala/com/twitter/timelines/util",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/ml/api:data-java",
|
||||
"src/thrift/com/twitter/timelines/socialproof:socialproof-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,172 @@
|
||||
package com.twitter.timelines.prediction.features.socialproof
|
||||
|
||||
import com.twitter.ml.api.DataRecord
|
||||
import com.twitter.ml.api.Feature.Binary
|
||||
import com.twitter.ml.api.Feature.Continuous
|
||||
import com.twitter.ml.api.Feature.SparseBinary
|
||||
import com.twitter.ml.api.util.FDsl._
|
||||
import com.twitter.timelines.prediction.features.socialproof.SocialProofDataRecordFeatures._
|
||||
import com.twitter.timelines.socialproof.thriftscala.SocialProof
|
||||
import com.twitter.timelines.socialproof.v1.thriftscala.SocialProofType
|
||||
import com.twitter.timelines.util.CommonTypes.UserId
|
||||
import scala.collection.JavaConverters._
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
|
||||
abstract class SocialProofUserGroundTruth(userIds: Seq[UserId], count: Int) {
|
||||
require(
|
||||
count >= userIds.size,
|
||||
"count must be equal to or greater than the number of entries in userIds"
|
||||
)
|
||||
// Using Double as the return type to make it more convenient for these values to be used as
|
||||
// ML feature values.
|
||||
val displayedUserCount: Double = userIds.size.toDouble
|
||||
val undisplayedUserCount: Double = count - userIds.size.toDouble
|
||||
val totalCount: Double = count.toDouble
|
||||
|
||||
def featureDisplayedUsers: SparseBinary
|
||||
def featureDisplayedUserCount: Continuous
|
||||
def featureUndisplayedUserCount: Continuous
|
||||
def featureTotalUserCount: Continuous
|
||||
|
||||
def setFeatures(rec: DataRecord): Unit = {
|
||||
rec.setFeatureValue(featureDisplayedUsers, toStringSet(userIds))
|
||||
rec.setFeatureValue(featureDisplayedUserCount, displayedUserCount)
|
||||
rec.setFeatureValue(featureUndisplayedUserCount, undisplayedUserCount)
|
||||
rec.setFeatureValue(featureTotalUserCount, totalCount)
|
||||
}
|
||||
protected def toStringSet(value: Seq[Long]): Set[String] = {
|
||||
value.map(_.toString).toSet
|
||||
}
|
||||
}
|
||||
|
||||
case class FavoritedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
|
||||
extends SocialProofUserGroundTruth(userIds, count) {
|
||||
|
||||
override val featureDisplayedUsers = SocialProofDisplayedFavoritedByUsers
|
||||
override val featureDisplayedUserCount = SocialProofDisplayedFavoritedByUserCount
|
||||
override val featureUndisplayedUserCount = SocialProofUndisplayedFavoritedByUserCount
|
||||
override val featureTotalUserCount = SocialProofTotalFavoritedByUserCount
|
||||
}
|
||||
|
||||
case class RetweetedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
|
||||
extends SocialProofUserGroundTruth(userIds, count) {
|
||||
|
||||
override val featureDisplayedUsers = SocialProofDisplayedRetweetedByUsers
|
||||
override val featureDisplayedUserCount = SocialProofDisplayedRetweetedByUserCount
|
||||
override val featureUndisplayedUserCount = SocialProofUndisplayedRetweetedByUserCount
|
||||
override val featureTotalUserCount = SocialProofTotalRetweetedByUserCount
|
||||
}
|
||||
|
||||
case class RepliedBySocialProofUserGroundTruth(userIds: Seq[UserId] = Seq.empty, count: Int = 0)
|
||||
extends SocialProofUserGroundTruth(userIds, count) {
|
||||
|
||||
override val featureDisplayedUsers = SocialProofDisplayedRepliedByUsers
|
||||
override val featureDisplayedUserCount = SocialProofDisplayedRepliedByUserCount
|
||||
override val featureUndisplayedUserCount = SocialProofUndisplayedRepliedByUserCount
|
||||
override val featureTotalUserCount = SocialProofTotalRepliedByUserCount
|
||||
}
|
||||
|
||||
case class SocialProofFeatures(
|
||||
hasSocialProof: Boolean,
|
||||
favoritedBy: FavoritedBySocialProofUserGroundTruth = FavoritedBySocialProofUserGroundTruth(),
|
||||
retweetedBy: RetweetedBySocialProofUserGroundTruth = RetweetedBySocialProofUserGroundTruth(),
|
||||
repliedBy: RepliedBySocialProofUserGroundTruth = RepliedBySocialProofUserGroundTruth()) {
|
||||
|
||||
def setFeatures(dataRecord: DataRecord): Unit =
|
||||
if (hasSocialProof) {
|
||||
dataRecord.setFeatureValue(HasSocialProof, hasSocialProof)
|
||||
favoritedBy.setFeatures(dataRecord)
|
||||
retweetedBy.setFeatures(dataRecord)
|
||||
repliedBy.setFeatures(dataRecord)
|
||||
}
|
||||
}
|
||||
|
||||
object SocialProofFeatures {
|
||||
def apply(socialProofs: Seq[SocialProof]): SocialProofFeatures =
|
||||
socialProofs.foldLeft(SocialProofFeatures(hasSocialProof = socialProofs.nonEmpty))(
|
||||
(prevFeatures, socialProof) => {
|
||||
val userIds = socialProof.v1.userIds
|
||||
val count = socialProof.v1.count
|
||||
socialProof.v1.socialProofType match {
|
||||
case SocialProofType.FavoritedBy =>
|
||||
prevFeatures.copy(favoritedBy = FavoritedBySocialProofUserGroundTruth(userIds, count))
|
||||
case SocialProofType.RetweetedBy =>
|
||||
prevFeatures.copy(retweetedBy = RetweetedBySocialProofUserGroundTruth(userIds, count))
|
||||
case SocialProofType.RepliedBy =>
|
||||
prevFeatures.copy(repliedBy = RepliedBySocialProofUserGroundTruth(userIds, count))
|
||||
case _ =>
|
||||
prevFeatures // skip silently instead of breaking jobs, since this isn't used yet
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
object SocialProofDataRecordFeatures {
|
||||
val HasSocialProof = new Binary("recap.social_proof.has_social_proof")
|
||||
|
||||
val SocialProofDisplayedFavoritedByUsers = new SparseBinary(
|
||||
"recap.social_proof.list.displayed.favorited_by",
|
||||
Set(UserId, PublicLikes, PrivateLikes).asJava
|
||||
)
|
||||
val SocialProofDisplayedFavoritedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.displayed.favorited_by",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val SocialProofUndisplayedFavoritedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.undisplayed.favorited_by",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val SocialProofTotalFavoritedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.total.favorited_by",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
|
||||
val SocialProofDisplayedRetweetedByUsers = new SparseBinary(
|
||||
"recap.social_proof.list.displayed.retweeted_by",
|
||||
Set(UserId, PublicRetweets, PrivateRetweets).asJava
|
||||
)
|
||||
val SocialProofDisplayedRetweetedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.displayed.retweeted_by",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val SocialProofUndisplayedRetweetedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.undisplayed.retweeted_by",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val SocialProofTotalRetweetedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.total.retweeted_by",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
|
||||
val SocialProofDisplayedRepliedByUsers = new SparseBinary(
|
||||
"recap.social_proof.list.displayed.replied_by",
|
||||
Set(UserId, PublicReplies, PrivateReplies).asJava
|
||||
)
|
||||
val SocialProofDisplayedRepliedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.displayed.replied_by",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val SocialProofUndisplayedRepliedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.undisplayed.replied_by",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val SocialProofTotalRepliedByUserCount = new Continuous(
|
||||
"recap.social_proof.count.total.replied_by",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
|
||||
val AllFeatures = Seq(
|
||||
HasSocialProof,
|
||||
SocialProofDisplayedFavoritedByUsers,
|
||||
SocialProofDisplayedFavoritedByUserCount,
|
||||
SocialProofUndisplayedFavoritedByUserCount,
|
||||
SocialProofTotalFavoritedByUserCount,
|
||||
SocialProofDisplayedRetweetedByUsers,
|
||||
SocialProofDisplayedRetweetedByUserCount,
|
||||
SocialProofUndisplayedRetweetedByUserCount,
|
||||
SocialProofTotalRetweetedByUserCount,
|
||||
SocialProofDisplayedRepliedByUsers,
|
||||
SocialProofDisplayedRepliedByUserCount,
|
||||
SocialProofUndisplayedRepliedByUserCount,
|
||||
SocialProofTotalRepliedByUserCount
|
||||
)
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/timelines/time_features:time_features-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,111 @@
|
||||
package com.twitter.timelines.prediction.features.time_features
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import com.twitter.ml.api.Feature._
|
||||
import scala.collection.JavaConverters._
|
||||
import com.twitter.util.Duration
|
||||
import com.twitter.conversions.DurationOps._
|
||||
|
||||
object TimeDataRecordFeatures {
|
||||
val TIME_BETWEEN_NON_POLLING_REQUESTS_AVG = new Continuous(
|
||||
"time_features.time_between_non_polling_requests_avg",
|
||||
Set(PrivateTimestamp).asJava
|
||||
)
|
||||
val TIME_SINCE_TWEET_CREATION = new Continuous("time_features.time_since_tweet_creation")
|
||||
val TIME_SINCE_SOURCE_TWEET_CREATION = new Continuous(
|
||||
"time_features.time_since_source_tweet_creation"
|
||||
)
|
||||
val TIME_SINCE_LAST_NON_POLLING_REQUEST = new Continuous(
|
||||
"time_features.time_since_last_non_polling_request",
|
||||
Set(PrivateTimestamp).asJava
|
||||
)
|
||||
val NON_POLLING_REQUESTS_SINCE_TWEET_CREATION = new Continuous(
|
||||
"time_features.non_polling_requests_since_tweet_creation",
|
||||
Set(PrivateTimestamp).asJava
|
||||
)
|
||||
val TWEET_AGE_RATIO = new Continuous("time_features.tweet_age_ratio")
|
||||
val IS_TWEET_RECYCLED = new Binary("time_features.is_tweet_recycled")
|
||||
// Last Engagement features
|
||||
val LAST_FAVORITE_SINCE_CREATION_HRS = new Continuous(
|
||||
"time_features.earlybird.last_favorite_since_creation_hrs",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val LAST_RETWEET_SINCE_CREATION_HRS = new Continuous(
|
||||
"time_features.earlybird.last_retweet_since_creation_hrs",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val LAST_REPLY_SINCE_CREATION_HRS = new Continuous(
|
||||
"time_features.earlybird.last_reply_since_creation_hrs",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val LAST_QUOTE_SINCE_CREATION_HRS = new Continuous(
|
||||
"time_features.earlybird.last_quote_since_creation_hrs",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val TIME_SINCE_LAST_FAVORITE_HRS = new Continuous(
|
||||
"time_features.earlybird.time_since_last_favorite",
|
||||
Set(CountOfPrivateLikes, CountOfPublicLikes).asJava
|
||||
)
|
||||
val TIME_SINCE_LAST_RETWEET_HRS = new Continuous(
|
||||
"time_features.earlybird.time_since_last_retweet",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
val TIME_SINCE_LAST_REPLY_HRS = new Continuous(
|
||||
"time_features.earlybird.time_since_last_reply",
|
||||
Set(CountOfPrivateReplies, CountOfPublicReplies).asJava
|
||||
)
|
||||
val TIME_SINCE_LAST_QUOTE_HRS = new Continuous(
|
||||
"time_features.earlybird.time_since_last_quote",
|
||||
Set(CountOfPrivateRetweets, CountOfPublicRetweets).asJava
|
||||
)
|
||||
|
||||
val TIME_SINCE_VIEWER_ACCOUNT_CREATION_SECS =
|
||||
new Continuous(
|
||||
"time_features.time_since_viewer_account_creation_secs",
|
||||
Set(AccountCreationTime, AgeOfAccount).asJava)
|
||||
|
||||
val USER_ID_IS_SNOWFLAKE_ID =
|
||||
new Binary("time_features.time_user_id_is_snowflake_id", Set(UserType).asJava)
|
||||
|
||||
val IS_30_DAY_NEW_USER =
|
||||
new Binary("time_features.is_day_30_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
|
||||
val IS_12_MONTH_NEW_USER =
|
||||
new Binary("time_features.is_month_12_new_user", Set(AccountCreationTime, AgeOfAccount).asJava)
|
||||
val ACCOUNT_AGE_INTERVAL =
|
||||
new Discrete("time_features.account_age_interval", Set(AgeOfAccount).asJava)
|
||||
}
|
||||
|
||||
object AccountAgeInterval extends Enumeration {
|
||||
val LTE_1_DAY, GT_1_DAY_LTE_5_DAY, GT_5_DAY_LTE_14_DAY, GT_14_DAY_LTE_30_DAY = Value
|
||||
|
||||
def fromDuration(accountAge: Duration): Option[AccountAgeInterval.Value] = {
|
||||
accountAge match {
|
||||
case a if (a <= 1.day) => Some(LTE_1_DAY)
|
||||
case a if (1.day < a && a <= 5.days) => Some(GT_1_DAY_LTE_5_DAY)
|
||||
case a if (5.days < a && a <= 14.days) => Some(GT_5_DAY_LTE_14_DAY)
|
||||
case a if (14.days < a && a <= 30.days) => Some(GT_14_DAY_LTE_30_DAY)
|
||||
case _ => None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
case class TimeFeatures(
|
||||
isTweetRecycled: Boolean,
|
||||
timeSinceTweetCreation: Double,
|
||||
isDay30NewUser: Boolean,
|
||||
isMonth12NewUser: Boolean,
|
||||
timeSinceSourceTweetCreation: Double, // same as timeSinceTweetCreation for non-retweets
|
||||
timeSinceViewerAccountCreationSecs: Option[Double],
|
||||
timeBetweenNonPollingRequestsAvg: Option[Double] = None,
|
||||
timeSinceLastNonPollingRequest: Option[Double] = None,
|
||||
nonPollingRequestsSinceTweetCreation: Option[Double] = None,
|
||||
tweetAgeRatio: Option[Double] = None,
|
||||
lastFavSinceCreationHrs: Option[Double] = None,
|
||||
lastRetweetSinceCreationHrs: Option[Double] = None,
|
||||
lastReplySinceCreationHrs: Option[Double] = None,
|
||||
lastQuoteSinceCreationHrs: Option[Double] = None,
|
||||
timeSinceLastFavoriteHrs: Option[Double] = None,
|
||||
timeSinceLastRetweetHrs: Option[Double] = None,
|
||||
timeSinceLastReplyHrs: Option[Double] = None,
|
||||
timeSinceLastQuoteHrs: Option[Double] = None,
|
||||
accountAgeInterval: Option[AccountAgeInterval.Value] = None)
|
@ -0,0 +1,10 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"graph-feature-service/src/main/thrift/com/twitter/graph_feature_service:graph_feature_service_thrift-scala",
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
],
|
||||
)
|
@ -0,0 +1,93 @@
|
||||
package com.twitter.timelines.prediction.features.two_hop_features
|
||||
|
||||
import com.twitter.graph_feature_service.thriftscala.EdgeType
|
||||
import com.twitter.ml.api.Feature._
|
||||
import scala.collection.JavaConverters._
|
||||
import TwoHopFeaturesConfig.personalDataTypesMap
|
||||
|
||||
object TwoHopFeaturesDescriptor {
|
||||
val prefix = "two_hop"
|
||||
val normalizedPostfix = "normalized"
|
||||
val leftNodeDegreePostfix = "left_degree"
|
||||
val rightNodeDegreePostfix = "right_degree"
|
||||
|
||||
type TwoHopFeatureMap = Map[(EdgeType, EdgeType), Continuous]
|
||||
type TwoHopFeatureNodeDegreeMap = Map[EdgeType, Continuous]
|
||||
|
||||
def apply(edgeTypePairs: Seq[(EdgeType, EdgeType)]): TwoHopFeaturesDescriptor = {
|
||||
new TwoHopFeaturesDescriptor(edgeTypePairs)
|
||||
}
|
||||
}
|
||||
|
||||
class TwoHopFeaturesDescriptor(edgeTypePairs: Seq[(EdgeType, EdgeType)]) {
|
||||
import TwoHopFeaturesDescriptor._
|
||||
|
||||
def getLeftEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = {
|
||||
edgeTypePair._1
|
||||
}
|
||||
|
||||
def getLeftEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = {
|
||||
getLeftEdge(edgeTypePair).originalName.toLowerCase
|
||||
}
|
||||
|
||||
def getRightEdge(edgeTypePair: (EdgeType, EdgeType)): EdgeType = {
|
||||
edgeTypePair._2
|
||||
}
|
||||
|
||||
def getRightEdgeName(edgeTypePair: (EdgeType, EdgeType)): String = {
|
||||
getRightEdge(edgeTypePair).originalName.toLowerCase
|
||||
}
|
||||
|
||||
val rawFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => {
|
||||
val leftEdgeType = getLeftEdge(edgeTypePair)
|
||||
val leftEdgeName = getLeftEdgeName(edgeTypePair)
|
||||
val rightEdgeType = getRightEdge(edgeTypePair)
|
||||
val rightEdgeName = getRightEdgeName(edgeTypePair)
|
||||
val personalDataTypes = (
|
||||
personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++
|
||||
personalDataTypesMap.getOrElse(rightEdgeType, Set.empty)
|
||||
).asJava
|
||||
val rawFeature = new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName", personalDataTypes)
|
||||
edgeTypePair -> rawFeature
|
||||
})(collection.breakOut)
|
||||
|
||||
val leftNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => {
|
||||
val leftEdgeType = getLeftEdge(edgeTypePair)
|
||||
val leftEdgeName = getLeftEdgeName(edgeTypePair)
|
||||
val personalDataTypes = personalDataTypesMap.getOrElse(leftEdgeType, Set.empty).asJava
|
||||
val leftNodeDegreeFeature =
|
||||
new Continuous(s"$prefix.$leftEdgeName.$leftNodeDegreePostfix", personalDataTypes)
|
||||
leftEdgeType -> leftNodeDegreeFeature
|
||||
})(collection.breakOut)
|
||||
|
||||
val rightNodeDegreeFeaturesMap: TwoHopFeatureNodeDegreeMap = edgeTypePairs.map(edgeTypePair => {
|
||||
val rightEdgeType = getRightEdge(edgeTypePair)
|
||||
val rightEdgeName = getRightEdgeName(edgeTypePair)
|
||||
val personalDataTypes = personalDataTypesMap.getOrElse(rightEdgeType, Set.empty).asJava
|
||||
val rightNodeDegreeFeature =
|
||||
new Continuous(s"$prefix.$rightEdgeName.$rightNodeDegreePostfix", personalDataTypes)
|
||||
rightEdgeType -> rightNodeDegreeFeature
|
||||
})(collection.breakOut)
|
||||
|
||||
val normalizedFeaturesMap: TwoHopFeatureMap = edgeTypePairs.map(edgeTypePair => {
|
||||
val leftEdgeType = getLeftEdge(edgeTypePair)
|
||||
val leftEdgeName = getLeftEdgeName(edgeTypePair)
|
||||
val rightEdgeType = getRightEdge(edgeTypePair)
|
||||
val rightEdgeName = getRightEdgeName(edgeTypePair)
|
||||
val personalDataTypes = (
|
||||
personalDataTypesMap.getOrElse(leftEdgeType, Set.empty) ++
|
||||
personalDataTypesMap.getOrElse(rightEdgeType, Set.empty)
|
||||
).asJava
|
||||
val normalizedFeature =
|
||||
new Continuous(s"$prefix.$leftEdgeName.$rightEdgeName.$normalizedPostfix", personalDataTypes)
|
||||
edgeTypePair -> normalizedFeature
|
||||
})(collection.breakOut)
|
||||
|
||||
private val rawFeaturesSeq: Seq[Continuous] = rawFeaturesMap.values.toSeq
|
||||
private val leftNodeDegreeFeaturesSeq: Seq[Continuous] = leftNodeDegreeFeaturesMap.values.toSeq
|
||||
private val rightNodeDegreeFeaturesSeq: Seq[Continuous] = rightNodeDegreeFeaturesMap.values.toSeq
|
||||
private val normalizedFeaturesSeq: Seq[Continuous] = normalizedFeaturesMap.values.toSeq
|
||||
|
||||
val featuresSeq: Seq[Continuous] =
|
||||
rawFeaturesSeq ++ leftNodeDegreeFeaturesSeq ++ rightNodeDegreeFeaturesSeq ++ normalizedFeaturesSeq
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package com.twitter.timelines.prediction.features.two_hop_features
|
||||
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType
|
||||
import com.twitter.graph_feature_service.thriftscala.{EdgeType, FeatureType}
|
||||
|
||||
object TwoHopFeaturesConfig {
|
||||
val leftEdgeTypes = Seq(EdgeType.Following, EdgeType.Favorite, EdgeType.MutualFollow)
|
||||
val rightEdgeTypes = Seq(
|
||||
EdgeType.FollowedBy,
|
||||
EdgeType.FavoritedBy,
|
||||
EdgeType.RetweetedBy,
|
||||
EdgeType.MentionedBy,
|
||||
EdgeType.MutualFollow)
|
||||
|
||||
val edgeTypePairs: Seq[(EdgeType, EdgeType)] = {
|
||||
for (leftEdgeType <- leftEdgeTypes; rightEdgeType <- rightEdgeTypes)
|
||||
yield (leftEdgeType, rightEdgeType)
|
||||
}
|
||||
|
||||
val featureTypes: Seq[FeatureType] = edgeTypePairs.map(pair => FeatureType(pair._1, pair._2))
|
||||
|
||||
val personalDataTypesMap: Map[EdgeType, Set[PersonalDataType]] = Map(
|
||||
EdgeType.Following -> Set(PersonalDataType.CountOfFollowersAndFollowees),
|
||||
EdgeType.Favorite -> Set(
|
||||
PersonalDataType.CountOfPrivateLikes,
|
||||
PersonalDataType.CountOfPublicLikes),
|
||||
EdgeType.MutualFollow -> Set(PersonalDataType.CountOfFollowersAndFollowees),
|
||||
EdgeType.FollowedBy -> Set(PersonalDataType.CountOfFollowersAndFollowees)
|
||||
)
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
scala_library(
|
||||
sources = ["*.scala"],
|
||||
platform = "java8",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"src/java/com/twitter/ml/api:api-base",
|
||||
"src/thrift/com/twitter/dal/personal_data:personal_data-java",
|
||||
"src/thrift/com/twitter/timelines/author_features/user_health:thrift-scala",
|
||||
],
|
||||
)
|
@ -0,0 +1,23 @@
|
||||
package com.twitter.timelines.prediction.features.user_health
|
||||
|
||||
import com.twitter.ml.api.Feature
|
||||
import com.twitter.timelines.author_features.user_health.thriftscala.UserState
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType.{UserState => UserStatePDT}
|
||||
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
|
||||
import scala.collection.JavaConverters._
|
||||
|
||||
object UserHealthFeatures {
|
||||
val UserState = new Feature.Discrete("user_health.user_state", Set(UserStatePDT, UserType).asJava)
|
||||
val IsLightMinusUser =
|
||||
new Feature.Binary("user_health.is_light_minus_user", Set(UserStatePDT, UserType).asJava)
|
||||
val AuthorState =
|
||||
new Feature.Discrete("user_health.author_state", Set(UserStatePDT, UserType).asJava)
|
||||
val NumAuthorFollowers =
|
||||
new Feature.Continuous("author_health.num_followers", Set(CountOfFollowersAndFollowees).asJava)
|
||||
val NumAuthorConnectDays = new Feature.Continuous("author_health.num_connect_days")
|
||||
val NumAuthorConnect = new Feature.Continuous("author_health.num_connect")
|
||||
|
||||
val IsUserVerifiedUnion = new Feature.Binary("user_account.is_user_verified_union")
|
||||
}
|
||||
|
||||
case class UserHealthFeatures(id: Long, userStateOpt: Option[UserState])
|
Reference in New Issue
Block a user