the-algorithm/src/scala/com/twitter/timelines/prediction/common/aggregates/BUILD

354 lines
17 KiB
Python

create_datasets(
base_name = "original_author_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/original_author_aggregates/1556496000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.OriginalAuthor",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "twitter_wide_user_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_aggregates/1556496000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUser",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "twitter_wide_user_author_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/twitter_wide_user_author_aggregates/1556323200000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.TwitterWideUserAuthor",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_aggregates/1556150400000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_author_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_author_aggregates/1556064000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserAuthor",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "aggregates_canary",
fallback_path = "gs://user.timelines.dp.gcp.twttr.net//canaries/processed/aggregates_v2/user_aggregates/1622851200000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.User",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_engager_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_engager_aggregates/1556496000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserEngager",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_original_author_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_original_author_aggregates/1556496000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserOriginalAuthor",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "author_topic_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/author_topic_aggregates/1589932800000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.AuthorTopic",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_topic_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_topic_aggregates/1590278400000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserTopic",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_inferred_topic_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_inferred_topic_aggregates/1599696000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserInferredTopic",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_mention_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_mention_aggregates/1556582400000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMention",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_request_dow_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_dow_aggregates/1556236800000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestDow",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_request_hour_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_request_hour_aggregates/1556150400000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserRequestHour",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_list_aggregates",
fallback_path = "viewfs://hadoop-proc2-nn.atla.twitter.com/user/timelines/processed/aggregates_v2/user_list_aggregates/1590624000000",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserList",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
create_datasets(
base_name = "user_media_understanding_annotation_aggregates",
key_type = "com.twitter.timelines.data_processing.ml_util.aggregation_framework.AggregationKey",
platform = "java8",
role = "timelines",
scala_schema = "com.twitter.timelines.prediction.common.aggregates.TimelinesAggregationKeyValInjections.UserMediaUnderstandingAnnotation",
segment_type = "snapshot",
tags = ["bazel-compatible"],
val_type = "(com.twitter.summingbird.batch.BatchID, com.twitter.ml.api.DataRecord)",
scala_dependencies = [
":injections",
"timelines/data_processing/ml_util/aggregation_framework:common_types",
],
)
scala_library(
sources = [
"BCELabelTransformFromUUADataRecord.scala",
"FeatureSelectorConfig.scala",
"RecapUserFeatureAggregation.scala",
"RectweetUserFeatureAggregation.scala",
"TimelinesAggregationConfig.scala",
"TimelinesAggregationConfigDetails.scala",
"TimelinesAggregationConfigTrait.scala",
"TimelinesAggregationSources.scala",
],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
":aggregates_canary-scala",
":author_topic_aggregates-scala",
":original_author_aggregates-scala",
":twitter_wide_user_aggregates-scala",
":twitter_wide_user_author_aggregates-scala",
":user_aggregates-scala",
":user_author_aggregates-scala",
":user_engager_aggregates-scala",
":user_inferred_topic_aggregates-scala",
":user_list_aggregates-scala",
":user_media_understanding_annotation_aggregates-scala",
":user_mention_aggregates-scala",
":user_original_author_aggregates-scala",
":user_request_dow_aggregates-scala",
":user_request_hour_aggregates-scala",
":user_topic_aggregates-scala",
"src/java/com/twitter/ml/api:api-base",
"src/java/com/twitter/ml/api/constant",
"src/java/com/twitter/ml/api/matcher",
"src/scala/com/twitter/common/text/util",
"src/scala/com/twitter/dal/client/dataset",
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
"src/scala/com/twitter/scalding_internal/multiformat/format",
"src/scala/com/twitter/timelines/prediction/common/adapters:engagement-converter",
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
"src/scala/com/twitter/timelines/prediction/features/common",
"src/scala/com/twitter/timelines/prediction/features/engagement_features",
"src/scala/com/twitter/timelines/prediction/features/escherbird",
"src/scala/com/twitter/timelines/prediction/features/itl",
"src/scala/com/twitter/timelines/prediction/features/list_features",
"src/scala/com/twitter/timelines/prediction/features/p_home_latest",
"src/scala/com/twitter/timelines/prediction/features/real_graph",
"src/scala/com/twitter/timelines/prediction/features/recap",
"src/scala/com/twitter/timelines/prediction/features/request_context",
"src/scala/com/twitter/timelines/prediction/features/simcluster",
"src/scala/com/twitter/timelines/prediction/features/time_features",
"src/scala/com/twitter/timelines/prediction/transform/filter",
"src/thrift/com/twitter/timelines/suggests/common:engagement-scala",
"timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
"util/util-core:scala",
],
)
scala_library(
name = "injections",
sources = [
"FeatureSelectorConfig.scala",
"RecapUserFeatureAggregation.scala",
"RectweetUserFeatureAggregation.scala",
"TimelinesAggregationConfigDetails.scala",
"TimelinesAggregationConfigTrait.scala",
"TimelinesAggregationKeyValInjections.scala",
"TimelinesAggregationSources.scala",
],
platform = "java8",
tags = ["bazel-compatible"],
dependencies = [
"src/java/com/twitter/ml/api:api-base",
"src/java/com/twitter/ml/api/constant",
"src/java/com/twitter/ml/api/matcher",
"src/scala/com/twitter/common/text/util",
"src/scala/com/twitter/dal/client/dataset",
"src/scala/com/twitter/frigate/data_pipeline/features_aggregated/core",
"src/scala/com/twitter/scalding_internal/multiformat/format",
"src/scala/com/twitter/timelines/prediction/features/client_log_event",
"src/scala/com/twitter/timelines/prediction/features/common",
"src/scala/com/twitter/timelines/prediction/features/engagement_features",
"src/scala/com/twitter/timelines/prediction/features/escherbird",
"src/scala/com/twitter/timelines/prediction/features/itl",
"src/scala/com/twitter/timelines/prediction/features/list_features",
"src/scala/com/twitter/timelines/prediction/features/p_home_latest",
"src/scala/com/twitter/timelines/prediction/features/real_graph",
"src/scala/com/twitter/timelines/prediction/features/recap",
"src/scala/com/twitter/timelines/prediction/features/request_context",
"src/scala/com/twitter/timelines/prediction/features/semantic_core_features",
"src/scala/com/twitter/timelines/prediction/features/simcluster",
"src/scala/com/twitter/timelines/prediction/features/time_features",
"src/scala/com/twitter/timelines/prediction/transform/filter",
"timelines/data_processing/ad_hoc/recap/data_record_preparation:recap_data_records_agg_minimal-java",
"util/util-core:scala",
],
)